clang 19.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGObjCRuntime.h"
17#include "CGOpenCLRuntime.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "CodeGenModule.h"
21#include "ConstantEmitter.h"
22#include "PatternInit.h"
23#include "TargetInfo.h"
25#include "clang/AST/Attr.h"
26#include "clang/AST/Decl.h"
27#include "clang/AST/OSLog.h"
34#include "llvm/ADT/APFloat.h"
35#include "llvm/ADT/APInt.h"
36#include "llvm/ADT/FloatingPointMode.h"
37#include "llvm/ADT/SmallPtrSet.h"
38#include "llvm/ADT/StringExtras.h"
39#include "llvm/Analysis/ValueTracking.h"
40#include "llvm/IR/DataLayout.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/IntrinsicsAArch64.h"
44#include "llvm/IR/IntrinsicsAMDGPU.h"
45#include "llvm/IR/IntrinsicsARM.h"
46#include "llvm/IR/IntrinsicsBPF.h"
47#include "llvm/IR/IntrinsicsDirectX.h"
48#include "llvm/IR/IntrinsicsHexagon.h"
49#include "llvm/IR/IntrinsicsNVPTX.h"
50#include "llvm/IR/IntrinsicsPowerPC.h"
51#include "llvm/IR/IntrinsicsR600.h"
52#include "llvm/IR/IntrinsicsRISCV.h"
53#include "llvm/IR/IntrinsicsS390.h"
54#include "llvm/IR/IntrinsicsVE.h"
55#include "llvm/IR/IntrinsicsWebAssembly.h"
56#include "llvm/IR/IntrinsicsX86.h"
57#include "llvm/IR/MDBuilder.h"
58#include "llvm/IR/MatrixBuilder.h"
59#include "llvm/Support/ConvertUTF.h"
60#include "llvm/Support/MathExtras.h"
61#include "llvm/Support/ScopedPrinter.h"
62#include "llvm/TargetParser/AArch64TargetParser.h"
63#include "llvm/TargetParser/X86TargetParser.h"
64#include <optional>
65#include <sstream>
66
67using namespace clang;
68using namespace CodeGen;
69using namespace llvm;
70
71static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
72 Align AlignmentInBytes) {
73 ConstantInt *Byte;
74 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
75 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
76 // Nothing to initialize.
77 return;
78 case LangOptions::TrivialAutoVarInitKind::Zero:
79 Byte = CGF.Builder.getInt8(0x00);
80 break;
81 case LangOptions::TrivialAutoVarInitKind::Pattern: {
82 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
83 Byte = llvm::dyn_cast<llvm::ConstantInt>(
84 initializationPatternFor(CGF.CGM, Int8));
85 break;
86 }
87 }
88 if (CGF.CGM.stopAutoInit())
89 return;
90 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
91 I->addAnnotationMetadata("auto-init");
92}
93
94/// getBuiltinLibFunction - Given a builtin id for a function like
95/// "__builtin_fabsf", return a Function* for "fabsf".
97 unsigned BuiltinID) {
98 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
99
100 // Get the name, skip over the __builtin_ prefix (if necessary).
101 StringRef Name;
102 GlobalDecl D(FD);
103
104 // TODO: This list should be expanded or refactored after all GCC-compatible
105 // std libcall builtins are implemented.
106 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
107 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
108 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
109 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
110 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
111 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
112 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
113 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
114 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
115 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
116 {Builtin::BI__builtin_printf, "__printfieee128"},
117 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
118 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
119 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
120 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
121 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
122 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
123 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
124 {Builtin::BI__builtin_scanf, "__scanfieee128"},
125 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
126 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
127 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
128 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
129 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
130 };
131
132 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
133 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
134 // if it is 64-bit 'long double' mode.
135 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
136 {Builtin::BI__builtin_frexpl, "frexp"},
137 {Builtin::BI__builtin_ldexpl, "ldexp"},
138 {Builtin::BI__builtin_modfl, "modf"},
139 };
140
141 // If the builtin has been declared explicitly with an assembler label,
142 // use the mangled name. This differs from the plain label on platforms
143 // that prefix labels.
144 if (FD->hasAttr<AsmLabelAttr>())
145 Name = getMangledName(D);
146 else {
147 // TODO: This mutation should also be applied to other targets other than
148 // PPC, after backend supports IEEE 128-bit style libcalls.
149 if (getTriple().isPPC64() &&
150 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
151 F128Builtins.contains(BuiltinID))
152 Name = F128Builtins[BuiltinID];
153 else if (getTriple().isOSAIX() &&
154 &getTarget().getLongDoubleFormat() ==
155 &llvm::APFloat::IEEEdouble() &&
156 AIXLongDouble64Builtins.contains(BuiltinID))
157 Name = AIXLongDouble64Builtins[BuiltinID];
158 else
159 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
160 }
161
162 llvm::FunctionType *Ty =
163 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
164
165 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
166}
167
168/// Emit the conversions required to turn the given value into an
169/// integer of the given size.
170static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
171 QualType T, llvm::IntegerType *IntType) {
172 V = CGF.EmitToMemory(V, T);
173
174 if (V->getType()->isPointerTy())
175 return CGF.Builder.CreatePtrToInt(V, IntType);
176
177 assert(V->getType() == IntType);
178 return V;
179}
180
181static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
182 QualType T, llvm::Type *ResultType) {
183 V = CGF.EmitFromMemory(V, T);
184
185 if (ResultType->isPointerTy())
186 return CGF.Builder.CreateIntToPtr(V, ResultType);
187
188 assert(V->getType() == ResultType);
189 return V;
190}
191
193 ASTContext &Ctx = CGF.getContext();
194 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
195 unsigned Bytes = Ptr.getElementType()->isPointerTy()
197 : Ptr.getElementType()->getScalarSizeInBits() / 8;
198 unsigned Align = Ptr.getAlignment().getQuantity();
199 if (Align % Bytes != 0) {
200 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
201 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
202 // Force address to be at least naturally-aligned.
203 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
204 }
205 return Ptr;
206}
207
208/// Utility to insert an atomic instruction based on Intrinsic::ID
209/// and the expression node.
211 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
212 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
213
214 QualType T = E->getType();
215 assert(E->getArg(0)->getType()->isPointerType());
216 assert(CGF.getContext().hasSameUnqualifiedType(T,
217 E->getArg(0)->getType()->getPointeeType()));
218 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
219
220 Address DestAddr = CheckAtomicAlignment(CGF, E);
221
222 llvm::IntegerType *IntType = llvm::IntegerType::get(
223 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
224
225 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
226 llvm::Type *ValueType = Val->getType();
227 Val = EmitToInt(CGF, Val, T, IntType);
228
229 llvm::Value *Result =
230 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
231 return EmitFromInt(CGF, Result, T, ValueType);
232}
233
235 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
236 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
237
238 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
239 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
240 LV.setNontemporal(true);
241 CGF.EmitStoreOfScalar(Val, LV, false);
242 return nullptr;
243}
244
246 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
247
248 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
249 LV.setNontemporal(true);
250 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
251}
252
254 llvm::AtomicRMWInst::BinOp Kind,
255 const CallExpr *E) {
256 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
257}
258
259/// Utility to insert an atomic instruction based Intrinsic::ID and
260/// the expression node, where the return value is the result of the
261/// operation.
263 llvm::AtomicRMWInst::BinOp Kind,
264 const CallExpr *E,
265 Instruction::BinaryOps Op,
266 bool Invert = false) {
267 QualType T = E->getType();
268 assert(E->getArg(0)->getType()->isPointerType());
269 assert(CGF.getContext().hasSameUnqualifiedType(T,
270 E->getArg(0)->getType()->getPointeeType()));
271 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
272
273 Address DestAddr = CheckAtomicAlignment(CGF, E);
274
275 llvm::IntegerType *IntType = llvm::IntegerType::get(
276 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
277
278 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
279 llvm::Type *ValueType = Val->getType();
280 Val = EmitToInt(CGF, Val, T, IntType);
281
282 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
283 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
284 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
285 if (Invert)
286 Result =
287 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
288 llvm::ConstantInt::getAllOnesValue(IntType));
289 Result = EmitFromInt(CGF, Result, T, ValueType);
290 return RValue::get(Result);
291}
292
293/// Utility to insert an atomic cmpxchg instruction.
294///
295/// @param CGF The current codegen function.
296/// @param E Builtin call expression to convert to cmpxchg.
297/// arg0 - address to operate on
298/// arg1 - value to compare with
299/// arg2 - new value
300/// @param ReturnBool Specifies whether to return success flag of
301/// cmpxchg result or the old value.
302///
303/// @returns result of cmpxchg, according to ReturnBool
304///
305/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
306/// invoke the function EmitAtomicCmpXchgForMSIntrin.
308 bool ReturnBool) {
309 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
310 Address DestAddr = CheckAtomicAlignment(CGF, E);
311
312 llvm::IntegerType *IntType = llvm::IntegerType::get(
313 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
314
315 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
316 llvm::Type *ValueType = Cmp->getType();
317 Cmp = EmitToInt(CGF, Cmp, T, IntType);
318 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
319
321 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
322 llvm::AtomicOrdering::SequentiallyConsistent);
323 if (ReturnBool)
324 // Extract boolean success flag and zext it to int.
325 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
326 CGF.ConvertType(E->getType()));
327 else
328 // Extract old value and emit it using the same type as compare value.
329 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
330 ValueType);
331}
332
333/// This function should be invoked to emit atomic cmpxchg for Microsoft's
334/// _InterlockedCompareExchange* intrinsics which have the following signature:
335/// T _InterlockedCompareExchange(T volatile *Destination,
336/// T Exchange,
337/// T Comparand);
338///
339/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
340/// cmpxchg *Destination, Comparand, Exchange.
341/// So we need to swap Comparand and Exchange when invoking
342/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
343/// function MakeAtomicCmpXchgValue since it expects the arguments to be
344/// already swapped.
345
346static
348 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
349 assert(E->getArg(0)->getType()->isPointerType());
351 E->getType(), E->getArg(0)->getType()->getPointeeType()));
352 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
353 E->getArg(1)->getType()));
354 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
355 E->getArg(2)->getType()));
356
357 Address DestAddr = CheckAtomicAlignment(CGF, E);
358
359 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
360 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
361
362 // For Release ordering, the failure ordering should be Monotonic.
363 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
364 AtomicOrdering::Monotonic :
365 SuccessOrdering;
366
367 // The atomic instruction is marked volatile for consistency with MSVC. This
368 // blocks the few atomics optimizations that LLVM has. If we want to optimize
369 // _Interlocked* operations in the future, we will have to remove the volatile
370 // marker.
372 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
373 Result->setVolatile(true);
374 return CGF.Builder.CreateExtractValue(Result, 0);
375}
376
377// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
378// prototyped like this:
379//
380// unsigned char _InterlockedCompareExchange128...(
381// __int64 volatile * _Destination,
382// __int64 _ExchangeHigh,
383// __int64 _ExchangeLow,
384// __int64 * _ComparandResult);
385//
386// Note that Destination is assumed to be at least 16-byte aligned, despite
387// being typed int64.
388
390 const CallExpr *E,
391 AtomicOrdering SuccessOrdering) {
392 assert(E->getNumArgs() == 4);
393 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
394 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
395 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
396 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
397
398 assert(DestPtr->getType()->isPointerTy());
399 assert(!ExchangeHigh->getType()->isPointerTy());
400 assert(!ExchangeLow->getType()->isPointerTy());
401
402 // For Release ordering, the failure ordering should be Monotonic.
403 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
404 ? AtomicOrdering::Monotonic
405 : SuccessOrdering;
406
407 // Convert to i128 pointers and values. Alignment is also overridden for
408 // destination pointer.
409 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
410 Address DestAddr(DestPtr, Int128Ty,
412 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
413
414 // (((i128)hi) << 64) | ((i128)lo)
415 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
416 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
417 ExchangeHigh =
418 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
419 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
420
421 // Load the comparand for the instruction.
422 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
423
424 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
425 SuccessOrdering, FailureOrdering);
426
427 // The atomic instruction is marked volatile for consistency with MSVC. This
428 // blocks the few atomics optimizations that LLVM has. If we want to optimize
429 // _Interlocked* operations in the future, we will have to remove the volatile
430 // marker.
431 CXI->setVolatile(true);
432
433 // Store the result as an outparameter.
434 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
435 ComparandAddr);
436
437 // Get the success boolean and zero extend it to i8.
438 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
439 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
440}
441
443 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
444 assert(E->getArg(0)->getType()->isPointerType());
445
446 auto *IntTy = CGF.ConvertType(E->getType());
447 Address DestAddr = CheckAtomicAlignment(CGF, E);
448 auto *Result = CGF.Builder.CreateAtomicRMW(
449 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
450 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
451}
452
454 CodeGenFunction &CGF, const CallExpr *E,
455 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
456 assert(E->getArg(0)->getType()->isPointerType());
457
458 auto *IntTy = CGF.ConvertType(E->getType());
459 Address DestAddr = CheckAtomicAlignment(CGF, E);
460 auto *Result = CGF.Builder.CreateAtomicRMW(
461 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
462 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
463}
464
465// Build a plain volatile load.
467 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
468 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
469 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
470 llvm::Type *ITy =
471 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
472 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
473 Load->setVolatile(true);
474 return Load;
475}
476
477// Build a plain volatile store.
479 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
480 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
481 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
482 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
483 llvm::StoreInst *Store =
484 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
485 Store->setVolatile(true);
486 return Store;
487}
488
489// Emit a simple mangled intrinsic that has 1 argument and a return type
490// matching the argument type. Depending on mode, this may be a constrained
491// floating-point intrinsic.
493 const CallExpr *E, unsigned IntrinsicID,
494 unsigned ConstrainedIntrinsicID) {
495 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
496
497 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
498 if (CGF.Builder.getIsFPConstrained()) {
499 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
500 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
501 } else {
502 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
503 return CGF.Builder.CreateCall(F, Src0);
504 }
505}
506
507// Emit an intrinsic that has 2 operands of the same type as its result.
508// Depending on mode, this may be a constrained floating-point intrinsic.
510 const CallExpr *E, unsigned IntrinsicID,
511 unsigned ConstrainedIntrinsicID) {
512 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
513 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
514
515 if (CGF.Builder.getIsFPConstrained()) {
516 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
517 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
518 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
519 } else {
520 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
521 return CGF.Builder.CreateCall(F, { Src0, Src1 });
522 }
523}
524
525// Has second type mangled argument.
527 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
528 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
529 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
530 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
531
532 if (CGF.Builder.getIsFPConstrained()) {
533 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
534 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
535 {Src0->getType(), Src1->getType()});
536 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
537 }
538
539 Function *F =
540 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
541 return CGF.Builder.CreateCall(F, {Src0, Src1});
542}
543
544// Emit an intrinsic that has 3 operands of the same type as its result.
545// Depending on mode, this may be a constrained floating-point intrinsic.
547 const CallExpr *E, unsigned IntrinsicID,
548 unsigned ConstrainedIntrinsicID) {
549 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
550 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
551 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
552
553 if (CGF.Builder.getIsFPConstrained()) {
554 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
555 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
556 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
557 } else {
558 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
559 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
560 }
561}
562
563// Emit an intrinsic where all operands are of the same type as the result.
564// Depending on mode, this may be a constrained floating-point intrinsic.
566 unsigned IntrinsicID,
567 unsigned ConstrainedIntrinsicID,
568 llvm::Type *Ty,
569 ArrayRef<Value *> Args) {
570 Function *F;
571 if (CGF.Builder.getIsFPConstrained())
572 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
573 else
574 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
575
576 if (CGF.Builder.getIsFPConstrained())
577 return CGF.Builder.CreateConstrainedFPCall(F, Args);
578 else
579 return CGF.Builder.CreateCall(F, Args);
580}
581
582// Emit a simple mangled intrinsic that has 1 argument and a return type
583// matching the argument type.
585 unsigned IntrinsicID,
586 llvm::StringRef Name = "") {
587 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
588
589 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
590 return CGF.Builder.CreateCall(F, Src0, Name);
591}
592
593// Emit an intrinsic that has 2 operands of the same type as its result.
595 const CallExpr *E,
596 unsigned IntrinsicID) {
597 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
598 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
599
600 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
601 return CGF.Builder.CreateCall(F, { Src0, Src1 });
602}
603
604// Emit an intrinsic that has 3 operands of the same type as its result.
606 const CallExpr *E,
607 unsigned IntrinsicID) {
608 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
609 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
610 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
611
612 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
613 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
614}
615
616// Emit an intrinsic that has 1 float or double operand, and 1 integer.
618 const CallExpr *E,
619 unsigned IntrinsicID) {
620 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
621 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
622
623 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
624 return CGF.Builder.CreateCall(F, {Src0, Src1});
625}
626
627// Emit an intrinsic that has overloaded integer result and fp operand.
628static Value *
630 unsigned IntrinsicID,
631 unsigned ConstrainedIntrinsicID) {
632 llvm::Type *ResultType = CGF.ConvertType(E->getType());
633 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
634
635 if (CGF.Builder.getIsFPConstrained()) {
636 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
637 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
638 {ResultType, Src0->getType()});
639 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
640 } else {
641 Function *F =
642 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
643 return CGF.Builder.CreateCall(F, Src0);
644 }
645}
646
648 llvm::Intrinsic::ID IntrinsicID) {
649 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
650 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
651
652 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
653 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
654 llvm::Function *F =
655 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
656 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
657
658 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
659 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
660 CGF.EmitStoreOfScalar(Exp, LV);
661
662 return CGF.Builder.CreateExtractValue(Call, 0);
663}
664
665/// EmitFAbs - Emit a call to @llvm.fabs().
667 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
668 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
669 Call->setDoesNotAccessMemory();
670 return Call;
671}
672
673/// Emit the computation of the sign bit for a floating point value. Returns
674/// the i1 sign bit value.
676 LLVMContext &C = CGF.CGM.getLLVMContext();
677
678 llvm::Type *Ty = V->getType();
679 int Width = Ty->getPrimitiveSizeInBits();
680 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
681 V = CGF.Builder.CreateBitCast(V, IntTy);
682 if (Ty->isPPC_FP128Ty()) {
683 // We want the sign bit of the higher-order double. The bitcast we just
684 // did works as if the double-double was stored to memory and then
685 // read as an i128. The "store" will put the higher-order double in the
686 // lower address in both little- and big-Endian modes, but the "load"
687 // will treat those bits as a different part of the i128: the low bits in
688 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
689 // we need to shift the high bits down to the low before truncating.
690 Width >>= 1;
691 if (CGF.getTarget().isBigEndian()) {
692 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
693 V = CGF.Builder.CreateLShr(V, ShiftCst);
694 }
695 // We are truncating value in order to extract the higher-order
696 // double, which we will be using to extract the sign from.
697 IntTy = llvm::IntegerType::get(C, Width);
698 V = CGF.Builder.CreateTrunc(V, IntTy);
699 }
700 Value *Zero = llvm::Constant::getNullValue(IntTy);
701 return CGF.Builder.CreateICmpSLT(V, Zero);
702}
703
705 const CallExpr *E, llvm::Constant *calleeValue) {
706 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
707 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
708}
709
710/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
711/// depending on IntrinsicID.
712///
713/// \arg CGF The current codegen function.
714/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
715/// \arg X The first argument to the llvm.*.with.overflow.*.
716/// \arg Y The second argument to the llvm.*.with.overflow.*.
717/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
718/// \returns The result (i.e. sum/product) returned by the intrinsic.
719static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
720 const llvm::Intrinsic::ID IntrinsicID,
721 llvm::Value *X, llvm::Value *Y,
722 llvm::Value *&Carry) {
723 // Make sure we have integers of the same width.
724 assert(X->getType() == Y->getType() &&
725 "Arguments must be the same type. (Did you forget to make sure both "
726 "arguments have the same integer width?)");
727
728 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
729 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
730 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
731 return CGF.Builder.CreateExtractValue(Tmp, 0);
732}
733
735 unsigned IntrinsicID,
736 int low, int high) {
737 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
738 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
739 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
740 llvm::Instruction *Call = CGF.Builder.CreateCall(F);
741 Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
742 Call->setMetadata(llvm::LLVMContext::MD_noundef,
743 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
744 return Call;
745}
746
747namespace {
748 struct WidthAndSignedness {
749 unsigned Width;
750 bool Signed;
751 };
752}
753
754static WidthAndSignedness
756 const clang::QualType Type) {
757 assert(Type->isIntegerType() && "Given type is not an integer.");
758 unsigned Width = Type->isBooleanType() ? 1
759 : Type->isBitIntType() ? context.getIntWidth(Type)
760 : context.getTypeInfo(Type).Width;
762 return {Width, Signed};
763}
764
765// Given one or more integer types, this function produces an integer type that
766// encompasses them: any value in one of the given types could be expressed in
767// the encompassing type.
768static struct WidthAndSignedness
769EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
770 assert(Types.size() > 0 && "Empty list of types.");
771
772 // If any of the given types is signed, we must return a signed type.
773 bool Signed = false;
774 for (const auto &Type : Types) {
775 Signed |= Type.Signed;
776 }
777
778 // The encompassing type must have a width greater than or equal to the width
779 // of the specified types. Additionally, if the encompassing type is signed,
780 // its width must be strictly greater than the width of any unsigned types
781 // given.
782 unsigned Width = 0;
783 for (const auto &Type : Types) {
784 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
785 if (Width < MinWidth) {
786 Width = MinWidth;
787 }
788 }
789
790 return {Width, Signed};
791}
792
793Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
794 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
795 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
796}
797
798/// Checks if using the result of __builtin_object_size(p, @p From) in place of
799/// __builtin_object_size(p, @p To) is correct
800static bool areBOSTypesCompatible(int From, int To) {
801 // Note: Our __builtin_object_size implementation currently treats Type=0 and
802 // Type=2 identically. Encoding this implementation detail here may make
803 // improving __builtin_object_size difficult in the future, so it's omitted.
804 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
805}
806
807static llvm::Value *
808getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
809 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
810}
811
812llvm::Value *
813CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
814 llvm::IntegerType *ResType,
815 llvm::Value *EmittedE,
816 bool IsDynamic) {
817 uint64_t ObjectSize;
818 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
819 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
820 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
821}
822
824 ASTContext &Ctx, const RecordDecl *RD, StringRef Name, uint64_t &Offset) {
825 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
826 getLangOpts().getStrictFlexArraysLevel();
827 unsigned FieldNo = 0;
828 bool IsUnion = RD->isUnion();
829
830 for (const Decl *D : RD->decls()) {
831 if (const auto *Field = dyn_cast<FieldDecl>(D);
832 Field && (Name.empty() || Field->getNameAsString() == Name) &&
834 Ctx, Field, Field->getType(), StrictFlexArraysLevel,
835 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
836 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
837 Offset += Layout.getFieldOffset(FieldNo);
838 return Field;
839 }
840
841 if (const auto *Record = dyn_cast<RecordDecl>(D))
842 if (const FieldDecl *Field =
843 FindFlexibleArrayMemberField(Ctx, Record, Name, Offset)) {
844 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
845 Offset += Layout.getFieldOffset(FieldNo);
846 return Field;
847 }
848
849 if (!IsUnion && isa<FieldDecl>(D))
850 ++FieldNo;
851 }
852
853 return nullptr;
854}
855
856static unsigned CountCountedByAttrs(const RecordDecl *RD) {
857 unsigned Num = 0;
858
859 for (const Decl *D : RD->decls()) {
860 if (const auto *FD = dyn_cast<FieldDecl>(D);
861 FD && FD->hasAttr<CountedByAttr>()) {
862 return ++Num;
863 }
864
865 if (const auto *Rec = dyn_cast<RecordDecl>(D))
866 Num += CountCountedByAttrs(Rec);
867 }
868
869 return Num;
870}
871
872llvm::Value *
873CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
874 llvm::IntegerType *ResType) {
875 // The code generated here calculates the size of a struct with a flexible
876 // array member that uses the counted_by attribute. There are two instances
877 // we handle:
878 //
879 // struct s {
880 // unsigned long flags;
881 // int count;
882 // int array[] __attribute__((counted_by(count)));
883 // }
884 //
885 // 1) bdos of the flexible array itself:
886 //
887 // __builtin_dynamic_object_size(p->array, 1) ==
888 // p->count * sizeof(*p->array)
889 //
890 // 2) bdos of a pointer into the flexible array:
891 //
892 // __builtin_dynamic_object_size(&p->array[42], 1) ==
893 // (p->count - 42) * sizeof(*p->array)
894 //
895 // 2) bdos of the whole struct, including the flexible array:
896 //
897 // __builtin_dynamic_object_size(p, 1) ==
898 // max(sizeof(struct s),
899 // offsetof(struct s, array) + p->count * sizeof(*p->array))
900 //
901 ASTContext &Ctx = getContext();
902 const Expr *Base = E->IgnoreParenImpCasts();
903 const Expr *Idx = nullptr;
904
905 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
906 UO && UO->getOpcode() == UO_AddrOf) {
907 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
908 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
909 Base = ASE->getBase()->IgnoreParenImpCasts();
910 Idx = ASE->getIdx()->IgnoreParenImpCasts();
911
912 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
913 int64_t Val = IL->getValue().getSExtValue();
914 if (Val < 0)
916
917 if (Val == 0)
918 // The index is 0, so we don't need to take it into account.
919 Idx = nullptr;
920 }
921 } else {
922 // Potential pointer to another element in the struct.
923 Base = SubExpr;
924 }
925 }
926
927 // Get the flexible array member Decl.
928 const RecordDecl *OuterRD = nullptr;
929 std::string FAMName;
930 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
931 // Check if \p Base is referencing the FAM itself.
932 const ValueDecl *VD = ME->getMemberDecl();
934 FAMName = VD->getNameAsString();
935 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
936 // Check if we're pointing to the whole struct.
937 QualType Ty = DRE->getDecl()->getType();
938 if (Ty->isPointerType())
939 Ty = Ty->getPointeeType();
940 OuterRD = Ty->getAsRecordDecl();
941
942 // If we have a situation like this:
943 //
944 // struct union_of_fams {
945 // int flags;
946 // union {
947 // signed char normal_field;
948 // struct {
949 // int count1;
950 // int arr1[] __counted_by(count1);
951 // };
952 // struct {
953 // signed char count2;
954 // int arr2[] __counted_by(count2);
955 // };
956 // };
957 // };
958 //
959 // We don't konw which 'count' to use in this scenario:
960 //
961 // size_t get_size(struct union_of_fams *p) {
962 // return __builtin_dynamic_object_size(p, 1);
963 // }
964 //
965 // Instead of calculating a wrong number, we give up.
966 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
967 return nullptr;
968 }
969
970 if (!OuterRD)
971 return nullptr;
972
973 uint64_t Offset = 0;
974 const FieldDecl *FAMDecl =
975 FindFlexibleArrayMemberField(Ctx, OuterRD, FAMName, Offset);
976 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
977
978 if (!FAMDecl || !FAMDecl->hasAttr<CountedByAttr>())
979 // No flexible array member found or it doesn't have the "counted_by"
980 // attribute.
981 return nullptr;
982
983 const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);
984 if (!CountedByFD)
985 // Can't find the field referenced by the "counted_by" attribute.
986 return nullptr;
987
988 // Build a load of the counted_by field.
989 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
990 Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD);
991 if (!CountedByInst)
993
994 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
995
996 // Build a load of the index and subtract it from the count.
997 Value *IdxInst = nullptr;
998 if (Idx) {
999 if (Idx->HasSideEffects(getContext()))
1000 // We can't have side-effects.
1001 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1002
1003 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1004 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1005 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1006
1007 // We go ahead with the calculation here. If the index turns out to be
1008 // negative, we'll catch it at the end.
1009 CountedByInst =
1010 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1011 }
1012
1013 // Calculate how large the flexible array member is in bytes.
1014 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1016 llvm::Constant *ElemSize =
1017 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1018 Value *FAMSize =
1019 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1020 FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned);
1021 Value *Res = FAMSize;
1022
1023 if (isa<DeclRefExpr>(Base)) {
1024 // The whole struct is specificed in the __bdos.
1025 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
1026
1027 // Get the offset of the FAM.
1028 llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
1029 Value *OffsetAndFAMSize =
1030 Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
1031
1032 // Get the full size of the struct.
1033 llvm::Constant *SizeofStruct =
1034 ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
1035
1036 // max(sizeof(struct s),
1037 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1038 Res = IsSigned
1039 ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
1040 OffsetAndFAMSize, SizeofStruct)
1041 : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
1042 OffsetAndFAMSize, SizeofStruct);
1043 }
1044
1045 // A negative \p IdxInst or \p CountedByInst means that the index lands
1046 // outside of the flexible array member. If that's the case, we want to
1047 // return 0.
1048 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1049 if (IdxInst)
1050 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1051
1052 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1053}
1054
1055/// Returns a Value corresponding to the size of the given expression.
1056/// This Value may be either of the following:
1057/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1058/// it)
1059/// - A call to the @llvm.objectsize intrinsic
1060///
1061/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1062/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1063/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1064llvm::Value *
1065CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1066 llvm::IntegerType *ResType,
1067 llvm::Value *EmittedE, bool IsDynamic) {
1068 // We need to reference an argument if the pointer is a parameter with the
1069 // pass_object_size attribute.
1070 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1071 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1072 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1073 if (Param != nullptr && PS != nullptr &&
1074 areBOSTypesCompatible(PS->getType(), Type)) {
1075 auto Iter = SizeArguments.find(Param);
1076 assert(Iter != SizeArguments.end());
1077
1078 const ImplicitParamDecl *D = Iter->second;
1079 auto DIter = LocalDeclMap.find(D);
1080 assert(DIter != LocalDeclMap.end());
1081
1082 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1083 getContext().getSizeType(), E->getBeginLoc());
1084 }
1085 }
1086
1087 if (IsDynamic) {
1088 // Emit special code for a flexible array member with the "counted_by"
1089 // attribute.
1090 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1091 return V;
1092 }
1093
1094 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1095 // evaluate E for side-effects. In either case, we shouldn't lower to
1096 // @llvm.objectsize.
1097 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1098 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1099
1100 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1101 assert(Ptr->getType()->isPointerTy() &&
1102 "Non-pointer passed to __builtin_object_size?");
1103
1104 Function *F =
1105 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1106
1107 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1108 Value *Min = Builder.getInt1((Type & 2) != 0);
1109 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1110 Value *NullIsUnknown = Builder.getTrue();
1111 Value *Dynamic = Builder.getInt1(IsDynamic);
1112 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1113}
1114
1115namespace {
1116/// A struct to generically describe a bit test intrinsic.
1117struct BitTest {
1118 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1119 enum InterlockingKind : uint8_t {
1120 Unlocked,
1121 Sequential,
1122 Acquire,
1123 Release,
1124 NoFence
1125 };
1126
1127 ActionKind Action;
1128 InterlockingKind Interlocking;
1129 bool Is64Bit;
1130
1131 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1132};
1133} // namespace
1134
1135BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1136 switch (BuiltinID) {
1137 // Main portable variants.
1138 case Builtin::BI_bittest:
1139 return {TestOnly, Unlocked, false};
1140 case Builtin::BI_bittestandcomplement:
1141 return {Complement, Unlocked, false};
1142 case Builtin::BI_bittestandreset:
1143 return {Reset, Unlocked, false};
1144 case Builtin::BI_bittestandset:
1145 return {Set, Unlocked, false};
1146 case Builtin::BI_interlockedbittestandreset:
1147 return {Reset, Sequential, false};
1148 case Builtin::BI_interlockedbittestandset:
1149 return {Set, Sequential, false};
1150
1151 // X86-specific 64-bit variants.
1152 case Builtin::BI_bittest64:
1153 return {TestOnly, Unlocked, true};
1154 case Builtin::BI_bittestandcomplement64:
1155 return {Complement, Unlocked, true};
1156 case Builtin::BI_bittestandreset64:
1157 return {Reset, Unlocked, true};
1158 case Builtin::BI_bittestandset64:
1159 return {Set, Unlocked, true};
1160 case Builtin::BI_interlockedbittestandreset64:
1161 return {Reset, Sequential, true};
1162 case Builtin::BI_interlockedbittestandset64:
1163 return {Set, Sequential, true};
1164
1165 // ARM/AArch64-specific ordering variants.
1166 case Builtin::BI_interlockedbittestandset_acq:
1167 return {Set, Acquire, false};
1168 case Builtin::BI_interlockedbittestandset_rel:
1169 return {Set, Release, false};
1170 case Builtin::BI_interlockedbittestandset_nf:
1171 return {Set, NoFence, false};
1172 case Builtin::BI_interlockedbittestandreset_acq:
1173 return {Reset, Acquire, false};
1174 case Builtin::BI_interlockedbittestandreset_rel:
1175 return {Reset, Release, false};
1176 case Builtin::BI_interlockedbittestandreset_nf:
1177 return {Reset, NoFence, false};
1178 }
1179 llvm_unreachable("expected only bittest intrinsics");
1180}
1181
1182static char bitActionToX86BTCode(BitTest::ActionKind A) {
1183 switch (A) {
1184 case BitTest::TestOnly: return '\0';
1185 case BitTest::Complement: return 'c';
1186 case BitTest::Reset: return 'r';
1187 case BitTest::Set: return 's';
1188 }
1189 llvm_unreachable("invalid action");
1190}
1191
1193 BitTest BT,
1194 const CallExpr *E, Value *BitBase,
1195 Value *BitPos) {
1196 char Action = bitActionToX86BTCode(BT.Action);
1197 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1198
1199 // Build the assembly.
1201 raw_svector_ostream AsmOS(Asm);
1202 if (BT.Interlocking != BitTest::Unlocked)
1203 AsmOS << "lock ";
1204 AsmOS << "bt";
1205 if (Action)
1206 AsmOS << Action;
1207 AsmOS << SizeSuffix << " $2, ($1)";
1208
1209 // Build the constraints. FIXME: We should support immediates when possible.
1210 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1211 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1212 if (!MachineClobbers.empty()) {
1213 Constraints += ',';
1214 Constraints += MachineClobbers;
1215 }
1216 llvm::IntegerType *IntType = llvm::IntegerType::get(
1217 CGF.getLLVMContext(),
1218 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1219 llvm::FunctionType *FTy =
1220 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1221
1222 llvm::InlineAsm *IA =
1223 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1224 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1225}
1226
1227static llvm::AtomicOrdering
1228getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1229 switch (I) {
1230 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1231 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1232 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1233 case BitTest::Release: return llvm::AtomicOrdering::Release;
1234 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1235 }
1236 llvm_unreachable("invalid interlocking");
1237}
1238
1239/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1240/// bits and a bit position and read and optionally modify the bit at that
1241/// position. The position index can be arbitrarily large, i.e. it can be larger
1242/// than 31 or 63, so we need an indexed load in the general case.
1243static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1244 unsigned BuiltinID,
1245 const CallExpr *E) {
1246 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1247 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1248
1249 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1250
1251 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1252 // indexing operation internally. Use them if possible.
1253 if (CGF.getTarget().getTriple().isX86())
1254 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1255
1256 // Otherwise, use generic code to load one byte and test the bit. Use all but
1257 // the bottom three bits as the array index, and the bottom three bits to form
1258 // a mask.
1259 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1260 Value *ByteIndex = CGF.Builder.CreateAShr(
1261 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1262 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1263 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1264 ByteIndex, "bittest.byteaddr"),
1265 CGF.Int8Ty, CharUnits::One());
1266 Value *PosLow =
1267 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1268 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1269
1270 // The updating instructions will need a mask.
1271 Value *Mask = nullptr;
1272 if (BT.Action != BitTest::TestOnly) {
1273 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1274 "bittest.mask");
1275 }
1276
1277 // Check the action and ordering of the interlocked intrinsics.
1278 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1279
1280 Value *OldByte = nullptr;
1281 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1282 // Emit a combined atomicrmw load/store operation for the interlocked
1283 // intrinsics.
1284 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1285 if (BT.Action == BitTest::Reset) {
1286 Mask = CGF.Builder.CreateNot(Mask);
1287 RMWOp = llvm::AtomicRMWInst::And;
1288 }
1289 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1290 } else {
1291 // Emit a plain load for the non-interlocked intrinsics.
1292 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1293 Value *NewByte = nullptr;
1294 switch (BT.Action) {
1295 case BitTest::TestOnly:
1296 // Don't store anything.
1297 break;
1298 case BitTest::Complement:
1299 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1300 break;
1301 case BitTest::Reset:
1302 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1303 break;
1304 case BitTest::Set:
1305 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1306 break;
1307 }
1308 if (NewByte)
1309 CGF.Builder.CreateStore(NewByte, ByteAddr);
1310 }
1311
1312 // However we loaded the old byte, either by plain load or atomicrmw, shift
1313 // the bit into the low position and mask it to 0 or 1.
1314 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1315 return CGF.Builder.CreateAnd(
1316 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1317}
1318
1320 unsigned BuiltinID,
1321 const CallExpr *E) {
1322 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1323
1325 raw_svector_ostream AsmOS(Asm);
1326 llvm::IntegerType *RetType = CGF.Int32Ty;
1327
1328 switch (BuiltinID) {
1329 case clang::PPC::BI__builtin_ppc_ldarx:
1330 AsmOS << "ldarx ";
1331 RetType = CGF.Int64Ty;
1332 break;
1333 case clang::PPC::BI__builtin_ppc_lwarx:
1334 AsmOS << "lwarx ";
1335 RetType = CGF.Int32Ty;
1336 break;
1337 case clang::PPC::BI__builtin_ppc_lharx:
1338 AsmOS << "lharx ";
1339 RetType = CGF.Int16Ty;
1340 break;
1341 case clang::PPC::BI__builtin_ppc_lbarx:
1342 AsmOS << "lbarx ";
1343 RetType = CGF.Int8Ty;
1344 break;
1345 default:
1346 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1347 }
1348
1349 AsmOS << "$0, ${1:y}";
1350
1351 std::string Constraints = "=r,*Z,~{memory}";
1352 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1353 if (!MachineClobbers.empty()) {
1354 Constraints += ',';
1355 Constraints += MachineClobbers;
1356 }
1357
1358 llvm::Type *PtrType = CGF.UnqualPtrTy;
1359 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1360
1361 llvm::InlineAsm *IA =
1362 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1363 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1364 CI->addParamAttr(
1365 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1366 return CI;
1367}
1368
1369namespace {
1370enum class MSVCSetJmpKind {
1371 _setjmpex,
1372 _setjmp3,
1373 _setjmp
1374};
1375}
1376
1377/// MSVC handles setjmp a bit differently on different platforms. On every
1378/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1379/// parameters can be passed as variadic arguments, but we always pass none.
1380static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1381 const CallExpr *E) {
1382 llvm::Value *Arg1 = nullptr;
1383 llvm::Type *Arg1Ty = nullptr;
1384 StringRef Name;
1385 bool IsVarArg = false;
1386 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1387 Name = "_setjmp3";
1388 Arg1Ty = CGF.Int32Ty;
1389 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1390 IsVarArg = true;
1391 } else {
1392 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1393 Arg1Ty = CGF.Int8PtrTy;
1394 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1395 Arg1 = CGF.Builder.CreateCall(
1396 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1397 } else
1398 Arg1 = CGF.Builder.CreateCall(
1399 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1400 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1401 }
1402
1403 // Mark the call site and declaration with ReturnsTwice.
1404 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1405 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1406 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1407 llvm::Attribute::ReturnsTwice);
1408 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1409 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1410 ReturnsTwiceAttr, /*Local=*/true);
1411
1412 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1413 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1414 llvm::Value *Args[] = {Buf, Arg1};
1415 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1416 CB->setAttributes(ReturnsTwiceAttr);
1417 return RValue::get(CB);
1418}
1419
1420// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1421// we handle them here.
1461 __fastfail,
1462};
1463
1464static std::optional<CodeGenFunction::MSVCIntrin>
1465translateArmToMsvcIntrin(unsigned BuiltinID) {
1466 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1467 switch (BuiltinID) {
1468 default:
1469 return std::nullopt;
1470 case clang::ARM::BI_BitScanForward:
1471 case clang::ARM::BI_BitScanForward64:
1472 return MSVCIntrin::_BitScanForward;
1473 case clang::ARM::BI_BitScanReverse:
1474 case clang::ARM::BI_BitScanReverse64:
1475 return MSVCIntrin::_BitScanReverse;
1476 case clang::ARM::BI_InterlockedAnd64:
1477 return MSVCIntrin::_InterlockedAnd;
1478 case clang::ARM::BI_InterlockedExchange64:
1479 return MSVCIntrin::_InterlockedExchange;
1480 case clang::ARM::BI_InterlockedExchangeAdd64:
1481 return MSVCIntrin::_InterlockedExchangeAdd;
1482 case clang::ARM::BI_InterlockedExchangeSub64:
1483 return MSVCIntrin::_InterlockedExchangeSub;
1484 case clang::ARM::BI_InterlockedOr64:
1485 return MSVCIntrin::_InterlockedOr;
1486 case clang::ARM::BI_InterlockedXor64:
1487 return MSVCIntrin::_InterlockedXor;
1488 case clang::ARM::BI_InterlockedDecrement64:
1489 return MSVCIntrin::_InterlockedDecrement;
1490 case clang::ARM::BI_InterlockedIncrement64:
1491 return MSVCIntrin::_InterlockedIncrement;
1492 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1493 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1494 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1495 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1496 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1497 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1498 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1499 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1500 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1501 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1502 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1503 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1504 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1505 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1506 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1507 case clang::ARM::BI_InterlockedExchange8_acq:
1508 case clang::ARM::BI_InterlockedExchange16_acq:
1509 case clang::ARM::BI_InterlockedExchange_acq:
1510 case clang::ARM::BI_InterlockedExchange64_acq:
1511 return MSVCIntrin::_InterlockedExchange_acq;
1512 case clang::ARM::BI_InterlockedExchange8_rel:
1513 case clang::ARM::BI_InterlockedExchange16_rel:
1514 case clang::ARM::BI_InterlockedExchange_rel:
1515 case clang::ARM::BI_InterlockedExchange64_rel:
1516 return MSVCIntrin::_InterlockedExchange_rel;
1517 case clang::ARM::BI_InterlockedExchange8_nf:
1518 case clang::ARM::BI_InterlockedExchange16_nf:
1519 case clang::ARM::BI_InterlockedExchange_nf:
1520 case clang::ARM::BI_InterlockedExchange64_nf:
1521 return MSVCIntrin::_InterlockedExchange_nf;
1522 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1523 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1524 case clang::ARM::BI_InterlockedCompareExchange_acq:
1525 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1526 return MSVCIntrin::_InterlockedCompareExchange_acq;
1527 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1528 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1529 case clang::ARM::BI_InterlockedCompareExchange_rel:
1530 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1531 return MSVCIntrin::_InterlockedCompareExchange_rel;
1532 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1533 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1534 case clang::ARM::BI_InterlockedCompareExchange_nf:
1535 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1536 return MSVCIntrin::_InterlockedCompareExchange_nf;
1537 case clang::ARM::BI_InterlockedOr8_acq:
1538 case clang::ARM::BI_InterlockedOr16_acq:
1539 case clang::ARM::BI_InterlockedOr_acq:
1540 case clang::ARM::BI_InterlockedOr64_acq:
1541 return MSVCIntrin::_InterlockedOr_acq;
1542 case clang::ARM::BI_InterlockedOr8_rel:
1543 case clang::ARM::BI_InterlockedOr16_rel:
1544 case clang::ARM::BI_InterlockedOr_rel:
1545 case clang::ARM::BI_InterlockedOr64_rel:
1546 return MSVCIntrin::_InterlockedOr_rel;
1547 case clang::ARM::BI_InterlockedOr8_nf:
1548 case clang::ARM::BI_InterlockedOr16_nf:
1549 case clang::ARM::BI_InterlockedOr_nf:
1550 case clang::ARM::BI_InterlockedOr64_nf:
1551 return MSVCIntrin::_InterlockedOr_nf;
1552 case clang::ARM::BI_InterlockedXor8_acq:
1553 case clang::ARM::BI_InterlockedXor16_acq:
1554 case clang::ARM::BI_InterlockedXor_acq:
1555 case clang::ARM::BI_InterlockedXor64_acq:
1556 return MSVCIntrin::_InterlockedXor_acq;
1557 case clang::ARM::BI_InterlockedXor8_rel:
1558 case clang::ARM::BI_InterlockedXor16_rel:
1559 case clang::ARM::BI_InterlockedXor_rel:
1560 case clang::ARM::BI_InterlockedXor64_rel:
1561 return MSVCIntrin::_InterlockedXor_rel;
1562 case clang::ARM::BI_InterlockedXor8_nf:
1563 case clang::ARM::BI_InterlockedXor16_nf:
1564 case clang::ARM::BI_InterlockedXor_nf:
1565 case clang::ARM::BI_InterlockedXor64_nf:
1566 return MSVCIntrin::_InterlockedXor_nf;
1567 case clang::ARM::BI_InterlockedAnd8_acq:
1568 case clang::ARM::BI_InterlockedAnd16_acq:
1569 case clang::ARM::BI_InterlockedAnd_acq:
1570 case clang::ARM::BI_InterlockedAnd64_acq:
1571 return MSVCIntrin::_InterlockedAnd_acq;
1572 case clang::ARM::BI_InterlockedAnd8_rel:
1573 case clang::ARM::BI_InterlockedAnd16_rel:
1574 case clang::ARM::BI_InterlockedAnd_rel:
1575 case clang::ARM::BI_InterlockedAnd64_rel:
1576 return MSVCIntrin::_InterlockedAnd_rel;
1577 case clang::ARM::BI_InterlockedAnd8_nf:
1578 case clang::ARM::BI_InterlockedAnd16_nf:
1579 case clang::ARM::BI_InterlockedAnd_nf:
1580 case clang::ARM::BI_InterlockedAnd64_nf:
1581 return MSVCIntrin::_InterlockedAnd_nf;
1582 case clang::ARM::BI_InterlockedIncrement16_acq:
1583 case clang::ARM::BI_InterlockedIncrement_acq:
1584 case clang::ARM::BI_InterlockedIncrement64_acq:
1585 return MSVCIntrin::_InterlockedIncrement_acq;
1586 case clang::ARM::BI_InterlockedIncrement16_rel:
1587 case clang::ARM::BI_InterlockedIncrement_rel:
1588 case clang::ARM::BI_InterlockedIncrement64_rel:
1589 return MSVCIntrin::_InterlockedIncrement_rel;
1590 case clang::ARM::BI_InterlockedIncrement16_nf:
1591 case clang::ARM::BI_InterlockedIncrement_nf:
1592 case clang::ARM::BI_InterlockedIncrement64_nf:
1593 return MSVCIntrin::_InterlockedIncrement_nf;
1594 case clang::ARM::BI_InterlockedDecrement16_acq:
1595 case clang::ARM::BI_InterlockedDecrement_acq:
1596 case clang::ARM::BI_InterlockedDecrement64_acq:
1597 return MSVCIntrin::_InterlockedDecrement_acq;
1598 case clang::ARM::BI_InterlockedDecrement16_rel:
1599 case clang::ARM::BI_InterlockedDecrement_rel:
1600 case clang::ARM::BI_InterlockedDecrement64_rel:
1601 return MSVCIntrin::_InterlockedDecrement_rel;
1602 case clang::ARM::BI_InterlockedDecrement16_nf:
1603 case clang::ARM::BI_InterlockedDecrement_nf:
1604 case clang::ARM::BI_InterlockedDecrement64_nf:
1605 return MSVCIntrin::_InterlockedDecrement_nf;
1606 }
1607 llvm_unreachable("must return from switch");
1608}
1609
1610static std::optional<CodeGenFunction::MSVCIntrin>
1611translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1612 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1613 switch (BuiltinID) {
1614 default:
1615 return std::nullopt;
1616 case clang::AArch64::BI_BitScanForward:
1617 case clang::AArch64::BI_BitScanForward64:
1618 return MSVCIntrin::_BitScanForward;
1619 case clang::AArch64::BI_BitScanReverse:
1620 case clang::AArch64::BI_BitScanReverse64:
1621 return MSVCIntrin::_BitScanReverse;
1622 case clang::AArch64::BI_InterlockedAnd64:
1623 return MSVCIntrin::_InterlockedAnd;
1624 case clang::AArch64::BI_InterlockedExchange64:
1625 return MSVCIntrin::_InterlockedExchange;
1626 case clang::AArch64::BI_InterlockedExchangeAdd64:
1627 return MSVCIntrin::_InterlockedExchangeAdd;
1628 case clang::AArch64::BI_InterlockedExchangeSub64:
1629 return MSVCIntrin::_InterlockedExchangeSub;
1630 case clang::AArch64::BI_InterlockedOr64:
1631 return MSVCIntrin::_InterlockedOr;
1632 case clang::AArch64::BI_InterlockedXor64:
1633 return MSVCIntrin::_InterlockedXor;
1634 case clang::AArch64::BI_InterlockedDecrement64:
1635 return MSVCIntrin::_InterlockedDecrement;
1636 case clang::AArch64::BI_InterlockedIncrement64:
1637 return MSVCIntrin::_InterlockedIncrement;
1638 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1639 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1640 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1641 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1642 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1643 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1644 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1645 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1646 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1647 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1648 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1649 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1650 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1651 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1652 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1653 case clang::AArch64::BI_InterlockedExchange8_acq:
1654 case clang::AArch64::BI_InterlockedExchange16_acq:
1655 case clang::AArch64::BI_InterlockedExchange_acq:
1656 case clang::AArch64::BI_InterlockedExchange64_acq:
1657 return MSVCIntrin::_InterlockedExchange_acq;
1658 case clang::AArch64::BI_InterlockedExchange8_rel:
1659 case clang::AArch64::BI_InterlockedExchange16_rel:
1660 case clang::AArch64::BI_InterlockedExchange_rel:
1661 case clang::AArch64::BI_InterlockedExchange64_rel:
1662 return MSVCIntrin::_InterlockedExchange_rel;
1663 case clang::AArch64::BI_InterlockedExchange8_nf:
1664 case clang::AArch64::BI_InterlockedExchange16_nf:
1665 case clang::AArch64::BI_InterlockedExchange_nf:
1666 case clang::AArch64::BI_InterlockedExchange64_nf:
1667 return MSVCIntrin::_InterlockedExchange_nf;
1668 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1669 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1670 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1671 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1672 return MSVCIntrin::_InterlockedCompareExchange_acq;
1673 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1674 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1675 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1676 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1677 return MSVCIntrin::_InterlockedCompareExchange_rel;
1678 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1679 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1680 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1681 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1682 return MSVCIntrin::_InterlockedCompareExchange_nf;
1683 case clang::AArch64::BI_InterlockedCompareExchange128:
1684 return MSVCIntrin::_InterlockedCompareExchange128;
1685 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1686 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1687 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1688 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1689 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1690 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1691 case clang::AArch64::BI_InterlockedOr8_acq:
1692 case clang::AArch64::BI_InterlockedOr16_acq:
1693 case clang::AArch64::BI_InterlockedOr_acq:
1694 case clang::AArch64::BI_InterlockedOr64_acq:
1695 return MSVCIntrin::_InterlockedOr_acq;
1696 case clang::AArch64::BI_InterlockedOr8_rel:
1697 case clang::AArch64::BI_InterlockedOr16_rel:
1698 case clang::AArch64::BI_InterlockedOr_rel:
1699 case clang::AArch64::BI_InterlockedOr64_rel:
1700 return MSVCIntrin::_InterlockedOr_rel;
1701 case clang::AArch64::BI_InterlockedOr8_nf:
1702 case clang::AArch64::BI_InterlockedOr16_nf:
1703 case clang::AArch64::BI_InterlockedOr_nf:
1704 case clang::AArch64::BI_InterlockedOr64_nf:
1705 return MSVCIntrin::_InterlockedOr_nf;
1706 case clang::AArch64::BI_InterlockedXor8_acq:
1707 case clang::AArch64::BI_InterlockedXor16_acq:
1708 case clang::AArch64::BI_InterlockedXor_acq:
1709 case clang::AArch64::BI_InterlockedXor64_acq:
1710 return MSVCIntrin::_InterlockedXor_acq;
1711 case clang::AArch64::BI_InterlockedXor8_rel:
1712 case clang::AArch64::BI_InterlockedXor16_rel:
1713 case clang::AArch64::BI_InterlockedXor_rel:
1714 case clang::AArch64::BI_InterlockedXor64_rel:
1715 return MSVCIntrin::_InterlockedXor_rel;
1716 case clang::AArch64::BI_InterlockedXor8_nf:
1717 case clang::AArch64::BI_InterlockedXor16_nf:
1718 case clang::AArch64::BI_InterlockedXor_nf:
1719 case clang::AArch64::BI_InterlockedXor64_nf:
1720 return MSVCIntrin::_InterlockedXor_nf;
1721 case clang::AArch64::BI_InterlockedAnd8_acq:
1722 case clang::AArch64::BI_InterlockedAnd16_acq:
1723 case clang::AArch64::BI_InterlockedAnd_acq:
1724 case clang::AArch64::BI_InterlockedAnd64_acq:
1725 return MSVCIntrin::_InterlockedAnd_acq;
1726 case clang::AArch64::BI_InterlockedAnd8_rel:
1727 case clang::AArch64::BI_InterlockedAnd16_rel:
1728 case clang::AArch64::BI_InterlockedAnd_rel:
1729 case clang::AArch64::BI_InterlockedAnd64_rel:
1730 return MSVCIntrin::_InterlockedAnd_rel;
1731 case clang::AArch64::BI_InterlockedAnd8_nf:
1732 case clang::AArch64::BI_InterlockedAnd16_nf:
1733 case clang::AArch64::BI_InterlockedAnd_nf:
1734 case clang::AArch64::BI_InterlockedAnd64_nf:
1735 return MSVCIntrin::_InterlockedAnd_nf;
1736 case clang::AArch64::BI_InterlockedIncrement16_acq:
1737 case clang::AArch64::BI_InterlockedIncrement_acq:
1738 case clang::AArch64::BI_InterlockedIncrement64_acq:
1739 return MSVCIntrin::_InterlockedIncrement_acq;
1740 case clang::AArch64::BI_InterlockedIncrement16_rel:
1741 case clang::AArch64::BI_InterlockedIncrement_rel:
1742 case clang::AArch64::BI_InterlockedIncrement64_rel:
1743 return MSVCIntrin::_InterlockedIncrement_rel;
1744 case clang::AArch64::BI_InterlockedIncrement16_nf:
1745 case clang::AArch64::BI_InterlockedIncrement_nf:
1746 case clang::AArch64::BI_InterlockedIncrement64_nf:
1747 return MSVCIntrin::_InterlockedIncrement_nf;
1748 case clang::AArch64::BI_InterlockedDecrement16_acq:
1749 case clang::AArch64::BI_InterlockedDecrement_acq:
1750 case clang::AArch64::BI_InterlockedDecrement64_acq:
1751 return MSVCIntrin::_InterlockedDecrement_acq;
1752 case clang::AArch64::BI_InterlockedDecrement16_rel:
1753 case clang::AArch64::BI_InterlockedDecrement_rel:
1754 case clang::AArch64::BI_InterlockedDecrement64_rel:
1755 return MSVCIntrin::_InterlockedDecrement_rel;
1756 case clang::AArch64::BI_InterlockedDecrement16_nf:
1757 case clang::AArch64::BI_InterlockedDecrement_nf:
1758 case clang::AArch64::BI_InterlockedDecrement64_nf:
1759 return MSVCIntrin::_InterlockedDecrement_nf;
1760 }
1761 llvm_unreachable("must return from switch");
1762}
1763
1764static std::optional<CodeGenFunction::MSVCIntrin>
1765translateX86ToMsvcIntrin(unsigned BuiltinID) {
1766 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1767 switch (BuiltinID) {
1768 default:
1769 return std::nullopt;
1770 case clang::X86::BI_BitScanForward:
1771 case clang::X86::BI_BitScanForward64:
1772 return MSVCIntrin::_BitScanForward;
1773 case clang::X86::BI_BitScanReverse:
1774 case clang::X86::BI_BitScanReverse64:
1775 return MSVCIntrin::_BitScanReverse;
1776 case clang::X86::BI_InterlockedAnd64:
1777 return MSVCIntrin::_InterlockedAnd;
1778 case clang::X86::BI_InterlockedCompareExchange128:
1779 return MSVCIntrin::_InterlockedCompareExchange128;
1780 case clang::X86::BI_InterlockedExchange64:
1781 return MSVCIntrin::_InterlockedExchange;
1782 case clang::X86::BI_InterlockedExchangeAdd64:
1783 return MSVCIntrin::_InterlockedExchangeAdd;
1784 case clang::X86::BI_InterlockedExchangeSub64:
1785 return MSVCIntrin::_InterlockedExchangeSub;
1786 case clang::X86::BI_InterlockedOr64:
1787 return MSVCIntrin::_InterlockedOr;
1788 case clang::X86::BI_InterlockedXor64:
1789 return MSVCIntrin::_InterlockedXor;
1790 case clang::X86::BI_InterlockedDecrement64:
1791 return MSVCIntrin::_InterlockedDecrement;
1792 case clang::X86::BI_InterlockedIncrement64:
1793 return MSVCIntrin::_InterlockedIncrement;
1794 }
1795 llvm_unreachable("must return from switch");
1796}
1797
1798// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1799Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1800 const CallExpr *E) {
1801 switch (BuiltinID) {
1802 case MSVCIntrin::_BitScanForward:
1803 case MSVCIntrin::_BitScanReverse: {
1804 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1805 Value *ArgValue = EmitScalarExpr(E->getArg(1));
1806
1807 llvm::Type *ArgType = ArgValue->getType();
1808 llvm::Type *IndexType = IndexAddress.getElementType();
1809 llvm::Type *ResultType = ConvertType(E->getType());
1810
1811 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1812 Value *ResZero = llvm::Constant::getNullValue(ResultType);
1813 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1814
1815 BasicBlock *Begin = Builder.GetInsertBlock();
1816 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1817 Builder.SetInsertPoint(End);
1818 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1819
1820 Builder.SetInsertPoint(Begin);
1821 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1822 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1823 Builder.CreateCondBr(IsZero, End, NotZero);
1824 Result->addIncoming(ResZero, Begin);
1825
1826 Builder.SetInsertPoint(NotZero);
1827
1828 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1829 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1830 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1831 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1832 Builder.CreateStore(ZeroCount, IndexAddress, false);
1833 } else {
1834 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1835 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1836
1837 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1838 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1839 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1840 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1841 Builder.CreateStore(Index, IndexAddress, false);
1842 }
1843 Builder.CreateBr(End);
1844 Result->addIncoming(ResOne, NotZero);
1845
1846 Builder.SetInsertPoint(End);
1847 return Result;
1848 }
1849 case MSVCIntrin::_InterlockedAnd:
1850 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1851 case MSVCIntrin::_InterlockedExchange:
1852 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1853 case MSVCIntrin::_InterlockedExchangeAdd:
1854 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1855 case MSVCIntrin::_InterlockedExchangeSub:
1856 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1857 case MSVCIntrin::_InterlockedOr:
1858 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1859 case MSVCIntrin::_InterlockedXor:
1860 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1861 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1862 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1863 AtomicOrdering::Acquire);
1864 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1865 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1866 AtomicOrdering::Release);
1867 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1868 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1869 AtomicOrdering::Monotonic);
1870 case MSVCIntrin::_InterlockedExchange_acq:
1871 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1872 AtomicOrdering::Acquire);
1873 case MSVCIntrin::_InterlockedExchange_rel:
1874 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1875 AtomicOrdering::Release);
1876 case MSVCIntrin::_InterlockedExchange_nf:
1877 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1878 AtomicOrdering::Monotonic);
1879 case MSVCIntrin::_InterlockedCompareExchange_acq:
1880 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1881 case MSVCIntrin::_InterlockedCompareExchange_rel:
1882 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1883 case MSVCIntrin::_InterlockedCompareExchange_nf:
1884 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1885 case MSVCIntrin::_InterlockedCompareExchange128:
1887 *this, E, AtomicOrdering::SequentiallyConsistent);
1888 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1889 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1890 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1891 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1892 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1893 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1894 case MSVCIntrin::_InterlockedOr_acq:
1895 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1896 AtomicOrdering::Acquire);
1897 case MSVCIntrin::_InterlockedOr_rel:
1898 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1899 AtomicOrdering::Release);
1900 case MSVCIntrin::_InterlockedOr_nf:
1901 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1902 AtomicOrdering::Monotonic);
1903 case MSVCIntrin::_InterlockedXor_acq:
1904 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1905 AtomicOrdering::Acquire);
1906 case MSVCIntrin::_InterlockedXor_rel:
1907 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1908 AtomicOrdering::Release);
1909 case MSVCIntrin::_InterlockedXor_nf:
1910 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1911 AtomicOrdering::Monotonic);
1912 case MSVCIntrin::_InterlockedAnd_acq:
1913 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1914 AtomicOrdering::Acquire);
1915 case MSVCIntrin::_InterlockedAnd_rel:
1916 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1917 AtomicOrdering::Release);
1918 case MSVCIntrin::_InterlockedAnd_nf:
1919 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1920 AtomicOrdering::Monotonic);
1921 case MSVCIntrin::_InterlockedIncrement_acq:
1922 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1923 case MSVCIntrin::_InterlockedIncrement_rel:
1924 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1925 case MSVCIntrin::_InterlockedIncrement_nf:
1926 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1927 case MSVCIntrin::_InterlockedDecrement_acq:
1928 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1929 case MSVCIntrin::_InterlockedDecrement_rel:
1930 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1931 case MSVCIntrin::_InterlockedDecrement_nf:
1932 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1933
1934 case MSVCIntrin::_InterlockedDecrement:
1935 return EmitAtomicDecrementValue(*this, E);
1936 case MSVCIntrin::_InterlockedIncrement:
1937 return EmitAtomicIncrementValue(*this, E);
1938
1939 case MSVCIntrin::__fastfail: {
1940 // Request immediate process termination from the kernel. The instruction
1941 // sequences to do this are documented on MSDN:
1942 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1943 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1944 StringRef Asm, Constraints;
1945 switch (ISA) {
1946 default:
1947 ErrorUnsupported(E, "__fastfail call for this architecture");
1948 break;
1949 case llvm::Triple::x86:
1950 case llvm::Triple::x86_64:
1951 Asm = "int $$0x29";
1952 Constraints = "{cx}";
1953 break;
1954 case llvm::Triple::thumb:
1955 Asm = "udf #251";
1956 Constraints = "{r0}";
1957 break;
1958 case llvm::Triple::aarch64:
1959 Asm = "brk #0xF003";
1960 Constraints = "{w0}";
1961 }
1962 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1963 llvm::InlineAsm *IA =
1964 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1965 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1966 getLLVMContext(), llvm::AttributeList::FunctionIndex,
1967 llvm::Attribute::NoReturn);
1968 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1969 CI->setAttributes(NoReturnAttr);
1970 return CI;
1971 }
1972 }
1973 llvm_unreachable("Incorrect MSVC intrinsic!");
1974}
1975
1976namespace {
1977// ARC cleanup for __builtin_os_log_format
1978struct CallObjCArcUse final : EHScopeStack::Cleanup {
1979 CallObjCArcUse(llvm::Value *object) : object(object) {}
1980 llvm::Value *object;
1981
1982 void Emit(CodeGenFunction &CGF, Flags flags) override {
1983 CGF.EmitARCIntrinsicUse(object);
1984 }
1985};
1986}
1987
1989 BuiltinCheckKind Kind) {
1990 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
1991 && "Unsupported builtin check kind");
1992
1993 Value *ArgValue = EmitScalarExpr(E);
1994 if (!SanOpts.has(SanitizerKind::Builtin))
1995 return ArgValue;
1996
1997 SanitizerScope SanScope(this);
1998 Value *Cond = Builder.CreateICmpNE(
1999 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2000 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2001 SanitizerHandler::InvalidBuiltin,
2003 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2004 std::nullopt);
2005 return ArgValue;
2006}
2007
2008static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2009 return CGF.Builder.CreateBinaryIntrinsic(
2010 Intrinsic::abs, ArgValue,
2011 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2012}
2013
2015 bool SanitizeOverflow) {
2016 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2017
2018 // Try to eliminate overflow check.
2019 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2020 if (!VCI->isMinSignedValue())
2021 return EmitAbs(CGF, ArgValue, true);
2022 }
2023
2024 CodeGenFunction::SanitizerScope SanScope(&CGF);
2025
2026 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2027 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2028 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2029 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2030 Value *NotOverflow = CGF.Builder.CreateNot(
2031 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2032
2033 // TODO: support -ftrapv-handler.
2034 if (SanitizeOverflow) {
2035 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2036 SanitizerHandler::NegateOverflow,
2039 {ArgValue});
2040 } else
2041 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2042
2043 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2044 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2045}
2046
2047/// Get the argument type for arguments to os_log_helper.
2049 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2050 return C.getCanonicalType(UnsignedTy);
2051}
2052
2055 CharUnits BufferAlignment) {
2056 ASTContext &Ctx = getContext();
2057
2059 {
2060 raw_svector_ostream OS(Name);
2061 OS << "__os_log_helper";
2062 OS << "_" << BufferAlignment.getQuantity();
2063 OS << "_" << int(Layout.getSummaryByte());
2064 OS << "_" << int(Layout.getNumArgsByte());
2065 for (const auto &Item : Layout.Items)
2066 OS << "_" << int(Item.getSizeByte()) << "_"
2067 << int(Item.getDescriptorByte());
2068 }
2069
2070 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2071 return F;
2072
2074 FunctionArgList Args;
2075 Args.push_back(ImplicitParamDecl::Create(
2076 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2078 ArgTys.emplace_back(Ctx.VoidPtrTy);
2079
2080 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2081 char Size = Layout.Items[I].getSizeByte();
2082 if (!Size)
2083 continue;
2084
2085 QualType ArgTy = getOSLogArgType(Ctx, Size);
2086 Args.push_back(ImplicitParamDecl::Create(
2087 Ctx, nullptr, SourceLocation(),
2088 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2090 ArgTys.emplace_back(ArgTy);
2091 }
2092
2093 QualType ReturnTy = Ctx.VoidTy;
2094
2095 // The helper function has linkonce_odr linkage to enable the linker to merge
2096 // identical functions. To ensure the merging always happens, 'noinline' is
2097 // attached to the function when compiling with -Oz.
2098 const CGFunctionInfo &FI =
2100 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2101 llvm::Function *Fn = llvm::Function::Create(
2102 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2103 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2104 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2106 Fn->setDoesNotThrow();
2107
2108 // Attach 'noinline' at -Oz.
2109 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2110 Fn->addFnAttr(llvm::Attribute::NoInline);
2111
2112 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2113 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2114
2115 // Create a scope with an artificial location for the body of this function.
2116 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2117
2118 CharUnits Offset;
2119 Address BufAddr =
2121 BufferAlignment);
2122 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2123 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2124 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2125 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2126
2127 unsigned I = 1;
2128 for (const auto &Item : Layout.Items) {
2130 Builder.getInt8(Item.getDescriptorByte()),
2131 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2133 Builder.getInt8(Item.getSizeByte()),
2134 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2135
2136 CharUnits Size = Item.size();
2137 if (!Size.getQuantity())
2138 continue;
2139
2140 Address Arg = GetAddrOfLocalVar(Args[I]);
2141 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2142 Addr = Addr.withElementType(Arg.getElementType());
2144 Offset += Size;
2145 ++I;
2146 }
2147
2149
2150 return Fn;
2151}
2152
2154 assert(E.getNumArgs() >= 2 &&
2155 "__builtin_os_log_format takes at least 2 arguments");
2156 ASTContext &Ctx = getContext();
2159 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2160 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2161
2162 // Ignore argument 1, the format string. It is not currently used.
2163 CallArgList Args;
2164 Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
2165
2166 for (const auto &Item : Layout.Items) {
2167 int Size = Item.getSizeByte();
2168 if (!Size)
2169 continue;
2170
2171 llvm::Value *ArgVal;
2172
2173 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2174 uint64_t Val = 0;
2175 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2176 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2177 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2178 } else if (const Expr *TheExpr = Item.getExpr()) {
2179 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2180
2181 // If a temporary object that requires destruction after the full
2182 // expression is passed, push a lifetime-extended cleanup to extend its
2183 // lifetime to the end of the enclosing block scope.
2184 auto LifetimeExtendObject = [&](const Expr *E) {
2185 E = E->IgnoreParenCasts();
2186 // Extend lifetimes of objects returned by function calls and message
2187 // sends.
2188
2189 // FIXME: We should do this in other cases in which temporaries are
2190 // created including arguments of non-ARC types (e.g., C++
2191 // temporaries).
2192 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2193 return true;
2194 return false;
2195 };
2196
2197 if (TheExpr->getType()->isObjCRetainableType() &&
2198 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2199 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2200 "Only scalar can be a ObjC retainable type");
2201 if (!isa<Constant>(ArgVal)) {
2202 CleanupKind Cleanup = getARCCleanupKind();
2203 QualType Ty = TheExpr->getType();
2204 Address Alloca = Address::invalid();
2205 Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2206 ArgVal = EmitARCRetain(Ty, ArgVal);
2207 Builder.CreateStore(ArgVal, Addr);
2208 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2210 Cleanup & EHCleanup);
2211
2212 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2213 // argument has to be alive.
2214 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2215 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2216 }
2217 }
2218 } else {
2219 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2220 }
2221
2222 unsigned ArgValSize =
2223 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2224 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2225 ArgValSize);
2226 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2227 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2228 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2229 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2230 Args.add(RValue::get(ArgVal), ArgTy);
2231 }
2232
2233 const CGFunctionInfo &FI =
2236 Layout, BufAddr.getAlignment());
2238 return RValue::get(BufAddr.getPointer());
2239}
2240
2242 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2243 WidthAndSignedness ResultInfo) {
2244 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2245 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2246 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2247}
2248
2250 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2251 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2252 const clang::Expr *ResultArg, QualType ResultQTy,
2253 WidthAndSignedness ResultInfo) {
2255 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2256 "Cannot specialize this multiply");
2257
2258 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2259 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2260
2261 llvm::Value *HasOverflow;
2262 llvm::Value *Result = EmitOverflowIntrinsic(
2263 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2264
2265 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2266 // however, since the original builtin had a signed result, we need to report
2267 // an overflow when the result is greater than INT_MAX.
2268 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2269 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2270
2271 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2272 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2273
2274 bool isVolatile =
2275 ResultArg->getType()->getPointeeType().isVolatileQualified();
2276 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2277 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2278 isVolatile);
2279 return RValue::get(HasOverflow);
2280}
2281
2282/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2283static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2284 WidthAndSignedness Op1Info,
2285 WidthAndSignedness Op2Info,
2286 WidthAndSignedness ResultInfo) {
2287 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2288 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2289 Op1Info.Signed != Op2Info.Signed;
2290}
2291
2292/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2293/// the generic checked-binop irgen.
2294static RValue
2296 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2297 WidthAndSignedness Op2Info,
2298 const clang::Expr *ResultArg, QualType ResultQTy,
2299 WidthAndSignedness ResultInfo) {
2300 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2301 Op2Info, ResultInfo) &&
2302 "Not a mixed-sign multipliction we can specialize");
2303
2304 // Emit the signed and unsigned operands.
2305 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2306 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2307 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2308 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2309 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2310 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2311
2312 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2313 if (SignedOpWidth < UnsignedOpWidth)
2314 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2315 if (UnsignedOpWidth < SignedOpWidth)
2316 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2317
2318 llvm::Type *OpTy = Signed->getType();
2319 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2320 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2321 llvm::Type *ResTy = ResultPtr.getElementType();
2322 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2323
2324 // Take the absolute value of the signed operand.
2325 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2326 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2327 llvm::Value *AbsSigned =
2328 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2329
2330 // Perform a checked unsigned multiplication.
2331 llvm::Value *UnsignedOverflow;
2332 llvm::Value *UnsignedResult =
2333 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2334 Unsigned, UnsignedOverflow);
2335
2336 llvm::Value *Overflow, *Result;
2337 if (ResultInfo.Signed) {
2338 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2339 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2340 auto IntMax =
2341 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2342 llvm::Value *MaxResult =
2343 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2344 CGF.Builder.CreateZExt(IsNegative, OpTy));
2345 llvm::Value *SignedOverflow =
2346 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2347 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2348
2349 // Prepare the signed result (possibly by negating it).
2350 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2351 llvm::Value *SignedResult =
2352 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2353 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2354 } else {
2355 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2356 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2357 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2358 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2359 if (ResultInfo.Width < OpWidth) {
2360 auto IntMax =
2361 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2362 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2363 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2364 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2365 }
2366
2367 // Negate the product if it would be negative in infinite precision.
2368 Result = CGF.Builder.CreateSelect(
2369 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2370
2371 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2372 }
2373 assert(Overflow && Result && "Missing overflow or result");
2374
2375 bool isVolatile =
2376 ResultArg->getType()->getPointeeType().isVolatileQualified();
2377 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2378 isVolatile);
2379 return RValue::get(Overflow);
2380}
2381
2382static bool
2384 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2385 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2386 Ty = Ctx.getBaseElementType(Arr);
2387
2388 const auto *Record = Ty->getAsCXXRecordDecl();
2389 if (!Record)
2390 return false;
2391
2392 // We've already checked this type, or are in the process of checking it.
2393 if (!Seen.insert(Record).second)
2394 return false;
2395
2396 assert(Record->hasDefinition() &&
2397 "Incomplete types should already be diagnosed");
2398
2399 if (Record->isDynamicClass())
2400 return true;
2401
2402 for (FieldDecl *F : Record->fields()) {
2403 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2404 return true;
2405 }
2406 return false;
2407}
2408
2409/// Determine if the specified type requires laundering by checking if it is a
2410/// dynamic class type or contains a subobject which is a dynamic class type.
2412 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2413 return false;
2415 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2416}
2417
2418RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2419 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2420 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2421
2422 // The builtin's shift arg may have a different type than the source arg and
2423 // result, but the LLVM intrinsic uses the same type for all values.
2424 llvm::Type *Ty = Src->getType();
2425 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2426
2427 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2428 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2429 Function *F = CGM.getIntrinsic(IID, Ty);
2430 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2431}
2432
2433// Map math builtins for long-double to f128 version.
2434static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2435 switch (BuiltinID) {
2436#define MUTATE_LDBL(func) \
2437 case Builtin::BI__builtin_##func##l: \
2438 return Builtin::BI__builtin_##func##f128;
2469 MUTATE_LDBL(nans)
2470 MUTATE_LDBL(inf)
2489 MUTATE_LDBL(huge_val)
2499#undef MUTATE_LDBL
2500 default:
2501 return BuiltinID;
2502 }
2503}
2504
2505static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2506 Value *V) {
2507 if (CGF.Builder.getIsFPConstrained() &&
2508 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2509 if (Value *Result =
2510 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2511 return Result;
2512 }
2513 return nullptr;
2514}
2515
2517 const FunctionDecl *FD) {
2518 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2519 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2520 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2521
2523 for (auto &&FormalTy : FnTy->params())
2524 Args.push_back(llvm::PoisonValue::get(FormalTy));
2525
2526 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2527}
2528
2529RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2530 const CallExpr *E,
2531 ReturnValueSlot ReturnValue) {
2532 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2533 // See if we can constant fold this builtin. If so, don't emit it at all.
2534 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2536 if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2537 !Result.hasSideEffects()) {
2538 if (Result.Val.isInt())
2539 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2540 Result.Val.getInt()));
2541 if (Result.Val.isFloat())
2542 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2543 Result.Val.getFloat()));
2544 }
2545
2546 // If current long-double semantics is IEEE 128-bit, replace math builtins
2547 // of long-double with f128 equivalent.
2548 // TODO: This mutation should also be applied to other targets other than PPC,
2549 // after backend supports IEEE 128-bit style libcalls.
2550 if (getTarget().getTriple().isPPC64() &&
2551 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2552 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2553
2554 // If the builtin has been declared explicitly with an assembler label,
2555 // disable the specialized emitting below. Ideally we should communicate the
2556 // rename in IR, or at least avoid generating the intrinsic calls that are
2557 // likely to get lowered to the renamed library functions.
2558 const unsigned BuiltinIDIfNoAsmLabel =
2559 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2560
2561 std::optional<bool> ErrnoOverriden;
2562 // ErrnoOverriden is true if math-errno is overriden via the
2563 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2564 // which implies math-errno.
2565 if (E->hasStoredFPFeatures()) {
2567 if (OP.hasMathErrnoOverride())
2568 ErrnoOverriden = OP.getMathErrnoOverride();
2569 }
2570 // True if 'atttibute__((optnone)) is used. This attibute overrides
2571 // fast-math which implies math-errno.
2572 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2573
2574 // True if we are compiling at -O2 and errno has been disabled
2575 // using the '#pragma float_control(precise, off)', and
2576 // attribute opt-none hasn't been seen.
2577 bool ErrnoOverridenToFalseWithOpt =
2578 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2579 CGM.getCodeGenOpts().OptimizationLevel != 0;
2580
2581 // There are LLVM math intrinsics/instructions corresponding to math library
2582 // functions except the LLVM op will never set errno while the math library
2583 // might. Also, math builtins have the same semantics as their math library
2584 // twins. Thus, we can transform math library and builtin calls to their
2585 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2586 // In case FP exceptions are enabled, the experimental versions of the
2587 // intrinsics model those.
2588 bool ConstAlways =
2589 getContext().BuiltinInfo.isConst(BuiltinID);
2590
2591 // There's a special case with the fma builtins where they are always const
2592 // if the target environment is GNU or the target is OS is Windows and we're
2593 // targeting the MSVCRT.dll environment.
2594 // FIXME: This list can be become outdated. Need to find a way to get it some
2595 // other way.
2596 switch (BuiltinID) {
2597 case Builtin::BI__builtin_fma:
2598 case Builtin::BI__builtin_fmaf:
2599 case Builtin::BI__builtin_fmal:
2600 case Builtin::BIfma:
2601 case Builtin::BIfmaf:
2602 case Builtin::BIfmal: {
2603 auto &Trip = CGM.getTriple();
2604 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2605 ConstAlways = true;
2606 break;
2607 }
2608 default:
2609 break;
2610 }
2611
2612 bool ConstWithoutErrnoAndExceptions =
2614 bool ConstWithoutExceptions =
2616
2617 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2618 // disabled.
2619 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2620 // or attributes that affect math-errno should prevent or allow math
2621 // intrincs to be generated. Intrinsics are generated:
2622 // 1- In fast math mode, unless math-errno is overriden
2623 // via '#pragma float_control(precise, on)', or via an
2624 // 'attribute__((optnone))'.
2625 // 2- If math-errno was enabled on command line but overriden
2626 // to false via '#pragma float_control(precise, off))' and
2627 // 'attribute__((optnone))' hasn't been used.
2628 // 3- If we are compiling with optimization and errno has been disabled
2629 // via '#pragma float_control(precise, off)', and
2630 // 'attribute__((optnone))' hasn't been used.
2631
2632 bool ConstWithoutErrnoOrExceptions =
2633 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2634 bool GenerateIntrinsics =
2635 (ConstAlways && !OptNone) ||
2636 (!getLangOpts().MathErrno &&
2637 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2638 if (!GenerateIntrinsics) {
2639 GenerateIntrinsics =
2640 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2641 if (!GenerateIntrinsics)
2642 GenerateIntrinsics =
2643 ConstWithoutErrnoOrExceptions &&
2644 (!getLangOpts().MathErrno &&
2645 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2646 if (!GenerateIntrinsics)
2647 GenerateIntrinsics =
2648 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2649 }
2650 if (GenerateIntrinsics) {
2651 switch (BuiltinIDIfNoAsmLabel) {
2652 case Builtin::BIceil:
2653 case Builtin::BIceilf:
2654 case Builtin::BIceill:
2655 case Builtin::BI__builtin_ceil:
2656 case Builtin::BI__builtin_ceilf:
2657 case Builtin::BI__builtin_ceilf16:
2658 case Builtin::BI__builtin_ceill:
2659 case Builtin::BI__builtin_ceilf128:
2661 Intrinsic::ceil,
2662 Intrinsic::experimental_constrained_ceil));
2663
2664 case Builtin::BIcopysign:
2665 case Builtin::BIcopysignf:
2666 case Builtin::BIcopysignl:
2667 case Builtin::BI__builtin_copysign:
2668 case Builtin::BI__builtin_copysignf:
2669 case Builtin::BI__builtin_copysignf16:
2670 case Builtin::BI__builtin_copysignl:
2671 case Builtin::BI__builtin_copysignf128:
2672 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
2673
2674 case Builtin::BIcos:
2675 case Builtin::BIcosf:
2676 case Builtin::BIcosl:
2677 case Builtin::BI__builtin_cos:
2678 case Builtin::BI__builtin_cosf:
2679 case Builtin::BI__builtin_cosf16:
2680 case Builtin::BI__builtin_cosl:
2681 case Builtin::BI__builtin_cosf128:
2683 Intrinsic::cos,
2684 Intrinsic::experimental_constrained_cos));
2685
2686 case Builtin::BIexp:
2687 case Builtin::BIexpf:
2688 case Builtin::BIexpl:
2689 case Builtin::BI__builtin_exp:
2690 case Builtin::BI__builtin_expf:
2691 case Builtin::BI__builtin_expf16:
2692 case Builtin::BI__builtin_expl:
2693 case Builtin::BI__builtin_expf128:
2695 Intrinsic::exp,
2696 Intrinsic::experimental_constrained_exp));
2697
2698 case Builtin::BIexp2:
2699 case Builtin::BIexp2f:
2700 case Builtin::BIexp2l:
2701 case Builtin::BI__builtin_exp2:
2702 case Builtin::BI__builtin_exp2f:
2703 case Builtin::BI__builtin_exp2f16:
2704 case Builtin::BI__builtin_exp2l:
2705 case Builtin::BI__builtin_exp2f128:
2707 Intrinsic::exp2,
2708 Intrinsic::experimental_constrained_exp2));
2709 case Builtin::BI__builtin_exp10:
2710 case Builtin::BI__builtin_exp10f:
2711 case Builtin::BI__builtin_exp10f16:
2712 case Builtin::BI__builtin_exp10l:
2713 case Builtin::BI__builtin_exp10f128: {
2714 // TODO: strictfp support
2715 if (Builder.getIsFPConstrained())
2716 break;
2717 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10));
2718 }
2719 case Builtin::BIfabs:
2720 case Builtin::BIfabsf:
2721 case Builtin::BIfabsl:
2722 case Builtin::BI__builtin_fabs:
2723 case Builtin::BI__builtin_fabsf:
2724 case Builtin::BI__builtin_fabsf16:
2725 case Builtin::BI__builtin_fabsl:
2726 case Builtin::BI__builtin_fabsf128:
2727 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
2728
2729 case Builtin::BIfloor:
2730 case Builtin::BIfloorf:
2731 case Builtin::BIfloorl:
2732 case Builtin::BI__builtin_floor:
2733 case Builtin::BI__builtin_floorf:
2734 case Builtin::BI__builtin_floorf16:
2735 case Builtin::BI__builtin_floorl:
2736 case Builtin::BI__builtin_floorf128:
2738 Intrinsic::floor,
2739 Intrinsic::experimental_constrained_floor));
2740
2741 case Builtin::BIfma:
2742 case Builtin::BIfmaf:
2743 case Builtin::BIfmal:
2744 case Builtin::BI__builtin_fma:
2745 case Builtin::BI__builtin_fmaf:
2746 case Builtin::BI__builtin_fmaf16:
2747 case Builtin::BI__builtin_fmal:
2748 case Builtin::BI__builtin_fmaf128:
2750 Intrinsic::fma,
2751 Intrinsic::experimental_constrained_fma));
2752
2753 case Builtin::BIfmax:
2754 case Builtin::BIfmaxf:
2755 case Builtin::BIfmaxl:
2756 case Builtin::BI__builtin_fmax:
2757 case Builtin::BI__builtin_fmaxf:
2758 case Builtin::BI__builtin_fmaxf16:
2759 case Builtin::BI__builtin_fmaxl:
2760 case Builtin::BI__builtin_fmaxf128:
2762 Intrinsic::maxnum,
2763 Intrinsic::experimental_constrained_maxnum));
2764
2765 case Builtin::BIfmin:
2766 case Builtin::BIfminf:
2767 case Builtin::BIfminl:
2768 case Builtin::BI__builtin_fmin:
2769 case Builtin::BI__builtin_fminf:
2770 case Builtin::BI__builtin_fminf16:
2771 case Builtin::BI__builtin_fminl:
2772 case Builtin::BI__builtin_fminf128:
2774 Intrinsic::minnum,
2775 Intrinsic::experimental_constrained_minnum));
2776
2777 // fmod() is a special-case. It maps to the frem instruction rather than an
2778 // LLVM intrinsic.
2779 case Builtin::BIfmod:
2780 case Builtin::BIfmodf:
2781 case Builtin::BIfmodl:
2782 case Builtin::BI__builtin_fmod:
2783 case Builtin::BI__builtin_fmodf:
2784 case Builtin::BI__builtin_fmodf16:
2785 case Builtin::BI__builtin_fmodl:
2786 case Builtin::BI__builtin_fmodf128: {
2787 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2788 Value *Arg1 = EmitScalarExpr(E->getArg(0));
2789 Value *Arg2 = EmitScalarExpr(E->getArg(1));
2790 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2791 }
2792
2793 case Builtin::BIlog:
2794 case Builtin::BIlogf:
2795 case Builtin::BIlogl:
2796 case Builtin::BI__builtin_log:
2797 case Builtin::BI__builtin_logf:
2798 case Builtin::BI__builtin_logf16:
2799 case Builtin::BI__builtin_logl:
2800 case Builtin::BI__builtin_logf128:
2802 Intrinsic::log,
2803 Intrinsic::experimental_constrained_log));
2804
2805 case Builtin::BIlog10:
2806 case Builtin::BIlog10f:
2807 case Builtin::BIlog10l:
2808 case Builtin::BI__builtin_log10:
2809 case Builtin::BI__builtin_log10f:
2810 case Builtin::BI__builtin_log10f16:
2811 case Builtin::BI__builtin_log10l:
2812 case Builtin::BI__builtin_log10f128:
2814 Intrinsic::log10,
2815 Intrinsic::experimental_constrained_log10));
2816
2817 case Builtin::BIlog2:
2818 case Builtin::BIlog2f:
2819 case Builtin::BIlog2l:
2820 case Builtin::BI__builtin_log2:
2821 case Builtin::BI__builtin_log2f:
2822 case Builtin::BI__builtin_log2f16:
2823 case Builtin::BI__builtin_log2l:
2824 case Builtin::BI__builtin_log2f128:
2826 Intrinsic::log2,
2827 Intrinsic::experimental_constrained_log2));
2828
2829 case Builtin::BInearbyint:
2830 case Builtin::BInearbyintf:
2831 case Builtin::BInearbyintl:
2832 case Builtin::BI__builtin_nearbyint:
2833 case Builtin::BI__builtin_nearbyintf:
2834 case Builtin::BI__builtin_nearbyintl:
2835 case Builtin::BI__builtin_nearbyintf128:
2837 Intrinsic::nearbyint,
2838 Intrinsic::experimental_constrained_nearbyint));
2839
2840 case Builtin::BIpow:
2841 case Builtin::BIpowf:
2842 case Builtin::BIpowl:
2843 case Builtin::BI__builtin_pow:
2844 case Builtin::BI__builtin_powf:
2845 case Builtin::BI__builtin_powf16:
2846 case Builtin::BI__builtin_powl:
2847 case Builtin::BI__builtin_powf128:
2849 Intrinsic::pow,
2850 Intrinsic::experimental_constrained_pow));
2851
2852 case Builtin::BIrint:
2853 case Builtin::BIrintf:
2854 case Builtin::BIrintl:
2855 case Builtin::BI__builtin_rint:
2856 case Builtin::BI__builtin_rintf:
2857 case Builtin::BI__builtin_rintf16:
2858 case Builtin::BI__builtin_rintl:
2859 case Builtin::BI__builtin_rintf128:
2861 Intrinsic::rint,
2862 Intrinsic::experimental_constrained_rint));
2863
2864 case Builtin::BIround:
2865 case Builtin::BIroundf:
2866 case Builtin::BIroundl:
2867 case Builtin::BI__builtin_round:
2868 case Builtin::BI__builtin_roundf:
2869 case Builtin::BI__builtin_roundf16:
2870 case Builtin::BI__builtin_roundl:
2871 case Builtin::BI__builtin_roundf128:
2873 Intrinsic::round,
2874 Intrinsic::experimental_constrained_round));
2875
2876 case Builtin::BIroundeven:
2877 case Builtin::BIroundevenf:
2878 case Builtin::BIroundevenl:
2879 case Builtin::BI__builtin_roundeven:
2880 case Builtin::BI__builtin_roundevenf:
2881 case Builtin::BI__builtin_roundevenf16:
2882 case Builtin::BI__builtin_roundevenl:
2883 case Builtin::BI__builtin_roundevenf128:
2885 Intrinsic::roundeven,
2886 Intrinsic::experimental_constrained_roundeven));
2887
2888 case Builtin::BIsin:
2889 case Builtin::BIsinf:
2890 case Builtin::BIsinl:
2891 case Builtin::BI__builtin_sin:
2892 case Builtin::BI__builtin_sinf:
2893 case Builtin::BI__builtin_sinf16:
2894 case Builtin::BI__builtin_sinl:
2895 case Builtin::BI__builtin_sinf128:
2897 Intrinsic::sin,
2898 Intrinsic::experimental_constrained_sin));
2899
2900 case Builtin::BIsqrt:
2901 case Builtin::BIsqrtf:
2902 case Builtin::BIsqrtl:
2903 case Builtin::BI__builtin_sqrt:
2904 case Builtin::BI__builtin_sqrtf:
2905 case Builtin::BI__builtin_sqrtf16:
2906 case Builtin::BI__builtin_sqrtl:
2907 case Builtin::BI__builtin_sqrtf128:
2908 case Builtin::BI__builtin_elementwise_sqrt: {
2910 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2912 return RValue::get(Call);
2913 }
2914 case Builtin::BItrunc:
2915 case Builtin::BItruncf:
2916 case Builtin::BItruncl:
2917 case Builtin::BI__builtin_trunc:
2918 case Builtin::BI__builtin_truncf:
2919 case Builtin::BI__builtin_truncf16:
2920 case Builtin::BI__builtin_truncl:
2921 case Builtin::BI__builtin_truncf128:
2923 Intrinsic::trunc,
2924 Intrinsic::experimental_constrained_trunc));
2925
2926 case Builtin::BIlround:
2927 case Builtin::BIlroundf:
2928 case Builtin::BIlroundl:
2929 case Builtin::BI__builtin_lround:
2930 case Builtin::BI__builtin_lroundf:
2931 case Builtin::BI__builtin_lroundl:
2932 case Builtin::BI__builtin_lroundf128:
2934 *this, E, Intrinsic::lround,
2935 Intrinsic::experimental_constrained_lround));
2936
2937 case Builtin::BIllround:
2938 case Builtin::BIllroundf:
2939 case Builtin::BIllroundl:
2940 case Builtin::BI__builtin_llround:
2941 case Builtin::BI__builtin_llroundf:
2942 case Builtin::BI__builtin_llroundl:
2943 case Builtin::BI__builtin_llroundf128:
2945 *this, E, Intrinsic::llround,
2946 Intrinsic::experimental_constrained_llround));
2947
2948 case Builtin::BIlrint:
2949 case Builtin::BIlrintf:
2950 case Builtin::BIlrintl:
2951 case Builtin::BI__builtin_lrint:
2952 case Builtin::BI__builtin_lrintf:
2953 case Builtin::BI__builtin_lrintl:
2954 case Builtin::BI__builtin_lrintf128:
2956 *this, E, Intrinsic::lrint,
2957 Intrinsic::experimental_constrained_lrint));
2958
2959 case Builtin::BIllrint:
2960 case Builtin::BIllrintf:
2961 case Builtin::BIllrintl:
2962 case Builtin::BI__builtin_llrint:
2963 case Builtin::BI__builtin_llrintf:
2964 case Builtin::BI__builtin_llrintl:
2965 case Builtin::BI__builtin_llrintf128:
2967 *this, E, Intrinsic::llrint,
2968 Intrinsic::experimental_constrained_llrint));
2969 case Builtin::BI__builtin_ldexp:
2970 case Builtin::BI__builtin_ldexpf:
2971 case Builtin::BI__builtin_ldexpl:
2972 case Builtin::BI__builtin_ldexpf16:
2973 case Builtin::BI__builtin_ldexpf128: {
2975 *this, E, Intrinsic::ldexp,
2976 Intrinsic::experimental_constrained_ldexp));
2977 }
2978 default:
2979 break;
2980 }
2981 }
2982
2983 // Check NonnullAttribute/NullabilityArg and Alignment.
2984 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
2985 unsigned ParmNum) {
2986 Value *Val = A.getPointer();
2987 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
2988 ParmNum);
2989
2990 if (SanOpts.has(SanitizerKind::Alignment)) {
2991 SanitizerSet SkippedChecks;
2992 SkippedChecks.set(SanitizerKind::All);
2993 SkippedChecks.clear(SanitizerKind::Alignment);
2994 SourceLocation Loc = Arg->getExprLoc();
2995 // Strip an implicit cast.
2996 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
2997 if (CE->getCastKind() == CK_BitCast)
2998 Arg = CE->getSubExpr();
2999 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3000 SkippedChecks);
3001 }
3002 };
3003
3004 switch (BuiltinIDIfNoAsmLabel) {
3005 default: break;
3006 case Builtin::BI__builtin___CFStringMakeConstantString:
3007 case Builtin::BI__builtin___NSStringMakeConstantString:
3008 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3009 case Builtin::BI__builtin_stdarg_start:
3010 case Builtin::BI__builtin_va_start:
3011 case Builtin::BI__va_start:
3012 case Builtin::BI__builtin_va_end:
3013 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3014 ? EmitScalarExpr(E->getArg(0))
3015 : EmitVAListRef(E->getArg(0)).getPointer(),
3016 BuiltinID != Builtin::BI__builtin_va_end);
3017 return RValue::get(nullptr);
3018 case Builtin::BI__builtin_va_copy: {
3019 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
3020 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
3021 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr});
3022 return RValue::get(nullptr);
3023 }
3024 case Builtin::BIabs:
3025 case Builtin::BIlabs:
3026 case Builtin::BIllabs:
3027 case Builtin::BI__builtin_abs:
3028 case Builtin::BI__builtin_labs:
3029 case Builtin::BI__builtin_llabs: {
3030 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3031
3032 Value *Result;
3033 switch (getLangOpts().getSignedOverflowBehavior()) {
3035 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3036 break;
3038 if (!SanitizeOverflow) {
3039 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3040 break;
3041 }
3042 [[fallthrough]];
3044 // TODO: Somehow handle the corner case when the address of abs is taken.
3045 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3046 break;
3047 }
3048 return RValue::get(Result);
3049 }
3050 case Builtin::BI__builtin_complex: {
3051 Value *Real = EmitScalarExpr(E->getArg(0));
3052 Value *Imag = EmitScalarExpr(E->getArg(1));
3053 return RValue::getComplex({Real, Imag});
3054 }
3055 case Builtin::BI__builtin_conj:
3056 case Builtin::BI__builtin_conjf:
3057 case Builtin::BI__builtin_conjl:
3058 case Builtin::BIconj:
3059 case Builtin::BIconjf:
3060 case Builtin::BIconjl: {
3061 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3062 Value *Real = ComplexVal.first;
3063 Value *Imag = ComplexVal.second;
3064 Imag = Builder.CreateFNeg(Imag, "neg");
3065 return RValue::getComplex(std::make_pair(Real, Imag));
3066 }
3067 case Builtin::BI__builtin_creal:
3068 case Builtin::BI__builtin_crealf:
3069 case Builtin::BI__builtin_creall:
3070 case Builtin::BIcreal:
3071 case Builtin::BIcrealf:
3072 case Builtin::BIcreall: {
3073 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3074 return RValue::get(ComplexVal.first);
3075 }
3076
3077 case Builtin::BI__builtin_preserve_access_index: {
3078 // Only enabled preserved access index region when debuginfo
3079 // is available as debuginfo is needed to preserve user-level
3080 // access pattern.
3081 if (!getDebugInfo()) {
3082 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3083 return RValue::get(EmitScalarExpr(E->getArg(0)));
3084 }
3085
3086 // Nested builtin_preserve_access_index() not supported
3088 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3089 return RValue::get(EmitScalarExpr(E->getArg(0)));
3090 }
3091
3092 IsInPreservedAIRegion = true;
3093 Value *Res = EmitScalarExpr(E->getArg(0));
3094 IsInPreservedAIRegion = false;
3095 return RValue::get(Res);
3096 }
3097
3098 case Builtin::BI__builtin_cimag:
3099 case Builtin::BI__builtin_cimagf:
3100 case Builtin::BI__builtin_cimagl:
3101 case Builtin::BIcimag:
3102 case Builtin::BIcimagf:
3103 case Builtin::BIcimagl: {
3104 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3105 return RValue::get(ComplexVal.second);
3106 }
3107
3108 case Builtin::BI__builtin_clrsb:
3109 case Builtin::BI__builtin_clrsbl:
3110 case Builtin::BI__builtin_clrsbll: {
3111 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3112 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3113
3114 llvm::Type *ArgType = ArgValue->getType();
3115 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3116
3117 llvm::Type *ResultType = ConvertType(E->getType());
3118 Value *Zero = llvm::Constant::getNullValue(ArgType);
3119 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3120 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3121 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3122 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3123 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3124 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3125 "cast");
3126 return RValue::get(Result);
3127 }
3128 case Builtin::BI__builtin_ctzs:
3129 case Builtin::BI__builtin_ctz:
3130 case Builtin::BI__builtin_ctzl:
3131 case Builtin::BI__builtin_ctzll: {
3133
3134 llvm::Type *ArgType = ArgValue->getType();
3135 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3136
3137 llvm::Type *ResultType = ConvertType(E->getType());
3138 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
3139 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3140 if (Result->getType() != ResultType)
3141 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3142 "cast");
3143 return RValue::get(Result);
3144 }
3145 case Builtin::BI__builtin_clzs:
3146 case Builtin::BI__builtin_clz:
3147 case Builtin::BI__builtin_clzl:
3148 case Builtin::BI__builtin_clzll: {
3150
3151 llvm::Type *ArgType = ArgValue->getType();
3152 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3153
3154 llvm::Type *ResultType = ConvertType(E->getType());
3155 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
3156 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3157 if (Result->getType() != ResultType)
3158 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3159 "cast");
3160 return RValue::get(Result);
3161 }
3162 case Builtin::BI__builtin_ffs:
3163 case Builtin::BI__builtin_ffsl:
3164 case Builtin::BI__builtin_ffsll: {
3165 // ffs(x) -> x ? cttz(x) + 1 : 0
3166 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3167
3168 llvm::Type *ArgType = ArgValue->getType();
3169 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3170
3171 llvm::Type *ResultType = ConvertType(E->getType());
3172 Value *Tmp =
3173 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3174 llvm::ConstantInt::get(ArgType, 1));
3175 Value *Zero = llvm::Constant::getNullValue(ArgType);
3176 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3177 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3178 if (Result->getType() != ResultType)
3179 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3180 "cast");
3181 return RValue::get(Result);
3182 }
3183 case Builtin::BI__builtin_parity:
3184 case Builtin::BI__builtin_parityl:
3185 case Builtin::BI__builtin_parityll: {
3186 // parity(x) -> ctpop(x) & 1
3187 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3188
3189 llvm::Type *ArgType = ArgValue->getType();
3190 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3191
3192 llvm::Type *ResultType = ConvertType(E->getType());
3193 Value *Tmp = Builder.CreateCall(F, ArgValue);
3194 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3195 if (Result->getType() != ResultType)
3196 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3197 "cast");
3198 return RValue::get(Result);
3199 }
3200 case Builtin::BI__lzcnt16:
3201 case Builtin::BI__lzcnt:
3202 case Builtin::BI__lzcnt64: {
3203 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3204
3205 llvm::Type *ArgType = ArgValue->getType();
3206 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3207
3208 llvm::Type *ResultType = ConvertType(E->getType());
3209 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3210 if (Result->getType() != ResultType)
3211 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3212 "cast");
3213 return RValue::get(Result);
3214 }
3215 case Builtin::BI__popcnt16:
3216 case Builtin::BI__popcnt:
3217 case Builtin::BI__popcnt64:
3218 case Builtin::BI__builtin_popcount:
3219 case Builtin::BI__builtin_popcountl:
3220 case Builtin::BI__builtin_popcountll:
3221 case Builtin::BI__builtin_popcountg: {
3222 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3223
3224 llvm::Type *ArgType = ArgValue->getType();
3225 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3226
3227 llvm::Type *ResultType = ConvertType(E->getType());
3228 Value *Result = Builder.CreateCall(F, ArgValue);
3229 if (Result->getType() != ResultType)
3230 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3231 "cast");
3232 return RValue::get(Result);
3233 }
3234 case Builtin::BI__builtin_unpredictable: {
3235 // Always return the argument of __builtin_unpredictable. LLVM does not
3236 // handle this builtin. Metadata for this builtin should be added directly
3237 // to instructions such as branches or switches that use it.
3238 return RValue::get(EmitScalarExpr(E->getArg(0)));
3239 }
3240 case Builtin::BI__builtin_expect: {
3241 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3242 llvm::Type *ArgType = ArgValue->getType();
3243
3244 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3245 // Don't generate llvm.expect on -O0 as the backend won't use it for
3246 // anything.
3247 // Note, we still IRGen ExpectedValue because it could have side-effects.
3248 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3249 return RValue::get(ArgValue);
3250
3251 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3252 Value *Result =
3253 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3254 return RValue::get(Result);
3255 }
3256 case Builtin::BI__builtin_expect_with_probability: {
3257 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3258 llvm::Type *ArgType = ArgValue->getType();
3259
3260 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3261 llvm::APFloat Probability(0.0);
3262 const Expr *ProbArg = E->getArg(2);
3263 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3264 assert(EvalSucceed && "probability should be able to evaluate as float");
3265 (void)EvalSucceed;
3266 bool LoseInfo = false;
3267 Probability.convert(llvm::APFloat::IEEEdouble(),
3268 llvm::RoundingMode::Dynamic, &LoseInfo);
3269 llvm::Type *Ty = ConvertType(ProbArg->getType());
3270 Constant *Confidence = ConstantFP::get(Ty, Probability);
3271 // Don't generate llvm.expect.with.probability on -O0 as the backend
3272 // won't use it for anything.
3273 // Note, we still IRGen ExpectedValue because it could have side-effects.
3274 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3275 return RValue::get(ArgValue);
3276
3277 Function *FnExpect =
3278 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3279 Value *Result = Builder.CreateCall(
3280 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3281 return RValue::get(Result);
3282 }
3283 case Builtin::BI__builtin_assume_aligned: {
3284 const Expr *Ptr = E->getArg(0);
3285 Value *PtrValue = EmitScalarExpr(Ptr);
3286 Value *OffsetValue =
3287 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3288
3289 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3290 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3291 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3292 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3293 llvm::Value::MaximumAlignment);
3294
3295 emitAlignmentAssumption(PtrValue, Ptr,
3296 /*The expr loc is sufficient.*/ SourceLocation(),
3297 AlignmentCI, OffsetValue);
3298 return RValue::get(PtrValue);
3299 }
3300 case Builtin::BI__assume:
3301 case Builtin::BI__builtin_assume: {
3302 if (E->getArg(0)->HasSideEffects(getContext()))
3303 return RValue::get(nullptr);
3304
3305 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3306 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3307 Builder.CreateCall(FnAssume, ArgValue);
3308 return RValue::get(nullptr);
3309 }
3310 case Builtin::BI__builtin_assume_separate_storage: {
3311 const Expr *Arg0 = E->getArg(0);
3312 const Expr *Arg1 = E->getArg(1);
3313
3314 Value *Value0 = EmitScalarExpr(Arg0);
3315 Value *Value1 = EmitScalarExpr(Arg1);
3316
3317 Value *Values[] = {Value0, Value1};
3318 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3319 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3320 return RValue::get(nullptr);
3321 }
3322 case Builtin::BI__arithmetic_fence: {
3323 // Create the builtin call if FastMath is selected, and the target
3324 // supports the builtin, otherwise just return the argument.
3325 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3326 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3327 bool isArithmeticFenceEnabled =
3328 FMF.allowReassoc() &&
3330 QualType ArgType = E->getArg(0)->getType();
3331 if (ArgType->isComplexType()) {
3332 if (isArithmeticFenceEnabled) {
3333 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3334 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3335 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3336 ConvertType(ElementType));
3337 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3338 ConvertType(ElementType));
3339 return RValue::getComplex(std::make_pair(Real, Imag));
3340 }
3341 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3342 Value *Real = ComplexVal.first;
3343 Value *Imag = ComplexVal.second;
3344 return RValue::getComplex(std::make_pair(Real, Imag));
3345 }
3346 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3347 if (isArithmeticFenceEnabled)
3348 return RValue::get(
3349 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3350 return RValue::get(ArgValue);
3351 }
3352 case Builtin::BI__builtin_bswap16:
3353 case Builtin::BI__builtin_bswap32:
3354 case Builtin::BI__builtin_bswap64:
3355 case Builtin::BI_byteswap_ushort:
3356 case Builtin::BI_byteswap_ulong:
3357 case Builtin::BI_byteswap_uint64: {
3358 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
3359 }
3360 case Builtin::BI__builtin_bitreverse8:
3361 case Builtin::BI__builtin_bitreverse16:
3362 case Builtin::BI__builtin_bitreverse32:
3363 case Builtin::BI__builtin_bitreverse64: {
3364 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
3365 }
3366 case Builtin::BI__builtin_rotateleft8:
3367 case Builtin::BI__builtin_rotateleft16:
3368 case Builtin::BI__builtin_rotateleft32:
3369 case Builtin::BI__builtin_rotateleft64:
3370 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3371 case Builtin::BI_rotl16:
3372 case Builtin::BI_rotl:
3373 case Builtin::BI_lrotl:
3374 case Builtin::BI_rotl64:
3375 return emitRotate(E, false);
3376
3377 case Builtin::BI__builtin_rotateright8:
3378 case Builtin::BI__builtin_rotateright16:
3379 case Builtin::BI__builtin_rotateright32:
3380 case Builtin::BI__builtin_rotateright64:
3381 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3382 case Builtin::BI_rotr16:
3383 case Builtin::BI_rotr:
3384 case Builtin::BI_lrotr:
3385 case Builtin::BI_rotr64:
3386 return emitRotate(E, true);
3387
3388 case Builtin::BI__builtin_constant_p: {
3389 llvm::Type *ResultType = ConvertType(E->getType());
3390
3391 const Expr *Arg = E->getArg(0);
3392 QualType ArgType = Arg->getType();
3393 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3394 // and likely a mistake.
3395 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3396 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3397 // Per the GCC documentation, only numeric constants are recognized after
3398 // inlining.
3399 return RValue::get(ConstantInt::get(ResultType, 0));
3400
3401 if (Arg->HasSideEffects(getContext()))
3402 // The argument is unevaluated, so be conservative if it might have
3403 // side-effects.
3404 return RValue::get(ConstantInt::get(ResultType, 0));
3405
3406 Value *ArgValue = EmitScalarExpr(Arg);
3407 if (ArgType->isObjCObjectPointerType()) {
3408 // Convert Objective-C objects to id because we cannot distinguish between
3409 // LLVM types for Obj-C classes as they are opaque.
3410 ArgType = CGM.getContext().getObjCIdType();
3411 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3412 }
3413 Function *F =
3414 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3415 Value *Result = Builder.CreateCall(F, ArgValue);
3416 if (Result->getType() != ResultType)
3417 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3418 return RValue::get(Result);
3419 }
3420 case Builtin::BI__builtin_dynamic_object_size:
3421 case Builtin::BI__builtin_object_size: {
3422 unsigned Type =
3423 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3424 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3425
3426 // We pass this builtin onto the optimizer so that it can figure out the
3427 // object size in more complex cases.
3428 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3429 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3430 /*EmittedE=*/nullptr, IsDynamic));
3431 }
3432 case Builtin::BI__builtin_prefetch: {
3433 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3434 // FIXME: Technically these constants should of type 'int', yes?
3435 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3436 llvm::ConstantInt::get(Int32Ty, 0);
3437 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3438 llvm::ConstantInt::get(Int32Ty, 3);
3439 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3440 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3441 Builder.CreateCall(F, {Address, RW, Locality, Data});
3442 return RValue::get(nullptr);
3443 }
3444 case Builtin::BI__builtin_readcyclecounter: {
3445 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3446 return RValue::get(Builder.CreateCall(F));
3447 }
3448 case Builtin::BI__builtin_readsteadycounter: {
3449 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3450 return RValue::get(Builder.CreateCall(F));
3451 }
3452 case Builtin::BI__builtin___clear_cache: {
3453 Value *Begin = EmitScalarExpr(E->getArg(0));
3454 Value *End = EmitScalarExpr(E->getArg(1));
3455 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3456 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3457 }
3458 case Builtin::BI__builtin_trap:
3459 EmitTrapCall(Intrinsic::trap);
3460 return RValue::get(nullptr);
3461 case Builtin::BI__debugbreak:
3462 EmitTrapCall(Intrinsic::debugtrap);
3463 return RValue::get(nullptr);
3464 case Builtin::BI__builtin_unreachable: {
3466
3467 // We do need to preserve an insertion point.
3468 EmitBlock(createBasicBlock("unreachable.cont"));
3469
3470 return RValue::get(nullptr);
3471 }
3472
3473 case Builtin::BI__builtin_powi:
3474 case Builtin::BI__builtin_powif:
3475 case Builtin::BI__builtin_powil: {
3476 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3477 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3478
3479 if (Builder.getIsFPConstrained()) {
3480 // FIXME: llvm.powi has 2 mangling types,
3481 // llvm.experimental.constrained.powi has one.
3482 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3483 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3484 Src0->getType());
3485 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3486 }
3487
3488 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3489 { Src0->getType(), Src1->getType() });
3490 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3491 }
3492 case Builtin::BI__builtin_frexpl: {
3493 // Linux PPC will not be adding additional PPCDoubleDouble support.
3494 // WIP to switch default to IEEE long double. Will emit libcall for
3495 // frexpl instead of legalizing this type in the BE.
3496 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3497 break;
3498 LLVM_FALLTHROUGH;
3499 }
3500 case Builtin::BI__builtin_frexp:
3501 case Builtin::BI__builtin_frexpf:
3502 case Builtin::BI__builtin_frexpf128:
3503 case Builtin::BI__builtin_frexpf16:
3504 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3505 case Builtin::BI__builtin_isgreater:
3506 case Builtin::BI__builtin_isgreaterequal:
3507 case Builtin::BI__builtin_isless:
3508 case Builtin::BI__builtin_islessequal:
3509 case Builtin::BI__builtin_islessgreater:
3510 case Builtin::BI__builtin_isunordered: {
3511 // Ordered comparisons: we know the arguments to these are matching scalar
3512 // floating point values.
3513 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3514 Value *LHS = EmitScalarExpr(E->getArg(0));
3515 Value *RHS = EmitScalarExpr(E->getArg(1));
3516
3517 switch (BuiltinID) {
3518 default: llvm_unreachable("Unknown ordered comparison");
3519 case Builtin::BI__builtin_isgreater:
3520 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3521 break;
3522 case Builtin::BI__builtin_isgreaterequal:
3523 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3524 break;
3525 case Builtin::BI__builtin_isless:
3526 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3527 break;
3528 case Builtin::BI__builtin_islessequal:
3529 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3530 break;
3531 case Builtin::BI__builtin_islessgreater:
3532 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3533 break;
3534 case Builtin::BI__builtin_isunordered:
3535 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3536 break;
3537 }
3538 // ZExt bool to int type.
3539 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3540 }
3541
3542 case Builtin::BI__builtin_isnan: {
3543 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3544 Value *V = EmitScalarExpr(E->getArg(0));
3545 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3546 return RValue::get(Result);
3547 return RValue::get(
3548 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3549 ConvertType(E->getType())));
3550 }
3551
3552 case Builtin::BI__builtin_issignaling: {
3553 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3554 Value *V = EmitScalarExpr(E->getArg(0));
3555 return RValue::get(
3556 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3557 ConvertType(E->getType())));
3558 }
3559
3560 case Builtin::BI__builtin_isinf: {
3561 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3562 Value *V = EmitScalarExpr(E->getArg(0));
3563 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3564 return RValue::get(Result);
3565 return RValue::get(
3566 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3567 ConvertType(E->getType())));
3568 }
3569
3570 case Builtin::BIfinite:
3571 case Builtin::BI__finite:
3572 case Builtin::BIfinitef:
3573 case Builtin::BI__finitef:
3574 case Builtin::BIfinitel:
3575 case Builtin::BI__finitel:
3576 case Builtin::BI__builtin_isfinite: {
3577 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3578 Value *V = EmitScalarExpr(E->getArg(0));
3579 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3580 return RValue::get(Result);
3581 return RValue::get(
3582 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3583 ConvertType(E->getType())));
3584 }
3585
3586 case Builtin::BI__builtin_isnormal: {
3587 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3588 Value *V = EmitScalarExpr(E->getArg(0));
3589 return RValue::get(
3590 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3591 ConvertType(E->getType())));
3592 }
3593
3594 case Builtin::BI__builtin_issubnormal: {
3595 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3596 Value *V = EmitScalarExpr(E->getArg(0));
3597 return RValue::get(
3598 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3599 ConvertType(E->getType())));
3600 }
3601
3602 case Builtin::BI__builtin_iszero: {
3603 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3604 Value *V = EmitScalarExpr(E->getArg(0));
3605 return RValue::get(
3606 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3607 ConvertType(E->getType())));
3608 }
3609
3610 case Builtin::BI__builtin_isfpclass: {
3612 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3613 break;
3614 uint64_t Test = Result.Val.getInt().getLimitedValue();
3615 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3616 Value *V = EmitScalarExpr(E->getArg(0));
3617 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3618 ConvertType(E->getType())));
3619 }
3620
3621 case Builtin::BI__builtin_nondeterministic_value: {
3622 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3623
3624 Value *Result = PoisonValue::get(Ty);
3625 Result = Builder.CreateFreeze(Result);
3626
3627 return RValue::get(Result);
3628 }
3629
3630 case Builtin::BI__builtin_elementwise_abs: {
3631 Value *Result;
3632 QualType QT = E->getArg(0)->getType();
3633
3634 if (auto *VecTy = QT->getAs<VectorType>())
3635 QT = VecTy->getElementType();
3636 if (QT->isIntegerType())
3637 Result = Builder.CreateBinaryIntrinsic(
3638 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3639 Builder.getFalse(), nullptr, "elt.abs");
3640 else
3641 Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
3642
3643 return RValue::get(Result);
3644 }
3645
3646 case Builtin::BI__builtin_elementwise_ceil:
3647 return RValue::get(
3648 emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3649 case Builtin::BI__builtin_elementwise_exp:
3650 return RValue::get(
3651 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp"));
3652 case Builtin::BI__builtin_elementwise_exp2:
3653 return RValue::get(
3654 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3655 case Builtin::BI__builtin_elementwise_log:
3656 return RValue::get(
3657 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log"));
3658 case Builtin::BI__builtin_elementwise_log2:
3659 return RValue::get(
3660 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2"));
3661 case Builtin::BI__builtin_elementwise_log10:
3662 return RValue::get(
3663 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10"));
3664 case Builtin::BI__builtin_elementwise_pow: {
3665 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow));
3666 }
3667 case Builtin::BI__builtin_elementwise_bitreverse:
3668 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse,
3669 "elt.bitreverse"));
3670 case Builtin::BI__builtin_elementwise_cos:
3671 return RValue::get(
3672 emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
3673 case Builtin::BI__builtin_elementwise_floor:
3674 return RValue::get(
3675 emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
3676 case Builtin::BI__builtin_elementwise_roundeven:
3677 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
3678 "elt.roundeven"));
3679 case Builtin::BI__builtin_elementwise_round:
3680 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round,
3681 "elt.round"));
3682 case Builtin::BI__builtin_elementwise_rint:
3683 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint,
3684 "elt.rint"));
3685 case Builtin::BI__builtin_elementwise_nearbyint:
3686 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint,
3687 "elt.nearbyint"));
3688 case Builtin::BI__builtin_elementwise_sin:
3689 return RValue::get(
3690 emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
3691
3692 case Builtin::BI__builtin_elementwise_trunc:
3693 return RValue::get(
3694 emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3695 case Builtin::BI__builtin_elementwise_canonicalize:
3696 return RValue::get(
3697 emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3698 case Builtin::BI__builtin_elementwise_copysign:
3699 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
3700 case Builtin::BI__builtin_elementwise_fma:
3701 return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma));
3702 case Builtin::BI__builtin_elementwise_add_sat:
3703 case Builtin::BI__builtin_elementwise_sub_sat: {
3704 Value *Op0 = EmitScalarExpr(E->getArg(0));
3705 Value *Op1 = EmitScalarExpr(E->getArg(1));
3706 Value *Result;
3707 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3708 QualType Ty = E->getArg(0)->getType();
3709 if (auto *VecTy = Ty->getAs<VectorType>())
3710 Ty = VecTy->getElementType();
3711 bool IsSigned = Ty->isSignedIntegerType();
3712 unsigned Opc;
3713 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3714 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3715 else
3716 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3717 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3718 return RValue::get(Result);
3719 }
3720
3721 case Builtin::BI__builtin_elementwise_max: {
3722 Value *Op0 = EmitScalarExpr(E->getArg(0));
3723 Value *Op1 = EmitScalarExpr(E->getArg(1));
3724 Value *Result;
3725 if (Op0->getType()->isIntOrIntVectorTy()) {
3726 QualType Ty = E->getArg(0)->getType();
3727 if (auto *VecTy = Ty->getAs<VectorType>())
3728 Ty = VecTy->getElementType();
3729 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3730 ? llvm::Intrinsic::smax
3731 : llvm::Intrinsic::umax,
3732 Op0, Op1, nullptr, "elt.max");
3733 } else
3734 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3735 return RValue::get(Result);
3736 }
3737 case Builtin::BI__builtin_elementwise_min: {
3738 Value *Op0 = EmitScalarExpr(E->getArg(0));
3739 Value *Op1 = EmitScalarExpr(E->getArg(1));
3740 Value *Result;
3741 if (Op0->getType()->isIntOrIntVectorTy()) {
3742 QualType Ty = E->getArg(0)->getType();
3743 if (auto *VecTy = Ty->getAs<VectorType>())
3744 Ty = VecTy->getElementType();
3745 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3746 ? llvm::Intrinsic::smin
3747 : llvm::Intrinsic::umin,
3748 Op0, Op1, nullptr, "elt.min");
3749 } else
3750 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3751 return RValue::get(Result);
3752 }
3753
3754 case Builtin::BI__builtin_reduce_max: {
3755 auto GetIntrinsicID = [](QualType QT) {
3756 if (auto *VecTy = QT->getAs<VectorType>())
3757 QT = VecTy->getElementType();
3758 if (QT->isSignedIntegerType())
3759 return llvm::Intrinsic::vector_reduce_smax;
3760 if (QT->isUnsignedIntegerType())
3761 return llvm::Intrinsic::vector_reduce_umax;
3762 assert(QT->isFloatingType() && "must have a float here");
3763 return llvm::Intrinsic::vector_reduce_fmax;
3764 };
3766 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3767 }
3768
3769 case Builtin::BI__builtin_reduce_min: {
3770 auto GetIntrinsicID = [](QualType QT) {
3771 if (auto *VecTy = QT->getAs<VectorType>())
3772 QT = VecTy->getElementType();
3773 if (QT->isSignedIntegerType())
3774 return llvm::Intrinsic::vector_reduce_smin;
3775 if (QT->isUnsignedIntegerType())
3776 return llvm::Intrinsic::vector_reduce_umin;
3777 assert(QT->isFloatingType() && "must have a float here");
3778 return llvm::Intrinsic::vector_reduce_fmin;
3779 };
3780
3782 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3783 }
3784
3785 case Builtin::BI__builtin_reduce_add:
3787 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3788 case Builtin::BI__builtin_reduce_mul:
3790 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3791 case Builtin::BI__builtin_reduce_xor:
3793 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3794 case Builtin::BI__builtin_reduce_or:
3796 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3797 case Builtin::BI__builtin_reduce_and:
3799 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3800
3801 case Builtin::BI__builtin_matrix_transpose: {
3802 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3803 Value *MatValue = EmitScalarExpr(E->getArg(0));
3804 MatrixBuilder MB(Builder);
3805 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3806 MatrixTy->getNumColumns());
3807 return RValue::get(Result);
3808 }
3809
3810 case Builtin::BI__builtin_matrix_column_major_load: {
3811 MatrixBuilder MB(Builder);
3812 // Emit everything that isn't dependent on the first parameter type
3813 Value *Stride = EmitScalarExpr(E->getArg(3));
3814 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3815 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3816 assert(PtrTy && "arg0 must be of pointer type");
3817 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3818
3821 E->getArg(0)->getExprLoc(), FD, 0);
3822 Value *Result = MB.CreateColumnMajorLoad(
3823 Src.getElementType(), Src.getPointer(),
3824 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
3825 ResultTy->getNumRows(), ResultTy->getNumColumns(),
3826 "matrix");
3827 return RValue::get(Result);
3828 }
3829
3830 case Builtin::BI__builtin_matrix_column_major_store: {
3831 MatrixBuilder MB(Builder);
3832 Value *Matrix = EmitScalarExpr(E->getArg(0));
3834 Value *Stride = EmitScalarExpr(E->getArg(2));
3835
3836 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
3837 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
3838 assert(PtrTy && "arg1 must be of pointer type");
3839 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3840
3842 E->getArg(1)->getExprLoc(), FD, 0);
3843 Value *Result = MB.CreateColumnMajorStore(
3844 Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()),
3845 Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns());
3846 return RValue::get(Result);
3847 }
3848
3849 case Builtin::BI__builtin_isinf_sign: {
3850 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
3851 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3852 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3853 Value *Arg = EmitScalarExpr(E->getArg(0));
3854 Value *AbsArg = EmitFAbs(*this, Arg);
3855 Value *IsInf = Builder.CreateFCmpOEQ(
3856 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
3857 Value *IsNeg = EmitSignBit(*this, Arg);
3858
3859 llvm::Type *IntTy = ConvertType(E->getType());
3860 Value *Zero = Constant::getNullValue(IntTy);
3861 Value *One = ConstantInt::get(IntTy, 1);
3862 Value *NegativeOne = ConstantInt::get(IntTy, -1);
3863 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
3864 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
3865 return RValue::get(Result);
3866 }
3867
3868 case Builtin::BI__builtin_flt_rounds: {
3869 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
3870
3871 llvm::Type *ResultType = ConvertType(E->getType());
3872 Value *Result = Builder.CreateCall(F);
3873 if (Result->getType() != ResultType)
3874 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3875 "cast");
3876 return RValue::get(Result);
3877 }
3878
3879 case Builtin::BI__builtin_set_flt_rounds: {
3880 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
3881
3882 Value *V = EmitScalarExpr(E->getArg(0));
3883 Builder.CreateCall(F, V);
3884 return RValue::get(nullptr);
3885 }
3886
3887 case Builtin::BI__builtin_fpclassify: {
3888 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3889 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3890 Value *V = EmitScalarExpr(E->getArg(5));
3891 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
3892
3893 // Create Result
3894 BasicBlock *Begin = Builder.GetInsertBlock();
3895 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
3896 Builder.SetInsertPoint(End);
3897 PHINode *Result =
3898 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
3899 "fpclassify_result");
3900
3901 // if (V==0) return FP_ZERO
3902 Builder.SetInsertPoint(Begin);
3903 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
3904 "iszero");
3905 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
3906 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
3907 Builder.CreateCondBr(IsZero, End, NotZero);
3908 Result->addIncoming(ZeroLiteral, Begin);
3909
3910 // if (V != V) return FP_NAN
3911 Builder.SetInsertPoint(NotZero);
3912 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
3913 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
3914 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
3915 Builder.CreateCondBr(IsNan, End, NotNan);
3916 Result->addIncoming(NanLiteral, NotZero);
3917
3918 // if (fabs(V) == infinity) return FP_INFINITY
3919 Builder.SetInsertPoint(NotNan);
3920 Value *VAbs = EmitFAbs(*this, V);
3921 Value *IsInf =
3922 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
3923 "isinf");
3924 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
3925 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
3926 Builder.CreateCondBr(IsInf, End, NotInf);
3927 Result->addIncoming(InfLiteral, NotNan);
3928
3929 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
3930 Builder.SetInsertPoint(NotInf);
3931 APFloat Smallest = APFloat::getSmallestNormalized(
3932 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
3933 Value *IsNormal =
3934 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
3935 "isnormal");
3936 Value *NormalResult =
3937 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
3938 EmitScalarExpr(E->getArg(3)));
3939 Builder.CreateBr(End);
3940 Result->addIncoming(NormalResult, NotInf);
3941
3942 // return Result
3943 Builder.SetInsertPoint(End);
3944 return RValue::get(Result);
3945 }
3946
3947 // An alloca will always return a pointer to the alloca (stack) address
3948 // space. This address space need not be the same as the AST / Language
3949 // default (e.g. in C / C++ auto vars are in the generic address space). At
3950 // the AST level this is handled within CreateTempAlloca et al., but for the
3951 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
3952 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
3953 case Builtin::BIalloca:
3954 case Builtin::BI_alloca:
3955 case Builtin::BI__builtin_alloca_uninitialized:
3956 case Builtin::BI__builtin_alloca: {
3957 Value *Size = EmitScalarExpr(E->getArg(0));
3958 const TargetInfo &TI = getContext().getTargetInfo();
3959 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
3960 const Align SuitableAlignmentInBytes =
3961 CGM.getContext()
3963 .getAsAlign();
3964 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3965 AI->setAlignment(SuitableAlignmentInBytes);
3966 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
3967 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
3970 if (AAS != EAS) {
3971 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
3972 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
3973 EAS, Ty));
3974 }
3975 return RValue::get(AI);
3976 }
3977
3978 case Builtin::BI__builtin_alloca_with_align_uninitialized:
3979 case Builtin::BI__builtin_alloca_with_align: {
3980 Value *Size = EmitScalarExpr(E->getArg(0));
3981 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
3982 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
3983 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
3984 const Align AlignmentInBytes =
3985 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
3986 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3987 AI->setAlignment(AlignmentInBytes);
3988 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
3989 initializeAlloca(*this, AI, Size, AlignmentInBytes);
3992 if (AAS != EAS) {
3993 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
3994 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
3995 EAS, Ty));
3996 }
3997 return RValue::get(AI);
3998 }
3999
4000 case Builtin::BIbzero:
4001 case Builtin::BI__builtin_bzero: {
4003 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4005 E->getArg(0)->getExprLoc(), FD, 0);
4006 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4007 return RValue::get(nullptr);
4008 }
4009
4010 case Builtin::BIbcopy:
4011 case Builtin::BI__builtin_bcopy: {
4014 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4016 E->getArg(0)->getExprLoc(), FD, 0);
4018 E->getArg(1)->getExprLoc(), FD, 0);
4019 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4020 return RValue::get(nullptr);
4021 }
4022
4023 case Builtin::BImemcpy:
4024 case Builtin::BI__builtin_memcpy:
4025 case Builtin::BImempcpy:
4026 case Builtin::BI__builtin_mempcpy: {
4029 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4030 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4031 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4032 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4033 if (BuiltinID == Builtin::BImempcpy ||
4034 BuiltinID == Builtin::BI__builtin_mempcpy)
4035 return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(),
4036 Dest.getPointer(), SizeVal));
4037 else
4038 return RValue::get(Dest.getPointer());
4039 }
4040
4041 case Builtin::BI__builtin_memcpy_inline: {
4044 uint64_t Size =
4045 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4046 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4047 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4048 Builder.CreateMemCpyInline(Dest, Src, Size);
4049 return RValue::get(nullptr);
4050 }
4051
4052 case Builtin::BI__builtin_char_memchr:
4053 BuiltinID = Builtin::BI__builtin_memchr;
4054 break;
4055
4056 case Builtin::BI__builtin___memcpy_chk: {
4057 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4058 Expr::EvalResult SizeResult, DstSizeResult;
4059 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4060 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4061 break;
4062 llvm::APSInt Size = SizeResult.Val.getInt();
4063 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4064 if (Size.ugt(DstSize))
4065 break;
4068 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4069 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4070 return RValue::get(Dest.getPointer());
4071 }
4072
4073 case Builtin::BI__builtin_objc_memmove_collectable: {
4074 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4075 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4076 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4078 DestAddr, SrcAddr, SizeVal);
4079 return RValue::get(DestAddr.getPointer());
4080 }
4081
4082 case Builtin::BI__builtin___memmove_chk: {
4083 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4084 Expr::EvalResult SizeResult, DstSizeResult;
4085 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4086 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4087 break;
4088 llvm::APSInt Size = SizeResult.Val.getInt();
4089 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4090 if (Size.ugt(DstSize))
4091 break;
4094 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4095 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4096 return RValue::get(Dest.getPointer());
4097 }
4098
4099 case Builtin::BImemmove:
4100 case Builtin::BI__builtin_memmove: {
4103 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4104 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4105 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4106 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4107 return RValue::get(Dest.getPointer());
4108 }
4109 case Builtin::BImemset:
4110 case Builtin::BI__builtin_memset: {
4112 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4113 Builder.getInt8Ty());
4114 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4116 E->getArg(0)->getExprLoc(), FD, 0);
4117 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4118 return RValue::get(Dest.getPointer());
4119 }
4120 case Builtin::BI__builtin_memset_inline: {
4122 Value *ByteVal =
4123 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4124 uint64_t Size =
4125 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4127 E->getArg(0)->getExprLoc(), FD, 0);
4128 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4129 return RValue::get(nullptr);
4130 }
4131 case Builtin::BI__builtin___memset_chk: {
4132 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4133 Expr::EvalResult SizeResult, DstSizeResult;
4134 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4135 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4136 break;
4137 llvm::APSInt Size = SizeResult.Val.getInt();
4138 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4139 if (Size.ugt(DstSize))
4140 break;
4142 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4143 Builder.getInt8Ty());
4144 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4145 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4146 return RValue::get(Dest.getPointer());
4147 }
4148 case Builtin::BI__builtin_wmemchr: {
4149 // The MSVC runtime library does not provide a definition of wmemchr, so we
4150 // need an inline implementation.
4151 if (!getTarget().getTriple().isOSMSVCRT())
4152 break;
4153
4154 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4155 Value *Str = EmitScalarExpr(E->getArg(0));
4156 Value *Chr = EmitScalarExpr(E->getArg(1));
4157 Value *Size = EmitScalarExpr(E->getArg(2));
4158
4159 BasicBlock *Entry = Builder.GetInsertBlock();
4160 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4161 BasicBlock *Next = createBasicBlock("wmemchr.next");
4162 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4163 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4164 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4165
4166 EmitBlock(CmpEq);
4167 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4168 StrPhi->addIncoming(Str, Entry);
4169 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4170 SizePhi->addIncoming(Size, Entry);
4171 CharUnits WCharAlign =
4173 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4174 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4175 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4176 Builder.CreateCondBr(StrEqChr, Exit, Next);
4177
4178 EmitBlock(Next);
4179 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4180 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4181 Value *NextSizeEq0 =
4182 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4183 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4184 StrPhi->addIncoming(NextStr, Next);
4185 SizePhi->addIncoming(NextSize, Next);
4186
4187 EmitBlock(Exit);
4188 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4189 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4190 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4191 Ret->addIncoming(FoundChr, CmpEq);
4192 return RValue::get(Ret);
4193 }
4194 case Builtin::BI__builtin_wmemcmp: {
4195 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4196 // need an inline implementation.
4197 if (!getTarget().getTriple().isOSMSVCRT())
4198 break;
4199
4200 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4201
4202 Value *Dst = EmitScalarExpr(E->getArg(0));
4203 Value *Src = EmitScalarExpr(E->getArg(1));
4204 Value *Size = EmitScalarExpr(E->getArg(2));
4205
4206 BasicBlock *Entry = Builder.GetInsertBlock();
4207 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4208 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4209 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4210 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4211 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4212 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4213
4214 EmitBlock(CmpGT);
4215 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4216 DstPhi->addIncoming(Dst, Entry);
4217 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4218 SrcPhi->addIncoming(Src, Entry);
4219 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4220 SizePhi->addIncoming(Size, Entry);
4221 CharUnits WCharAlign =
4223 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4224 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4225 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4226 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4227
4228 EmitBlock(CmpLT);
4229 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4230 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4231
4232 EmitBlock(Next);
4233 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4234 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4235 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4236 Value *NextSizeEq0 =
4237 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4238 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4239 DstPhi->addIncoming(NextDst, Next);
4240 SrcPhi->addIncoming(NextSrc, Next);
4241 SizePhi->addIncoming(NextSize, Next);
4242
4243 EmitBlock(Exit);
4244 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4245 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4246 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4247 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4248 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4249 return RValue::get(Ret);
4250 }
4251 case Builtin::BI__builtin_dwarf_cfa: {
4252 // The offset in bytes from the first argument to the CFA.
4253 //
4254 // Why on earth is this in the frontend? Is there any reason at
4255 // all that the backend can't reasonably determine this while
4256 // lowering llvm.eh.dwarf.cfa()?
4257 //
4258 // TODO: If there's a satisfactory reason, add a target hook for
4259 // this instead of hard-coding 0, which is correct for most targets.
4260 int32_t Offset = 0;
4261
4262 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4263 return RValue::get(Builder.CreateCall(F,
4264 llvm::ConstantInt::get(Int32Ty, Offset)));
4265 }
4266 case Builtin::BI__builtin_return_address: {
4267 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4268 getContext().UnsignedIntTy);
4269 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4270 return RValue::get(Builder.CreateCall(F, Depth));
4271 }
4272 case Builtin::BI_ReturnAddress: {
4273 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4274 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4275 }
4276 case Builtin::BI__builtin_frame_address: {
4277 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4278 getContext().UnsignedIntTy);
4279 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4280 return RValue::get(Builder.CreateCall(F, Depth));
4281 }
4282 case Builtin::BI__builtin_extract_return_addr: {
4285 return RValue::get(Result);
4286 }
4287 case Builtin::BI__builtin_frob_return_addr: {
4290 return RValue::get(Result);
4291 }
4292 case Builtin::BI__builtin_dwarf_sp_column: {
4293 llvm::IntegerType *Ty
4294 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4296 if (Column == -1) {
4297 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4298 return RValue::get(llvm::UndefValue::get(Ty));
4299 }
4300 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4301 }
4302 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4304 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4305 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4306 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4307 }
4308 case Builtin::BI__builtin_eh_return: {
4309 Value *Int = EmitScalarExpr(E->getArg(0));
4310 Value *Ptr = EmitScalarExpr(E->getArg(1));
4311
4312 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4313 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4314 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4315 Function *F =
4316 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4317 : Intrinsic::eh_return_i64);
4318 Builder.CreateCall(F, {Int, Ptr});
4319 Builder.CreateUnreachable();
4320
4321 // We do need to preserve an insertion point.
4322 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4323
4324 return RValue::get(nullptr);
4325 }
4326 case Builtin::BI__builtin_unwind_init: {
4327 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4328 Builder.CreateCall(F);
4329 return RValue::get(nullptr);
4330 }
4331 case Builtin::BI__builtin_extend_pointer: {
4332 // Extends a pointer to the size of an _Unwind_Word, which is
4333 // uint64_t on all platforms. Generally this gets poked into a
4334 // register and eventually used as an address, so if the
4335 // addressing registers are wider than pointers and the platform
4336 // doesn't implicitly ignore high-order bits when doing
4337 // addressing, we need to make sure we zext / sext based on
4338 // the platform's expectations.
4339 //
4340 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4341
4342 // Cast the pointer to intptr_t.
4343 Value *Ptr = EmitScalarExpr(E->getArg(0));
4344 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4345
4346 // If that's 64 bits, we're done.
4347 if (IntPtrTy->getBitWidth() == 64)
4348 return RValue::get(Result);
4349
4350 // Otherwise, ask the codegen data what to do.
4351 if (getTargetHooks().extendPointerWithSExt())
4352 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4353 else
4354 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4355 }
4356 case Builtin::BI__builtin_setjmp: {
4357 // Buffer is a void**.
4359
4360 // Store the frame pointer to the setjmp buffer.
4361 Value *FrameAddr = Builder.CreateCall(
4362 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4363 ConstantInt::get(Int32Ty, 0));
4364 Builder.CreateStore(FrameAddr, Buf);
4365
4366 // Store the stack pointer to the setjmp buffer.
4367 Value *StackAddr = Builder.CreateStackSave();
4368 assert(Buf.getPointer()->getType() == StackAddr->getType());
4369
4370 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4371 Builder.CreateStore(StackAddr, StackSaveSlot);
4372
4373 // Call LLVM's EH setjmp, which is lightweight.
4374 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4375 return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
4376 }
4377 case Builtin::BI__builtin_longjmp: {
4378 Value *Buf = EmitScalarExpr(E->getArg(0));
4379
4380 // Call LLVM's EH longjmp, which is lightweight.
4381 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4382
4383 // longjmp doesn't return; mark this as unreachable.
4384 Builder.CreateUnreachable();
4385
4386 // We do need to preserve an insertion point.
4387 EmitBlock(createBasicBlock("longjmp.cont"));
4388
4389 return RValue::get(nullptr);
4390 }
4391 case Builtin::BI__builtin_launder: {
4392 const Expr *Arg = E->getArg(0);
4393 QualType ArgTy = Arg->getType()->getPointeeType();
4394 Value *Ptr = EmitScalarExpr(Arg);
4395 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4397
4398 return RValue::get(Ptr);
4399 }
4400 case Builtin::BI__sync_fetch_and_add:
4401 case Builtin::BI__sync_fetch_and_sub:
4402 case Builtin::BI__sync_fetch_and_or:
4403 case Builtin::BI__sync_fetch_and_and:
4404 case Builtin::BI__sync_fetch_and_xor:
4405 case Builtin::BI__sync_fetch_and_nand:
4406 case Builtin::BI__sync_add_and_fetch:
4407 case Builtin::BI__sync_sub_and_fetch:
4408 case Builtin::BI__sync_and_and_fetch:
4409 case Builtin::BI__sync_or_and_fetch:
4410 case Builtin::BI__sync_xor_and_fetch:
4411 case Builtin::BI__sync_nand_and_fetch:
4412 case Builtin::BI__sync_val_compare_and_swap:
4413 case Builtin::BI__sync_bool_compare_and_swap:
4414 case Builtin::BI__sync_lock_test_and_set:
4415 case Builtin::BI__sync_lock_release:
4416 case Builtin::BI__sync_swap:
4417 llvm_unreachable("Shouldn't make it through sema");
4418 case Builtin::BI__sync_fetch_and_add_1:
4419 case Builtin::BI__sync_fetch_and_add_2:
4420 case Builtin::BI__sync_fetch_and_add_4:
4421 case Builtin::BI__sync_fetch_and_add_8:
4422 case Builtin::BI__sync_fetch_and_add_16:
4423 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4424 case Builtin::BI__sync_fetch_and_sub_1:
4425 case Builtin::BI__sync_fetch_and_sub_2:
4426 case Builtin::BI__sync_fetch_and_sub_4:
4427 case Builtin::BI__sync_fetch_and_sub_8:
4428 case Builtin::BI__sync_fetch_and_sub_16:
4429 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4430 case Builtin::BI__sync_fetch_and_or_1:
4431 case Builtin::BI__sync_fetch_and_or_2:
4432 case Builtin::BI__sync_fetch_and_or_4:
4433 case Builtin::BI__sync_fetch_and_or_8:
4434 case Builtin::BI__sync_fetch_and_or_16:
4435 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4436 case Builtin::BI__sync_fetch_and_and_1:
4437 case Builtin::BI__sync_fetch_and_and_2:
4438 case Builtin::BI__sync_fetch_and_and_4:
4439 case Builtin::BI__sync_fetch_and_and_8:
4440 case Builtin::BI__sync_fetch_and_and_16:
4441 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4442 case Builtin::BI__sync_fetch_and_xor_1:
4443 case Builtin::BI__sync_fetch_and_xor_2:
4444 case Builtin::BI__sync_fetch_and_xor_4:
4445 case Builtin::BI__sync_fetch_and_xor_8:
4446 case Builtin::BI__sync_fetch_and_xor_16:
4447 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4448 case Builtin::BI__sync_fetch_and_nand_1:
4449 case Builtin::BI__sync_fetch_and_nand_2:
4450 case Builtin::BI__sync_fetch_and_nand_4:
4451 case Builtin::BI__sync_fetch_and_nand_8:
4452 case Builtin::BI__sync_fetch_and_nand_16:
4453 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4454
4455 // Clang extensions: not overloaded yet.
4456 case Builtin::BI__sync_fetch_and_min:
4457 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4458 case Builtin::BI__sync_fetch_and_max:
4459 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4460 case Builtin::BI__sync_fetch_and_umin:
4461 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4462 case Builtin::BI__sync_fetch_and_umax:
4463 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4464
4465 case Builtin::BI__sync_add_and_fetch_1:
4466 case Builtin::BI__sync_add_and_fetch_2:
4467 case Builtin::BI__sync_add_and_fetch_4:
4468 case Builtin::BI__sync_add_and_fetch_8:
4469 case Builtin::BI__sync_add_and_fetch_16:
4470 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4471 llvm::Instruction::Add);
4472 case Builtin::BI__sync_sub_and_fetch_1:
4473 case Builtin::BI__sync_sub_and_fetch_2:
4474 case Builtin::BI__sync_sub_and_fetch_4:
4475 case Builtin::BI__sync_sub_and_fetch_8:
4476 case Builtin::BI__sync_sub_and_fetch_16:
4477 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4478 llvm::Instruction::Sub);
4479 case Builtin::BI__sync_and_and_fetch_1:
4480 case Builtin::BI__sync_and_and_fetch_2:
4481 case Builtin::BI__sync_and_and_fetch_4:
4482 case Builtin::BI__sync_and_and_fetch_8:
4483 case Builtin::BI__sync_and_and_fetch_16:
4484 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4485 llvm::Instruction::And);
4486 case Builtin::BI__sync_or_and_fetch_1:
4487 case Builtin::BI__sync_or_and_fetch_2:
4488 case Builtin::BI__sync_or_and_fetch_4:
4489 case Builtin::BI__sync_or_and_fetch_8:
4490 case Builtin::BI__sync_or_and_fetch_16:
4491 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4492 llvm::Instruction::Or);
4493 case Builtin::BI__sync_xor_and_fetch_1:
4494 case Builtin::BI__sync_xor_and_fetch_2:
4495 case Builtin::BI__sync_xor_and_fetch_4:
4496 case Builtin::BI__sync_xor_and_fetch_8:
4497 case Builtin::BI__sync_xor_and_fetch_16:
4498 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4499 llvm::Instruction::Xor);
4500 case Builtin::BI__sync_nand_and_fetch_1:
4501 case Builtin::BI__sync_nand_and_fetch_2:
4502 case Builtin::BI__sync_nand_and_fetch_4:
4503 case Builtin::BI__sync_nand_and_fetch_8:
4504 case Builtin::BI__sync_nand_and_fetch_16:
4505 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4506 llvm::Instruction::And, true);
4507
4508 case Builtin::BI__sync_val_compare_and_swap_1:
4509 case Builtin::BI__sync_val_compare_and_swap_2:
4510 case Builtin::BI__sync_val_compare_and_swap_4:
4511 case Builtin::BI__sync_val_compare_and_swap_8:
4512 case Builtin::BI__sync_val_compare_and_swap_16:
4513 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4514
4515 case Builtin::BI__sync_bool_compare_and_swap_1:
4516 case Builtin::BI__sync_bool_compare_and_swap_2:
4517 case Builtin::BI__sync_bool_compare_and_swap_4:
4518 case Builtin::BI__sync_bool_compare_and_swap_8:
4519 case Builtin::BI__sync_bool_compare_and_swap_16:
4520 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4521
4522 case Builtin::BI__sync_swap_1:
4523 case Builtin::BI__sync_swap_2:
4524 case Builtin::BI__sync_swap_4:
4525 case Builtin::BI__sync_swap_8:
4526 case Builtin::BI__sync_swap_16:
4527 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4528
4529 case Builtin::BI__sync_lock_test_and_set_1:
4530 case Builtin::BI__sync_lock_test_and_set_2:
4531 case Builtin::BI__sync_lock_test_and_set_4:
4532 case Builtin::BI__sync_lock_test_and_set_8:
4533 case Builtin::BI__sync_lock_test_and_set_16:
4534 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4535
4536 case Builtin::BI__sync_lock_release_1:
4537 case Builtin::BI__sync_lock_release_2:
4538 case Builtin::BI__sync_lock_release_4:
4539 case Builtin::BI__sync_lock_release_8:
4540 case Builtin::BI__sync_lock_release_16: {
4541 Address Ptr = CheckAtomicAlignment(*this, E);
4542 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4543
4544 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4545 getContext().getTypeSize(ElTy));
4546 llvm::StoreInst *Store =
4547 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
4548 Store->setAtomic(llvm::AtomicOrdering::Release);
4549 return RValue::get(nullptr);
4550 }
4551
4552 case Builtin::BI__sync_synchronize: {
4553 // We assume this is supposed to correspond to a C++0x-style
4554 // sequentially-consistent fence (i.e. this is only usable for
4555 // synchronization, not device I/O or anything like that). This intrinsic
4556 // is really badly designed in the sense that in theory, there isn't
4557 // any way to safely use it... but in practice, it mostly works
4558 // to use it with non-atomic loads and stores to get acquire/release
4559 // semantics.
4560 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4561 return RValue::get(nullptr);
4562 }
4563
4564 case Builtin::BI__builtin_nontemporal_load:
4565 return RValue::get(EmitNontemporalLoad(*this, E));
4566 case Builtin::BI__builtin_nontemporal_store:
4567 return RValue::get(EmitNontemporalStore(*this, E));
4568 case Builtin::BI__c11_atomic_is_lock_free:
4569 case Builtin::BI__atomic_is_lock_free: {
4570 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4571 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4572 // _Atomic(T) is always properly-aligned.
4573 const char *LibCallName = "__atomic_is_lock_free";
4574 CallArgList Args;
4575 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4576 getContext().getSizeType());
4577 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4578 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4580 else
4581 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4583 const CGFunctionInfo &FuncInfo =
4585 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4586 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4587 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4588 ReturnValueSlot(), Args);
4589 }
4590
4591 case Builtin::BI__atomic_test_and_set: {
4592 // Look at the argument type to determine whether this is a volatile
4593 // operation. The parameter type is always volatile.
4594 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4595 bool Volatile =
4596 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4597
4598 Address Ptr =
4600
4601 Value *NewVal = Builder.getInt8(1);
4602 Value *Order = EmitScalarExpr(E->getArg(1));
4603 if (isa<llvm::ConstantInt>(Order)) {
4604 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4605 AtomicRMWInst *Result = nullptr;
4606 switch (ord) {
4607 case 0: // memory_order_relaxed
4608 default: // invalid order
4609 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4610 llvm::AtomicOrdering::Monotonic);
4611 break;
4612 case 1: // memory_order_consume
4613 case 2: // memory_order_acquire
4614 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4615 llvm::AtomicOrdering::Acquire);
4616 break;
4617 case 3: // memory_order_release
4618 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4619 llvm::AtomicOrdering::Release);
4620 break;
4621 case 4: // memory_order_acq_rel
4622
4623 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4624 llvm::AtomicOrdering::AcquireRelease);
4625 break;
4626 case 5: // memory_order_seq_cst
4628 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4629 llvm::AtomicOrdering::SequentiallyConsistent);
4630 break;
4631 }
4632 Result->setVolatile(Volatile);
4633 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4634 }
4635
4636 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4637
4638 llvm::BasicBlock *BBs[5] = {
4639 createBasicBlock("monotonic", CurFn),
4640 createBasicBlock("acquire", CurFn),
4641 createBasicBlock("release", CurFn),
4642 createBasicBlock("acqrel", CurFn),
4643 createBasicBlock("seqcst", CurFn)
4644 };
4645 llvm::AtomicOrdering Orders[5] = {
4646 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4647 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4648 llvm::AtomicOrdering::SequentiallyConsistent};
4649
4650 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4651 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4652
4653 Builder.SetInsertPoint(ContBB);
4654 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4655
4656 for (unsigned i = 0; i < 5; ++i) {
4657 Builder.SetInsertPoint(BBs[i]);
4658 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4659 Ptr, NewVal, Orders[i]);
4660 RMW->setVolatile(Volatile);
4661 Result->addIncoming(RMW, BBs[i]);
4662 Builder.CreateBr(ContBB);
4663 }
4664
4665 SI->addCase(Builder.getInt32(0), BBs[0]);
4666 SI->addCase(Builder.getInt32(1), BBs[1]);
4667 SI->addCase(Builder.getInt32(2), BBs[1]);
4668 SI->addCase(Builder.getInt32(3), BBs[2]);
4669 SI->addCase(Builder.getInt32(4), BBs[3]);
4670 SI->addCase(Builder.getInt32(5), BBs[4]);
4671
4672 Builder.SetInsertPoint(ContBB);
4673 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4674 }
4675
4676 case Builtin::BI__atomic_clear: {
4677 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4678 bool Volatile =
4679 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4680
4682 Ptr = Ptr.withElementType(Int8Ty);
4683 Value *NewVal = Builder.getInt8(0);
4684 Value *Order = EmitScalarExpr(E->getArg(1));
4685 if (isa<llvm::ConstantInt>(Order)) {
4686 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4687 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4688 switch (ord) {
4689 case 0: // memory_order_relaxed
4690 default: // invalid order
4691 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4692 break;
4693 case 3: // memory_order_release
4694 Store->setOrdering(llvm::AtomicOrdering::Release);
4695 break;
4696 case 5: // memory_order_seq_cst
4697 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4698 break;
4699 }
4700 return RValue::get(nullptr);
4701 }
4702
4703 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4704
4705 llvm::BasicBlock *BBs[3] = {
4706 createBasicBlock("monotonic", CurFn),
4707 createBasicBlock("release", CurFn),
4708 createBasicBlock("seqcst", CurFn)
4709 };
4710 llvm::AtomicOrdering Orders[3] = {
4711 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4712 llvm::AtomicOrdering::SequentiallyConsistent};
4713
4714 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4715 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4716
4717 for (unsigned i = 0; i < 3; ++i) {
4718 Builder.SetInsertPoint(BBs[i]);
4719 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4720 Store->setOrdering(Orders[i]);
4721 Builder.CreateBr(ContBB);
4722 }
4723
4724 SI->addCase(Builder.getInt32(0), BBs[0]);
4725 SI->addCase(Builder.getInt32(3), BBs[1]);
4726 SI->addCase(Builder.getInt32(5), BBs[2]);
4727
4728 Builder.SetInsertPoint(ContBB);
4729 return RValue::get(nullptr);
4730 }
4731
4732 case Builtin::BI__atomic_thread_fence:
4733 case Builtin::BI__atomic_signal_fence:
4734 case Builtin::BI__c11_atomic_thread_fence:
4735 case Builtin::BI__c11_atomic_signal_fence: {
4736 llvm::SyncScope::ID SSID;
4737 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4738 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4739 SSID = llvm::SyncScope::SingleThread;
4740 else
4741 SSID = llvm::SyncScope::System;
4742 Value *Order = EmitScalarExpr(E->getArg(0));
4743 if (isa<llvm::ConstantInt>(Order)) {
4744 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4745 switch (ord) {
4746 case 0: // memory_order_relaxed
4747 default: // invalid order
4748 break;
4749 case 1: // memory_order_consume
4750 case 2: // memory_order_acquire
4751 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4752 break;
4753 case 3: // memory_order_release
4754 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4755 break;
4756 case 4: // memory_order_acq_rel
4757 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4758 break;
4759 case 5: // memory_order_seq_cst
4760 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4761 break;
4762 }
4763 return RValue::get(nullptr);
4764 }
4765
4766 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4767 AcquireBB = createBasicBlock("acquire", CurFn);
4768 ReleaseBB = createBasicBlock("release", CurFn);
4769 AcqRelBB = createBasicBlock("acqrel", CurFn);
4770 SeqCstBB = createBasicBlock("seqcst", CurFn);
4771 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4772
4773 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4774 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4775
4776 Builder.SetInsertPoint(AcquireBB);
4777 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4778 Builder.CreateBr(ContBB);
4779 SI->addCase(Builder.getInt32(1), AcquireBB);
4780 SI->addCase(Builder.getInt32(2), AcquireBB);
4781
4782 Builder.SetInsertPoint(ReleaseBB);
4783 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4784 Builder.CreateBr(ContBB);
4785 SI->addCase(Builder.getInt32(3), ReleaseBB);
4786
4787 Builder.SetInsertPoint(AcqRelBB);
4788 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4789 Builder.CreateBr(ContBB);
4790 SI->addCase(Builder.getInt32(4), AcqRelBB);
4791
4792 Builder.SetInsertPoint(SeqCstBB);
4793 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4794 Builder.CreateBr(ContBB);
4795 SI->addCase(Builder.getInt32(5), SeqCstBB);
4796
4797 Builder.SetInsertPoint(ContBB);
4798 return RValue::get(nullptr);
4799 }
4800
4801 case Builtin::BI__builtin_signbit:
4802 case Builtin::BI__builtin_signbitf:
4803 case Builtin::BI__builtin_signbitl: {
4804 return RValue::get(
4805 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4806 ConvertType(E->getType())));
4807 }
4808 case Builtin::BI__warn_memset_zero_len:
4809 return RValue::getIgnored();
4810 case Builtin::BI__annotation: {
4811 // Re-encode each wide string to UTF8 and make an MDString.
4813 for (const Expr *Arg : E->arguments()) {
4814 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4815 assert(Str->getCharByteWidth() == 2);
4816 StringRef WideBytes = Str->getBytes();
4817 std::string StrUtf8;
4818 if (!convertUTF16ToUTF8String(
4819 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
4820 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
4821 continue;
4822 }
4823 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
4824 }
4825
4826 // Build and MDTuple of MDStrings and emit the intrinsic call.
4827 llvm::Function *F =
4828 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
4829 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
4830 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
4831 return RValue::getIgnored();
4832 }
4833 case Builtin::BI__builtin_annotation: {
4834 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
4835 llvm::Function *F =
4836 CGM.getIntrinsic(llvm::Intrinsic::annotation,
4837 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
4838
4839 // Get the annotation string, go through casts. Sema requires this to be a
4840 // non-wide string literal, potentially casted, so the cast<> is safe.
4841 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
4842 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
4843 return RValue::get(
4844 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
4845 }
4846 case Builtin::BI__builtin_addcb:
4847 case Builtin::BI__builtin_addcs:
4848 case Builtin::BI__builtin_addc:
4849 case Builtin::BI__builtin_addcl:
4850 case Builtin::BI__builtin_addcll:
4851 case Builtin::BI__builtin_subcb:
4852 case Builtin::BI__builtin_subcs:
4853 case Builtin::BI__builtin_subc:
4854 case Builtin::BI__builtin_subcl:
4855 case Builtin::BI__builtin_subcll: {
4856
4857 // We translate all of these builtins from expressions of the form:
4858 // int x = ..., y = ..., carryin = ..., carryout, result;
4859 // result = __builtin_addc(x, y, carryin, &carryout);
4860 //
4861 // to LLVM IR of the form:
4862 //
4863 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
4864 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
4865 // %carry1 = extractvalue {i32, i1} %tmp1, 1
4866 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
4867 // i32 %carryin)
4868 // %result = extractvalue {i32, i1} %tmp2, 0
4869 // %carry2 = extractvalue {i32, i1} %tmp2, 1
4870 // %tmp3 = or i1 %carry1, %carry2
4871 // %tmp4 = zext i1 %tmp3 to i32
4872 // store i32 %tmp4, i32* %carryout
4873
4874 // Scalarize our inputs.
4875 llvm::Value *X = EmitScalarExpr(E->getArg(0));
4876 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
4877 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
4878 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
4879
4880 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
4881 llvm::Intrinsic::ID IntrinsicId;
4882 switch (BuiltinID) {
4883 default: llvm_unreachable("Unknown multiprecision builtin id.");
4884 case Builtin::BI__builtin_addcb:
4885 case Builtin::BI__builtin_addcs:
4886 case Builtin::BI__builtin_addc:
4887 case Builtin::BI__builtin_addcl:
4888 case Builtin::BI__builtin_addcll:
4889 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
4890 break;
4891 case Builtin::BI__builtin_subcb:
4892 case Builtin::BI__builtin_subcs:
4893 case Builtin::BI__builtin_subc:
4894 case Builtin::BI__builtin_subcl:
4895 case Builtin::BI__builtin_subcll:
4896 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
4897 break;
4898 }
4899
4900 // Construct our resulting LLVM IR expression.
4901 llvm::Value *Carry1;
4902 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
4903 X, Y, Carry1);
4904 llvm::Value *Carry2;
4905 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
4906 Sum1, Carryin, Carry2);
4907 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
4908 X->getType());
4909 Builder.CreateStore(CarryOut, CarryOutPtr);
4910 return RValue::get(Sum2);
4911 }
4912
4913 case Builtin::BI__builtin_add_overflow:
4914 case Builtin::BI__builtin_sub_overflow:
4915 case Builtin::BI__builtin_mul_overflow: {
4916 const clang::Expr *LeftArg = E->getArg(0);
4917 const clang::Expr *RightArg = E->getArg(1);
4918 const clang::Expr *ResultArg = E->getArg(2);
4919
4920 clang::QualType ResultQTy =
4921 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
4922
4923 WidthAndSignedness LeftInfo =
4925 WidthAndSignedness RightInfo =
4927 WidthAndSignedness ResultInfo =
4929
4930 // Handle mixed-sign multiplication as a special case, because adding
4931 // runtime or backend support for our generic irgen would be too expensive.
4932 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
4933 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
4934 RightInfo, ResultArg, ResultQTy,
4935 ResultInfo);
4936
4937 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
4938 ResultInfo))
4940 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
4941 ResultInfo);
4942
4943 WidthAndSignedness EncompassingInfo =
4944 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
4945
4946 llvm::Type *EncompassingLLVMTy =
4947 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
4948
4949 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
4950
4951 llvm::Intrinsic::ID IntrinsicId;
4952 switch (BuiltinID) {
4953 default:
4954 llvm_unreachable("Unknown overflow builtin id.");
4955 case Builtin::BI__builtin_add_overflow:
4956 IntrinsicId = EncompassingInfo.Signed
4957 ? llvm::Intrinsic::sadd_with_overflow
4958 : llvm::Intrinsic::uadd_with_overflow;
4959 break;
4960 case Builtin::BI__builtin_sub_overflow:
4961 IntrinsicId = EncompassingInfo.Signed
4962 ? llvm::Intrinsic::ssub_with_overflow
4963 : llvm::Intrinsic::usub_with_overflow;
4964 break;
4965 case Builtin::BI__builtin_mul_overflow:
4966 IntrinsicId = EncompassingInfo.Signed
4967 ? llvm::Intrinsic::smul_with_overflow
4968 : llvm::Intrinsic::umul_with_overflow;
4969 break;
4970 }
4971
4972 llvm::Value *Left = EmitScalarExpr(LeftArg);
4973 llvm::Value *Right = EmitScalarExpr(RightArg);
4974 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
4975
4976 // Extend each operand to the encompassing type.
4977 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
4978 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
4979
4980 // Perform the operation on the extended values.
4981 llvm::Value *Overflow, *Result;
4982 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
4983
4984 if (EncompassingInfo.Width > ResultInfo.Width) {
4985 // The encompassing type is wider than the result type, so we need to
4986 // truncate it.
4987 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
4988
4989 // To see if the truncation caused an overflow, we will extend
4990 // the result and then compare it to the original result.
4991 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
4992 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
4993 llvm::Value *TruncationOverflow =
4994 Builder.CreateICmpNE(Result, ResultTruncExt);
4995
4996 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
4997 Result = ResultTrunc;
4998 }
4999
5000 // Finally, store the result using the pointer.
5001 bool isVolatile =
5002 ResultArg->getType()->getPointeeType().isVolatileQualified();
5003 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5004
5005 return RValue::get(Overflow);
5006 }
5007
5008 case Builtin::BI__builtin_uadd_overflow:
5009 case Builtin::BI__builtin_uaddl_overflow:
5010 case Builtin::BI__builtin_uaddll_overflow:
5011 case Builtin::BI__builtin_usub_overflow:
5012 case Builtin::BI__builtin_usubl_overflow:
5013 case Builtin::BI__builtin_usubll_overflow:
5014 case Builtin::BI__builtin_umul_overflow:
5015 case Builtin::BI__builtin_umull_overflow:
5016 case Builtin::BI__builtin_umulll_overflow:
5017 case Builtin::BI__builtin_sadd_overflow:
5018 case Builtin::BI__builtin_saddl_overflow:
5019 case Builtin::BI__builtin_saddll_overflow:
5020 case Builtin::BI__builtin_ssub_overflow:
5021 case Builtin::BI__builtin_ssubl_overflow:
5022 case Builtin::BI__builtin_ssubll_overflow:
5023 case Builtin::BI__builtin_smul_overflow:
5024 case Builtin::BI__builtin_smull_overflow:
5025 case Builtin::BI__builtin_smulll_overflow: {
5026
5027 // We translate all of these builtins directly to the relevant llvm IR node.
5028
5029 // Scalarize our inputs.
5030 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5031 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5032 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5033
5034 // Decide which of the overflow intrinsics we are lowering to:
5035 llvm::Intrinsic::ID IntrinsicId;
5036 switch (BuiltinID) {
5037 default: llvm_unreachable("Unknown overflow builtin id.");
5038 case Builtin::BI__builtin_uadd_overflow:
5039 case Builtin::BI__builtin_uaddl_overflow:
5040 case Builtin::BI__builtin_uaddll_overflow:
5041 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5042 break;
5043 case Builtin::BI__builtin_usub_overflow:
5044 case Builtin::BI__builtin_usubl_overflow:
5045 case Builtin::BI__builtin_usubll_overflow:
5046 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5047 break;
5048 case Builtin::BI__builtin_umul_overflow:
5049 case Builtin::BI__builtin_umull_overflow:
5050 case Builtin::BI__builtin_umulll_overflow:
5051 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5052 break;
5053 case Builtin::BI__builtin_sadd_overflow:
5054 case Builtin::BI__builtin_saddl_overflow:
5055 case Builtin::BI__builtin_saddll_overflow:
5056 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5057 break;
5058 case Builtin::BI__builtin_ssub_overflow:
5059 case Builtin::BI__builtin_ssubl_overflow:
5060 case Builtin::BI__builtin_ssubll_overflow:
5061 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5062 break;
5063 case Builtin::BI__builtin_smul_overflow:
5064 case Builtin::BI__builtin_smull_overflow:
5065 case Builtin::BI__builtin_smulll_overflow:
5066 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5067 break;
5068 }
5069
5070
5071 llvm::Value *Carry;
5072 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5073 Builder.CreateStore(Sum, SumOutPtr);
5074
5075 return RValue::get(Carry);
5076 }
5077 case Builtin::BIaddressof:
5078 case Builtin::BI__addressof:
5079 case Builtin::BI__builtin_addressof:
5080 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5081 case Builtin::BI__builtin_function_start:
5084 case Builtin::BI__builtin_operator_new:
5086 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5087 case Builtin::BI__builtin_operator_delete:
5089 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5090 return RValue::get(nullptr);
5091
5092 case Builtin::BI__builtin_is_aligned:
5093 return EmitBuiltinIsAligned(E);
5094 case Builtin::BI__builtin_align_up:
5095 return EmitBuiltinAlignTo(E, true);
5096 case Builtin::BI__builtin_align_down:
5097 return EmitBuiltinAlignTo(E, false);
5098
5099 case Builtin::BI__noop:
5100 // __noop always evaluates to an integer literal zero.
5101 return RValue::get(ConstantInt::get(IntTy, 0));
5102 case Builtin::BI__builtin_call_with_static_chain: {
5103 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5104 const Expr *Chain = E->getArg(1);
5105 return EmitCall(Call->getCallee()->getType(),
5106 EmitCallee(Call->getCallee()), Call, ReturnValue,
5107 EmitScalarExpr(Chain));
5108 }
5109 case Builtin::BI_InterlockedExchange8:
5110 case Builtin::BI_InterlockedExchange16:
5111 case Builtin::BI_InterlockedExchange:
5112 case Builtin::BI_InterlockedExchangePointer:
5113 return RValue::get(
5114 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5115 case Builtin::BI_InterlockedCompareExchangePointer:
5116 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5117 llvm::Type *RTy;
5118 llvm::IntegerType *IntType = IntegerType::get(
5120
5121 Address DestAddr = CheckAtomicAlignment(*this, E);
5122
5123 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5124 RTy = Exchange->getType();
5125 Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5126
5127 llvm::Value *Comparand =
5128 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5129
5130 auto Ordering =
5131 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5132 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5133
5134 auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
5135 Ordering, Ordering);
5136 Result->setVolatile(true);
5137
5138 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5139 0),
5140 RTy));
5141 }
5142 case Builtin::BI_InterlockedCompareExchange8:
5143 case Builtin::BI_InterlockedCompareExchange16:
5144 case Builtin::BI_InterlockedCompareExchange:
5145 case Builtin::BI_InterlockedCompareExchange64:
5146 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
5147 case Builtin::BI_InterlockedIncrement16:
5148 case Builtin::BI_InterlockedIncrement:
5149 return RValue::get(
5150 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5151 case Builtin::BI_InterlockedDecrement16:
5152 case Builtin::BI_InterlockedDecrement:
5153 return RValue::get(
5154 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5155 case Builtin::BI_InterlockedAnd8:
5156 case Builtin::BI_InterlockedAnd16:
5157 case Builtin::BI_InterlockedAnd:
5158 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5159 case Builtin::BI_InterlockedExchangeAdd8:
5160 case Builtin::BI_InterlockedExchangeAdd16:
5161 case Builtin::BI_InterlockedExchangeAdd:
5162 return RValue::get(
5163 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5164 case Builtin::BI_InterlockedExchangeSub8:
5165 case Builtin::BI_InterlockedExchangeSub16:
5166 case Builtin::BI_InterlockedExchangeSub:
5167 return RValue::get(
5168 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5169 case Builtin::BI_InterlockedOr8:
5170 case Builtin::BI_InterlockedOr16:
5171 case Builtin::BI_InterlockedOr:
5172 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5173 case Builtin::BI_InterlockedXor8:
5174 case Builtin::BI_InterlockedXor16:
5175 case Builtin::BI_InterlockedXor:
5176 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5177
5178 case Builtin::BI_bittest64:
5179 case Builtin::BI_bittest:
5180 case Builtin::BI_bittestandcomplement64:
5181 case Builtin::BI_bittestandcomplement:
5182 case Builtin::BI_bittestandreset64:
5183 case Builtin::BI_bittestandreset:
5184 case Builtin::BI_bittestandset64:
5185 case Builtin::BI_bittestandset:
5186 case Builtin::BI_interlockedbittestandreset:
5187 case Builtin::BI_interlockedbittestandreset64:
5188 case Builtin::BI_interlockedbittestandset64:
5189 case Builtin::BI_interlockedbittestandset:
5190 case Builtin::BI_interlockedbittestandset_acq:
5191 case Builtin::BI_interlockedbittestandset_rel:
5192 case Builtin::BI_interlockedbittestandset_nf:
5193 case Builtin::BI_interlockedbittestandreset_acq:
5194 case Builtin::BI_interlockedbittestandreset_rel:
5195 case Builtin::BI_interlockedbittestandreset_nf:
5196 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5197
5198 // These builtins exist to emit regular volatile loads and stores not
5199 // affected by the -fms-volatile setting.
5200 case Builtin::BI__iso_volatile_load8:
5201 case Builtin::BI__iso_volatile_load16:
5202 case Builtin::BI__iso_volatile_load32:
5203 case Builtin::BI__iso_volatile_load64:
5204 return RValue::get(EmitISOVolatileLoad(*this, E));
5205 case Builtin::BI__iso_volatile_store8:
5206 case Builtin::BI__iso_volatile_store16:
5207 case Builtin::BI__iso_volatile_store32:
5208 case Builtin::BI__iso_volatile_store64:
5209 return RValue::get(EmitISOVolatileStore(*this, E));
5210
5211 case Builtin::BI__builtin_ptrauth_auth:
5212 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5213 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5214 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5215 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5216 case Builtin::BI__builtin_ptrauth_strip: {
5217 // Emit the arguments.
5219 for (auto argExpr : E->arguments())
5220 Args.push_back(EmitScalarExpr(argExpr));
5221
5222 // Cast the value to intptr_t, saving its original type.
5223 llvm::Type *OrigValueType = Args[0]->getType();
5224 if (OrigValueType->isPointerTy())
5225 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5226
5227 switch (BuiltinID) {
5228 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5229 if (Args[4]->getType()->isPointerTy())
5230 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5231 LLVM_FALLTHROUGH;
5232
5233 case Builtin::BI__builtin_ptrauth_auth:
5234 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5235 if (Args[2]->getType()->isPointerTy())
5236 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5237 break;
5238
5239 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5240 if (Args[1]->getType()->isPointerTy())
5241 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5242 break;
5243
5244 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5245 case Builtin::BI__builtin_ptrauth_strip:
5246 break;
5247 }
5248
5249 // Call the intrinsic.
5250 auto IntrinsicID = [&]() -> unsigned {
5251 switch (BuiltinID) {
5252 case Builtin::BI__builtin_ptrauth_auth:
5253 return llvm::Intrinsic::ptrauth_auth;
5254 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5255 return llvm::Intrinsic::ptrauth_resign;
5256 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5257 return llvm::Intrinsic::ptrauth_blend;
5258 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5259 return llvm::Intrinsic::ptrauth_sign_generic;
5260 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5261 return llvm::Intrinsic::ptrauth_sign;
5262 case Builtin::BI__builtin_ptrauth_strip:
5263 return llvm::Intrinsic::ptrauth_strip;
5264 }
5265 llvm_unreachable("bad ptrauth intrinsic");
5266 }();
5267 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5268 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5269
5270 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5271 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5272 OrigValueType->isPointerTy()) {
5273 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5274 }
5275 return RValue::get(Result);
5276 }
5277
5278 case Builtin::BI__exception_code:
5279 case Builtin::BI_exception_code:
5281 case Builtin::BI__exception_info:
5282 case Builtin::BI_exception_info:
5284 case Builtin::BI__abnormal_termination:
5285 case Builtin::BI_abnormal_termination:
5287 case Builtin::BI_setjmpex:
5288 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5289 E->getArg(0)->getType()->isPointerType())
5290 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5291 break;
5292 case Builtin::BI_setjmp:
5293 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5294 E->getArg(0)->getType()->isPointerType()) {
5295 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5296 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5297 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5298 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5299 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5300 }
5301 break;
5302
5303 // C++ std:: builtins.
5304 case Builtin::BImove:
5305 case Builtin::BImove_if_noexcept:
5306 case Builtin::BIforward:
5307 case Builtin::BIforward_like:
5308 case Builtin::BIas_const:
5309 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5310 case Builtin::BI__GetExceptionInfo: {
5311 if (llvm::GlobalVariable *GV =
5313 return RValue::get(GV);
5314 break;
5315 }
5316
5317 case Builtin::BI__fastfail:
5318 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5319
5320 case Builtin::BI__builtin_coro_id:
5321 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5322 case Builtin::BI__builtin_coro_promise:
5323 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5324 case Builtin::BI__builtin_coro_resume:
5325 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5326 return RValue::get(nullptr);
5327 case Builtin::BI__builtin_coro_frame:
5328 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5329 case Builtin::BI__builtin_coro_noop:
5330 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5331 case Builtin::BI__builtin_coro_free:
5332 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5333 case Builtin::BI__builtin_coro_destroy:
5334 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5335 return RValue::get(nullptr);
5336 case Builtin::BI__builtin_coro_done:
5337 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5338 case Builtin::BI__builtin_coro_alloc:
5339 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5340 case Builtin::BI__builtin_coro_begin:
5341 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5342 case Builtin::BI__builtin_coro_end:
5343 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5344 case Builtin::BI__builtin_coro_suspend:
5345 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5346 case Builtin::BI__builtin_coro_size:
5347 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5348 case Builtin::BI__builtin_coro_align:
5349 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5350
5351 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5352 case Builtin::BIread_pipe:
5353 case Builtin::BIwrite_pipe: {
5354 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5355 *Arg1 = EmitScalarExpr(E->getArg(1));
5356 CGOpenCLRuntime OpenCLRT(CGM);
5357 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5358 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5359
5360 // Type of the generic packet parameter.
5361 unsigned GenericAS =
5363 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5364
5365 // Testing which overloaded version we should generate the call for.
5366 if (2U == E->getNumArgs()) {
5367 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5368 : "__write_pipe_2";
5369 // Creating a generic function type to be able to call with any builtin or
5370 // user defined type.
5371 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5372 llvm::FunctionType *FTy = llvm::FunctionType::get(
5373 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5374 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5375 return RValue::get(
5377 {Arg0, BCast, PacketSize, PacketAlign}));
5378 } else {
5379 assert(4 == E->getNumArgs() &&
5380 "Illegal number of parameters to pipe function");
5381 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5382 : "__write_pipe_4";
5383
5384 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5385 Int32Ty, Int32Ty};
5386 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5387 *Arg3 = EmitScalarExpr(E->getArg(3));
5388 llvm::FunctionType *FTy = llvm::FunctionType::get(
5389 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5390 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5391 // We know the third argument is an integer type, but we may need to cast
5392 // it to i32.
5393 if (Arg2->getType() != Int32Ty)
5394 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5395 return RValue::get(
5397 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5398 }
5399 }
5400 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5401 // functions
5402 case Builtin::BIreserve_read_pipe:
5403 case Builtin::BIreserve_write_pipe:
5404 case Builtin::BIwork_group_reserve_read_pipe:
5405 case Builtin::BIwork_group_reserve_write_pipe:
5406 case Builtin::BIsub_group_reserve_read_pipe:
5407 case Builtin::BIsub_group_reserve_write_pipe: {
5408 // Composing the mangled name for the function.
5409 const char *Name;
5410 if (BuiltinID == Builtin::BIreserve_read_pipe)
5411 Name = "__reserve_read_pipe";
5412 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5413 Name = "__reserve_write_pipe";
5414 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5415 Name = "__work_group_reserve_read_pipe";
5416 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5417 Name = "__work_group_reserve_write_pipe";
5418 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5419 Name = "__sub_group_reserve_read_pipe";
5420 else
5421 Name = "__sub_group_reserve_write_pipe";
5422
5423 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5424 *Arg1 = EmitScalarExpr(E->getArg(1));
5425 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5426 CGOpenCLRuntime OpenCLRT(CGM);
5427 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5428 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5429
5430 // Building the generic function prototype.
5431 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5432 llvm::FunctionType *FTy = llvm::FunctionType::get(
5433 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5434 // We know the second argument is an integer type, but we may need to cast
5435 // it to i32.
5436 if (Arg1->getType() != Int32Ty)
5437 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5439 {Arg0, Arg1, PacketSize, PacketAlign}));
5440 }
5441 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5442 // functions
5443 case Builtin::BIcommit_read_pipe:
5444 case Builtin::BIcommit_write_pipe:
5445 case Builtin::BIwork_group_commit_read_pipe:
5446 case Builtin::BIwork_group_commit_write_pipe:
5447 case Builtin::BIsub_group_commit_read_pipe:
5448 case Builtin::BIsub_group_commit_write_pipe: {
5449 const char *Name;
5450 if (BuiltinID == Builtin::BIcommit_read_pipe)
5451 Name = "__commit_read_pipe";
5452 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5453 Name = "__commit_write_pipe";
5454 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5455 Name = "__work_group_commit_read_pipe";
5456 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5457 Name = "__work_group_commit_write_pipe";
5458 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5459 Name = "__sub_group_commit_read_pipe";
5460 else
5461 Name = "__sub_group_commit_write_pipe";
5462
5463 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5464 *Arg1 = EmitScalarExpr(E->getArg(1));
5465 CGOpenCLRuntime OpenCLRT(CGM);
5466 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5467 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5468
5469 // Building the generic function prototype.
5470 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5471 llvm::FunctionType *FTy =
5472 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5473 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5474
5476 {Arg0, Arg1, PacketSize, PacketAlign}));
5477 }
5478 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5479 case Builtin::BIget_pipe_num_packets:
5480 case Builtin::BIget_pipe_max_packets: {
5481 const char *BaseName;
5482 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5483 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5484 BaseName = "__get_pipe_num_packets";
5485 else
5486 BaseName = "__get_pipe_max_packets";
5487 std::string Name = std::string(BaseName) +
5488 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5489
5490 // Building the generic function prototype.
5491 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5492 CGOpenCLRuntime OpenCLRT(CGM);
5493 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5494 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5495 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5496 llvm::FunctionType *FTy = llvm::FunctionType::get(
5497 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5498
5500 {Arg0, PacketSize, PacketAlign}));
5501 }
5502
5503 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5504 case Builtin::BIto_global:
5505 case Builtin::BIto_local:
5506 case Builtin::BIto_private: {
5507 auto Arg0 = EmitScalarExpr(E->getArg(0));
5508 auto NewArgT = llvm::PointerType::get(
5511 auto NewRetT = llvm::PointerType::get(
5515 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5516 llvm::Value *NewArg;
5517 if (Arg0->getType()->getPointerAddressSpace() !=
5518 NewArgT->getPointerAddressSpace())
5519 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5520 else
5521 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5522 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5523 auto NewCall =
5524 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5525 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5526 ConvertType(E->getType())));
5527 }
5528
5529 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5530 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
5531 // The code below expands the builtin call to a call to one of the following
5532 // functions that an OpenCL runtime library will have to provide:
5533 // __enqueue_kernel_basic
5534 // __enqueue_kernel_varargs
5535 // __enqueue_kernel_basic_events
5536 // __enqueue_kernel_events_varargs
5537 case Builtin::BIenqueue_kernel: {
5538 StringRef Name; // Generated function call name
5539 unsigned NumArgs = E->getNumArgs();
5540
5541 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5542 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5543 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5544
5545 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5546 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5547 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5548 llvm::Value *Range = NDRangeL.getAddress(*this).getPointer();
5549 llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
5550
5551 if (NumArgs == 4) {
5552 // The most basic form of the call with parameters:
5553 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5554 Name = "__enqueue_kernel_basic";
5555 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5556 GenericVoidPtrTy};
5557 llvm::FunctionType *FTy = llvm::FunctionType::get(
5558 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5559
5560 auto Info =
5562 llvm::Value *Kernel =
5563 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5564 llvm::Value *Block =
5565 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5566
5567 AttrBuilder B(Builder.getContext());
5568 B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
5569 llvm::AttributeList ByValAttrSet =
5570 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5571
5572 auto RTCall =
5573 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5574 {Queue, Flags, Range, Kernel, Block});
5575 RTCall->setAttributes(ByValAttrSet);
5576 return RValue::get(RTCall);
5577 }
5578 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5579
5580 // Create a temporary array to hold the sizes of local pointer arguments
5581 // for the block. \p First is the position of the first size argument.
5582 auto CreateArrayForSizeVar = [=](unsigned First)
5583 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5584 llvm::APInt ArraySize(32, NumArgs - First);
5586 getContext().getSizeType(), ArraySize, nullptr,
5588 /*IndexTypeQuals=*/0);
5589 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5590 llvm::Value *TmpPtr = Tmp.getPointer();
5591 llvm::Value *TmpSize = EmitLifetimeStart(
5592 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5593 llvm::Value *ElemPtr;
5594 // Each of the following arguments specifies the size of the corresponding
5595 // argument passed to the enqueued block.
5596 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5597 for (unsigned I = First; I < NumArgs; ++I) {
5598 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5599 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5600 {Zero, Index});
5601 if (I == First)
5602 ElemPtr = GEP;
5603 auto *V =
5604 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5606 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5607 }
5608 return std::tie(ElemPtr, TmpSize, TmpPtr);
5609 };
5610
5611 // Could have events and/or varargs.
5612 if (E->getArg(3)->getType()->isBlockPointerType()) {
5613 // No events passed, but has variadic arguments.
5614 Name = "__enqueue_kernel_varargs";
5615 auto Info =
5617 llvm::Value *Kernel =
5618 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5619 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5620 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5621 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5622
5623 // Create a vector of the arguments, as well as a constant value to
5624 // express to the runtime the number of variadic arguments.
5625 llvm::Value *const Args[] = {Queue, Flags,
5626 Range, Kernel,
5627 Block, ConstantInt::get(IntTy, NumArgs - 4),
5628 ElemPtr};
5629 llvm::Type *const ArgTys[] = {
5630 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5631 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5632
5633 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5634 auto Call = RValue::get(
5635 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5636 if (TmpSize)
5637 EmitLifetimeEnd(TmpSize, TmpPtr);
5638 return Call;
5639 }
5640 // Any calls now have event arguments passed.
5641 if (NumArgs >= 7) {
5642 llvm::PointerType *PtrTy = llvm::PointerType::get(
5645
5646 llvm::Value *NumEvents =
5647 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5648
5649 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5650 // to be a null pointer constant (including `0` literal), we can take it
5651 // into account and emit null pointer directly.
5652 llvm::Value *EventWaitList = nullptr;
5653 if (E->getArg(4)->isNullPointerConstant(
5655 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5656 } else {
5657 EventWaitList = E->getArg(4)->getType()->isArrayType()
5659 : EmitScalarExpr(E->getArg(4));
5660 // Convert to generic address space.
5661 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5662 }
5663 llvm::Value *EventRet = nullptr;
5664 if (E->getArg(5)->isNullPointerConstant(
5666 EventRet = llvm::ConstantPointerNull::get(PtrTy);
5667 } else {
5668 EventRet =
5669 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5670 }
5671
5672 auto Info =
5674 llvm::Value *Kernel =
5675 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5676 llvm::Value *Block =
5677 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5678
5679 std::vector<llvm::Type *> ArgTys = {
5680 QueueTy, Int32Ty, RangeTy, Int32Ty,
5681 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5682
5683 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5684 NumEvents, EventWaitList, EventRet,
5685 Kernel, Block};
5686
5687 if (NumArgs == 7) {
5688 // Has events but no variadics.
5689 Name = "__enqueue_kernel_basic_events";
5690 llvm::FunctionType *FTy = llvm::FunctionType::get(
5691 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5692 return RValue::get(
5695 }
5696 // Has event info and variadics
5697 // Pass the number of variadics to the runtime function too.
5698 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5699 ArgTys.push_back(Int32Ty);
5700 Name = "__enqueue_kernel_events_varargs";
5701
5702 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5703 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5704 Args.push_back(ElemPtr);
5705 ArgTys.push_back(ElemPtr->getType());
5706
5707 llvm::FunctionType *FTy = llvm::FunctionType::get(
5708 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5709 auto Call =
5712 if (TmpSize)
5713 EmitLifetimeEnd(TmpSize, TmpPtr);
5714 return Call;
5715 }
5716 [[fallthrough]];
5717 }
5718 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5719 // parameter.
5720 case Builtin::BIget_kernel_work_group_size: {
5721 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5722 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5723 auto Info =
5725 Value *Kernel =
5726 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5727 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5730 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5731 false),
5732 "__get_kernel_work_group_size_impl"),
5733 {Kernel, Arg}));
5734 }
5735 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5736 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5737 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5738 auto Info =
5740 Value *Kernel =
5741 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5742 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5745 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5746 false),
5747 "__get_kernel_preferred_work_group_size_multiple_impl"),
5748 {Kernel, Arg}));
5749 }
5750 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5751 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5752 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5753 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5754 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5755 llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
5756 auto Info =
5758 Value *Kernel =
5759 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5760 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5761 const char *Name =
5762 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5763 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5764 : "__get_kernel_sub_group_count_for_ndrange_impl";
5767 llvm::FunctionType::get(
5768 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5769 false),
5770 Name),
5771 {NDRange, Kernel, Block}));
5772 }
5773
5774 case Builtin::BI__builtin_store_half:
5775 case Builtin::BI__builtin_store_halff: {
5776 Value *Val = EmitScalarExpr(E->getArg(0));
5778 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5779 Builder.CreateStore(HalfVal, Address);
5780 return RValue::get(nullptr);
5781 }
5782 case Builtin::BI__builtin_load_half: {
5784 Value *HalfVal = Builder.CreateLoad(Address);
5785 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5786 }
5787 case Builtin::BI__builtin_load_halff: {
5789 Value *HalfVal = Builder.CreateLoad(Address);
5790 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5791 }
5792 case Builtin::BI__builtin_printf:
5793 case Builtin::BIprintf:
5794 if (getTarget().getTriple().isNVPTX() ||
5795 getTarget().getTriple().isAMDGCN()) {
5796 if (getLangOpts().OpenMPIsTargetDevice)
5798 if (getTarget().getTriple().isNVPTX())
5800 if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
5802 }
5803
5804 break;
5805 case Builtin::BI__builtin_canonicalize:
5806 case Builtin::BI__builtin_canonicalizef:
5807 case Builtin::BI__builtin_canonicalizef16:
5808 case Builtin::BI__builtin_canonicalizel:
5809 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
5810
5811 case Builtin::BI__builtin_thread_pointer: {
5812 if (!getContext().getTargetInfo().isTLSSupported())
5813 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5814 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
5815 break;
5816 }
5817 case Builtin::BI__builtin_os_log_format:
5818 return emitBuiltinOSLogFormat(*E);
5819
5820 case Builtin::BI__xray_customevent: {
5822 return RValue::getIgnored();
5823
5826 return RValue::getIgnored();
5827
5828 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5829 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
5830 return RValue::getIgnored();
5831
5832 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
5833 auto FTy = F->getFunctionType();
5834 auto Arg0 = E->getArg(0);
5835 auto Arg0Val = EmitScalarExpr(Arg0);
5836 auto Arg0Ty = Arg0->getType();
5837 auto PTy0 = FTy->getParamType(0);
5838 if (PTy0 != Arg0Val->getType()) {
5839 if (Arg0Ty->isArrayType())
5840 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
5841 else
5842 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
5843 }
5844 auto Arg1 = EmitScalarExpr(E->getArg(1));
5845 auto PTy1 = FTy->getParamType(1);
5846 if (PTy1 != Arg1->getType())
5847 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
5848 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
5849 }
5850
5851 case Builtin::BI__xray_typedevent: {
5852 // TODO: There should be a way to always emit events even if the current
5853 // function is not instrumented. Losing events in a stream can cripple
5854 // a trace.
5856 return RValue::getIgnored();
5857
5860 return RValue::getIgnored();
5861
5862 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5863 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
5864 return RValue::getIgnored();
5865
5866 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
5867 auto FTy = F->getFunctionType();
5868 auto Arg0 = EmitScalarExpr(E->getArg(0));
5869 auto PTy0 = FTy->getParamType(0);
5870 if (PTy0 != Arg0->getType())
5871 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
5872 auto Arg1 = E->getArg(1);
5873 auto Arg1Val = EmitScalarExpr(Arg1);
5874 auto Arg1Ty = Arg1->getType();
5875 auto PTy1 = FTy->getParamType(1);
5876 if (PTy1 != Arg1Val->getType()) {
5877 if (Arg1Ty->isArrayType())
5878 Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
5879 else
5880 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
5881 }
5882 auto Arg2 = EmitScalarExpr(E->getArg(2));
5883 auto PTy2 = FTy->getParamType(2);
5884 if (PTy2 != Arg2->getType())
5885 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
5886 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
5887 }
5888
5889 case Builtin::BI__builtin_ms_va_start:
5890 case Builtin::BI__builtin_ms_va_end:
5891 return RValue::get(
5893 BuiltinID == Builtin::BI__builtin_ms_va_start));
5894
5895 case Builtin::BI__builtin_ms_va_copy: {
5896 // Lower this manually. We can't reliably determine whether or not any
5897 // given va_copy() is for a Win64 va_list from the calling convention
5898 // alone, because it's legal to do this from a System V ABI function.
5899 // With opaque pointer types, we won't have enough information in LLVM
5900 // IR to determine this from the argument types, either. Best to do it
5901 // now, while we have enough information.
5902 Address DestAddr = EmitMSVAListRef(E->getArg(0));
5903 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
5904
5905 DestAddr = DestAddr.withElementType(Int8PtrTy);
5906 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
5907
5908 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
5909 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
5910 }
5911
5912 case Builtin::BI__builtin_get_device_side_mangled_name: {
5913 auto Name = CGM.getCUDARuntime().getDeviceSideName(
5914 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
5915 auto Str = CGM.GetAddrOfConstantCString(Name, "");
5916 llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
5917 llvm::ConstantInt::get(SizeTy, 0)};
5918 auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
5919 Str.getPointer(), Zeros);
5920 return RValue::get(Ptr);
5921 }
5922 }
5923
5924 // If this is an alias for a lib function (e.g. __builtin_sin), emit
5925 // the call using the normal call path, but using the unmangled
5926 // version of the function name.
5927 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
5928 return emitLibraryCall(*this, FD, E,
5929 CGM.getBuiltinLibFunction(FD, BuiltinID));
5930
5931 // If this is a predefined lib function (e.g. malloc), emit the call
5932 // using exactly the normal call path.
5933 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
5934 return emitLibraryCall(*this, FD, E,
5935 cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
5936
5937 // Check that a call to a target specific builtin has the correct target
5938 // features.
5939 // This is down here to avoid non-target specific builtins, however, if
5940 // generic builtins start to require generic target features then we
5941 // can move this up to the beginning of the function.
5942 checkTargetFeatures(E, FD);
5943
5944 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
5945 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
5946
5947 // See if we have a target specific intrinsic.
5948 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
5949 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
5950 StringRef Prefix =
5951 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
5952 if (!Prefix.empty()) {
5953 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
5954 // NOTE we don't need to perform a compatibility flag check here since the
5955 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
5956 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
5957 if (IntrinsicID == Intrinsic::not_intrinsic)
5958 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
5959 }
5960
5961 if (IntrinsicID != Intrinsic::not_intrinsic) {
5963
5964 // Find out if any arguments are required to be integer constant
5965 // expressions.
5966 unsigned ICEArguments = 0;
5968 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5969 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5970
5971 Function *F = CGM.getIntrinsic(IntrinsicID);
5972 llvm::FunctionType *FTy = F->getFunctionType();
5973
5974 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
5975 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
5976 // If the intrinsic arg type is different from the builtin arg type
5977 // we need to do a bit cast.
5978 llvm::Type *PTy = FTy->getParamType(i);
5979 if (PTy != ArgValue->getType()) {
5980 // XXX - vector of pointers?
5981 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
5982 if (PtrTy->getAddressSpace() !=
5983 ArgValue->getType()->getPointerAddressSpace()) {
5984 ArgValue = Builder.CreateAddrSpaceCast(
5985 ArgValue, llvm::PointerType::get(getLLVMContext(),
5986 PtrTy->getAddressSpace()));
5987 }
5988 }
5989
5990 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
5991 // in amx intrinsics.
5992 if (PTy->isX86_AMXTy())
5993 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
5994 {ArgValue->getType()}, {ArgValue});
5995 else
5996 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
5997 }
5998
5999 Args.push_back(ArgValue);
6000 }
6001
6002 Value *V = Builder.CreateCall(F, Args);
6003 QualType BuiltinRetType = E->getType();
6004
6005 llvm::Type *RetTy = VoidTy;
6006 if (!BuiltinRetType->isVoidType())
6007 RetTy = ConvertType(BuiltinRetType);
6008
6009 if (RetTy != V->getType()) {
6010 // XXX - vector of pointers?
6011 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6012 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6014 V, llvm::PointerType::get(getLLVMContext(),
6015 PtrTy->getAddressSpace()));
6016 }
6017 }
6018
6019 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6020 // in amx intrinsics.
6021 if (V->getType()->isX86_AMXTy())
6022 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6023 {V});
6024 else
6025 V = Builder.CreateBitCast(V, RetTy);
6026 }
6027
6028 if (RetTy->isVoidTy())
6029 return RValue::get(nullptr);
6030
6031 return RValue::get(V);
6032 }
6033
6034 // Some target-specific builtins can have aggregate return values, e.g.
6035 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6036 // ReturnValue to be non-null, so that the target-specific emission code can
6037 // always just emit into it.
6039 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6040 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6041 ReturnValue = ReturnValueSlot(DestPtr, false);
6042 }
6043
6044 // Now see if we can emit a target-specific builtin.
6045 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6046 switch (EvalKind) {
6047 case TEK_Scalar:
6048 if (V->getType()->isVoidTy())
6049 return RValue::get(nullptr);
6050 return RValue::get(V);
6051 case TEK_Aggregate:
6052 return RValue::getAggregate(ReturnValue.getValue(),
6053 ReturnValue.isVolatile());
6054 case TEK_Complex:
6055 llvm_unreachable("No current target builtin returns complex");
6056 }
6057 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6058 }
6059
6060 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6061 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
6062 return RValue::get(V);
6063
6064 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6065 return EmitHipStdParUnsupportedBuiltin(this, FD);
6066
6067 ErrorUnsupported(E, "builtin function");
6068
6069 // Unknown builtin, for now just dump it out and return undef.
6070 return GetUndefRValue(E->getType());
6071}
6072
6074 unsigned BuiltinID, const CallExpr *E,
6075 ReturnValueSlot ReturnValue,
6076 llvm::Triple::ArchType Arch) {
6077 // When compiling in HipStdPar mode we have to be conservative in rejecting
6078 // target specific features in the FE, and defer the possible error to the
6079 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6080 // referenced by an accelerator executable function, we emit an error.
6081 // Returning nullptr here leads to the builtin being handled in
6082 // EmitStdParUnsupportedBuiltin.
6083 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6084 Arch != CGF->getTarget().getTriple().getArch())
6085 return nullptr;
6086
6087 switch (Arch) {
6088 case llvm::Triple::arm:
6089 case llvm::Triple::armeb:
6090 case llvm::Triple::thumb:
6091 case llvm::Triple::thumbeb:
6092 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6093 case llvm::Triple::aarch64:
6094 case llvm::Triple::aarch64_32:
6095 case llvm::Triple::aarch64_be:
6096 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6097 case llvm::Triple::bpfeb:
6098 case llvm::Triple::bpfel:
6099 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6100 case llvm::Triple::x86:
6101 case llvm::Triple::x86_64:
6102 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6103 case llvm::Triple::ppc:
6104 case llvm::Triple::ppcle:
6105 case llvm::Triple::ppc64:
6106 case llvm::Triple::ppc64le:
6107 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6108 case llvm::Triple::r600:
6109 case llvm::Triple::amdgcn:
6110 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6111 case llvm::Triple::systemz:
6112 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6113 case llvm::Triple::nvptx:
6114 case llvm::Triple::nvptx64:
6115 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6116 case llvm::Triple::wasm32:
6117 case llvm::Triple::wasm64:
6118 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6119 case llvm::Triple::hexagon:
6120 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6121 case llvm::Triple::riscv32:
6122 case llvm::Triple::riscv64:
6123 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6124 default:
6125 return nullptr;
6126 }
6127}
6128
6130 const CallExpr *E,
6131 ReturnValueSlot ReturnValue) {
6132 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6133 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6135 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6136 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6137 }
6138
6139 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6140 getTarget().getTriple().getArch());
6141}
6142
6143static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6144 NeonTypeFlags TypeFlags,
6145 bool HasLegalHalfType = true,
6146 bool V1Ty = false,
6147 bool AllowBFloatArgsAndRet = true) {
6148 int IsQuad = TypeFlags.isQuad();
6149 switch (TypeFlags.getEltType()) {
6152 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6155 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6157 if (AllowBFloatArgsAndRet)
6158 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6159 else
6160 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6162 if (HasLegalHalfType)
6163 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6164 else
6165 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6167 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6170 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6172 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6173 // There is a lot of i128 and f128 API missing.
6174 // so we use v16i8 to represent poly128 and get pattern matched.
6175 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6177 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6179 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6180 }
6181 llvm_unreachable("Unknown vector element type!");
6182}
6183
6184static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6185 NeonTypeFlags IntTypeFlags) {
6186 int IsQuad = IntTypeFlags.isQuad();
6187 switch (IntTypeFlags.getEltType()) {
6189 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6191 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6193 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6194 default:
6195 llvm_unreachable("Type can't be converted to floating-point!");
6196 }
6197}
6198
6200 const ElementCount &Count) {
6201 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6202 return Builder.CreateShuffleVector(V, V, SV, "lane");
6203}
6204
6206 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6207 return EmitNeonSplat(V, C, EC);
6208}
6209
6211 const char *name,
6212 unsigned shift, bool rightshift) {
6213 unsigned j = 0;
6214 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6215 ai != ae; ++ai, ++j) {
6216 if (F->isConstrainedFPIntrinsic())
6217 if (ai->getType()->isMetadataTy())
6218 continue;
6219 if (shift > 0 && shift == j)
6220 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6221 else
6222 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6223 }
6224
6225 if (F->isConstrainedFPIntrinsic())
6226 return Builder.CreateConstrainedFPCall(F, Ops, name);
6227 else
6228 return Builder.CreateCall(F, Ops, name);
6229}
6230
6232 bool neg) {
6233 int SV = cast<ConstantInt>(V)->getSExtValue();
6234 return ConstantInt::get(Ty, neg ? -SV : SV);
6235}
6236
6237// Right-shift a vector by a constant.
6239 llvm::Type *Ty, bool usgn,
6240 const char *name) {
6241 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6242
6243 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6244 int EltSize = VTy->getScalarSizeInBits();
6245
6246 Vec = Builder.CreateBitCast(Vec, Ty);
6247
6248 // lshr/ashr are undefined when the shift amount is equal to the vector
6249 // element size.
6250 if (ShiftAmt == EltSize) {
6251 if (usgn) {
6252 // Right-shifting an unsigned value by its size yields 0.
6253 return llvm::ConstantAggregateZero::get(VTy);
6254 } else {
6255 // Right-shifting a signed value by its size is equivalent
6256 // to a shift of size-1.
6257 --ShiftAmt;
6258 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6259 }
6260 }
6261
6262 Shift = EmitNeonShiftVector(Shift, Ty, false);
6263 if (usgn)
6264 return Builder.CreateLShr(Vec, Shift, name);
6265 else
6266 return Builder.CreateAShr(Vec, Shift, name);
6267}
6268
6269enum {
6270 AddRetType = (1 << 0),
6271 Add1ArgType = (1 << 1),
6272 Add2ArgTypes = (1 << 2),
6273
6276
6278 UnsignedAlts = (1 << 6),
6279
6282
6290
6291namespace {
6292struct ARMVectorIntrinsicInfo {
6293 const char *NameHint;
6294 unsigned BuiltinID;
6295 unsigned LLVMIntrinsic;
6296 unsigned AltLLVMIntrinsic;
6298
6299 bool operator<(unsigned RHSBuiltinID) const {
6300 return BuiltinID < RHSBuiltinID;
6301 }
6302 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6303 return BuiltinID < TE.BuiltinID;
6304 }
6305};
6306} // end anonymous namespace
6307
6308#define NEONMAP0(NameBase) \
6309 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6310
6311#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6312 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6313 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6314
6315#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6316 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6317 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6318 TypeModifier }
6319
6320static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6321 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6322 NEONMAP0(splat_lane_v),
6323 NEONMAP0(splat_laneq_v),
6324 NEONMAP0(splatq_lane_v),
6325 NEONMAP0(splatq_laneq_v),
6326 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6327 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6328 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6329 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6330 NEONMAP0(vadd_v),
6331 NEONMAP0(vaddhn_v),
6332 NEONMAP0(vaddq_v),
6333 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6334 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6335 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6336 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6337 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6338 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6339 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6340 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6341 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6342 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6343 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6344 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6345 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6346 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6347 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6348 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6349 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6350 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6351 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6352 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6353 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6354 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6355 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6356 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6357 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6358 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6359 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6360 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6361 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6362 NEONMAP0(vceqz_v),
6363 NEONMAP0(vceqzq_v),
6364 NEONMAP0(vcgez_v),
6365 NEONMAP0(vcgezq_v),
6366 NEONMAP0(vcgtz_v),
6367 NEONMAP0(vcgtzq_v),
6368 NEONMAP0(vclez_v),
6369 NEONMAP0(vclezq_v),
6370 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6371 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6372 NEONMAP0(vcltz_v),
6373 NEONMAP0(vcltzq_v),
6374 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6375 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6376 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6377 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6378 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6379 NEONMAP0(vcvt_f16_s16),
6380 NEONMAP0(vcvt_f16_u16),
6381 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6382 NEONMAP0(vcvt_f32_v),
6383 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6384 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6385 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6386 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6387 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6388 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6389 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6390 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6391 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6392 NEONMAP0(vcvt_s16_f16),
6393 NEONMAP0(vcvt_s32_v),
6394 NEONMAP0(vcvt_s64_v),
6395 NEONMAP0(vcvt_u16_f16),
6396 NEONMAP0(vcvt_u32_v),
6397 NEONMAP0(vcvt_u64_v),
6398 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6399 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6400 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6401 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6402 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6403 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6404 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6405 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6406 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6407 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6408 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6409 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6410 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6411 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6412 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6413 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6414 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6415 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6416 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6417 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6418 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6419 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6420 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6421 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6422 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6423 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6424 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6425 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6426 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6427 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6428 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6429 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6430 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6431 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6432 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6433 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6434 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6435 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6436 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6437 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6438 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6439 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6440 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6441 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6442 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6443 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6444 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6445 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6446 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6447 NEONMAP0(vcvtq_f16_s16),
6448 NEONMAP0(vcvtq_f16_u16),
6449 NEONMAP0(vcvtq_f32_v),
6450 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6451 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6452 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6453 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6454 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6455 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6456 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6457 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6458 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6459 NEONMAP0(vcvtq_s16_f16),
6460 NEONMAP0(vcvtq_s32_v),
6461 NEONMAP0(vcvtq_s64_v),
6462 NEONMAP0(vcvtq_u16_f16),
6463 NEONMAP0(vcvtq_u32_v),
6464 NEONMAP0(vcvtq_u64_v),
6465 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6466 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6467 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6468 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6469 NEONMAP0(vext_v),
6470 NEONMAP0(vextq_v),
6471 NEONMAP0(vfma_v),
6472 NEONMAP0(vfmaq_v),
6473 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6474 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6475 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6476 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6477 NEONMAP0(vld1_dup_v),
6478 NEONMAP1(vld1_v, arm_neon_vld1, 0),
6479 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6480 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6481 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6482 NEONMAP0(vld1q_dup_v),
6483 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6484 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6485 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6486 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6487 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6488 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6489 NEONMAP1(vld2_v, arm_neon_vld2, 0),
6490 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6491 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6492 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6493 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6494 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6495 NEONMAP1(vld3_v, arm_neon_vld3, 0),
6496 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6497 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6498 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6499 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6500 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6501 NEONMAP1(vld4_v, arm_neon_vld4, 0),
6502 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6503 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6504 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6505 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6506 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6507 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6508 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6509 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6510 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6511 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6512 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6513 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6514 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6515 NEONMAP0(vmovl_v),
6516 NEONMAP0(vmovn_v),
6517 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6518 NEONMAP0(vmull_v),
6519 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6520 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6521 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6522 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6523 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6524 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6525 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6526 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6527 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6528 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6529 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6530 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6531 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6532 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6533 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6534 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6535 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6536 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6537 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6538 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6539 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6540 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6541 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6542 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6543 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6544 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6545 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6546 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6547 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6548 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6549 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6550 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6551 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6552 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6553 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6554 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6555 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6556 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6557 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6558 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6559 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6560 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6561 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6562 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6563 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6564 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6565 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6566 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6567 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6568 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6569 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6570 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6571 NEONMAP0(vrndi_v),
6572 NEONMAP0(vrndiq_v),
6573 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6574 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6575 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6576 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6577 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6578 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6579 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6580 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6581 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6582 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6583 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6584 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6585 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6586 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6587 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6588 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6589 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6590 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6591 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6592 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6593 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6594 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6595 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6596 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6597 NEONMAP0(vshl_n_v),
6598 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6599 NEONMAP0(vshll_n_v),
6600 NEONMAP0(vshlq_n_v),
6601 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6602 NEONMAP0(vshr_n_v),
6603 NEONMAP0(vshrn_n_v),
6604 NEONMAP0(vshrq_n_v),
6605 NEONMAP1(vst1_v, arm_neon_vst1, 0),
6606 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6607 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6608 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6609 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6610 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6611 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6612 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6613 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6614 NEONMAP1(vst2_v, arm_neon_vst2, 0),
6615 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6616 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6617 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6618 NEONMAP1(vst3_v, arm_neon_vst3, 0),
6619 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6620 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6621 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6622 NEONMAP1(vst4_v, arm_neon_vst4, 0),
6623 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6624 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6625 NEONMAP0(vsubhn_v),
6626 NEONMAP0(vtrn_v),
6627 NEONMAP0(vtrnq_v),
6628 NEONMAP0(vtst_v),
6629 NEONMAP0(vtstq_v),
6630 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6631 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6632 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6633 NEONMAP0(vuzp_v),
6634 NEONMAP0(vuzpq_v),
6635 NEONMAP0(vzip_v),
6636 NEONMAP0(vzipq_v)
6637};
6638
6639static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6640 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6641 NEONMAP0(splat_lane_v),
6642 NEONMAP0(splat_laneq_v),
6643 NEONMAP0(splatq_lane_v),
6644 NEONMAP0(splatq_laneq_v),
6645 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6646 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6647 NEONMAP0(vadd_v),
6648 NEONMAP0(vaddhn_v),
6649 NEONMAP0(vaddq_p128),
6650 NEONMAP0(vaddq_v),
6651 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6652 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6653 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6654 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6655 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6656 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6657 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6658 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6659 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6660 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6661 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6662 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6663 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6664 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6665 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6666 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6667 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6668 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6669 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6670 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6671 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6672 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6673 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6674 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6675 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6676 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6677 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6678 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6679 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6680 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6681 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6682 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6683 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6684 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6685 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6686 NEONMAP0(vceqz_v),
6687 NEONMAP0(vceqzq_v),
6688 NEONMAP0(vcgez_v),
6689 NEONMAP0(vcgezq_v),
6690 NEONMAP0(vcgtz_v),
6691 NEONMAP0(vcgtzq_v),
6692 NEONMAP0(vclez_v),
6693 NEONMAP0(vclezq_v),
6694 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6695 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6696 NEONMAP0(vcltz_v),
6697 NEONMAP0(vcltzq_v),
6698 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6699 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6700 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6701 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6702 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6703 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6704 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6705 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6706 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6707 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6708 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6709 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6710 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6711 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6712 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6713 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6714 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6715 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6716 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6717 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6718 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6719 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6720 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6721 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6722 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6723 NEONMAP0(vcvt_f16_s16),
6724 NEONMAP0(vcvt_f16_u16),
6725 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6726 NEONMAP0(vcvt_f32_v),
6727 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6728 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6729 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6730 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6731 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6732 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6733 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6734 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6735 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6736 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6737 NEONMAP0(vcvtq_f16_s16),
6738 NEONMAP0(vcvtq_f16_u16),
6739 NEONMAP0(vcvtq_f32_v),
6740 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6741 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6742 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6743 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6744 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6745 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6746 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6747 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6748 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6749 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6750 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6751 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6752 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6753 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6754 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6755 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6756 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6757 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6758 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6759 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6760 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6761 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6762 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6763 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6764 NEONMAP0(vext_v),
6765 NEONMAP0(vextq_v),
6766 NEONMAP0(vfma_v),
6767 NEONMAP0(vfmaq_v),
6768 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6769 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6770 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6771 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6772 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6773 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6774 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6775 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6776 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6777 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6778 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6779 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6780 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6781 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6782 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6783 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6784 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6785 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6786 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6787 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6788 NEONMAP0(vmovl_v),
6789 NEONMAP0(vmovn_v),
6790 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6791 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6792 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6793 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6794 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6795 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6796 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6797 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6798 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6799 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6800 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6801 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6802 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6803 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6804 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6805 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6806 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6807 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6808 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6809 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6810 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6811 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6812 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
6813 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6814 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6815 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6816 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6817 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6818 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6819 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6820 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6821 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6822 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6823 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
6824 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6825 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6826 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
6827 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6828 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6829 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
6830 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6831 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
6832 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6833 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
6834 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
6835 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6836 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6837 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
6838 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
6839 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6840 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6841 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
6842 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
6843 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6844 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6845 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
6846 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
6847 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
6848 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
6849 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
6850 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
6851 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
6852 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
6853 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
6854 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
6855 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
6856 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
6857 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
6858 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
6859 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
6860 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
6861 NEONMAP0(vrndi_v),
6862 NEONMAP0(vrndiq_v),
6863 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6864 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6865 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6866 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6867 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6868 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6869 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
6870 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
6871 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
6872 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
6873 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
6874 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
6875 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
6876 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
6877 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
6878 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
6879 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
6880 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
6881 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
6882 NEONMAP0(vshl_n_v),
6883 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6884 NEONMAP0(vshll_n_v),
6885 NEONMAP0(vshlq_n_v),
6886 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6887 NEONMAP0(vshr_n_v),
6888 NEONMAP0(vshrn_n_v),
6889 NEONMAP0(vshrq_n_v),
6890 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
6891 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
6892 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
6893 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
6894 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
6895 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
6896 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
6897 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
6898 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
6899 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
6900 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
6901 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
6902 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
6903 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
6904 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
6905 NEONMAP0(vsubhn_v),
6906 NEONMAP0(vtst_v),
6907 NEONMAP0(vtstq_v),
6908 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
6909 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
6910 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
6911 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
6912};
6913
6914static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
6915 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
6916 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
6917 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
6918 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6919 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6920 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6921 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6922 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6923 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6924 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6925 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6926 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
6927 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6928 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
6929 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6930 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6931 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6932 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6933 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6934 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6935 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6936 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6937 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6938 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6939 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6940 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6941 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6942 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6943 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6944 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6945 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6946 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6947 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6948 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6949 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
6950 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6951 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6952 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6953 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6954 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6955 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6956 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6957 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6958 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6959 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6960 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6961 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6962 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6963 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6964 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6965 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6966 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6967 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6968 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
6969 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6970 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6971 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6972 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6973 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6974 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6975 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6976 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6977 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6978 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6979 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6980 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6981 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6982 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6983 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6984 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6985 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6986 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
6987 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6988 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6989 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
6990 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
6991 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
6992 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6993 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6994 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6995 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6996 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6997 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6998 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6999 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7000 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7001 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7002 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7003 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7004 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7005 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7006 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7007 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7008 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7009 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7010 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7011 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7012 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7013 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7014 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7015 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7016 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7017 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7018 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7019 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7020 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7021 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7022 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7023 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7024 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7025 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7026 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7027 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7028 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7029 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7030 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7031 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7032 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7033 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7034 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7035 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7036 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7037 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7038 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7039 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7040 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7041 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7042 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7043 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7044 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7045 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7046 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7047 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7048 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7049 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7050 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7051 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7052 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7053 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7054 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7055 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7056 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7057 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7058 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7059 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7060 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7061 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7062 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7063 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7064 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7065 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7066 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7067 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7068 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7069 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7070 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7071 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7072 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7073 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7074 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7075 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7076 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7077 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7078 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7079 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7080 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7081 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7082 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7083 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7084 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7085 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7086 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7087 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7088 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7089 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7090 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7091 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7092 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7093 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7094 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7095 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7096 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7097 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7098 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7099 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7100 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7101 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7102 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7103 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7104 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7105 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7106 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7107 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7108 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7109 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7110 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7111 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7112 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7113 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7114 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7115 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7116 // FP16 scalar intrinisics go here.
7117 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7118 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7119 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7120 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7121 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7122 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7123 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7124 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7125 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7126 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7127 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7128 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7129 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7130 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7131 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7132 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7133 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7134 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7135 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7136 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7137 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7138 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7139 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7140 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7141 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7142 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7143 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7144 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7145 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7146 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7147 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7148 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7149 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7150 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7151};
7152
7153// Some intrinsics are equivalent for codegen.
7154static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7155 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7156 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7157 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7158 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7159 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7160 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7161 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7162 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7163 { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, },
7164 { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, },
7165 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7166 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7167 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7168 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7169 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7170 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7171 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7172 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7173 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7174 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7175 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7176 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7177 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7178 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7179 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7180 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7181 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7182 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7183 { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, },
7184 { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, },
7185 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7186 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7187 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7188 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7189 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7190 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7191 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7192 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7193 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7194 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7195 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7196 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7197 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7198 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7199 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7200 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7201 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7202 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7203 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7204 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7205 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7206 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7207 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7208 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7209 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7210 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7211 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7212 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7213 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7214 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7215 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7216 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7217 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7218 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7219 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7220 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7221 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7222 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7223 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7224 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7225 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7226 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7227 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7228 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7229 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7230 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7231 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7232 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7233 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7234 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7235 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7236 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7237 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7238 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7239 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7240 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7241 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7242 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7243 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7244 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7245 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7246 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7247 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7248 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7249 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7250 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7251 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7252 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7253 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7254 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7255 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7256 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7257 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7258 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7259 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7260 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7261 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7262 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7263 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7264 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7265 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7266 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7267 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7268 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7269 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7270 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7271 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7272 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7273 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7274 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7275 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7276 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7277 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7278 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7279 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7280 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7281 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7282 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7283 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7284 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7285 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7286 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7287 { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, },
7288 { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, },
7289 { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, },
7290 { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, },
7291 { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, },
7292 { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, },
7293 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7294 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7295 // arbitrary one to be handled as tha canonical variation.
7296 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7297 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7298 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7299 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7300 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7301 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7302 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7303 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7304 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7305 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7306 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7307 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7308};
7309
7310#undef NEONMAP0
7311#undef NEONMAP1
7312#undef NEONMAP2
7313
7314#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7315 { \
7316 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7317 TypeModifier \
7318 }
7319
7320#define SVEMAP2(NameBase, TypeModifier) \
7321 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7322static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7323#define GET_SVE_LLVM_INTRINSIC_MAP
7324#include "clang/Basic/arm_sve_builtin_cg.inc"
7325#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7326#undef GET_SVE_LLVM_INTRINSIC_MAP
7327};
7328
7329#undef SVEMAP1
7330#undef SVEMAP2
7331
7332#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7333 { \
7334 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7335 TypeModifier \
7336 }
7337
7338#define SMEMAP2(NameBase, TypeModifier) \
7339 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7340static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7341#define GET_SME_LLVM_INTRINSIC_MAP
7342#include "clang/Basic/arm_sme_builtin_cg.inc"
7343#undef GET_SME_LLVM_INTRINSIC_MAP
7344};
7345
7346#undef SMEMAP1
7347#undef SMEMAP2
7348
7350
7355
7356static const ARMVectorIntrinsicInfo *
7358 unsigned BuiltinID, bool &MapProvenSorted) {
7359
7360#ifndef NDEBUG
7361 if (!MapProvenSorted) {
7362 assert(llvm::is_sorted(IntrinsicMap));
7363 MapProvenSorted = true;
7364 }
7365#endif
7366
7367 const ARMVectorIntrinsicInfo *Builtin =
7368 llvm::lower_bound(IntrinsicMap, BuiltinID);
7369
7370 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7371 return Builtin;
7372
7373 return nullptr;
7374}
7375
7377 unsigned Modifier,
7378 llvm::Type *ArgType,
7379 const CallExpr *E) {
7380 int VectorSize = 0;
7381 if (Modifier & Use64BitVectors)
7382 VectorSize = 64;
7383 else if (Modifier & Use128BitVectors)
7384 VectorSize = 128;
7385
7386 // Return type.
7388 if (Modifier & AddRetType) {
7389 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7390 if (Modifier & VectorizeRetType)
7391 Ty = llvm::FixedVectorType::get(
7392 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7393
7394 Tys.push_back(Ty);
7395 }
7396
7397 // Arguments.
7398 if (Modifier & VectorizeArgTypes) {
7399 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7400 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7401 }
7402
7403 if (Modifier & (Add1ArgType | Add2ArgTypes))
7404 Tys.push_back(ArgType);
7405
7406 if (Modifier & Add2ArgTypes)
7407 Tys.push_back(ArgType);
7408
7409 if (Modifier & InventFloatType)
7410 Tys.push_back(FloatTy);
7411
7412 return CGM.getIntrinsic(IntrinsicID, Tys);
7413}
7414
7416 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7417 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7418 unsigned BuiltinID = SISDInfo.BuiltinID;
7419 unsigned int Int = SISDInfo.LLVMIntrinsic;
7420 unsigned Modifier = SISDInfo.TypeModifier;
7421 const char *s = SISDInfo.NameHint;
7422
7423 switch (BuiltinID) {
7424 case NEON::BI__builtin_neon_vcled_s64:
7425 case NEON::BI__builtin_neon_vcled_u64:
7426 case NEON::BI__builtin_neon_vcles_f32:
7427 case NEON::BI__builtin_neon_vcled_f64:
7428 case NEON::BI__builtin_neon_vcltd_s64:
7429 case NEON::BI__builtin_neon_vcltd_u64:
7430 case NEON::BI__builtin_neon_vclts_f32:
7431 case NEON::BI__builtin_neon_vcltd_f64:
7432 case NEON::BI__builtin_neon_vcales_f32:
7433 case NEON::BI__builtin_neon_vcaled_f64:
7434 case NEON::BI__builtin_neon_vcalts_f32:
7435 case NEON::BI__builtin_neon_vcaltd_f64:
7436 // Only one direction of comparisons actually exist, cmle is actually a cmge
7437 // with swapped operands. The table gives us the right intrinsic but we
7438 // still need to do the swap.
7439 std::swap(Ops[0], Ops[1]);
7440 break;
7441 }
7442
7443 assert(Int && "Generic code assumes a valid intrinsic");
7444
7445 // Determine the type(s) of this overloaded AArch64 intrinsic.
7446 const Expr *Arg = E->getArg(0);
7447 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7448 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7449
7450 int j = 0;
7451 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7452 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7453 ai != ae; ++ai, ++j) {
7454 llvm::Type *ArgTy = ai->getType();
7455 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7456 ArgTy->getPrimitiveSizeInBits())
7457 continue;
7458
7459 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7460 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7461 // it before inserting.
7462 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7463 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7464 Ops[j] =
7465 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7466 }
7467
7468 Value *Result = CGF.EmitNeonCall(F, Ops, s);
7469 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7470 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7471 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7472 return CGF.Builder.CreateExtractElement(Result, C0);
7473
7474 return CGF.Builder.CreateBitCast(Result, ResultType, s);
7475}
7476
7478 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7479 const char *NameHint, unsigned Modifier, const CallExpr *E,
7480 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7481 llvm::Triple::ArchType Arch) {
7482 // Get the last argument, which specifies the vector type.
7483 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7484 std::optional<llvm::APSInt> NeonTypeConst =
7486 if (!NeonTypeConst)
7487 return nullptr;
7488
7489 // Determine the type of this overloaded NEON intrinsic.
7490 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7491 bool Usgn = Type.isUnsigned();
7492 bool Quad = Type.isQuad();
7493 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7494 const bool AllowBFloatArgsAndRet =
7495 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7496
7497 llvm::FixedVectorType *VTy =
7498 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7499 llvm::Type *Ty = VTy;
7500 if (!Ty)
7501 return nullptr;
7502
7503 auto getAlignmentValue32 = [&](Address addr) -> Value* {
7504 return Builder.getInt32(addr.getAlignment().getQuantity());
7505 };
7506
7507 unsigned Int = LLVMIntrinsic;
7508 if ((Modifier & UnsignedAlts) && !Usgn)
7509 Int = AltLLVMIntrinsic;
7510
7511 switch (BuiltinID) {
7512 default: break;
7513 case NEON::BI__builtin_neon_splat_lane_v:
7514 case NEON::BI__builtin_neon_splat_laneq_v:
7515 case NEON::BI__builtin_neon_splatq_lane_v:
7516 case NEON::BI__builtin_neon_splatq_laneq_v: {
7517 auto NumElements = VTy->getElementCount();
7518 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7519 NumElements = NumElements * 2;
7520 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7521 NumElements = NumElements.divideCoefficientBy(2);
7522
7523 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7524 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7525 }
7526 case NEON::BI__builtin_neon_vpadd_v:
7527 case NEON::BI__builtin_neon_vpaddq_v:
7528 // We don't allow fp/int overloading of intrinsics.
7529 if (VTy->getElementType()->isFloatingPointTy() &&
7530 Int == Intrinsic::aarch64_neon_addp)
7531 Int = Intrinsic::aarch64_neon_faddp;
7532 break;
7533 case NEON::BI__builtin_neon_vabs_v:
7534 case NEON::BI__builtin_neon_vabsq_v:
7535 if (VTy->getElementType()->isFloatingPointTy())
7536 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7537 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7538 case NEON::BI__builtin_neon_vadd_v:
7539 case NEON::BI__builtin_neon_vaddq_v: {
7540 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7541 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7542 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7543 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
7544 return Builder.CreateBitCast(Ops[0], Ty);
7545 }
7546 case NEON::BI__builtin_neon_vaddhn_v: {
7547 llvm::FixedVectorType *SrcTy =
7548 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7549
7550 // %sum = add <4 x i32> %lhs, %rhs
7551 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7552 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7553 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7554
7555 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7556 Constant *ShiftAmt =
7557 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7558 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7559
7560 // %res = trunc <4 x i32> %high to <4 x i16>
7561 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7562 }
7563 case NEON::BI__builtin_neon_vcale_v:
7564 case NEON::BI__builtin_neon_vcaleq_v:
7565 case NEON::BI__builtin_neon_vcalt_v:
7566 case NEON::BI__builtin_neon_vcaltq_v:
7567 std::swap(Ops[0], Ops[1]);
7568 [[fallthrough]];
7569 case NEON::BI__builtin_neon_vcage_v:
7570 case NEON::BI__builtin_neon_vcageq_v:
7571 case NEON::BI__builtin_neon_vcagt_v:
7572 case NEON::BI__builtin_neon_vcagtq_v: {
7573 llvm::Type *Ty;
7574 switch (VTy->getScalarSizeInBits()) {
7575 default: llvm_unreachable("unexpected type");
7576 case 32:
7577 Ty = FloatTy;
7578 break;
7579 case 64:
7580 Ty = DoubleTy;
7581 break;
7582 case 16:
7583 Ty = HalfTy;
7584 break;
7585 }
7586 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7587 llvm::Type *Tys[] = { VTy, VecFlt };
7588 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7589 return EmitNeonCall(F, Ops, NameHint);
7590 }
7591 case NEON::BI__builtin_neon_vceqz_v:
7592 case NEON::BI__builtin_neon_vceqzq_v:
7593 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7594 ICmpInst::ICMP_EQ, "vceqz");
7595 case NEON::BI__builtin_neon_vcgez_v:
7596 case NEON::BI__builtin_neon_vcgezq_v:
7597 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7598 ICmpInst::ICMP_SGE, "vcgez");
7599 case NEON::BI__builtin_neon_vclez_v:
7600 case NEON::BI__builtin_neon_vclezq_v:
7601 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7602 ICmpInst::ICMP_SLE, "vclez");
7603 case NEON::BI__builtin_neon_vcgtz_v:
7604 case NEON::BI__builtin_neon_vcgtzq_v:
7605 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7606 ICmpInst::ICMP_SGT, "vcgtz");
7607 case NEON::BI__builtin_neon_vcltz_v:
7608 case NEON::BI__builtin_neon_vcltzq_v:
7609 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7610 ICmpInst::ICMP_SLT, "vcltz");
7611 case NEON::BI__builtin_neon_vclz_v:
7612 case NEON::BI__builtin_neon_vclzq_v:
7613 // We generate target-independent intrinsic, which needs a second argument
7614 // for whether or not clz of zero is undefined; on ARM it isn't.
7615 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7616 break;
7617 case NEON::BI__builtin_neon_vcvt_f32_v:
7618 case NEON::BI__builtin_neon_vcvtq_f32_v:
7619 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7620 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7621 HasLegalHalfType);
7622 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7623 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7624 case NEON::BI__builtin_neon_vcvt_f16_s16:
7625 case NEON::BI__builtin_neon_vcvt_f16_u16:
7626 case NEON::BI__builtin_neon_vcvtq_f16_s16:
7627 case NEON::BI__builtin_neon_vcvtq_f16_u16:
7628 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7629 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7630 HasLegalHalfType);
7631 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7632 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7633 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7634 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7635 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7636 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7637 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7638 Function *F = CGM.getIntrinsic(Int, Tys);
7639 return EmitNeonCall(F, Ops, "vcvt_n");
7640 }
7641 case NEON::BI__builtin_neon_vcvt_n_f32_v:
7642 case NEON::BI__builtin_neon_vcvt_n_f64_v:
7643 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7644 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7645 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7646 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7647 Function *F = CGM.getIntrinsic(Int, Tys);
7648 return EmitNeonCall(F, Ops, "vcvt_n");
7649 }
7650 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7651 case NEON::BI__builtin_neon_vcvt_n_s32_v:
7652 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7653 case NEON::BI__builtin_neon_vcvt_n_u32_v:
7654 case NEON::BI__builtin_neon_vcvt_n_s64_v:
7655 case NEON::BI__builtin_neon_vcvt_n_u64_v:
7656 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7657 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7658 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7659 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7660 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7661 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7662 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7663 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7664 return EmitNeonCall(F, Ops, "vcvt_n");
7665 }
7666 case NEON::BI__builtin_neon_vcvt_s32_v:
7667 case NEON::BI__builtin_neon_vcvt_u32_v:
7668 case NEON::BI__builtin_neon_vcvt_s64_v:
7669 case NEON::BI__builtin_neon_vcvt_u64_v:
7670 case NEON::BI__builtin_neon_vcvt_s16_f16:
7671 case NEON::BI__builtin_neon_vcvt_u16_f16:
7672 case NEON::BI__builtin_neon_vcvtq_s32_v:
7673 case NEON::BI__builtin_neon_vcvtq_u32_v:
7674 case NEON::BI__builtin_neon_vcvtq_s64_v:
7675 case NEON::BI__builtin_neon_vcvtq_u64_v:
7676 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7677 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7678 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7679 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7680 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7681 }
7682 case NEON::BI__builtin_neon_vcvta_s16_f16:
7683 case NEON::BI__builtin_neon_vcvta_s32_v:
7684 case NEON::BI__builtin_neon_vcvta_s64_v:
7685 case NEON::BI__builtin_neon_vcvta_u16_f16:
7686 case NEON::BI__builtin_neon_vcvta_u32_v:
7687 case NEON::BI__builtin_neon_vcvta_u64_v:
7688 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7689 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7690 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7691 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7692 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7693 case NEON::BI__builtin_neon_vcvtaq_u64_v:
7694 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7695 case NEON::BI__builtin_neon_vcvtn_s32_v:
7696 case NEON::BI__builtin_neon_vcvtn_s64_v:
7697 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7698 case NEON::BI__builtin_neon_vcvtn_u32_v:
7699 case NEON::BI__builtin_neon_vcvtn_u64_v:
7700 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7701 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7702 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7703 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7704 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7705 case NEON::BI__builtin_neon_vcvtnq_u64_v:
7706 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7707 case NEON::BI__builtin_neon_vcvtp_s32_v:
7708 case NEON::BI__builtin_neon_vcvtp_s64_v:
7709 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7710 case NEON::BI__builtin_neon_vcvtp_u32_v:
7711 case NEON::BI__builtin_neon_vcvtp_u64_v:
7712 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7713 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7714 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7715 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7716 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7717 case NEON::BI__builtin_neon_vcvtpq_u64_v:
7718 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7719 case NEON::BI__builtin_neon_vcvtm_s32_v:
7720 case NEON::BI__builtin_neon_vcvtm_s64_v:
7721 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7722 case NEON::BI__builtin_neon_vcvtm_u32_v:
7723 case NEON::BI__builtin_neon_vcvtm_u64_v:
7724 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7725 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7726 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7727 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7728 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7729 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7730 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7731 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7732 }
7733 case NEON::BI__builtin_neon_vcvtx_f32_v: {
7734 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7735 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7736
7737 }
7738 case NEON::BI__builtin_neon_vext_v:
7739 case NEON::BI__builtin_neon_vextq_v: {
7740 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7741 SmallVector<int, 16> Indices;
7742 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7743 Indices.push_back(i+CV);
7744
7745 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7746 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7747 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7748 }
7749 case NEON::BI__builtin_neon_vfma_v:
7750 case NEON::BI__builtin_neon_vfmaq_v: {
7751 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7752 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7753 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7754
7755 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7757 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7758 {Ops[1], Ops[2], Ops[0]});
7759 }
7760 case NEON::BI__builtin_neon_vld1_v:
7761 case NEON::BI__builtin_neon_vld1q_v: {
7762 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7763 Ops.push_back(getAlignmentValue32(PtrOp0));
7764 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7765 }
7766 case NEON::BI__builtin_neon_vld1_x2_v:
7767 case NEON::BI__builtin_neon_vld1q_x2_v:
7768 case NEON::BI__builtin_neon_vld1_x3_v:
7769 case NEON::BI__builtin_neon_vld1q_x3_v:
7770 case NEON::BI__builtin_neon_vld1_x4_v:
7771 case NEON::BI__builtin_neon_vld1q_x4_v: {
7772 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7773 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7774 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7775 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7776 }
7777 case NEON::BI__builtin_neon_vld2_v:
7778 case NEON::BI__builtin_neon_vld2q_v:
7779 case NEON::BI__builtin_neon_vld3_v:
7780 case NEON::BI__builtin_neon_vld3q_v:
7781 case NEON::BI__builtin_neon_vld4_v:
7782 case NEON::BI__builtin_neon_vld4q_v:
7783 case NEON::BI__builtin_neon_vld2_dup_v:
7784 case NEON::BI__builtin_neon_vld2q_dup_v:
7785 case NEON::BI__builtin_neon_vld3_dup_v:
7786 case NEON::BI__builtin_neon_vld3q_dup_v:
7787 case NEON::BI__builtin_neon_vld4_dup_v:
7788 case NEON::BI__builtin_neon_vld4q_dup_v: {
7789 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7790 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7791 Value *Align = getAlignmentValue32(PtrOp1);
7792 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7793 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7794 }
7795 case NEON::BI__builtin_neon_vld1_dup_v:
7796 case NEON::BI__builtin_neon_vld1q_dup_v: {
7797 Value *V = PoisonValue::get(Ty);
7798 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7799 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7800 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7801 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7802 return EmitNeonSplat(Ops[0], CI);
7803 }
7804 case NEON::BI__builtin_neon_vld2_lane_v:
7805 case NEON::BI__builtin_neon_vld2q_lane_v:
7806 case NEON::BI__builtin_neon_vld3_lane_v:
7807 case NEON::BI__builtin_neon_vld3q_lane_v:
7808 case NEON::BI__builtin_neon_vld4_lane_v:
7809 case NEON::BI__builtin_neon_vld4q_lane_v: {
7810 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7811 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7812 for (unsigned I = 2; I < Ops.size() - 1; ++I)
7813 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
7814 Ops.push_back(getAlignmentValue32(PtrOp1));
7815 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
7816 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7817 }
7818 case NEON::BI__builtin_neon_vmovl_v: {
7819 llvm::FixedVectorType *DTy =
7820 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7821 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
7822 if (Usgn)
7823 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
7824 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
7825 }
7826 case NEON::BI__builtin_neon_vmovn_v: {
7827 llvm::FixedVectorType *QTy =
7828 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7829 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
7830 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
7831 }
7832 case NEON::BI__builtin_neon_vmull_v:
7833 // FIXME: the integer vmull operations could be emitted in terms of pure
7834 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
7835 // hoisting the exts outside loops. Until global ISel comes along that can
7836 // see through such movement this leads to bad CodeGen. So we need an
7837 // intrinsic for now.
7838 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
7839 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
7840 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
7841 case NEON::BI__builtin_neon_vpadal_v:
7842 case NEON::BI__builtin_neon_vpadalq_v: {
7843 // The source operand type has twice as many elements of half the size.
7844 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7845 llvm::Type *EltTy =
7846 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7847 auto *NarrowTy =
7848 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7849 llvm::Type *Tys[2] = { Ty, NarrowTy };
7850 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7851 }
7852 case NEON::BI__builtin_neon_vpaddl_v:
7853 case NEON::BI__builtin_neon_vpaddlq_v: {
7854 // The source operand type has twice as many elements of half the size.
7855 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7856 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7857 auto *NarrowTy =
7858 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7859 llvm::Type *Tys[2] = { Ty, NarrowTy };
7860 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
7861 }
7862 case NEON::BI__builtin_neon_vqdmlal_v:
7863 case NEON::BI__builtin_neon_vqdmlsl_v: {
7864 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
7865 Ops[1] =
7866 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
7867 Ops.resize(2);
7868 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
7869 }
7870 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
7871 case NEON::BI__builtin_neon_vqdmulh_lane_v:
7872 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
7873 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
7874 auto *RTy = cast<llvm::FixedVectorType>(Ty);
7875 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
7876 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
7877 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
7878 RTy->getNumElements() * 2);
7879 llvm::Type *Tys[2] = {
7880 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7881 /*isQuad*/ false))};
7882 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7883 }
7884 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
7885 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
7886 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
7887 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
7888 llvm::Type *Tys[2] = {
7889 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7890 /*isQuad*/ true))};
7891 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7892 }
7893 case NEON::BI__builtin_neon_vqshl_n_v:
7894 case NEON::BI__builtin_neon_vqshlq_n_v:
7895 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
7896 1, false);
7897 case NEON::BI__builtin_neon_vqshlu_n_v:
7898 case NEON::BI__builtin_neon_vqshluq_n_v:
7899 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
7900 1, false);
7901 case NEON::BI__builtin_neon_vrecpe_v:
7902 case NEON::BI__builtin_neon_vrecpeq_v:
7903 case NEON::BI__builtin_neon_vrsqrte_v:
7904 case NEON::BI__builtin_neon_vrsqrteq_v:
7905 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
7906 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7907 case NEON::BI__builtin_neon_vrndi_v:
7908 case NEON::BI__builtin_neon_vrndiq_v:
7909 Int = Builder.getIsFPConstrained()
7910 ? Intrinsic::experimental_constrained_nearbyint
7911 : Intrinsic::nearbyint;
7912 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7913 case NEON::BI__builtin_neon_vrshr_n_v:
7914 case NEON::BI__builtin_neon_vrshrq_n_v:
7915 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
7916 1, true);
7917 case NEON::BI__builtin_neon_vsha512hq_u64:
7918 case NEON::BI__builtin_neon_vsha512h2q_u64:
7919 case NEON::BI__builtin_neon_vsha512su0q_u64:
7920 case NEON::BI__builtin_neon_vsha512su1q_u64: {
7921 Function *F = CGM.getIntrinsic(Int);
7922 return EmitNeonCall(F, Ops, "");
7923 }
7924 case NEON::BI__builtin_neon_vshl_n_v:
7925 case NEON::BI__builtin_neon_vshlq_n_v:
7926 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
7927 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
7928 "vshl_n");
7929 case NEON::BI__builtin_neon_vshll_n_v: {
7930 llvm::FixedVectorType *SrcTy =
7931 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7932 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7933 if (Usgn)
7934 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
7935 else
7936 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
7937 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
7938 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
7939 }
7940 case NEON::BI__builtin_neon_vshrn_n_v: {
7941 llvm::FixedVectorType *SrcTy =
7942 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7943 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7944 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
7945 if (Usgn)
7946 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
7947 else
7948 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
7949 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
7950 }
7951 case NEON::BI__builtin_neon_vshr_n_v:
7952 case NEON::BI__builtin_neon_vshrq_n_v:
7953 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
7954 case NEON::BI__builtin_neon_vst1_v:
7955 case NEON::BI__builtin_neon_vst1q_v:
7956 case NEON::BI__builtin_neon_vst2_v:
7957 case NEON::BI__builtin_neon_vst2q_v:
7958 case NEON::BI__builtin_neon_vst3_v:
7959 case NEON::BI__builtin_neon_vst3q_v:
7960 case NEON::BI__builtin_neon_vst4_v:
7961 case NEON::BI__builtin_neon_vst4q_v:
7962 case NEON::BI__builtin_neon_vst2_lane_v:
7963 case NEON::BI__builtin_neon_vst2q_lane_v:
7964 case NEON::BI__builtin_neon_vst3_lane_v:
7965 case NEON::BI__builtin_neon_vst3q_lane_v:
7966 case NEON::BI__builtin_neon_vst4_lane_v:
7967 case NEON::BI__builtin_neon_vst4q_lane_v: {
7968 llvm::Type *Tys[] = {Int8PtrTy, Ty};
7969 Ops.push_back(getAlignmentValue32(PtrOp0));
7970 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
7971 }
7972 case NEON::BI__builtin_neon_vsm3partw1q_u32:
7973 case NEON::BI__builtin_neon_vsm3partw2q_u32:
7974 case NEON::BI__builtin_neon_vsm3ss1q_u32:
7975 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
7976 case NEON::BI__builtin_neon_vsm4eq_u32: {
7977 Function *F = CGM.getIntrinsic(Int);
7978 return EmitNeonCall(F, Ops, "");
7979 }
7980 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
7981 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
7982 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
7983 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
7984 Function *F = CGM.getIntrinsic(Int);
7985 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7986 return EmitNeonCall(F, Ops, "");
7987 }
7988 case NEON::BI__builtin_neon_vst1_x2_v:
7989 case NEON::BI__builtin_neon_vst1q_x2_v:
7990 case NEON::BI__builtin_neon_vst1_x3_v:
7991 case NEON::BI__builtin_neon_vst1q_x3_v:
7992 case NEON::BI__builtin_neon_vst1_x4_v:
7993 case NEON::BI__builtin_neon_vst1q_x4_v: {
7994 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
7995 // in AArch64 it comes last. We may want to stick to one or another.
7996 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
7997 Arch == llvm::Triple::aarch64_32) {
7998 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7999 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8000 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8001 }
8002 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8003 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8004 }
8005 case NEON::BI__builtin_neon_vsubhn_v: {
8006 llvm::FixedVectorType *SrcTy =
8007 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8008
8009 // %sum = add <4 x i32> %lhs, %rhs
8010 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8011 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8012 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8013
8014 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8015 Constant *ShiftAmt =
8016 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8017 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8018
8019 // %res = trunc <4 x i32> %high to <4 x i16>
8020 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8021 }
8022 case NEON::BI__builtin_neon_vtrn_v:
8023 case NEON::BI__builtin_neon_vtrnq_v: {
8024 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8025 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8026 Value *SV = nullptr;
8027
8028 for (unsigned vi = 0; vi != 2; ++vi) {
8029 SmallVector<int, 16> Indices;
8030 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8031 Indices.push_back(i+vi);
8032 Indices.push_back(i+e+vi);
8033 }
8034 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8035 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8036 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8037 }
8038 return SV;
8039 }
8040 case NEON::BI__builtin_neon_vtst_v:
8041 case NEON::BI__builtin_neon_vtstq_v: {
8042 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8043 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8044 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8045 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8046 ConstantAggregateZero::get(Ty));
8047 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8048 }
8049 case NEON::BI__builtin_neon_vuzp_v:
8050 case NEON::BI__builtin_neon_vuzpq_v: {
8051 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8052 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8053 Value *SV = nullptr;
8054
8055 for (unsigned vi = 0; vi != 2; ++vi) {
8056 SmallVector<int, 16> Indices;
8057 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8058 Indices.push_back(2*i+vi);
8059
8060 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8061 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8062 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8063 }
8064 return SV;
8065 }
8066 case NEON::BI__builtin_neon_vxarq_u64: {
8067 Function *F = CGM.getIntrinsic(Int);
8068 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8069 return EmitNeonCall(F, Ops, "");
8070 }
8071 case NEON::BI__builtin_neon_vzip_v:
8072 case NEON::BI__builtin_neon_vzipq_v: {
8073 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8074 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8075 Value *SV = nullptr;
8076
8077 for (unsigned vi = 0; vi != 2; ++vi) {
8078 SmallVector<int, 16> Indices;
8079 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8080 Indices.push_back((i + vi*e) >> 1);
8081 Indices.push_back(((i + vi*e) >> 1)+e);
8082 }
8083 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8084 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8085 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8086 }
8087 return SV;
8088 }
8089 case NEON::BI__builtin_neon_vdot_s32:
8090 case NEON::BI__builtin_neon_vdot_u32:
8091 case NEON::BI__builtin_neon_vdotq_s32:
8092 case NEON::BI__builtin_neon_vdotq_u32: {
8093 auto *InputTy =
8094 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8095 llvm::Type *Tys[2] = { Ty, InputTy };
8096 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8097 }
8098 case NEON::BI__builtin_neon_vfmlal_low_f16:
8099 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8100 auto *InputTy =
8101 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8102 llvm::Type *Tys[2] = { Ty, InputTy };
8103 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8104 }
8105 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8106 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8107 auto *InputTy =
8108 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8109 llvm::Type *Tys[2] = { Ty, InputTy };
8110 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8111 }
8112 case NEON::BI__builtin_neon_vfmlal_high_f16:
8113 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8114 auto *InputTy =
8115 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8116 llvm::Type *Tys[2] = { Ty, InputTy };
8117 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8118 }
8119 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8120 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8121 auto *InputTy =
8122 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8123 llvm::Type *Tys[2] = { Ty, InputTy };
8124 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8125 }
8126 case NEON::BI__builtin_neon_vmmlaq_s32:
8127 case NEON::BI__builtin_neon_vmmlaq_u32: {
8128 auto *InputTy =
8129 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8130 llvm::Type *Tys[2] = { Ty, InputTy };
8131 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8132 }
8133 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8134 auto *InputTy =
8135 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8136 llvm::Type *Tys[2] = { Ty, InputTy };
8137 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8138 }
8139 case NEON::BI__builtin_neon_vusdot_s32:
8140 case NEON::BI__builtin_neon_vusdotq_s32: {
8141 auto *InputTy =
8142 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8143 llvm::Type *Tys[2] = { Ty, InputTy };
8144 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8145 }
8146 case NEON::BI__builtin_neon_vbfdot_f32:
8147 case NEON::BI__builtin_neon_vbfdotq_f32: {
8148 llvm::Type *InputTy =
8149 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8150 llvm::Type *Tys[2] = { Ty, InputTy };
8151 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8152 }
8153 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8154 llvm::Type *Tys[1] = { Ty };
8155 Function *F = CGM.getIntrinsic(Int, Tys);
8156 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8157 }
8158
8159 }
8160
8161 assert(Int && "Expected valid intrinsic number");
8162
8163 // Determine the type(s) of this overloaded AArch64 intrinsic.
8164 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8165
8166 Value *Result = EmitNeonCall(F, Ops, NameHint);
8167 llvm::Type *ResultType = ConvertType(E->getType());
8168 // AArch64 intrinsic one-element vector type cast to
8169 // scalar type expected by the builtin
8170 return Builder.CreateBitCast(Result, ResultType, NameHint);
8171}
8172
8174 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8175 const CmpInst::Predicate Ip, const Twine &Name) {
8176 llvm::Type *OTy = Op->getType();
8177
8178 // FIXME: this is utterly horrific. We should not be looking at previous
8179 // codegen context to find out what needs doing. Unfortunately TableGen
8180 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8181 // (etc).
8182 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8183 OTy = BI->getOperand(0)->getType();
8184
8185 Op = Builder.CreateBitCast(Op, OTy);
8186 if (OTy->getScalarType()->isFloatingPointTy()) {
8187 if (Fp == CmpInst::FCMP_OEQ)
8188 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8189 else
8190 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8191 } else {
8192 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8193 }
8194 return Builder.CreateSExt(Op, Ty, Name);
8195}
8196
8198 Value *ExtOp, Value *IndexOp,
8199 llvm::Type *ResTy, unsigned IntID,
8200 const char *Name) {
8202 if (ExtOp)
8203 TblOps.push_back(ExtOp);
8204
8205 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8206 SmallVector<int, 16> Indices;
8207 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8208 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8209 Indices.push_back(2*i);
8210 Indices.push_back(2*i+1);
8211 }
8212
8213 int PairPos = 0, End = Ops.size() - 1;
8214 while (PairPos < End) {
8215 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8216 Ops[PairPos+1], Indices,
8217 Name));
8218 PairPos += 2;
8219 }
8220
8221 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8222 // of the 128-bit lookup table with zero.
8223 if (PairPos == End) {
8224 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8225 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8226 ZeroTbl, Indices, Name));
8227 }
8228
8229 Function *TblF;
8230 TblOps.push_back(IndexOp);
8231 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8232
8233 return CGF.EmitNeonCall(TblF, TblOps, Name);
8234}
8235
8236Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8237 unsigned Value;
8238 switch (BuiltinID) {
8239 default:
8240 return nullptr;
8241 case clang::ARM::BI__builtin_arm_nop:
8242 Value = 0;
8243 break;
8244 case clang::ARM::BI__builtin_arm_yield:
8245 case clang::ARM::BI__yield:
8246 Value = 1;
8247 break;
8248 case clang::ARM::BI__builtin_arm_wfe:
8249 case clang::ARM::BI__wfe:
8250 Value = 2;
8251 break;
8252 case clang::ARM::BI__builtin_arm_wfi:
8253 case clang::ARM::BI__wfi:
8254 Value = 3;
8255 break;
8256 case clang::ARM::BI__builtin_arm_sev:
8257 case clang::ARM::BI__sev:
8258 Value = 4;
8259 break;
8260 case clang::ARM::BI__builtin_arm_sevl:
8261 case clang::ARM::BI__sevl:
8262 Value = 5;
8263 break;
8264 }
8265
8266 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8267 llvm::ConstantInt::get(Int32Ty, Value));
8268}
8269
8274};
8275
8276// Generates the IR for __builtin_read_exec_*.
8277// Lowers the builtin to amdgcn_ballot intrinsic.
8279 llvm::Type *RegisterType,
8280 llvm::Type *ValueType, bool isExecHi) {
8281 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8282 CodeGen::CodeGenModule &CGM = CGF.CGM;
8283
8284 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8285 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8286
8287 if (isExecHi) {
8288 Value *Rt2 = Builder.CreateLShr(Call, 32);
8289 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8290 return Rt2;
8291 }
8292
8293 return Call;
8294}
8295
8296// Generates the IR for the read/write special register builtin,
8297// ValueType is the type of the value that is to be written or read,
8298// RegisterType is the type of the register being written to or read from.
8300 const CallExpr *E,
8301 llvm::Type *RegisterType,
8302 llvm::Type *ValueType,
8303 SpecialRegisterAccessKind AccessKind,
8304 StringRef SysReg = "") {
8305 // write and register intrinsics only support 32, 64 and 128 bit operations.
8306 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8307 RegisterType->isIntegerTy(128)) &&
8308 "Unsupported size for register.");
8309
8310 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8311 CodeGen::CodeGenModule &CGM = CGF.CGM;
8312 LLVMContext &Context = CGM.getLLVMContext();
8313
8314 if (SysReg.empty()) {
8315 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8316 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8317 }
8318
8319 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8320 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8321 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8322
8323 llvm::Type *Types[] = { RegisterType };
8324
8325 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8326 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8327 && "Can't fit 64-bit value in 32-bit register");
8328
8329 if (AccessKind != Write) {
8330 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8331 llvm::Function *F = CGM.getIntrinsic(
8332 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8333 : llvm::Intrinsic::read_register,
8334 Types);
8335 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8336
8337 if (MixedTypes)
8338 // Read into 64 bit register and then truncate result to 32 bit.
8339 return Builder.CreateTrunc(Call, ValueType);
8340
8341 if (ValueType->isPointerTy())
8342 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8343 return Builder.CreateIntToPtr(Call, ValueType);
8344
8345 return Call;
8346 }
8347
8348 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8349 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8350 if (MixedTypes) {
8351 // Extend 32 bit write value to 64 bit to pass to write.
8352 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8353 return Builder.CreateCall(F, { Metadata, ArgValue });
8354 }
8355
8356 if (ValueType->isPointerTy()) {
8357 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8358 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8359 return Builder.CreateCall(F, { Metadata, ArgValue });
8360 }
8361
8362 return Builder.CreateCall(F, { Metadata, ArgValue });
8363}
8364
8365/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8366/// argument that specifies the vector type.
8367static bool HasExtraNeonArgument(unsigned BuiltinID) {
8368 switch (BuiltinID) {
8369 default: break;
8370 case NEON::BI__builtin_neon_vget_lane_i8:
8371 case NEON::BI__builtin_neon_vget_lane_i16:
8372 case NEON::BI__builtin_neon_vget_lane_bf16:
8373 case NEON::BI__builtin_neon_vget_lane_i32:
8374 case NEON::BI__builtin_neon_vget_lane_i64:
8375 case NEON::BI__builtin_neon_vget_lane_f32:
8376 case NEON::BI__builtin_neon_vgetq_lane_i8:
8377 case NEON::BI__builtin_neon_vgetq_lane_i16:
8378 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8379 case NEON::BI__builtin_neon_vgetq_lane_i32:
8380 case NEON::BI__builtin_neon_vgetq_lane_i64:
8381 case NEON::BI__builtin_neon_vgetq_lane_f32:
8382 case NEON::BI__builtin_neon_vduph_lane_bf16:
8383 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8384 case NEON::BI__builtin_neon_vset_lane_i8:
8385 case NEON::BI__builtin_neon_vset_lane_i16:
8386 case NEON::BI__builtin_neon_vset_lane_bf16:
8387 case NEON::BI__builtin_neon_vset_lane_i32:
8388 case NEON::BI__builtin_neon_vset_lane_i64:
8389 case NEON::BI__builtin_neon_vset_lane_f32:
8390 case NEON::BI__builtin_neon_vsetq_lane_i8:
8391 case NEON::BI__builtin_neon_vsetq_lane_i16:
8392 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8393 case NEON::BI__builtin_neon_vsetq_lane_i32:
8394 case NEON::BI__builtin_neon_vsetq_lane_i64:
8395 case NEON::BI__builtin_neon_vsetq_lane_f32:
8396 case NEON::BI__builtin_neon_vsha1h_u32:
8397 case NEON::BI__builtin_neon_vsha1cq_u32:
8398 case NEON::BI__builtin_neon_vsha1pq_u32:
8399 case NEON::BI__builtin_neon_vsha1mq_u32:
8400 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8401 case clang::ARM::BI_MoveToCoprocessor:
8402 case clang::ARM::BI_MoveToCoprocessor2:
8403 return false;
8404 }
8405 return true;
8406}
8407
8408Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8409 const CallExpr *E,
8410 ReturnValueSlot ReturnValue,
8411 llvm::Triple::ArchType Arch) {
8412 if (auto Hint = GetValueForARMHint(BuiltinID))
8413 return Hint;
8414
8415 if (BuiltinID == clang::ARM::BI__emit) {
8416 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8417 llvm::FunctionType *FTy =
8418 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8419
8421 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8422 llvm_unreachable("Sema will ensure that the parameter is constant");
8423
8424 llvm::APSInt Value = Result.Val.getInt();
8425 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8426
8427 llvm::InlineAsm *Emit =
8428 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8429 /*hasSideEffects=*/true)
8430 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8431 /*hasSideEffects=*/true);
8432
8433 return Builder.CreateCall(Emit);
8434 }
8435
8436 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8437 Value *Option = EmitScalarExpr(E->getArg(0));
8438 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8439 }
8440
8441 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8443 Value *RW = EmitScalarExpr(E->getArg(1));
8444 Value *IsData = EmitScalarExpr(E->getArg(2));
8445
8446 // Locality is not supported on ARM target
8447 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8448
8449 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8450 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8451 }
8452
8453 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8454 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8455 return Builder.CreateCall(
8456 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8457 }
8458
8459 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8460 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8461 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8462 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8463 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8464 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8465 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8466 return Res;
8467 }
8468
8469
8470 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8471 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8472 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8473 }
8474 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8475 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8476 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8477 "cls");
8478 }
8479
8480 if (BuiltinID == clang::ARM::BI__clear_cache) {
8481 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8482 const FunctionDecl *FD = E->getDirectCallee();
8483 Value *Ops[2];
8484 for (unsigned i = 0; i < 2; i++)
8485 Ops[i] = EmitScalarExpr(E->getArg(i));
8486 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8487 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8488 StringRef Name = FD->getName();
8489 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8490 }
8491
8492 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8493 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8494 Function *F;
8495
8496 switch (BuiltinID) {
8497 default: llvm_unreachable("unexpected builtin");
8498 case clang::ARM::BI__builtin_arm_mcrr:
8499 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8500 break;
8501 case clang::ARM::BI__builtin_arm_mcrr2:
8502 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8503 break;
8504 }
8505
8506 // MCRR{2} instruction has 5 operands but
8507 // the intrinsic has 4 because Rt and Rt2
8508 // are represented as a single unsigned 64
8509 // bit integer in the intrinsic definition
8510 // but internally it's represented as 2 32
8511 // bit integers.
8512
8513 Value *Coproc = EmitScalarExpr(E->getArg(0));
8514 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8515 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8516 Value *CRm = EmitScalarExpr(E->getArg(3));
8517
8518 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8519 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8520 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8521 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8522
8523 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8524 }
8525
8526 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8527 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8528 Function *F;
8529
8530 switch (BuiltinID) {
8531 default: llvm_unreachable("unexpected builtin");
8532 case clang::ARM::BI__builtin_arm_mrrc:
8533 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8534 break;
8535 case clang::ARM::BI__builtin_arm_mrrc2:
8536 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8537 break;
8538 }
8539
8540 Value *Coproc = EmitScalarExpr(E->getArg(0));
8541 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8542 Value *CRm = EmitScalarExpr(E->getArg(2));
8543 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8544
8545 // Returns an unsigned 64 bit integer, represented
8546 // as two 32 bit integers.
8547
8548 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8549 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8550 Rt = Builder.CreateZExt(Rt, Int64Ty);
8551 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8552
8553 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8554 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8555 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8556
8557 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8558 }
8559
8560 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8561 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8562 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8563 getContext().getTypeSize(E->getType()) == 64) ||
8564 BuiltinID == clang::ARM::BI__ldrexd) {
8565 Function *F;
8566
8567 switch (BuiltinID) {
8568 default: llvm_unreachable("unexpected builtin");
8569 case clang::ARM::BI__builtin_arm_ldaex:
8570 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8571 break;
8572 case clang::ARM::BI__builtin_arm_ldrexd:
8573 case clang::ARM::BI__builtin_arm_ldrex:
8574 case clang::ARM::BI__ldrexd:
8575 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8576 break;
8577 }
8578
8579 Value *LdPtr = EmitScalarExpr(E->getArg(0));
8580 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
8581
8582 Value *Val0 = Builder.CreateExtractValue(Val, 1);
8583 Value *Val1 = Builder.CreateExtractValue(Val, 0);
8584 Val0 = Builder.CreateZExt(Val0, Int64Ty);
8585 Val1 = Builder.CreateZExt(Val1, Int64Ty);
8586
8587 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8588 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8589 Val = Builder.CreateOr(Val, Val1);
8590 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8591 }
8592
8593 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8594 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8595 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8596
8597 QualType Ty = E->getType();
8598 llvm::Type *RealResTy = ConvertType(Ty);
8599 llvm::Type *IntTy =
8600 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8601
8603 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8604 : Intrinsic::arm_ldrex,
8605 UnqualPtrTy);
8606 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8607 Val->addParamAttr(
8608 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8609
8610 if (RealResTy->isPointerTy())
8611 return Builder.CreateIntToPtr(Val, RealResTy);
8612 else {
8613 llvm::Type *IntResTy = llvm::IntegerType::get(
8614 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8615 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8616 RealResTy);
8617 }
8618 }
8619
8620 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8621 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8622 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8623 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8625 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8626 : Intrinsic::arm_strexd);
8627 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8628
8629 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8630 Value *Val = EmitScalarExpr(E->getArg(0));
8631 Builder.CreateStore(Val, Tmp);
8632
8633 Address LdPtr = Tmp.withElementType(STy);
8634 Val = Builder.CreateLoad(LdPtr);
8635
8636 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8637 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8638 Value *StPtr = EmitScalarExpr(E->getArg(1));
8639 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8640 }
8641
8642 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8643 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8644 Value *StoreVal = EmitScalarExpr(E->getArg(0));
8645 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8646
8647 QualType Ty = E->getArg(0)->getType();
8648 llvm::Type *StoreTy =
8649 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8650
8651 if (StoreVal->getType()->isPointerTy())
8652 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8653 else {
8654 llvm::Type *IntTy = llvm::IntegerType::get(
8656 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8657 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8658 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8659 }
8660
8662 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8663 : Intrinsic::arm_strex,
8664 StoreAddr->getType());
8665
8666 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8667 CI->addParamAttr(
8668 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8669 return CI;
8670 }
8671
8672 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8673 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8674 return Builder.CreateCall(F);
8675 }
8676
8677 // CRC32
8678 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8679 switch (BuiltinID) {
8680 case clang::ARM::BI__builtin_arm_crc32b:
8681 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8682 case clang::ARM::BI__builtin_arm_crc32cb:
8683 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8684 case clang::ARM::BI__builtin_arm_crc32h:
8685 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8686 case clang::ARM::BI__builtin_arm_crc32ch:
8687 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8688 case clang::ARM::BI__builtin_arm_crc32w:
8689 case clang::ARM::BI__builtin_arm_crc32d:
8690 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8691 case clang::ARM::BI__builtin_arm_crc32cw:
8692 case clang::ARM::BI__builtin_arm_crc32cd:
8693 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8694 }
8695
8696 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8697 Value *Arg0 = EmitScalarExpr(E->getArg(0));
8698 Value *Arg1 = EmitScalarExpr(E->getArg(1));
8699
8700 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8701 // intrinsics, hence we need different codegen for these cases.
8702 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8703 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8704 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8705 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8706 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8707 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8708
8709 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8710 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8711 return Builder.CreateCall(F, {Res, Arg1b});
8712 } else {
8713 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8714
8715 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8716 return Builder.CreateCall(F, {Arg0, Arg1});
8717 }
8718 }
8719
8720 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8721 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8722 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8723 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8724 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8725 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8726
8727 SpecialRegisterAccessKind AccessKind = Write;
8728 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8729 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8730 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8731 AccessKind = VolatileRead;
8732
8733 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8734 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8735
8736 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8737 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8738
8739 llvm::Type *ValueType;
8740 llvm::Type *RegisterType;
8741 if (IsPointerBuiltin) {
8742 ValueType = VoidPtrTy;
8743 RegisterType = Int32Ty;
8744 } else if (Is64Bit) {
8745 ValueType = RegisterType = Int64Ty;
8746 } else {
8747 ValueType = RegisterType = Int32Ty;
8748 }
8749
8750 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8751 AccessKind);
8752 }
8753
8754 if (BuiltinID == ARM::BI__builtin_sponentry) {
8755 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8756 return Builder.CreateCall(F);
8757 }
8758
8759 // Handle MSVC intrinsics before argument evaluation to prevent double
8760 // evaluation.
8761 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8762 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8763
8764 // Deal with MVE builtins
8765 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8766 return Result;
8767 // Handle CDE builtins
8768 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8769 return Result;
8770
8771 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8772 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8773 return P.first == BuiltinID;
8774 });
8775 if (It != end(NEONEquivalentIntrinsicMap))
8776 BuiltinID = It->second;
8777
8778 // Find out if any arguments are required to be integer constant
8779 // expressions.
8780 unsigned ICEArguments = 0;
8782 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8783 assert(Error == ASTContext::GE_None && "Should not codegen an error");
8784
8785 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8786 return Builder.getInt32(addr.getAlignment().getQuantity());
8787 };
8788
8789 Address PtrOp0 = Address::invalid();
8790 Address PtrOp1 = Address::invalid();
8792 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8793 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8794 for (unsigned i = 0, e = NumArgs; i != e; i++) {
8795 if (i == 0) {
8796 switch (BuiltinID) {
8797 case NEON::BI__builtin_neon_vld1_v:
8798 case NEON::BI__builtin_neon_vld1q_v:
8799 case NEON::BI__builtin_neon_vld1q_lane_v:
8800 case NEON::BI__builtin_neon_vld1_lane_v:
8801 case NEON::BI__builtin_neon_vld1_dup_v:
8802 case NEON::BI__builtin_neon_vld1q_dup_v:
8803 case NEON::BI__builtin_neon_vst1_v:
8804 case NEON::BI__builtin_neon_vst1q_v:
8805 case NEON::BI__builtin_neon_vst1q_lane_v:
8806 case NEON::BI__builtin_neon_vst1_lane_v:
8807 case NEON::BI__builtin_neon_vst2_v:
8808 case NEON::BI__builtin_neon_vst2q_v:
8809 case NEON::BI__builtin_neon_vst2_lane_v:
8810 case NEON::BI__builtin_neon_vst2q_lane_v:
8811 case NEON::BI__builtin_neon_vst3_v:
8812 case NEON::BI__builtin_neon_vst3q_v:
8813 case NEON::BI__builtin_neon_vst3_lane_v:
8814 case NEON::BI__builtin_neon_vst3q_lane_v:
8815 case NEON::BI__builtin_neon_vst4_v:
8816 case NEON::BI__builtin_neon_vst4q_v:
8817 case NEON::BI__builtin_neon_vst4_lane_v:
8818 case NEON::BI__builtin_neon_vst4q_lane_v:
8819 // Get the alignment for the argument in addition to the value;
8820 // we'll use it later.
8821 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
8822 Ops.push_back(PtrOp0.getPointer());
8823 continue;
8824 }
8825 }
8826 if (i == 1) {
8827 switch (BuiltinID) {
8828 case NEON::BI__builtin_neon_vld2_v:
8829 case NEON::BI__builtin_neon_vld2q_v:
8830 case NEON::BI__builtin_neon_vld3_v:
8831 case NEON::BI__builtin_neon_vld3q_v:
8832 case NEON::BI__builtin_neon_vld4_v:
8833 case NEON::BI__builtin_neon_vld4q_v:
8834 case NEON::BI__builtin_neon_vld2_lane_v:
8835 case NEON::BI__builtin_neon_vld2q_lane_v:
8836 case NEON::BI__builtin_neon_vld3_lane_v:
8837 case NEON::BI__builtin_neon_vld3q_lane_v:
8838 case NEON::BI__builtin_neon_vld4_lane_v:
8839 case NEON::BI__builtin_neon_vld4q_lane_v:
8840 case NEON::BI__builtin_neon_vld2_dup_v:
8841 case NEON::BI__builtin_neon_vld2q_dup_v:
8842 case NEON::BI__builtin_neon_vld3_dup_v:
8843 case NEON::BI__builtin_neon_vld3q_dup_v:
8844 case NEON::BI__builtin_neon_vld4_dup_v:
8845 case NEON::BI__builtin_neon_vld4q_dup_v:
8846 // Get the alignment for the argument in addition to the value;
8847 // we'll use it later.
8848 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
8849 Ops.push_back(PtrOp1.getPointer());
8850 continue;
8851 }
8852 }
8853
8854 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
8855 }
8856
8857 switch (BuiltinID) {
8858 default: break;
8859
8860 case NEON::BI__builtin_neon_vget_lane_i8:
8861 case NEON::BI__builtin_neon_vget_lane_i16:
8862 case NEON::BI__builtin_neon_vget_lane_i32:
8863 case NEON::BI__builtin_neon_vget_lane_i64:
8864 case NEON::BI__builtin_neon_vget_lane_bf16:
8865 case NEON::BI__builtin_neon_vget_lane_f32:
8866 case NEON::BI__builtin_neon_vgetq_lane_i8:
8867 case NEON::BI__builtin_neon_vgetq_lane_i16:
8868 case NEON::BI__builtin_neon_vgetq_lane_i32:
8869 case NEON::BI__builtin_neon_vgetq_lane_i64:
8870 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8871 case NEON::BI__builtin_neon_vgetq_lane_f32:
8872 case NEON::BI__builtin_neon_vduph_lane_bf16:
8873 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8874 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
8875
8876 case NEON::BI__builtin_neon_vrndns_f32: {
8877 Value *Arg = EmitScalarExpr(E->getArg(0));
8878 llvm::Type *Tys[] = {Arg->getType()};
8879 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
8880 return Builder.CreateCall(F, {Arg}, "vrndn"); }
8881
8882 case NEON::BI__builtin_neon_vset_lane_i8:
8883 case NEON::BI__builtin_neon_vset_lane_i16:
8884 case NEON::BI__builtin_neon_vset_lane_i32:
8885 case NEON::BI__builtin_neon_vset_lane_i64:
8886 case NEON::BI__builtin_neon_vset_lane_bf16:
8887 case NEON::BI__builtin_neon_vset_lane_f32:
8888 case NEON::BI__builtin_neon_vsetq_lane_i8:
8889 case NEON::BI__builtin_neon_vsetq_lane_i16:
8890 case NEON::BI__builtin_neon_vsetq_lane_i32:
8891 case NEON::BI__builtin_neon_vsetq_lane_i64:
8892 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8893 case NEON::BI__builtin_neon_vsetq_lane_f32:
8894 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
8895
8896 case NEON::BI__builtin_neon_vsha1h_u32:
8897 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
8898 "vsha1h");
8899 case NEON::BI__builtin_neon_vsha1cq_u32:
8900 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
8901 "vsha1h");
8902 case NEON::BI__builtin_neon_vsha1pq_u32:
8903 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
8904 "vsha1h");
8905 case NEON::BI__builtin_neon_vsha1mq_u32:
8906 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
8907 "vsha1h");
8908
8909 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
8910 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
8911 "vcvtbfp2bf");
8912 }
8913
8914 // The ARM _MoveToCoprocessor builtins put the input register value as
8915 // the first argument, but the LLVM intrinsic expects it as the third one.
8916 case clang::ARM::BI_MoveToCoprocessor:
8917 case clang::ARM::BI_MoveToCoprocessor2: {
8918 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
8919 ? Intrinsic::arm_mcr
8920 : Intrinsic::arm_mcr2);
8921 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
8922 Ops[3], Ops[4], Ops[5]});
8923 }
8924 }
8925
8926 // Get the last argument, which specifies the vector type.
8927 assert(HasExtraArg);
8928 const Expr *Arg = E->getArg(E->getNumArgs()-1);
8929 std::optional<llvm::APSInt> Result =
8931 if (!Result)
8932 return nullptr;
8933
8934 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
8935 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
8936 // Determine the overloaded type of this builtin.
8937 llvm::Type *Ty;
8938 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
8939 Ty = FloatTy;
8940 else
8941 Ty = DoubleTy;
8942
8943 // Determine whether this is an unsigned conversion or not.
8944 bool usgn = Result->getZExtValue() == 1;
8945 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
8946
8947 // Call the appropriate intrinsic.
8948 Function *F = CGM.getIntrinsic(Int, Ty);
8949 return Builder.CreateCall(F, Ops, "vcvtr");
8950 }
8951
8952 // Determine the type of this overloaded NEON intrinsic.
8953 NeonTypeFlags Type = Result->getZExtValue();
8954 bool usgn = Type.isUnsigned();
8955 bool rightShift = false;
8956
8957 llvm::FixedVectorType *VTy =
8958 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
8959 getTarget().hasBFloat16Type());
8960 llvm::Type *Ty = VTy;
8961 if (!Ty)
8962 return nullptr;
8963
8964 // Many NEON builtins have identical semantics and uses in ARM and
8965 // AArch64. Emit these in a single function.
8966 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
8967 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
8968 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
8969 if (Builtin)
8971 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
8972 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
8973
8974 unsigned Int;
8975 switch (BuiltinID) {
8976 default: return nullptr;
8977 case NEON::BI__builtin_neon_vld1q_lane_v:
8978 // Handle 64-bit integer elements as a special case. Use shuffles of
8979 // one-element vectors to avoid poor code for i64 in the backend.
8980 if (VTy->getElementType()->isIntegerTy(64)) {
8981 // Extract the other lane.
8982 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8983 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
8984 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
8985 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
8986 // Load the value as a one-element vector.
8987 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
8988 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8989 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
8990 Value *Align = getAlignmentValue32(PtrOp0);
8991 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
8992 // Combine them.
8993 int Indices[] = {1 - Lane, Lane};
8994 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
8995 }
8996 [[fallthrough]];
8997 case NEON::BI__builtin_neon_vld1_lane_v: {
8998 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8999 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9000 Value *Ld = Builder.CreateLoad(PtrOp0);
9001 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9002 }
9003 case NEON::BI__builtin_neon_vqrshrn_n_v:
9004 Int =
9005 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9006 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9007 1, true);
9008 case NEON::BI__builtin_neon_vqrshrun_n_v:
9009 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9010 Ops, "vqrshrun_n", 1, true);
9011 case NEON::BI__builtin_neon_vqshrn_n_v:
9012 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9013 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9014 1, true);
9015 case NEON::BI__builtin_neon_vqshrun_n_v:
9016 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9017 Ops, "vqshrun_n", 1, true);
9018 case NEON::BI__builtin_neon_vrecpe_v:
9019 case NEON::BI__builtin_neon_vrecpeq_v:
9020 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9021 Ops, "vrecpe");
9022 case NEON::BI__builtin_neon_vrshrn_n_v:
9023 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9024 Ops, "vrshrn_n", 1, true);
9025 case NEON::BI__builtin_neon_vrsra_n_v:
9026 case NEON::BI__builtin_neon_vrsraq_n_v:
9027 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9028 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9029 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9030 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9031 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9032 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9033 case NEON::BI__builtin_neon_vsri_n_v:
9034 case NEON::BI__builtin_neon_vsriq_n_v:
9035 rightShift = true;
9036 [[fallthrough]];
9037 case NEON::BI__builtin_neon_vsli_n_v:
9038 case NEON::BI__builtin_neon_vsliq_n_v:
9039 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9040 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9041 Ops, "vsli_n");
9042 case NEON::BI__builtin_neon_vsra_n_v:
9043 case NEON::BI__builtin_neon_vsraq_n_v:
9044 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9045 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9046 return Builder.CreateAdd(Ops[0], Ops[1]);
9047 case NEON::BI__builtin_neon_vst1q_lane_v:
9048 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9049 // a one-element vector and avoid poor code for i64 in the backend.
9050 if (VTy->getElementType()->isIntegerTy(64)) {
9051 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9052 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9053 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9054 Ops[2] = getAlignmentValue32(PtrOp0);
9055 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9056 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9057 Tys), Ops);
9058 }
9059 [[fallthrough]];
9060 case NEON::BI__builtin_neon_vst1_lane_v: {
9061 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9062 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9063 return Builder.CreateStore(Ops[1],
9064 PtrOp0.withElementType(Ops[1]->getType()));
9065 }
9066 case NEON::BI__builtin_neon_vtbl1_v:
9067 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9068 Ops, "vtbl1");
9069 case NEON::BI__builtin_neon_vtbl2_v:
9070 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9071 Ops, "vtbl2");
9072 case NEON::BI__builtin_neon_vtbl3_v:
9073 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9074 Ops, "vtbl3");
9075 case NEON::BI__builtin_neon_vtbl4_v:
9076 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9077 Ops, "vtbl4");
9078 case NEON::BI__builtin_neon_vtbx1_v:
9079 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9080 Ops, "vtbx1");
9081 case NEON::BI__builtin_neon_vtbx2_v:
9082 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9083 Ops, "vtbx2");
9084 case NEON::BI__builtin_neon_vtbx3_v:
9085 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9086 Ops, "vtbx3");
9087 case NEON::BI__builtin_neon_vtbx4_v:
9088 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9089 Ops, "vtbx4");
9090 }
9091}
9092
9093template<typename Integer>
9095 return E->getIntegerConstantExpr(Context)->getExtValue();
9096}
9097
9098static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9099 llvm::Type *T, bool Unsigned) {
9100 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9101 // which finds it convenient to specify signed/unsigned as a boolean flag.
9102 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9103}
9104
9105static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9106 uint32_t Shift, bool Unsigned) {
9107 // MVE helper function for integer shift right. This must handle signed vs
9108 // unsigned, and also deal specially with the case where the shift count is
9109 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9110 // undefined behavior, but in MVE it's legal, so we must convert it to code
9111 // that is not undefined in IR.
9112 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9113 ->getElementType()
9114 ->getPrimitiveSizeInBits();
9115 if (Shift == LaneBits) {
9116 // An unsigned shift of the full lane size always generates zero, so we can
9117 // simply emit a zero vector. A signed shift of the full lane size does the
9118 // same thing as shifting by one bit fewer.
9119 if (Unsigned)
9120 return llvm::Constant::getNullValue(V->getType());
9121 else
9122 --Shift;
9123 }
9124 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9125}
9126
9127static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9128 // MVE-specific helper function for a vector splat, which infers the element
9129 // count of the output vector by knowing that MVE vectors are all 128 bits
9130 // wide.
9131 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9132 return Builder.CreateVectorSplat(Elements, V);
9133}
9134
9135static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9136 CodeGenFunction *CGF,
9137 llvm::Value *V,
9138 llvm::Type *DestType) {
9139 // Convert one MVE vector type into another by reinterpreting its in-register
9140 // format.
9141 //
9142 // Little-endian, this is identical to a bitcast (which reinterprets the
9143 // memory format). But big-endian, they're not necessarily the same, because
9144 // the register and memory formats map to each other differently depending on
9145 // the lane size.
9146 //
9147 // We generate a bitcast whenever we can (if we're little-endian, or if the
9148 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9149 // that performs the different kind of reinterpretation.
9150 if (CGF->getTarget().isBigEndian() &&
9151 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9152 return Builder.CreateCall(
9153 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9154 {DestType, V->getType()}),
9155 V);
9156 } else {
9157 return Builder.CreateBitCast(V, DestType);
9158 }
9159}
9160
9161static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9162 // Make a shufflevector that extracts every other element of a vector (evens
9163 // or odds, as desired).
9164 SmallVector<int, 16> Indices;
9165 unsigned InputElements =
9166 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9167 for (unsigned i = 0; i < InputElements; i += 2)
9168 Indices.push_back(i + Odd);
9169 return Builder.CreateShuffleVector(V, Indices);
9170}
9171
9172static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9173 llvm::Value *V1) {
9174 // Make a shufflevector that interleaves two vectors element by element.
9175 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9176 SmallVector<int, 16> Indices;
9177 unsigned InputElements =
9178 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9179 for (unsigned i = 0; i < InputElements; i++) {
9180 Indices.push_back(i);
9181 Indices.push_back(i + InputElements);
9182 }
9183 return Builder.CreateShuffleVector(V0, V1, Indices);
9184}
9185
9186template<unsigned HighBit, unsigned OtherBits>
9187static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9188 // MVE-specific helper function to make a vector splat of a constant such as
9189 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9190 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9191 unsigned LaneBits = T->getPrimitiveSizeInBits();
9192 uint32_t Value = HighBit << (LaneBits - 1);
9193 if (OtherBits)
9194 Value |= (1UL << (LaneBits - 1)) - 1;
9195 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9196 return ARMMVEVectorSplat(Builder, Lane);
9197}
9198
9199static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9200 llvm::Value *V,
9201 unsigned ReverseWidth) {
9202 // MVE-specific helper function which reverses the elements of a
9203 // vector within every (ReverseWidth)-bit collection of lanes.
9204 SmallVector<int, 16> Indices;
9205 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9206 unsigned Elements = 128 / LaneSize;
9207 unsigned Mask = ReverseWidth / LaneSize - 1;
9208 for (unsigned i = 0; i < Elements; i++)
9209 Indices.push_back(i ^ Mask);
9210 return Builder.CreateShuffleVector(V, Indices);
9211}
9212
9214 const CallExpr *E,
9215 ReturnValueSlot ReturnValue,
9216 llvm::Triple::ArchType Arch) {
9217 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9218 Intrinsic::ID IRIntr;
9219 unsigned NumVectors;
9220
9221 // Code autogenerated by Tablegen will handle all the simple builtins.
9222 switch (BuiltinID) {
9223 #include "clang/Basic/arm_mve_builtin_cg.inc"
9224
9225 // If we didn't match an MVE builtin id at all, go back to the
9226 // main EmitARMBuiltinExpr.
9227 default:
9228 return nullptr;
9229 }
9230
9231 // Anything that breaks from that switch is an MVE builtin that
9232 // needs handwritten code to generate.
9233
9234 switch (CustomCodeGenType) {
9235
9236 case CustomCodeGen::VLD24: {
9239
9240 auto MvecCType = E->getType();
9241 auto MvecLType = ConvertType(MvecCType);
9242 assert(MvecLType->isStructTy() &&
9243 "Return type for vld[24]q should be a struct");
9244 assert(MvecLType->getStructNumElements() == 1 &&
9245 "Return-type struct for vld[24]q should have one element");
9246 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9247 assert(MvecLTypeInner->isArrayTy() &&
9248 "Return-type struct for vld[24]q should contain an array");
9249 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9250 "Array member of return-type struct vld[24]q has wrong length");
9251 auto VecLType = MvecLTypeInner->getArrayElementType();
9252
9253 Tys.push_back(VecLType);
9254
9255 auto Addr = E->getArg(0);
9256 Ops.push_back(EmitScalarExpr(Addr));
9257 Tys.push_back(ConvertType(Addr->getType()));
9258
9259 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9260 Value *LoadResult = Builder.CreateCall(F, Ops);
9261 Value *MvecOut = PoisonValue::get(MvecLType);
9262 for (unsigned i = 0; i < NumVectors; ++i) {
9263 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9264 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9265 }
9266
9267 if (ReturnValue.isNull())
9268 return MvecOut;
9269 else
9270 return Builder.CreateStore(MvecOut, ReturnValue.getValue());
9271 }
9272
9273 case CustomCodeGen::VST24: {
9276
9277 auto Addr = E->getArg(0);
9278 Ops.push_back(EmitScalarExpr(Addr));
9279 Tys.push_back(ConvertType(Addr->getType()));
9280
9281 auto MvecCType = E->getArg(1)->getType();
9282 auto MvecLType = ConvertType(MvecCType);
9283 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9284 assert(MvecLType->getStructNumElements() == 1 &&
9285 "Data-type struct for vst2q should have one element");
9286 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9287 assert(MvecLTypeInner->isArrayTy() &&
9288 "Data-type struct for vst2q should contain an array");
9289 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9290 "Array member of return-type struct vld[24]q has wrong length");
9291 auto VecLType = MvecLTypeInner->getArrayElementType();
9292
9293 Tys.push_back(VecLType);
9294
9295 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9296 EmitAggExpr(E->getArg(1), MvecSlot);
9297 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9298 for (unsigned i = 0; i < NumVectors; i++)
9299 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9300
9301 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9302 Value *ToReturn = nullptr;
9303 for (unsigned i = 0; i < NumVectors; i++) {
9304 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9305 ToReturn = Builder.CreateCall(F, Ops);
9306 Ops.pop_back();
9307 }
9308 return ToReturn;
9309 }
9310 }
9311 llvm_unreachable("unknown custom codegen type.");
9312}
9313
9315 const CallExpr *E,
9316 ReturnValueSlot ReturnValue,
9317 llvm::Triple::ArchType Arch) {
9318 switch (BuiltinID) {
9319 default:
9320 return nullptr;
9321#include "clang/Basic/arm_cde_builtin_cg.inc"
9322 }
9323}
9324
9325static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9326 const CallExpr *E,
9328 llvm::Triple::ArchType Arch) {
9329 unsigned int Int = 0;
9330 const char *s = nullptr;
9331
9332 switch (BuiltinID) {
9333 default:
9334 return nullptr;
9335 case NEON::BI__builtin_neon_vtbl1_v:
9336 case NEON::BI__builtin_neon_vqtbl1_v:
9337 case NEON::BI__builtin_neon_vqtbl1q_v:
9338 case NEON::BI__builtin_neon_vtbl2_v:
9339 case NEON::BI__builtin_neon_vqtbl2_v:
9340 case NEON::BI__builtin_neon_vqtbl2q_v:
9341 case NEON::BI__builtin_neon_vtbl3_v:
9342 case NEON::BI__builtin_neon_vqtbl3_v:
9343 case NEON::BI__builtin_neon_vqtbl3q_v:
9344 case NEON::BI__builtin_neon_vtbl4_v:
9345 case NEON::BI__builtin_neon_vqtbl4_v:
9346 case NEON::BI__builtin_neon_vqtbl4q_v:
9347 break;
9348 case NEON::BI__builtin_neon_vtbx1_v:
9349 case NEON::BI__builtin_neon_vqtbx1_v:
9350 case NEON::BI__builtin_neon_vqtbx1q_v:
9351 case NEON::BI__builtin_neon_vtbx2_v:
9352 case NEON::BI__builtin_neon_vqtbx2_v:
9353 case NEON::BI__builtin_neon_vqtbx2q_v:
9354 case NEON::BI__builtin_neon_vtbx3_v:
9355 case NEON::BI__builtin_neon_vqtbx3_v:
9356 case NEON::BI__builtin_neon_vqtbx3q_v:
9357 case NEON::BI__builtin_neon_vtbx4_v:
9358 case NEON::BI__builtin_neon_vqtbx4_v:
9359 case NEON::BI__builtin_neon_vqtbx4q_v:
9360 break;
9361 }
9362
9363 assert(E->getNumArgs() >= 3);
9364
9365 // Get the last argument, which specifies the vector type.
9366 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9367 std::optional<llvm::APSInt> Result =
9369 if (!Result)
9370 return nullptr;
9371
9372 // Determine the type of this overloaded NEON intrinsic.
9373 NeonTypeFlags Type = Result->getZExtValue();
9374 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9375 if (!Ty)
9376 return nullptr;
9377
9378 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9379
9380 // AArch64 scalar builtins are not overloaded, they do not have an extra
9381 // argument that specifies the vector type, need to handle each case.
9382 switch (BuiltinID) {
9383 case NEON::BI__builtin_neon_vtbl1_v: {
9384 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9385 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9386 }
9387 case NEON::BI__builtin_neon_vtbl2_v: {
9388 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9389 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9390 }
9391 case NEON::BI__builtin_neon_vtbl3_v: {
9392 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9393 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9394 }
9395 case NEON::BI__builtin_neon_vtbl4_v: {
9396 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9397 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9398 }
9399 case NEON::BI__builtin_neon_vtbx1_v: {
9400 Value *TblRes =
9401 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9402 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9403
9404 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9405 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9406 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9407
9408 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9409 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9410 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9411 }
9412 case NEON::BI__builtin_neon_vtbx2_v: {
9413 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9414 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9415 }
9416 case NEON::BI__builtin_neon_vtbx3_v: {
9417 Value *TblRes =
9418 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9419 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9420
9421 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9422 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9423 TwentyFourV);
9424 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9425
9426 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9427 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9428 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9429 }
9430 case NEON::BI__builtin_neon_vtbx4_v: {
9431 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9432 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9433 }
9434 case NEON::BI__builtin_neon_vqtbl1_v:
9435 case NEON::BI__builtin_neon_vqtbl1q_v:
9436 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9437 case NEON::BI__builtin_neon_vqtbl2_v:
9438 case NEON::BI__builtin_neon_vqtbl2q_v: {
9439 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9440 case NEON::BI__builtin_neon_vqtbl3_v:
9441 case NEON::BI__builtin_neon_vqtbl3q_v:
9442 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9443 case NEON::BI__builtin_neon_vqtbl4_v:
9444 case NEON::BI__builtin_neon_vqtbl4q_v:
9445 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9446 case NEON::BI__builtin_neon_vqtbx1_v:
9447 case NEON::BI__builtin_neon_vqtbx1q_v:
9448 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9449 case NEON::BI__builtin_neon_vqtbx2_v:
9450 case NEON::BI__builtin_neon_vqtbx2q_v:
9451 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9452 case NEON::BI__builtin_neon_vqtbx3_v:
9453 case NEON::BI__builtin_neon_vqtbx3q_v:
9454 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9455 case NEON::BI__builtin_neon_vqtbx4_v:
9456 case NEON::BI__builtin_neon_vqtbx4q_v:
9457 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9458 }
9459 }
9460
9461 if (!Int)
9462 return nullptr;
9463
9464 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9465 return CGF.EmitNeonCall(F, Ops, s);
9466}
9467
9469 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9470 Op = Builder.CreateBitCast(Op, Int16Ty);
9471 Value *V = PoisonValue::get(VTy);
9472 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9473 Op = Builder.CreateInsertElement(V, Op, CI);
9474 return Op;
9475}
9476
9477/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9478/// access builtin. Only required if it can't be inferred from the base pointer
9479/// operand.
9480llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9481 switch (TypeFlags.getMemEltType()) {
9482 case SVETypeFlags::MemEltTyDefault:
9483 return getEltType(TypeFlags);
9484 case SVETypeFlags::MemEltTyInt8:
9485 return Builder.getInt8Ty();
9486 case SVETypeFlags::MemEltTyInt16:
9487 return Builder.getInt16Ty();
9488 case SVETypeFlags::MemEltTyInt32:
9489 return Builder.getInt32Ty();
9490 case SVETypeFlags::MemEltTyInt64:
9491 return Builder.getInt64Ty();
9492 }
9493 llvm_unreachable("Unknown MemEltType");
9494}
9495
9496llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9497 switch (TypeFlags.getEltType()) {
9498 default:
9499 llvm_unreachable("Invalid SVETypeFlag!");
9500
9501 case SVETypeFlags::EltTyInt8:
9502 return Builder.getInt8Ty();
9503 case SVETypeFlags::EltTyInt16:
9504 return Builder.getInt16Ty();
9505 case SVETypeFlags::EltTyInt32:
9506 return Builder.getInt32Ty();
9507 case SVETypeFlags::EltTyInt64:
9508 return Builder.getInt64Ty();
9509 case SVETypeFlags::EltTyInt128:
9510 return Builder.getInt128Ty();
9511
9512 case SVETypeFlags::EltTyFloat16:
9513 return Builder.getHalfTy();
9514 case SVETypeFlags::EltTyFloat32:
9515 return Builder.getFloatTy();
9516 case SVETypeFlags::EltTyFloat64:
9517 return Builder.getDoubleTy();
9518
9519 case SVETypeFlags::EltTyBFloat16:
9520 return Builder.getBFloatTy();
9521
9522 case SVETypeFlags::EltTyBool8:
9523 case SVETypeFlags::EltTyBool16:
9524 case SVETypeFlags::EltTyBool32:
9525 case SVETypeFlags::EltTyBool64:
9526 return Builder.getInt1Ty();
9527 }
9528}
9529
9530// Return the llvm predicate vector type corresponding to the specified element
9531// TypeFlags.
9532llvm::ScalableVectorType *
9534 switch (TypeFlags.getEltType()) {
9535 default: llvm_unreachable("Unhandled SVETypeFlag!");
9536
9537 case SVETypeFlags::EltTyInt8:
9538 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9539 case SVETypeFlags::EltTyInt16:
9540 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9541 case SVETypeFlags::EltTyInt32:
9542 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9543 case SVETypeFlags::EltTyInt64:
9544 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9545
9546 case SVETypeFlags::EltTyBFloat16:
9547 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9548 case SVETypeFlags::EltTyFloat16:
9549 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9550 case SVETypeFlags::EltTyFloat32:
9551 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9552 case SVETypeFlags::EltTyFloat64:
9553 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9554
9555 case SVETypeFlags::EltTyBool8:
9556 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9557 case SVETypeFlags::EltTyBool16:
9558 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9559 case SVETypeFlags::EltTyBool32:
9560 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9561 case SVETypeFlags::EltTyBool64:
9562 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9563 }
9564}
9565
9566// Return the llvm vector type corresponding to the specified element TypeFlags.
9567llvm::ScalableVectorType *
9568CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9569 switch (TypeFlags.getEltType()) {
9570 default:
9571 llvm_unreachable("Invalid SVETypeFlag!");
9572
9573 case SVETypeFlags::EltTyInt8:
9574 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9575 case SVETypeFlags::EltTyInt16:
9576 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9577 case SVETypeFlags::EltTyInt32:
9578 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9579 case SVETypeFlags::EltTyInt64:
9580 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9581
9582 case SVETypeFlags::EltTyFloat16:
9583 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9584 case SVETypeFlags::EltTyBFloat16:
9585 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9586 case SVETypeFlags::EltTyFloat32:
9587 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9588 case SVETypeFlags::EltTyFloat64:
9589 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9590
9591 case SVETypeFlags::EltTyBool8:
9592 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9593 case SVETypeFlags::EltTyBool16:
9594 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9595 case SVETypeFlags::EltTyBool32:
9596 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9597 case SVETypeFlags::EltTyBool64:
9598 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9599 }
9600}
9601
9602llvm::Value *
9604 Function *Ptrue =
9605 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9606 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9607}
9608
9609constexpr unsigned SVEBitsPerBlock = 128;
9610
9611static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9612 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9613 return llvm::ScalableVectorType::get(EltTy, NumElts);
9614}
9615
9616// Reinterpret the input predicate so that it can be used to correctly isolate
9617// the elements of the specified datatype.
9619 llvm::ScalableVectorType *VTy) {
9620
9621 if (isa<TargetExtType>(Pred->getType()) &&
9622 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
9623 return Pred;
9624
9625 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9626 if (Pred->getType() == RTy)
9627 return Pred;
9628
9629 unsigned IntID;
9630 llvm::Type *IntrinsicTy;
9631 switch (VTy->getMinNumElements()) {
9632 default:
9633 llvm_unreachable("unsupported element count!");
9634 case 1:
9635 case 2:
9636 case 4:
9637 case 8:
9638 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9639 IntrinsicTy = RTy;
9640 break;
9641 case 16:
9642 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9643 IntrinsicTy = Pred->getType();
9644 break;
9645 }
9646
9647 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9648 Value *C = Builder.CreateCall(F, Pred);
9649 assert(C->getType() == RTy && "Unexpected return type!");
9650 return C;
9651}
9652
9655 unsigned IntID) {
9656 auto *ResultTy = getSVEType(TypeFlags);
9657 auto *OverloadedTy =
9658 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9659
9660 Function *F = nullptr;
9661 if (Ops[1]->getType()->isVectorTy())
9662 // This is the "vector base, scalar offset" case. In order to uniquely
9663 // map this built-in to an LLVM IR intrinsic, we need both the return type
9664 // and the type of the vector base.
9665 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9666 else
9667 // This is the "scalar base, vector offset case". The type of the offset
9668 // is encoded in the name of the intrinsic. We only need to specify the
9669 // return type in order to uniquely map this built-in to an LLVM IR
9670 // intrinsic.
9671 F = CGM.getIntrinsic(IntID, OverloadedTy);
9672
9673 // At the ACLE level there's only one predicate type, svbool_t, which is
9674 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9675 // actual type being loaded. For example, when loading doubles (i64) the
9676 // predicate should be <n x 2 x i1> instead. At the IR level the type of
9677 // the predicate and the data being loaded must match. Cast to the type
9678 // expected by the intrinsic. The intrinsic itself should be defined in
9679 // a way than enforces relations between parameter types.
9680 Ops[0] = EmitSVEPredicateCast(
9681 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9682
9683 // Pass 0 when the offset is missing. This can only be applied when using
9684 // the "vector base" addressing mode for which ACLE allows no offset. The
9685 // corresponding LLVM IR always requires an offset.
9686 if (Ops.size() == 2) {
9687 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9688 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9689 }
9690
9691 // For "vector base, scalar index" scale the index so that it becomes a
9692 // scalar offset.
9693 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9694 unsigned BytesPerElt =
9695 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9696 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9697 }
9698
9699 Value *Call = Builder.CreateCall(F, Ops);
9700
9701 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9702 // other cases it's folded into a nop.
9703 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9704 : Builder.CreateSExt(Call, ResultTy);
9705}
9706
9709 unsigned IntID) {
9710 auto *SrcDataTy = getSVEType(TypeFlags);
9711 auto *OverloadedTy =
9712 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9713
9714 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9715 // it's the first argument. Move it accordingly.
9716 Ops.insert(Ops.begin(), Ops.pop_back_val());
9717
9718 Function *F = nullptr;
9719 if (Ops[2]->getType()->isVectorTy())
9720 // This is the "vector base, scalar offset" case. In order to uniquely
9721 // map this built-in to an LLVM IR intrinsic, we need both the return type
9722 // and the type of the vector base.
9723 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9724 else
9725 // This is the "scalar base, vector offset case". The type of the offset
9726 // is encoded in the name of the intrinsic. We only need to specify the
9727 // return type in order to uniquely map this built-in to an LLVM IR
9728 // intrinsic.
9729 F = CGM.getIntrinsic(IntID, OverloadedTy);
9730
9731 // Pass 0 when the offset is missing. This can only be applied when using
9732 // the "vector base" addressing mode for which ACLE allows no offset. The
9733 // corresponding LLVM IR always requires an offset.
9734 if (Ops.size() == 3) {
9735 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9736 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9737 }
9738
9739 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9740 // folded into a nop.
9741 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9742
9743 // At the ACLE level there's only one predicate type, svbool_t, which is
9744 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9745 // actual type being stored. For example, when storing doubles (i64) the
9746 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9747 // the predicate and the data being stored must match. Cast to the type
9748 // expected by the intrinsic. The intrinsic itself should be defined in
9749 // a way that enforces relations between parameter types.
9750 Ops[1] = EmitSVEPredicateCast(
9751 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
9752
9753 // For "vector base, scalar index" scale the index so that it becomes a
9754 // scalar offset.
9755 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9756 unsigned BytesPerElt =
9757 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9758 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9759 }
9760
9761 return Builder.CreateCall(F, Ops);
9762}
9763
9766 unsigned IntID) {
9767 // The gather prefetches are overloaded on the vector input - this can either
9768 // be the vector of base addresses or vector of offsets.
9769 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9770 if (!OverloadedTy)
9771 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9772
9773 // Cast the predicate from svbool_t to the right number of elements.
9774 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9775
9776 // vector + imm addressing modes
9777 if (Ops[1]->getType()->isVectorTy()) {
9778 if (Ops.size() == 3) {
9779 // Pass 0 for 'vector+imm' when the index is omitted.
9780 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9781
9782 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9783 std::swap(Ops[2], Ops[3]);
9784 } else {
9785 // Index needs to be passed as scaled offset.
9786 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9787 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9788 if (BytesPerElt > 1)
9789 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9790 }
9791 }
9792
9793 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9794 return Builder.CreateCall(F, Ops);
9795}
9796
9799 unsigned IntID) {
9800 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9801
9802 unsigned N;
9803 switch (IntID) {
9804 case Intrinsic::aarch64_sve_ld2_sret:
9805 case Intrinsic::aarch64_sve_ld1_pn_x2:
9806 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9807 case Intrinsic::aarch64_sve_ld2q_sret:
9808 N = 2;
9809 break;
9810 case Intrinsic::aarch64_sve_ld3_sret:
9811 case Intrinsic::aarch64_sve_ld3q_sret:
9812 N = 3;
9813 break;
9814 case Intrinsic::aarch64_sve_ld4_sret:
9815 case Intrinsic::aarch64_sve_ld1_pn_x4:
9816 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9817 case Intrinsic::aarch64_sve_ld4q_sret:
9818 N = 4;
9819 break;
9820 default:
9821 llvm_unreachable("unknown intrinsic!");
9822 }
9823 auto RetTy = llvm::VectorType::get(VTy->getElementType(),
9824 VTy->getElementCount() * N);
9825
9826 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9827 Value *BasePtr = Ops[1];
9828
9829 // Does the load have an offset?
9830 if (Ops.size() > 2)
9831 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9832
9833 Function *F = CGM.getIntrinsic(IntID, {VTy});
9834 Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
9835 unsigned MinElts = VTy->getMinNumElements();
9836 Value *Ret = llvm::PoisonValue::get(RetTy);
9837 for (unsigned I = 0; I < N; I++) {
9838 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9839 Value *SRet = Builder.CreateExtractValue(Call, I);
9840 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
9841 }
9842 return Ret;
9843}
9844
9847 unsigned IntID) {
9848 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9849
9850 unsigned N;
9851 switch (IntID) {
9852 case Intrinsic::aarch64_sve_st2:
9853 case Intrinsic::aarch64_sve_st1_pn_x2:
9854 case Intrinsic::aarch64_sve_stnt1_pn_x2:
9855 case Intrinsic::aarch64_sve_st2q:
9856 N = 2;
9857 break;
9858 case Intrinsic::aarch64_sve_st3:
9859 case Intrinsic::aarch64_sve_st3q:
9860 N = 3;
9861 break;
9862 case Intrinsic::aarch64_sve_st4:
9863 case Intrinsic::aarch64_sve_st1_pn_x4:
9864 case Intrinsic::aarch64_sve_stnt1_pn_x4:
9865 case Intrinsic::aarch64_sve_st4q:
9866 N = 4;
9867 break;
9868 default:
9869 llvm_unreachable("unknown intrinsic!");
9870 }
9871
9872 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9873 Value *BasePtr = Ops[1];
9874
9875 // Does the store have an offset?
9876 if (Ops.size() > (2 + N))
9877 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9878
9879 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
9880 // need to break up the tuple vector.
9882 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
9883 Operands.push_back(Ops[I]);
9884 Operands.append({Predicate, BasePtr});
9885 Function *F = CGM.getIntrinsic(IntID, { VTy });
9886
9887 return Builder.CreateCall(F, Operands);
9888}
9889
9890// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
9891// svpmullt_pair intrinsics, with the exception that their results are bitcast
9892// to a wider type.
9895 unsigned BuiltinID) {
9896 // Splat scalar operand to vector (intrinsics with _n infix)
9897 if (TypeFlags.hasSplatOperand()) {
9898 unsigned OpNo = TypeFlags.getSplatOperand();
9899 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
9900 }
9901
9902 // The pair-wise function has a narrower overloaded type.
9903 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
9904 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
9905
9906 // Now bitcast to the wider result type.
9907 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
9908 return EmitSVEReinterpret(Call, Ty);
9909}
9910
9912 ArrayRef<Value *> Ops, unsigned BuiltinID) {
9913 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
9914 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
9915 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
9916}
9917
9920 unsigned BuiltinID) {
9921 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9922 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
9923 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9924
9925 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9926 Value *BasePtr = Ops[1];
9927
9928 // Implement the index operand if not omitted.
9929 if (Ops.size() > 3)
9930 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9931
9932 Value *PrfOp = Ops.back();
9933
9934 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
9935 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
9936}
9937
9939 llvm::Type *ReturnTy,
9941 unsigned IntrinsicID,
9942 bool IsZExtReturn) {
9943 QualType LangPTy = E->getArg(1)->getType();
9944 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9945 LangPTy->castAs<PointerType>()->getPointeeType());
9946
9947 // The vector type that is returned may be different from the
9948 // eventual type loaded from memory.
9949 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
9950 llvm::ScalableVectorType *MemoryTy = nullptr;
9951 llvm::ScalableVectorType *PredTy = nullptr;
9952 bool IsQuadLoad = false;
9953 switch (IntrinsicID) {
9954 case Intrinsic::aarch64_sve_ld1uwq:
9955 case Intrinsic::aarch64_sve_ld1udq:
9956 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
9957 PredTy = llvm::ScalableVectorType::get(
9958 llvm::Type::getInt1Ty(getLLVMContext()), 1);
9959 IsQuadLoad = true;
9960 break;
9961 default:
9962 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9963 PredTy = MemoryTy;
9964 break;
9965 }
9966
9967 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
9968 Value *BasePtr = Ops[1];
9969
9970 // Does the load have an offset?
9971 if (Ops.size() > 2)
9972 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9973
9974 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
9975 auto *Load =
9976 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
9977 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
9978 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
9979
9980 if (IsQuadLoad)
9981 return Load;
9982
9983 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
9984 : Builder.CreateSExt(Load, VectorTy);
9985}
9986
9989 unsigned IntrinsicID) {
9990 QualType LangPTy = E->getArg(1)->getType();
9991 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9992 LangPTy->castAs<PointerType>()->getPointeeType());
9993
9994 // The vector type that is stored may be different from the
9995 // eventual type stored to memory.
9996 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
9997 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9998
9999 auto PredTy = MemoryTy;
10000 auto AddrMemoryTy = MemoryTy;
10001 bool IsQuadStore = false;
10002
10003 switch (IntrinsicID) {
10004 case Intrinsic::aarch64_sve_st1wq:
10005 case Intrinsic::aarch64_sve_st1dq:
10006 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10007 PredTy =
10008 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10009 IsQuadStore = true;
10010 break;
10011 default:
10012 break;
10013 }
10014 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10015 Value *BasePtr = Ops[1];
10016
10017 // Does the store have an offset?
10018 if (Ops.size() == 4)
10019 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10020
10021 // Last value is always the data
10022 Value *Val =
10023 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10024
10025 Function *F =
10026 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10027 auto *Store =
10028 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10029 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10030 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10031 return Store;
10032}
10033
10036 unsigned IntID) {
10037 Ops[2] = EmitSVEPredicateCast(
10039
10040 SmallVector<Value *> NewOps;
10041 NewOps.push_back(Ops[2]);
10042
10043 llvm::Value *BasePtr = Ops[3];
10044
10045 // If the intrinsic contains the vnum parameter, multiply it with the vector
10046 // size in bytes.
10047 if (Ops.size() == 5) {
10048 Function *StreamingVectorLength =
10049 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10050 llvm::Value *StreamingVectorLengthCall =
10051 Builder.CreateCall(StreamingVectorLength);
10052 llvm::Value *Mulvl =
10053 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10054 // The type of the ptr parameter is void *, so use Int8Ty here.
10055 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10056 }
10057 NewOps.push_back(BasePtr);
10058 NewOps.push_back(Ops[0]);
10059 NewOps.push_back(Ops[1]);
10060 Function *F = CGM.getIntrinsic(IntID);
10061 return Builder.CreateCall(F, NewOps);
10062}
10063
10066 unsigned IntID) {
10067 auto *VecTy = getSVEType(TypeFlags);
10068 Function *F = CGM.getIntrinsic(IntID, VecTy);
10069 if (TypeFlags.isReadZA())
10070 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10071 else if (TypeFlags.isWriteZA())
10072 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10073 return Builder.CreateCall(F, Ops);
10074}
10075
10078 unsigned IntID) {
10079 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10080 if (Ops.size() == 0)
10081 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10082 Function *F = CGM.getIntrinsic(IntID, {});
10083 return Builder.CreateCall(F, Ops);
10084}
10085
10088 unsigned IntID) {
10089 if (Ops.size() == 2)
10090 Ops.push_back(Builder.getInt32(0));
10091 else
10092 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10093 Function *F = CGM.getIntrinsic(IntID, {});
10094 return Builder.CreateCall(F, Ops);
10095}
10096
10097// Limit the usage of scalable llvm IR generated by the ACLE by using the
10098// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10099Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10100 return Builder.CreateVectorSplat(
10101 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10102}
10103
10105 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10106}
10107
10108Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10109 // FIXME: For big endian this needs an additional REV, or needs a separate
10110 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10111 // instruction is defined as 'bitwise' equivalent from memory point of
10112 // view (when storing/reloading), whereas the svreinterpret builtin
10113 // implements bitwise equivalent cast from register point of view.
10114 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10115 return Builder.CreateBitCast(Val, Ty);
10116}
10117
10118static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10120 auto *SplatZero = Constant::getNullValue(Ty);
10121 Ops.insert(Ops.begin(), SplatZero);
10122}
10123
10124static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10126 auto *SplatUndef = UndefValue::get(Ty);
10127 Ops.insert(Ops.begin(), SplatUndef);
10128}
10129
10132 llvm::Type *ResultType,
10133 ArrayRef<Value *> Ops) {
10134 if (TypeFlags.isOverloadNone())
10135 return {};
10136
10137 llvm::Type *DefaultType = getSVEType(TypeFlags);
10138
10139 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10140 return {DefaultType, Ops[1]->getType()};
10141
10142 if (TypeFlags.isOverloadWhileRW())
10143 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10144
10145 if (TypeFlags.isOverloadCvt())
10146 return {Ops[0]->getType(), Ops.back()->getType()};
10147
10148 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10149 ResultType->isVectorTy())
10150 return {ResultType, Ops[1]->getType()};
10151
10152 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10153 return {DefaultType};
10154}
10155
10157 llvm::Type *Ty,
10158 ArrayRef<Value *> Ops) {
10159 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10160 "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
10161
10162 unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10163 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10164 TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10165 Value *Idx = ConstantInt::get(CGM.Int64Ty,
10166 I * SingleVecTy->getMinNumElements());
10167
10168 if (TypeFlags.isTupleSet())
10169 return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10170 return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10171}
10172
10174 llvm::Type *Ty,
10175 ArrayRef<Value *> Ops) {
10176 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10177
10178 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10179 unsigned MinElts = SrcTy->getMinNumElements();
10180 Value *Call = llvm::PoisonValue::get(Ty);
10181 for (unsigned I = 0; I < Ops.size(); I++) {
10182 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10183 Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10184 }
10185
10186 return Call;
10187}
10188
10190 // Multi-vector results should be broken up into a single (wide) result
10191 // vector.
10192 auto *StructTy = dyn_cast<StructType>(Call->getType());
10193 if (!StructTy)
10194 return Call;
10195
10196 auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10197 if (!VTy)
10198 return Call;
10199 unsigned N = StructTy->getNumElements();
10200
10201 // We may need to emit a cast to a svbool_t
10202 bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10203 unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10204
10205 ScalableVectorType *WideVTy =
10206 ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10207 Value *Ret = llvm::PoisonValue::get(WideVTy);
10208 for (unsigned I = 0; I < N; ++I) {
10209 Value *SRet = Builder.CreateExtractValue(Call, I);
10210 assert(SRet->getType() == VTy && "Unexpected type for result value");
10211 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10212
10213 if (IsPredTy)
10214 SRet = EmitSVEPredicateCast(
10215 SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10216
10217 Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10218 }
10219 Call = Ret;
10220
10221 return Call;
10222}
10223
10225 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10226 SVETypeFlags TypeFlags) {
10227 // Find out if any arguments are required to be integer constant expressions.
10228 unsigned ICEArguments = 0;
10230 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10231 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10232
10233 // Tuple set/get only requires one insert/extract vector, which is
10234 // created by EmitSVETupleSetOrGet.
10235 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10236
10237 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10238 bool IsICE = ICEArguments & (1 << i);
10239 Value *Arg = EmitScalarExpr(E->getArg(i));
10240
10241 if (IsICE) {
10242 // If this is required to be a constant, constant fold it so that we know
10243 // that the generated intrinsic gets a ConstantInt.
10244 std::optional<llvm::APSInt> Result =
10246 assert(Result && "Expected argument to be a constant");
10247
10248 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10249 // truncate because the immediate has been range checked and no valid
10250 // immediate requires more than a handful of bits.
10251 *Result = Result->extOrTrunc(32);
10252 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10253 continue;
10254 }
10255
10256 if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10257 Ops.push_back(Arg);
10258 continue;
10259 }
10260
10261 auto *VTy = cast<ScalableVectorType>(Arg->getType());
10262 unsigned MinElts = VTy->getMinNumElements();
10263 bool IsPred = VTy->getElementType()->isIntegerTy(1);
10264 unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10265
10266 if (N == 1) {
10267 Ops.push_back(Arg);
10268 continue;
10269 }
10270
10271 for (unsigned I = 0; I < N; ++I) {
10272 Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10273 auto *NewVTy =
10274 ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10275 Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10276 }
10277 }
10278}
10279
10281 const CallExpr *E) {
10282 llvm::Type *Ty = ConvertType(E->getType());
10283 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10284 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10285 Value *Val = EmitScalarExpr(E->getArg(0));
10286 return EmitSVEReinterpret(Val, Ty);
10287 }
10288
10289 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10291
10293 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10294 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10295
10296 if (TypeFlags.isLoad())
10297 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10298 TypeFlags.isZExtReturn());
10299 else if (TypeFlags.isStore())
10300 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10301 else if (TypeFlags.isGatherLoad())
10302 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10303 else if (TypeFlags.isScatterStore())
10304 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10305 else if (TypeFlags.isPrefetch())
10306 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10307 else if (TypeFlags.isGatherPrefetch())
10308 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10309 else if (TypeFlags.isStructLoad())
10310 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10311 else if (TypeFlags.isStructStore())
10312 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10313 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10314 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10315 else if (TypeFlags.isTupleCreate())
10316 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10317 else if (TypeFlags.isUndef())
10318 return UndefValue::get(Ty);
10319 else if (Builtin->LLVMIntrinsic != 0) {
10320 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10322
10323 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10325
10326 // Some ACLE builtins leave out the argument to specify the predicate
10327 // pattern, which is expected to be expanded to an SV_ALL pattern.
10328 if (TypeFlags.isAppendSVALL())
10329 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10330 if (TypeFlags.isInsertOp1SVALL())
10331 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10332
10333 // Predicates must match the main datatype.
10334 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10335 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10336 if (PredTy->getElementType()->isIntegerTy(1))
10337 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10338
10339 // Splat scalar operand to vector (intrinsics with _n infix)
10340 if (TypeFlags.hasSplatOperand()) {
10341 unsigned OpNo = TypeFlags.getSplatOperand();
10342 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10343 }
10344
10345 if (TypeFlags.isReverseCompare())
10346 std::swap(Ops[1], Ops[2]);
10347 else if (TypeFlags.isReverseUSDOT())
10348 std::swap(Ops[1], Ops[2]);
10349 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10350 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10351 std::swap(Ops[1], Ops[2]);
10352 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10353 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10354 std::swap(Ops[1], Ops[3]);
10355
10356 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10357 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10358 llvm::Type *OpndTy = Ops[1]->getType();
10359 auto *SplatZero = Constant::getNullValue(OpndTy);
10360 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10361 }
10362
10363 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10364 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10365 Value *Call = Builder.CreateCall(F, Ops);
10366
10367 // Predicate results must be converted to svbool_t.
10368 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10369 if (PredTy->getScalarType()->isIntegerTy(1))
10370 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10371
10372 return FormSVEBuiltinResult(Call);
10373 }
10374
10375 switch (BuiltinID) {
10376 default:
10377 return nullptr;
10378
10379 case SVE::BI__builtin_sve_svreinterpret_b: {
10380 auto SVCountTy =
10381 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10382 Function *CastFromSVCountF =
10383 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10384 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10385 }
10386 case SVE::BI__builtin_sve_svreinterpret_c: {
10387 auto SVCountTy =
10388 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10389 Function *CastToSVCountF =
10390 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10391 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10392 }
10393
10394 case SVE::BI__builtin_sve_svpsel_lane_b8:
10395 case SVE::BI__builtin_sve_svpsel_lane_b16:
10396 case SVE::BI__builtin_sve_svpsel_lane_b32:
10397 case SVE::BI__builtin_sve_svpsel_lane_b64:
10398 case SVE::BI__builtin_sve_svpsel_lane_c8:
10399 case SVE::BI__builtin_sve_svpsel_lane_c16:
10400 case SVE::BI__builtin_sve_svpsel_lane_c32:
10401 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10402 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10403 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10404 "aarch64.svcount")) &&
10405 "Unexpected TargetExtType");
10406 auto SVCountTy =
10407 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10408 Function *CastFromSVCountF =
10409 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10410 Function *CastToSVCountF =
10411 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10412
10413 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10414 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10415 llvm::Value *Ops0 =
10416 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10417 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10418 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10419 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10420 }
10421 case SVE::BI__builtin_sve_svmov_b_z: {
10422 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10423 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10424 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10425 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10426 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10427 }
10428
10429 case SVE::BI__builtin_sve_svnot_b_z: {
10430 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10431 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10432 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10433 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10434 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10435 }
10436
10437 case SVE::BI__builtin_sve_svmovlb_u16:
10438 case SVE::BI__builtin_sve_svmovlb_u32:
10439 case SVE::BI__builtin_sve_svmovlb_u64:
10440 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10441
10442 case SVE::BI__builtin_sve_svmovlb_s16:
10443 case SVE::BI__builtin_sve_svmovlb_s32:
10444 case SVE::BI__builtin_sve_svmovlb_s64:
10445 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10446
10447 case SVE::BI__builtin_sve_svmovlt_u16:
10448 case SVE::BI__builtin_sve_svmovlt_u32:
10449 case SVE::BI__builtin_sve_svmovlt_u64:
10450 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10451
10452 case SVE::BI__builtin_sve_svmovlt_s16:
10453 case SVE::BI__builtin_sve_svmovlt_s32:
10454 case SVE::BI__builtin_sve_svmovlt_s64:
10455 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10456
10457 case SVE::BI__builtin_sve_svpmullt_u16:
10458 case SVE::BI__builtin_sve_svpmullt_u64:
10459 case SVE::BI__builtin_sve_svpmullt_n_u16:
10460 case SVE::BI__builtin_sve_svpmullt_n_u64:
10461 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10462
10463 case SVE::BI__builtin_sve_svpmullb_u16:
10464 case SVE::BI__builtin_sve_svpmullb_u64:
10465 case SVE::BI__builtin_sve_svpmullb_n_u16:
10466 case SVE::BI__builtin_sve_svpmullb_n_u64:
10467 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10468
10469 case SVE::BI__builtin_sve_svdup_n_b8:
10470 case SVE::BI__builtin_sve_svdup_n_b16:
10471 case SVE::BI__builtin_sve_svdup_n_b32:
10472 case SVE::BI__builtin_sve_svdup_n_b64: {
10473 Value *CmpNE =
10474 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10475 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10476 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10477 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10478 }
10479
10480 case SVE::BI__builtin_sve_svdupq_n_b8:
10481 case SVE::BI__builtin_sve_svdupq_n_b16:
10482 case SVE::BI__builtin_sve_svdupq_n_b32:
10483 case SVE::BI__builtin_sve_svdupq_n_b64:
10484 case SVE::BI__builtin_sve_svdupq_n_u8:
10485 case SVE::BI__builtin_sve_svdupq_n_s8:
10486 case SVE::BI__builtin_sve_svdupq_n_u64:
10487 case SVE::BI__builtin_sve_svdupq_n_f64:
10488 case SVE::BI__builtin_sve_svdupq_n_s64:
10489 case SVE::BI__builtin_sve_svdupq_n_u16:
10490 case SVE::BI__builtin_sve_svdupq_n_f16:
10491 case SVE::BI__builtin_sve_svdupq_n_bf16:
10492 case SVE::BI__builtin_sve_svdupq_n_s16:
10493 case SVE::BI__builtin_sve_svdupq_n_u32:
10494 case SVE::BI__builtin_sve_svdupq_n_f32:
10495 case SVE::BI__builtin_sve_svdupq_n_s32: {
10496 // These builtins are implemented by storing each element to an array and using
10497 // ld1rq to materialize a vector.
10498 unsigned NumOpnds = Ops.size();
10499
10500 bool IsBoolTy =
10501 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10502
10503 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10504 // so that the compare can use the width that is natural for the expected
10505 // number of predicate lanes.
10506 llvm::Type *EltTy = Ops[0]->getType();
10507 if (IsBoolTy)
10508 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10509
10511 for (unsigned I = 0; I < NumOpnds; ++I)
10512 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10513 Value *Vec = BuildVector(VecOps);
10514
10515 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10516 Value *InsertSubVec = Builder.CreateInsertVector(
10517 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10518
10519 Function *F =
10520 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10521 Value *DupQLane =
10522 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10523
10524 if (!IsBoolTy)
10525 return DupQLane;
10526
10527 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10528 Value *Pred = EmitSVEAllTruePred(TypeFlags);
10529
10530 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10531 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10532 : Intrinsic::aarch64_sve_cmpne_wide,
10533 OverloadedTy);
10534 Value *Call = Builder.CreateCall(
10535 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10536 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10537 }
10538
10539 case SVE::BI__builtin_sve_svpfalse_b:
10540 return ConstantInt::getFalse(Ty);
10541
10542 case SVE::BI__builtin_sve_svpfalse_c: {
10543 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10544 Function *CastToSVCountF =
10545 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10546 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10547 }
10548
10549 case SVE::BI__builtin_sve_svlen_bf16:
10550 case SVE::BI__builtin_sve_svlen_f16:
10551 case SVE::BI__builtin_sve_svlen_f32:
10552 case SVE::BI__builtin_sve_svlen_f64:
10553 case SVE::BI__builtin_sve_svlen_s8:
10554 case SVE::BI__builtin_sve_svlen_s16:
10555 case SVE::BI__builtin_sve_svlen_s32:
10556 case SVE::BI__builtin_sve_svlen_s64:
10557 case SVE::BI__builtin_sve_svlen_u8:
10558 case SVE::BI__builtin_sve_svlen_u16:
10559 case SVE::BI__builtin_sve_svlen_u32:
10560 case SVE::BI__builtin_sve_svlen_u64: {
10561 SVETypeFlags TF(Builtin->TypeModifier);
10562 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10563 auto *NumEls =
10564 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10565
10566 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10567 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10568 }
10569
10570 case SVE::BI__builtin_sve_svtbl2_u8:
10571 case SVE::BI__builtin_sve_svtbl2_s8:
10572 case SVE::BI__builtin_sve_svtbl2_u16:
10573 case SVE::BI__builtin_sve_svtbl2_s16:
10574 case SVE::BI__builtin_sve_svtbl2_u32:
10575 case SVE::BI__builtin_sve_svtbl2_s32:
10576 case SVE::BI__builtin_sve_svtbl2_u64:
10577 case SVE::BI__builtin_sve_svtbl2_s64:
10578 case SVE::BI__builtin_sve_svtbl2_f16:
10579 case SVE::BI__builtin_sve_svtbl2_bf16:
10580 case SVE::BI__builtin_sve_svtbl2_f32:
10581 case SVE::BI__builtin_sve_svtbl2_f64: {
10582 SVETypeFlags TF(Builtin->TypeModifier);
10583 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10584 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10585 return Builder.CreateCall(F, Ops);
10586 }
10587
10588 case SVE::BI__builtin_sve_svset_neonq_s8:
10589 case SVE::BI__builtin_sve_svset_neonq_s16:
10590 case SVE::BI__builtin_sve_svset_neonq_s32:
10591 case SVE::BI__builtin_sve_svset_neonq_s64:
10592 case SVE::BI__builtin_sve_svset_neonq_u8:
10593 case SVE::BI__builtin_sve_svset_neonq_u16:
10594 case SVE::BI__builtin_sve_svset_neonq_u32:
10595 case SVE::BI__builtin_sve_svset_neonq_u64:
10596 case SVE::BI__builtin_sve_svset_neonq_f16:
10597 case SVE::BI__builtin_sve_svset_neonq_f32:
10598 case SVE::BI__builtin_sve_svset_neonq_f64:
10599 case SVE::BI__builtin_sve_svset_neonq_bf16: {
10600 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10601 }
10602
10603 case SVE::BI__builtin_sve_svget_neonq_s8:
10604 case SVE::BI__builtin_sve_svget_neonq_s16:
10605 case SVE::BI__builtin_sve_svget_neonq_s32:
10606 case SVE::BI__builtin_sve_svget_neonq_s64:
10607 case SVE::BI__builtin_sve_svget_neonq_u8:
10608 case SVE::BI__builtin_sve_svget_neonq_u16:
10609 case SVE::BI__builtin_sve_svget_neonq_u32:
10610 case SVE::BI__builtin_sve_svget_neonq_u64:
10611 case SVE::BI__builtin_sve_svget_neonq_f16:
10612 case SVE::BI__builtin_sve_svget_neonq_f32:
10613 case SVE::BI__builtin_sve_svget_neonq_f64:
10614 case SVE::BI__builtin_sve_svget_neonq_bf16: {
10615 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10616 }
10617
10618 case SVE::BI__builtin_sve_svdup_neonq_s8:
10619 case SVE::BI__builtin_sve_svdup_neonq_s16:
10620 case SVE::BI__builtin_sve_svdup_neonq_s32:
10621 case SVE::BI__builtin_sve_svdup_neonq_s64:
10622 case SVE::BI__builtin_sve_svdup_neonq_u8:
10623 case SVE::BI__builtin_sve_svdup_neonq_u16:
10624 case SVE::BI__builtin_sve_svdup_neonq_u32:
10625 case SVE::BI__builtin_sve_svdup_neonq_u64:
10626 case SVE::BI__builtin_sve_svdup_neonq_f16:
10627 case SVE::BI__builtin_sve_svdup_neonq_f32:
10628 case SVE::BI__builtin_sve_svdup_neonq_f64:
10629 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10630 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10631 Builder.getInt64(0));
10632 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10633 {Insert, Builder.getInt64(0)});
10634 }
10635 }
10636
10637 /// Should not happen
10638 return nullptr;
10639}
10640
10641static void swapCommutativeSMEOperands(unsigned BuiltinID,
10643 unsigned MultiVec;
10644 switch (BuiltinID) {
10645 default:
10646 return;
10647 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10648 MultiVec = 1;
10649 break;
10650 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10651 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10652 MultiVec = 2;
10653 break;
10654 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10655 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10656 MultiVec = 4;
10657 break;
10658 }
10659
10660 if (MultiVec > 0)
10661 for (unsigned I = 0; I < MultiVec; ++I)
10662 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
10663}
10664
10666 const CallExpr *E) {
10667 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10669
10671 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10672 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10673
10674 if (TypeFlags.isLoad() || TypeFlags.isStore())
10675 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10676 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10677 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10678 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10679 BuiltinID == SME::BI__builtin_sme_svzero_za)
10680 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10681 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10682 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10683 BuiltinID == SME::BI__builtin_sme_svldr_za ||
10684 BuiltinID == SME::BI__builtin_sme_svstr_za)
10685 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10686
10687 // Handle builtins which require their multi-vector operands to be swapped
10688 swapCommutativeSMEOperands(BuiltinID, Ops);
10689
10690 // Should not happen!
10691 if (Builtin->LLVMIntrinsic == 0)
10692 return nullptr;
10693
10694 // Predicates must match the main datatype.
10695 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10696 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10697 if (PredTy->getElementType()->isIntegerTy(1))
10698 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10699
10700 Function *F =
10701 TypeFlags.isOverloadNone()
10702 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10703 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10704 Value *Call = Builder.CreateCall(F, Ops);
10705
10706 return FormSVEBuiltinResult(Call);
10707}
10708
10710 const CallExpr *E,
10711 llvm::Triple::ArchType Arch) {
10712 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10713 BuiltinID <= clang::AArch64::LastSVEBuiltin)
10714 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10715
10716 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10717 BuiltinID <= clang::AArch64::LastSMEBuiltin)
10718 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10719
10720 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
10721 return EmitAArch64CpuSupports(E);
10722
10723 unsigned HintID = static_cast<unsigned>(-1);
10724 switch (BuiltinID) {
10725 default: break;
10726 case clang::AArch64::BI__builtin_arm_nop:
10727 HintID = 0;
10728 break;
10729 case clang::AArch64::BI__builtin_arm_yield:
10730 case clang::AArch64::BI__yield:
10731 HintID = 1;
10732 break;
10733 case clang::AArch64::BI__builtin_arm_wfe:
10734 case clang::AArch64::BI__wfe:
10735 HintID = 2;
10736 break;
10737 case clang::AArch64::BI__builtin_arm_wfi:
10738 case clang::AArch64::BI__wfi:
10739 HintID = 3;
10740 break;
10741 case clang::AArch64::BI__builtin_arm_sev:
10742 case clang::AArch64::BI__sev:
10743 HintID = 4;
10744 break;
10745 case clang::AArch64::BI__builtin_arm_sevl:
10746 case clang::AArch64::BI__sevl:
10747 HintID = 5;
10748 break;
10749 }
10750
10751 if (HintID != static_cast<unsigned>(-1)) {
10752 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10753 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10754 }
10755
10756 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
10757 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
10758 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10759 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
10760 }
10761
10762 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10763 // Create call to __arm_sme_state and store the results to the two pointers.
10765 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10766 false),
10767 "__arm_sme_state"));
10768 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
10769 "aarch64_pstate_sm_compatible");
10770 CI->setAttributes(Attrs);
10771 CI->setCallingConv(
10772 llvm::CallingConv::
10773 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10774 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10776 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10778 }
10779
10780 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10781 assert((getContext().getTypeSize(E->getType()) == 32) &&
10782 "rbit of unusual size!");
10783 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10784 return Builder.CreateCall(
10785 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10786 }
10787 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10788 assert((getContext().getTypeSize(E->getType()) == 64) &&
10789 "rbit of unusual size!");
10790 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10791 return Builder.CreateCall(
10792 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10793 }
10794
10795 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10796 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10797 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10798 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10799 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10800 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10801 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10802 return Res;
10803 }
10804
10805 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10806 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10807 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10808 "cls");
10809 }
10810 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
10811 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10812 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
10813 "cls");
10814 }
10815
10816 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
10817 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
10818 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10819 llvm::Type *Ty = Arg->getType();
10820 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
10821 Arg, "frint32z");
10822 }
10823
10824 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
10825 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
10826 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10827 llvm::Type *Ty = Arg->getType();
10828 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
10829 Arg, "frint64z");
10830 }
10831
10832 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
10833 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
10834 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10835 llvm::Type *Ty = Arg->getType();
10836 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
10837 Arg, "frint32x");
10838 }
10839
10840 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
10841 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
10842 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10843 llvm::Type *Ty = Arg->getType();
10844 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
10845 Arg, "frint64x");
10846 }
10847
10848 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
10849 assert((getContext().getTypeSize(E->getType()) == 32) &&
10850 "__jcvt of unusual size!");
10851 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10852 return Builder.CreateCall(
10853 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
10854 }
10855
10856 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
10857 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
10858 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
10859 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
10860 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
10861 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
10862
10863 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
10864 // Load from the address via an LLVM intrinsic, receiving a
10865 // tuple of 8 i64 words, and store each one to ValPtr.
10866 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
10867 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
10868 llvm::Value *ToRet;
10869 for (size_t i = 0; i < 8; i++) {
10870 llvm::Value *ValOffsetPtr =
10871 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10872 Address Addr =
10873 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10874 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
10875 }
10876 return ToRet;
10877 } else {
10878 // Load 8 i64 words from ValPtr, and store them to the address
10879 // via an LLVM intrinsic.
10881 Args.push_back(MemAddr);
10882 for (size_t i = 0; i < 8; i++) {
10883 llvm::Value *ValOffsetPtr =
10884 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10885 Address Addr =
10886 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10887 Args.push_back(Builder.CreateLoad(Addr));
10888 }
10889
10890 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
10891 ? Intrinsic::aarch64_st64b
10892 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
10893 ? Intrinsic::aarch64_st64bv
10894 : Intrinsic::aarch64_st64bv0);
10895 Function *F = CGM.getIntrinsic(Intr);
10896 return Builder.CreateCall(F, Args);
10897 }
10898 }
10899
10900 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
10901 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
10902
10903 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
10904 ? Intrinsic::aarch64_rndr
10905 : Intrinsic::aarch64_rndrrs);
10906 Function *F = CGM.getIntrinsic(Intr);
10907 llvm::Value *Val = Builder.CreateCall(F);
10908 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
10909 Value *Status = Builder.CreateExtractValue(Val, 1);
10910
10911 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
10912 Builder.CreateStore(RandomValue, MemAddress);
10913 Status = Builder.CreateZExt(Status, Int32Ty);
10914 return Status;
10915 }
10916
10917 if (BuiltinID == clang::AArch64::BI__clear_cache) {
10918 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
10919 const FunctionDecl *FD = E->getDirectCallee();
10920 Value *Ops[2];
10921 for (unsigned i = 0; i < 2; i++)
10922 Ops[i] = EmitScalarExpr(E->getArg(i));
10923 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
10924 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
10925 StringRef Name = FD->getName();
10926 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
10927 }
10928
10929 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
10930 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
10931 getContext().getTypeSize(E->getType()) == 128) {
10932 Function *F =
10933 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
10934 ? Intrinsic::aarch64_ldaxp
10935 : Intrinsic::aarch64_ldxp);
10936
10937 Value *LdPtr = EmitScalarExpr(E->getArg(0));
10938 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
10939
10940 Value *Val0 = Builder.CreateExtractValue(Val, 1);
10941 Value *Val1 = Builder.CreateExtractValue(Val, 0);
10942 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
10943 Val0 = Builder.CreateZExt(Val0, Int128Ty);
10944 Val1 = Builder.CreateZExt(Val1, Int128Ty);
10945
10946 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
10947 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
10948 Val = Builder.CreateOr(Val, Val1);
10949 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
10950 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
10951 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
10952 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
10953
10954 QualType Ty = E->getType();
10955 llvm::Type *RealResTy = ConvertType(Ty);
10956 llvm::Type *IntTy =
10957 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
10958
10959 Function *F =
10960 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
10961 ? Intrinsic::aarch64_ldaxr
10962 : Intrinsic::aarch64_ldxr,
10963 UnqualPtrTy);
10964 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
10965 Val->addParamAttr(
10966 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
10967
10968 if (RealResTy->isPointerTy())
10969 return Builder.CreateIntToPtr(Val, RealResTy);
10970
10971 llvm::Type *IntResTy = llvm::IntegerType::get(
10972 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
10973 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
10974 RealResTy);
10975 }
10976
10977 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
10978 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
10979 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
10980 Function *F =
10981 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
10982 ? Intrinsic::aarch64_stlxp
10983 : Intrinsic::aarch64_stxp);
10984 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
10985
10986 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
10987 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
10988
10989 Tmp = Tmp.withElementType(STy);
10990 llvm::Value *Val = Builder.CreateLoad(Tmp);
10991
10992 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
10993 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
10994 Value *StPtr = EmitScalarExpr(E->getArg(1));
10995 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
10996 }
10997
10998 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
10999 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11000 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11001 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11002
11003 QualType Ty = E->getArg(0)->getType();
11004 llvm::Type *StoreTy =
11005 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11006
11007 if (StoreVal->getType()->isPointerTy())
11008 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11009 else {
11010 llvm::Type *IntTy = llvm::IntegerType::get(
11012 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11013 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11014 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11015 }
11016
11017 Function *F =
11018 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11019 ? Intrinsic::aarch64_stlxr
11020 : Intrinsic::aarch64_stxr,
11021 StoreAddr->getType());
11022 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11023 CI->addParamAttr(
11024 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11025 return CI;
11026 }
11027
11028 if (BuiltinID == clang::AArch64::BI__getReg) {
11030 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11031 llvm_unreachable("Sema will ensure that the parameter is constant");
11032
11033 llvm::APSInt Value = Result.Val.getInt();
11034 LLVMContext &Context = CGM.getLLVMContext();
11035 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11036
11037 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11038 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11039 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11040
11041 llvm::Function *F =
11042 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11043 return Builder.CreateCall(F, Metadata);
11044 }
11045
11046 if (BuiltinID == clang::AArch64::BI__break) {
11048 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11049 llvm_unreachable("Sema will ensure that the parameter is constant");
11050
11051 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11052 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11053 }
11054
11055 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11056 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11057 return Builder.CreateCall(F);
11058 }
11059
11060 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11061 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11062 llvm::SyncScope::SingleThread);
11063
11064 // CRC32
11065 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11066 switch (BuiltinID) {
11067 case clang::AArch64::BI__builtin_arm_crc32b:
11068 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11069 case clang::AArch64::BI__builtin_arm_crc32cb:
11070 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11071 case clang::AArch64::BI__builtin_arm_crc32h:
11072 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11073 case clang::AArch64::BI__builtin_arm_crc32ch:
11074 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11075 case clang::AArch64::BI__builtin_arm_crc32w:
11076 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11077 case clang::AArch64::BI__builtin_arm_crc32cw:
11078 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11079 case clang::AArch64::BI__builtin_arm_crc32d:
11080 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11081 case clang::AArch64::BI__builtin_arm_crc32cd:
11082 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11083 }
11084
11085 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11086 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11087 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11088 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11089
11090 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11091 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11092
11093 return Builder.CreateCall(F, {Arg0, Arg1});
11094 }
11095
11096 // Memory Operations (MOPS)
11097 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11098 Value *Dst = EmitScalarExpr(E->getArg(0));
11099 Value *Val = EmitScalarExpr(E->getArg(1));
11100 Value *Size = EmitScalarExpr(E->getArg(2));
11101 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
11102 Val = Builder.CreateTrunc(Val, Int8Ty);
11103 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11104 return Builder.CreateCall(
11105 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11106 }
11107
11108 // Memory Tagging Extensions (MTE) Intrinsics
11109 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11110 switch (BuiltinID) {
11111 case clang::AArch64::BI__builtin_arm_irg:
11112 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11113 case clang::AArch64::BI__builtin_arm_addg:
11114 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11115 case clang::AArch64::BI__builtin_arm_gmi:
11116 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11117 case clang::AArch64::BI__builtin_arm_ldg:
11118 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11119 case clang::AArch64::BI__builtin_arm_stg:
11120 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11121 case clang::AArch64::BI__builtin_arm_subp:
11122 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11123 }
11124
11125 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11126 llvm::Type *T = ConvertType(E->getType());
11127
11128 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11130 Value *Mask = EmitScalarExpr(E->getArg(1));
11131
11132 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11133 Mask = Builder.CreateZExt(Mask, Int64Ty);
11134 Value *RV = Builder.CreateCall(
11135 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
11136 return Builder.CreatePointerCast(RV, T);
11137 }
11138 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11140 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11141
11142 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11143 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11144 Value *RV = Builder.CreateCall(
11145 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
11146 return Builder.CreatePointerCast(RV, T);
11147 }
11148 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11150 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11151
11152 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11153 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11154 return Builder.CreateCall(
11155 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11156 }
11157 // Although it is possible to supply a different return
11158 // address (first arg) to this intrinsic, for now we set
11159 // return address same as input address.
11160 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11161 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11162 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11163 Value *RV = Builder.CreateCall(
11164 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11165 return Builder.CreatePointerCast(RV, T);
11166 }
11167 // Although it is possible to supply a different tag (to set)
11168 // to this intrinsic (as first arg), for now we supply
11169 // the tag that is in input address arg (common use case).
11170 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11171 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11172 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11173 return Builder.CreateCall(
11174 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11175 }
11176 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11177 Value *PointerA = EmitScalarExpr(E->getArg(0));
11178 Value *PointerB = EmitScalarExpr(E->getArg(1));
11179 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
11180 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
11181 return Builder.CreateCall(
11182 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11183 }
11184 }
11185
11186 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11187 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11188 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11189 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11190 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11191 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11192 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11193 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11194
11195 SpecialRegisterAccessKind AccessKind = Write;
11196 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11197 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11198 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11199 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11200 AccessKind = VolatileRead;
11201
11202 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11203 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11204
11205 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11206 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11207
11208 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11209 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11210
11211 llvm::Type *ValueType;
11212 llvm::Type *RegisterType = Int64Ty;
11213 if (Is32Bit) {
11214 ValueType = Int32Ty;
11215 } else if (Is128Bit) {
11216 llvm::Type *Int128Ty =
11217 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11218 ValueType = Int128Ty;
11219 RegisterType = Int128Ty;
11220 } else if (IsPointerBuiltin) {
11221 ValueType = VoidPtrTy;
11222 } else {
11223 ValueType = Int64Ty;
11224 };
11225
11226 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11227 AccessKind);
11228 }
11229
11230 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11231 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11232 LLVMContext &Context = CGM.getLLVMContext();
11233
11234 unsigned SysReg =
11235 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11236
11237 std::string SysRegStr;
11238 llvm::raw_string_ostream(SysRegStr) <<
11239 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11240 ((SysReg >> 11) & 7) << ":" <<
11241 ((SysReg >> 7) & 15) << ":" <<
11242 ((SysReg >> 3) & 15) << ":" <<
11243 ( SysReg & 7);
11244
11245 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11246 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11247 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11248
11249 llvm::Type *RegisterType = Int64Ty;
11250 llvm::Type *Types[] = { RegisterType };
11251
11252 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11253 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11254
11255 return Builder.CreateCall(F, Metadata);
11256 }
11257
11258 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11259 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11260
11261 return Builder.CreateCall(F, { Metadata, ArgValue });
11262 }
11263
11264 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11265 llvm::Function *F =
11266 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11267 return Builder.CreateCall(F);
11268 }
11269
11270 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11271 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11272 return Builder.CreateCall(F);
11273 }
11274
11275 if (BuiltinID == clang::AArch64::BI__mulh ||
11276 BuiltinID == clang::AArch64::BI__umulh) {
11277 llvm::Type *ResType = ConvertType(E->getType());
11278 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11279
11280 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11281 Value *LHS =
11282 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11283 Value *RHS =
11284 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11285
11286 Value *MulResult, *HigherBits;
11287 if (IsSigned) {
11288 MulResult = Builder.CreateNSWMul(LHS, RHS);
11289 HigherBits = Builder.CreateAShr(MulResult, 64);
11290 } else {
11291 MulResult = Builder.CreateNUWMul(LHS, RHS);
11292 HigherBits = Builder.CreateLShr(MulResult, 64);
11293 }
11294 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11295
11296 return HigherBits;
11297 }
11298
11299 if (BuiltinID == AArch64::BI__writex18byte ||
11300 BuiltinID == AArch64::BI__writex18word ||
11301 BuiltinID == AArch64::BI__writex18dword ||
11302 BuiltinID == AArch64::BI__writex18qword) {
11303 // Read x18 as i8*
11304 LLVMContext &Context = CGM.getLLVMContext();
11305 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11306 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11307 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11308 llvm::Function *F =
11309 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11310 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11311 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11312
11313 // Store val at x18 + offset
11314 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11315 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11316 Value *Val = EmitScalarExpr(E->getArg(1));
11317 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11318 return Store;
11319 }
11320
11321 if (BuiltinID == AArch64::BI__readx18byte ||
11322 BuiltinID == AArch64::BI__readx18word ||
11323 BuiltinID == AArch64::BI__readx18dword ||
11324 BuiltinID == AArch64::BI__readx18qword) {
11325 llvm::Type *IntTy = ConvertType(E->getType());
11326
11327 // Read x18 as i8*
11328 LLVMContext &Context = CGM.getLLVMContext();
11329 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11330 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11331 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11332 llvm::Function *F =
11333 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11334 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11335 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11336
11337 // Load x18 + offset
11338 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11339 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11340 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11341 return Load;
11342 }
11343
11344 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11345 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11346 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11347 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11348 Value *Arg = EmitScalarExpr(E->getArg(0));
11349 llvm::Type *RetTy = ConvertType(E->getType());
11350 return Builder.CreateBitCast(Arg, RetTy);
11351 }
11352
11353 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11354 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11355 BuiltinID == AArch64::BI_CountLeadingZeros ||
11356 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11357 Value *Arg = EmitScalarExpr(E->getArg(0));
11358 llvm::Type *ArgType = Arg->getType();
11359
11360 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11361 BuiltinID == AArch64::BI_CountLeadingOnes64)
11362 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11363
11364 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11365 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11366
11367 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11368 BuiltinID == AArch64::BI_CountLeadingZeros64)
11369 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11370 return Result;
11371 }
11372
11373 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11374 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11375 Value *Arg = EmitScalarExpr(E->getArg(0));
11376
11377 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11378 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11379 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11380
11381 Value *Result = Builder.CreateCall(F, Arg, "cls");
11382 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11383 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11384 return Result;
11385 }
11386
11387 if (BuiltinID == AArch64::BI_CountOneBits ||
11388 BuiltinID == AArch64::BI_CountOneBits64) {
11389 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11390 llvm::Type *ArgType = ArgValue->getType();
11391 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11392
11393 Value *Result = Builder.CreateCall(F, ArgValue);
11394 if (BuiltinID == AArch64::BI_CountOneBits64)
11395 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11396 return Result;
11397 }
11398
11399 if (BuiltinID == AArch64::BI__prefetch) {
11401 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11402 Value *Locality = ConstantInt::get(Int32Ty, 3);
11403 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11404 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11405 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11406 }
11407
11408 // Handle MSVC intrinsics before argument evaluation to prevent double
11409 // evaluation.
11410 if (std::optional<MSVCIntrin> MsvcIntId =
11412 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11413
11414 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11415 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11416 return P.first == BuiltinID;
11417 });
11418 if (It != end(NEONEquivalentIntrinsicMap))
11419 BuiltinID = It->second;
11420
11421 // Find out if any arguments are required to be integer constant
11422 // expressions.
11423 unsigned ICEArguments = 0;
11425 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11426 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11427
11429 Address PtrOp0 = Address::invalid();
11430 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11431 if (i == 0) {
11432 switch (BuiltinID) {
11433 case NEON::BI__builtin_neon_vld1_v:
11434 case NEON::BI__builtin_neon_vld1q_v:
11435 case NEON::BI__builtin_neon_vld1_dup_v:
11436 case NEON::BI__builtin_neon_vld1q_dup_v:
11437 case NEON::BI__builtin_neon_vld1_lane_v:
11438 case NEON::BI__builtin_neon_vld1q_lane_v:
11439 case NEON::BI__builtin_neon_vst1_v:
11440 case NEON::BI__builtin_neon_vst1q_v:
11441 case NEON::BI__builtin_neon_vst1_lane_v:
11442 case NEON::BI__builtin_neon_vst1q_lane_v:
11443 case NEON::BI__builtin_neon_vldap1_lane_s64:
11444 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11445 case NEON::BI__builtin_neon_vstl1_lane_s64:
11446 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11447 // Get the alignment for the argument in addition to the value;
11448 // we'll use it later.
11449 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11450 Ops.push_back(PtrOp0.getPointer());
11451 continue;
11452 }
11453 }
11454 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11455 }
11456
11457 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11458 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11459 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11460
11461 if (Builtin) {
11462 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11463 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11464 assert(Result && "SISD intrinsic should have been handled");
11465 return Result;
11466 }
11467
11468 const Expr *Arg = E->getArg(E->getNumArgs()-1);
11470 if (std::optional<llvm::APSInt> Result =
11472 // Determine the type of this overloaded NEON intrinsic.
11473 Type = NeonTypeFlags(Result->getZExtValue());
11474
11475 bool usgn = Type.isUnsigned();
11476 bool quad = Type.isQuad();
11477
11478 // Handle non-overloaded intrinsics first.
11479 switch (BuiltinID) {
11480 default: break;
11481 case NEON::BI__builtin_neon_vabsh_f16:
11482 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11483 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11484 case NEON::BI__builtin_neon_vaddq_p128: {
11485 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11486 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11487 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11488 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11489 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
11490 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11491 return Builder.CreateBitCast(Ops[0], Int128Ty);
11492 }
11493 case NEON::BI__builtin_neon_vldrq_p128: {
11494 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11495 Value *Ptr = EmitScalarExpr(E->getArg(0));
11496 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11498 }
11499 case NEON::BI__builtin_neon_vstrq_p128: {
11500 Value *Ptr = Ops[0];
11502 }
11503 case NEON::BI__builtin_neon_vcvts_f32_u32:
11504 case NEON::BI__builtin_neon_vcvtd_f64_u64:
11505 usgn = true;
11506 [[fallthrough]];
11507 case NEON::BI__builtin_neon_vcvts_f32_s32:
11508 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11509 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11510 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11511 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11512 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11513 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11514 if (usgn)
11515 return Builder.CreateUIToFP(Ops[0], FTy);
11516 return Builder.CreateSIToFP(Ops[0], FTy);
11517 }
11518 case NEON::BI__builtin_neon_vcvth_f16_u16:
11519 case NEON::BI__builtin_neon_vcvth_f16_u32:
11520 case NEON::BI__builtin_neon_vcvth_f16_u64:
11521 usgn = true;
11522 [[fallthrough]];
11523 case NEON::BI__builtin_neon_vcvth_f16_s16:
11524 case NEON::BI__builtin_neon_vcvth_f16_s32:
11525 case NEON::BI__builtin_neon_vcvth_f16_s64: {
11526 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11527 llvm::Type *FTy = HalfTy;
11528 llvm::Type *InTy;
11529 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11530 InTy = Int64Ty;
11531 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11532 InTy = Int32Ty;
11533 else
11534 InTy = Int16Ty;
11535 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11536 if (usgn)
11537 return Builder.CreateUIToFP(Ops[0], FTy);
11538 return Builder.CreateSIToFP(Ops[0], FTy);
11539 }
11540 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11541 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11542 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11543 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11544 case NEON::BI__builtin_neon_vcvth_u16_f16:
11545 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11546 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11547 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11548 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11549 case NEON::BI__builtin_neon_vcvth_s16_f16: {
11550 unsigned Int;
11551 llvm::Type* InTy = Int32Ty;
11552 llvm::Type* FTy = HalfTy;
11553 llvm::Type *Tys[2] = {InTy, FTy};
11554 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11555 switch (BuiltinID) {
11556 default: llvm_unreachable("missing builtin ID in switch!");
11557 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11558 Int = Intrinsic::aarch64_neon_fcvtau; break;
11559 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11560 Int = Intrinsic::aarch64_neon_fcvtmu; break;
11561 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11562 Int = Intrinsic::aarch64_neon_fcvtnu; break;
11563 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11564 Int = Intrinsic::aarch64_neon_fcvtpu; break;
11565 case NEON::BI__builtin_neon_vcvth_u16_f16:
11566 Int = Intrinsic::aarch64_neon_fcvtzu; break;
11567 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11568 Int = Intrinsic::aarch64_neon_fcvtas; break;
11569 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11570 Int = Intrinsic::aarch64_neon_fcvtms; break;
11571 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11572 Int = Intrinsic::aarch64_neon_fcvtns; break;
11573 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11574 Int = Intrinsic::aarch64_neon_fcvtps; break;
11575 case NEON::BI__builtin_neon_vcvth_s16_f16:
11576 Int = Intrinsic::aarch64_neon_fcvtzs; break;
11577 }
11578 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11579 return Builder.CreateTrunc(Ops[0], Int16Ty);
11580 }
11581 case NEON::BI__builtin_neon_vcaleh_f16:
11582 case NEON::BI__builtin_neon_vcalth_f16:
11583 case NEON::BI__builtin_neon_vcageh_f16:
11584 case NEON::BI__builtin_neon_vcagth_f16: {
11585 unsigned Int;
11586 llvm::Type* InTy = Int32Ty;
11587 llvm::Type* FTy = HalfTy;
11588 llvm::Type *Tys[2] = {InTy, FTy};
11589 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11590 switch (BuiltinID) {
11591 default: llvm_unreachable("missing builtin ID in switch!");
11592 case NEON::BI__builtin_neon_vcageh_f16:
11593 Int = Intrinsic::aarch64_neon_facge; break;
11594 case NEON::BI__builtin_neon_vcagth_f16:
11595 Int = Intrinsic::aarch64_neon_facgt; break;
11596 case NEON::BI__builtin_neon_vcaleh_f16:
11597 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11598 case NEON::BI__builtin_neon_vcalth_f16:
11599 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11600 }
11601 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11602 return Builder.CreateTrunc(Ops[0], Int16Ty);
11603 }
11604 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11605 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11606 unsigned Int;
11607 llvm::Type* InTy = Int32Ty;
11608 llvm::Type* FTy = HalfTy;
11609 llvm::Type *Tys[2] = {InTy, FTy};
11610 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11611 switch (BuiltinID) {
11612 default: llvm_unreachable("missing builtin ID in switch!");
11613 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11614 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11615 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11616 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11617 }
11618 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11619 return Builder.CreateTrunc(Ops[0], Int16Ty);
11620 }
11621 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11622 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11623 unsigned Int;
11624 llvm::Type* FTy = HalfTy;
11625 llvm::Type* InTy = Int32Ty;
11626 llvm::Type *Tys[2] = {FTy, InTy};
11627 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11628 switch (BuiltinID) {
11629 default: llvm_unreachable("missing builtin ID in switch!");
11630 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11631 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11632 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11633 break;
11634 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11635 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11636 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11637 break;
11638 }
11639 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11640 }
11641 case NEON::BI__builtin_neon_vpaddd_s64: {
11642 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11643 Value *Vec = EmitScalarExpr(E->getArg(0));
11644 // The vector is v2f64, so make sure it's bitcast to that.
11645 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11646 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11647 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11648 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11649 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11650 // Pairwise addition of a v2f64 into a scalar f64.
11651 return Builder.CreateAdd(Op0, Op1, "vpaddd");
11652 }
11653 case NEON::BI__builtin_neon_vpaddd_f64: {
11654 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11655 Value *Vec = EmitScalarExpr(E->getArg(0));
11656 // The vector is v2f64, so make sure it's bitcast to that.
11657 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11658 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11659 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11660 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11661 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11662 // Pairwise addition of a v2f64 into a scalar f64.
11663 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11664 }
11665 case NEON::BI__builtin_neon_vpadds_f32: {
11666 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11667 Value *Vec = EmitScalarExpr(E->getArg(0));
11668 // The vector is v2f32, so make sure it's bitcast to that.
11669 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11670 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11671 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11672 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11673 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11674 // Pairwise addition of a v2f32 into a scalar f32.
11675 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11676 }
11677 case NEON::BI__builtin_neon_vceqzd_s64:
11678 case NEON::BI__builtin_neon_vceqzd_f64:
11679 case NEON::BI__builtin_neon_vceqzs_f32:
11680 case NEON::BI__builtin_neon_vceqzh_f16:
11681 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11684 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11685 case NEON::BI__builtin_neon_vcgezd_s64:
11686 case NEON::BI__builtin_neon_vcgezd_f64:
11687 case NEON::BI__builtin_neon_vcgezs_f32:
11688 case NEON::BI__builtin_neon_vcgezh_f16:
11689 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11692 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11693 case NEON::BI__builtin_neon_vclezd_s64:
11694 case NEON::BI__builtin_neon_vclezd_f64:
11695 case NEON::BI__builtin_neon_vclezs_f32:
11696 case NEON::BI__builtin_neon_vclezh_f16:
11697 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11700 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11701 case NEON::BI__builtin_neon_vcgtzd_s64:
11702 case NEON::BI__builtin_neon_vcgtzd_f64:
11703 case NEON::BI__builtin_neon_vcgtzs_f32:
11704 case NEON::BI__builtin_neon_vcgtzh_f16:
11705 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11708 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11709 case NEON::BI__builtin_neon_vcltzd_s64:
11710 case NEON::BI__builtin_neon_vcltzd_f64:
11711 case NEON::BI__builtin_neon_vcltzs_f32:
11712 case NEON::BI__builtin_neon_vcltzh_f16:
11713 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11716 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11717
11718 case NEON::BI__builtin_neon_vceqzd_u64: {
11719 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11720 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11721 Ops[0] =
11722 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11723 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11724 }
11725 case NEON::BI__builtin_neon_vceqd_f64:
11726 case NEON::BI__builtin_neon_vcled_f64:
11727 case NEON::BI__builtin_neon_vcltd_f64:
11728 case NEON::BI__builtin_neon_vcged_f64:
11729 case NEON::BI__builtin_neon_vcgtd_f64: {
11730 llvm::CmpInst::Predicate P;
11731 switch (BuiltinID) {
11732 default: llvm_unreachable("missing builtin ID in switch!");
11733 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11734 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11735 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11736 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11737 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11738 }
11739 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11740 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11741 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11742 if (P == llvm::FCmpInst::FCMP_OEQ)
11743 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11744 else
11745 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11746 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11747 }
11748 case NEON::BI__builtin_neon_vceqs_f32:
11749 case NEON::BI__builtin_neon_vcles_f32:
11750 case NEON::BI__builtin_neon_vclts_f32:
11751 case NEON::BI__builtin_neon_vcges_f32:
11752 case NEON::BI__builtin_neon_vcgts_f32: {
11753 llvm::CmpInst::Predicate P;
11754 switch (BuiltinID) {
11755 default: llvm_unreachable("missing builtin ID in switch!");
11756 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11757 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11758 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11759 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11760 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11761 }
11762 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11763 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11764 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11765 if (P == llvm::FCmpInst::FCMP_OEQ)
11766 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11767 else
11768 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11769 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11770 }
11771 case NEON::BI__builtin_neon_vceqh_f16:
11772 case NEON::BI__builtin_neon_vcleh_f16:
11773 case NEON::BI__builtin_neon_vclth_f16:
11774 case NEON::BI__builtin_neon_vcgeh_f16:
11775 case NEON::BI__builtin_neon_vcgth_f16: {
11776 llvm::CmpInst::Predicate P;
11777 switch (BuiltinID) {
11778 default: llvm_unreachable("missing builtin ID in switch!");
11779 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11780 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11781 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11782 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11783 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11784 }
11785 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11786 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11787 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11788 if (P == llvm::FCmpInst::FCMP_OEQ)
11789 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11790 else
11791 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11792 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11793 }
11794 case NEON::BI__builtin_neon_vceqd_s64:
11795 case NEON::BI__builtin_neon_vceqd_u64:
11796 case NEON::BI__builtin_neon_vcgtd_s64:
11797 case NEON::BI__builtin_neon_vcgtd_u64:
11798 case NEON::BI__builtin_neon_vcltd_s64:
11799 case NEON::BI__builtin_neon_vcltd_u64:
11800 case NEON::BI__builtin_neon_vcged_u64:
11801 case NEON::BI__builtin_neon_vcged_s64:
11802 case NEON::BI__builtin_neon_vcled_u64:
11803 case NEON::BI__builtin_neon_vcled_s64: {
11804 llvm::CmpInst::Predicate P;
11805 switch (BuiltinID) {
11806 default: llvm_unreachable("missing builtin ID in switch!");
11807 case NEON::BI__builtin_neon_vceqd_s64:
11808 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
11809 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
11810 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
11811 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
11812 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
11813 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
11814 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
11815 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
11816 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
11817 }
11818 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11819 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11820 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11821 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
11822 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
11823 }
11824 case NEON::BI__builtin_neon_vtstd_s64:
11825 case NEON::BI__builtin_neon_vtstd_u64: {
11826 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11827 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11828 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11829 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
11830 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
11831 llvm::Constant::getNullValue(Int64Ty));
11832 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
11833 }
11834 case NEON::BI__builtin_neon_vset_lane_i8:
11835 case NEON::BI__builtin_neon_vset_lane_i16:
11836 case NEON::BI__builtin_neon_vset_lane_i32:
11837 case NEON::BI__builtin_neon_vset_lane_i64:
11838 case NEON::BI__builtin_neon_vset_lane_bf16:
11839 case NEON::BI__builtin_neon_vset_lane_f32:
11840 case NEON::BI__builtin_neon_vsetq_lane_i8:
11841 case NEON::BI__builtin_neon_vsetq_lane_i16:
11842 case NEON::BI__builtin_neon_vsetq_lane_i32:
11843 case NEON::BI__builtin_neon_vsetq_lane_i64:
11844 case NEON::BI__builtin_neon_vsetq_lane_bf16:
11845 case NEON::BI__builtin_neon_vsetq_lane_f32:
11846 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11847 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11848 case NEON::BI__builtin_neon_vset_lane_f64:
11849 // The vector type needs a cast for the v1f64 variant.
11850 Ops[1] =
11851 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
11852 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11853 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11854 case NEON::BI__builtin_neon_vsetq_lane_f64:
11855 // The vector type needs a cast for the v2f64 variant.
11856 Ops[1] =
11857 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
11858 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11859 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11860
11861 case NEON::BI__builtin_neon_vget_lane_i8:
11862 case NEON::BI__builtin_neon_vdupb_lane_i8:
11863 Ops[0] =
11864 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
11865 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11866 "vget_lane");
11867 case NEON::BI__builtin_neon_vgetq_lane_i8:
11868 case NEON::BI__builtin_neon_vdupb_laneq_i8:
11869 Ops[0] =
11870 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
11871 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11872 "vgetq_lane");
11873 case NEON::BI__builtin_neon_vget_lane_i16:
11874 case NEON::BI__builtin_neon_vduph_lane_i16:
11875 Ops[0] =
11876 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
11877 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11878 "vget_lane");
11879 case NEON::BI__builtin_neon_vgetq_lane_i16:
11880 case NEON::BI__builtin_neon_vduph_laneq_i16:
11881 Ops[0] =
11882 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
11883 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11884 "vgetq_lane");
11885 case NEON::BI__builtin_neon_vget_lane_i32:
11886 case NEON::BI__builtin_neon_vdups_lane_i32:
11887 Ops[0] =
11888 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
11889 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11890 "vget_lane");
11891 case NEON::BI__builtin_neon_vdups_lane_f32:
11892 Ops[0] =
11893 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11894 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11895 "vdups_lane");
11896 case NEON::BI__builtin_neon_vgetq_lane_i32:
11897 case NEON::BI__builtin_neon_vdups_laneq_i32:
11898 Ops[0] =
11899 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
11900 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11901 "vgetq_lane");
11902 case NEON::BI__builtin_neon_vget_lane_i64:
11903 case NEON::BI__builtin_neon_vdupd_lane_i64:
11904 Ops[0] =
11905 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
11906 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11907 "vget_lane");
11908 case NEON::BI__builtin_neon_vdupd_lane_f64:
11909 Ops[0] =
11910 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11911 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11912 "vdupd_lane");
11913 case NEON::BI__builtin_neon_vgetq_lane_i64:
11914 case NEON::BI__builtin_neon_vdupd_laneq_i64:
11915 Ops[0] =
11916 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
11917 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11918 "vgetq_lane");
11919 case NEON::BI__builtin_neon_vget_lane_f32:
11920 Ops[0] =
11921 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11922 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11923 "vget_lane");
11924 case NEON::BI__builtin_neon_vget_lane_f64:
11925 Ops[0] =
11926 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11927 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11928 "vget_lane");
11929 case NEON::BI__builtin_neon_vgetq_lane_f32:
11930 case NEON::BI__builtin_neon_vdups_laneq_f32:
11931 Ops[0] =
11932 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
11933 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11934 "vgetq_lane");
11935 case NEON::BI__builtin_neon_vgetq_lane_f64:
11936 case NEON::BI__builtin_neon_vdupd_laneq_f64:
11937 Ops[0] =
11938 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
11939 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11940 "vgetq_lane");
11941 case NEON::BI__builtin_neon_vaddh_f16:
11942 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11943 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
11944 case NEON::BI__builtin_neon_vsubh_f16:
11945 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11946 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
11947 case NEON::BI__builtin_neon_vmulh_f16:
11948 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11949 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
11950 case NEON::BI__builtin_neon_vdivh_f16:
11951 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11952 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
11953 case NEON::BI__builtin_neon_vfmah_f16:
11954 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11956 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
11957 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
11958 case NEON::BI__builtin_neon_vfmsh_f16: {
11959 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
11960
11961 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11963 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
11964 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
11965 }
11966 case NEON::BI__builtin_neon_vaddd_s64:
11967 case NEON::BI__builtin_neon_vaddd_u64:
11968 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
11969 case NEON::BI__builtin_neon_vsubd_s64:
11970 case NEON::BI__builtin_neon_vsubd_u64:
11971 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
11972 case NEON::BI__builtin_neon_vqdmlalh_s16:
11973 case NEON::BI__builtin_neon_vqdmlslh_s16: {
11974 SmallVector<Value *, 2> ProductOps;
11975 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
11976 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
11977 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
11978 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
11979 ProductOps, "vqdmlXl");
11980 Constant *CI = ConstantInt::get(SizeTy, 0);
11981 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
11982
11983 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
11984 ? Intrinsic::aarch64_neon_sqadd
11985 : Intrinsic::aarch64_neon_sqsub;
11986 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
11987 }
11988 case NEON::BI__builtin_neon_vqshlud_n_s64: {
11989 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11990 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
11991 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
11992 Ops, "vqshlu_n");
11993 }
11994 case NEON::BI__builtin_neon_vqshld_n_u64:
11995 case NEON::BI__builtin_neon_vqshld_n_s64: {
11996 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
11997 ? Intrinsic::aarch64_neon_uqshl
11998 : Intrinsic::aarch64_neon_sqshl;
11999 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12000 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12001 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12002 }
12003 case NEON::BI__builtin_neon_vrshrd_n_u64:
12004 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12005 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12006 ? Intrinsic::aarch64_neon_urshl
12007 : Intrinsic::aarch64_neon_srshl;
12008 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12009 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12010 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12011 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12012 }
12013 case NEON::BI__builtin_neon_vrsrad_n_u64:
12014 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12015 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12016 ? Intrinsic::aarch64_neon_urshl
12017 : Intrinsic::aarch64_neon_srshl;
12018 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12019 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12020 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12021 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12022 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12023 }
12024 case NEON::BI__builtin_neon_vshld_n_s64:
12025 case NEON::BI__builtin_neon_vshld_n_u64: {
12026 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12027 return Builder.CreateShl(
12028 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12029 }
12030 case NEON::BI__builtin_neon_vshrd_n_s64: {
12031 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12032 return Builder.CreateAShr(
12033 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12034 Amt->getZExtValue())),
12035 "shrd_n");
12036 }
12037 case NEON::BI__builtin_neon_vshrd_n_u64: {
12038 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12039 uint64_t ShiftAmt = Amt->getZExtValue();
12040 // Right-shifting an unsigned value by its size yields 0.
12041 if (ShiftAmt == 64)
12042 return ConstantInt::get(Int64Ty, 0);
12043 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12044 "shrd_n");
12045 }
12046 case NEON::BI__builtin_neon_vsrad_n_s64: {
12047 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12048 Ops[1] = Builder.CreateAShr(
12049 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12050 Amt->getZExtValue())),
12051 "shrd_n");
12052 return Builder.CreateAdd(Ops[0], Ops[1]);
12053 }
12054 case NEON::BI__builtin_neon_vsrad_n_u64: {
12055 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12056 uint64_t ShiftAmt = Amt->getZExtValue();
12057 // Right-shifting an unsigned value by its size yields 0.
12058 // As Op + 0 = Op, return Ops[0] directly.
12059 if (ShiftAmt == 64)
12060 return Ops[0];
12061 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12062 "shrd_n");
12063 return Builder.CreateAdd(Ops[0], Ops[1]);
12064 }
12065 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12066 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12067 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12068 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12069 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12070 "lane");
12071 SmallVector<Value *, 2> ProductOps;
12072 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12073 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12074 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12075 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12076 ProductOps, "vqdmlXl");
12077 Constant *CI = ConstantInt::get(SizeTy, 0);
12078 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12079 Ops.pop_back();
12080
12081 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12082 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12083 ? Intrinsic::aarch64_neon_sqadd
12084 : Intrinsic::aarch64_neon_sqsub;
12085 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12086 }
12087 case NEON::BI__builtin_neon_vqdmlals_s32:
12088 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12089 SmallVector<Value *, 2> ProductOps;
12090 ProductOps.push_back(Ops[1]);
12091 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12092 Ops[1] =
12093 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12094 ProductOps, "vqdmlXl");
12095
12096 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12097 ? Intrinsic::aarch64_neon_sqadd
12098 : Intrinsic::aarch64_neon_sqsub;
12099 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12100 }
12101 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12102 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12103 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12104 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12105 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12106 "lane");
12107 SmallVector<Value *, 2> ProductOps;
12108 ProductOps.push_back(Ops[1]);
12109 ProductOps.push_back(Ops[2]);
12110 Ops[1] =
12111 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12112 ProductOps, "vqdmlXl");
12113 Ops.pop_back();
12114
12115 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12116 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12117 ? Intrinsic::aarch64_neon_sqadd
12118 : Intrinsic::aarch64_neon_sqsub;
12119 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12120 }
12121 case NEON::BI__builtin_neon_vget_lane_bf16:
12122 case NEON::BI__builtin_neon_vduph_lane_bf16:
12123 case NEON::BI__builtin_neon_vduph_lane_f16: {
12124 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12125 "vget_lane");
12126 }
12127 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12128 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12129 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12130 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12131 "vgetq_lane");
12132 }
12133
12134 case clang::AArch64::BI_InterlockedAdd:
12135 case clang::AArch64::BI_InterlockedAdd64: {
12136 Address DestAddr = CheckAtomicAlignment(*this, E);
12137 Value *Val = EmitScalarExpr(E->getArg(1));
12138 AtomicRMWInst *RMWI =
12139 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12140 llvm::AtomicOrdering::SequentiallyConsistent);
12141 return Builder.CreateAdd(RMWI, Val);
12142 }
12143 }
12144
12145 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12146 llvm::Type *Ty = VTy;
12147 if (!Ty)
12148 return nullptr;
12149
12150 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12151 // defer to common code if it's been added to our special map.
12154
12155 if (Builtin)
12157 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12158 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12159 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12160
12161 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12162 return V;
12163
12164 unsigned Int;
12165 switch (BuiltinID) {
12166 default: return nullptr;
12167 case NEON::BI__builtin_neon_vbsl_v:
12168 case NEON::BI__builtin_neon_vbslq_v: {
12169 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12170 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12171 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12172 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12173
12174 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12175 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12176 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12177 return Builder.CreateBitCast(Ops[0], Ty);
12178 }
12179 case NEON::BI__builtin_neon_vfma_lane_v:
12180 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12181 // The ARM builtins (and instructions) have the addend as the first
12182 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12183 Value *Addend = Ops[0];
12184 Value *Multiplicand = Ops[1];
12185 Value *LaneSource = Ops[2];
12186 Ops[0] = Multiplicand;
12187 Ops[1] = LaneSource;
12188 Ops[2] = Addend;
12189
12190 // Now adjust things to handle the lane access.
12191 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12192 ? llvm::FixedVectorType::get(VTy->getElementType(),
12193 VTy->getNumElements() / 2)
12194 : VTy;
12195 llvm::Constant *cst = cast<Constant>(Ops[3]);
12196 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12197 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12198 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12199
12200 Ops.pop_back();
12201 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12202 : Intrinsic::fma;
12203 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12204 }
12205 case NEON::BI__builtin_neon_vfma_laneq_v: {
12206 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12207 // v1f64 fma should be mapped to Neon scalar f64 fma
12208 if (VTy && VTy->getElementType() == DoubleTy) {
12209 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12210 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12211 llvm::FixedVectorType *VTy =
12213 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12214 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12215 Value *Result;
12217 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12218 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12219 return Builder.CreateBitCast(Result, Ty);
12220 }
12221 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12222 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12223
12224 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12225 VTy->getNumElements() * 2);
12226 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12227 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12228 cast<ConstantInt>(Ops[3]));
12229 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12230
12232 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12233 {Ops[2], Ops[1], Ops[0]});
12234 }
12235 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12236 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12237 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12238
12239 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12240 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12242 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12243 {Ops[2], Ops[1], Ops[0]});
12244 }
12245 case NEON::BI__builtin_neon_vfmah_lane_f16:
12246 case NEON::BI__builtin_neon_vfmas_lane_f32:
12247 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12248 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12249 case NEON::BI__builtin_neon_vfmad_lane_f64:
12250 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12251 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12252 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12253 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12255 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12256 {Ops[1], Ops[2], Ops[0]});
12257 }
12258 case NEON::BI__builtin_neon_vmull_v:
12259 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12260 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12261 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12262 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12263 case NEON::BI__builtin_neon_vmax_v:
12264 case NEON::BI__builtin_neon_vmaxq_v:
12265 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12266 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12267 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12268 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12269 case NEON::BI__builtin_neon_vmaxh_f16: {
12270 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12271 Int = Intrinsic::aarch64_neon_fmax;
12272 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12273 }
12274 case NEON::BI__builtin_neon_vmin_v:
12275 case NEON::BI__builtin_neon_vminq_v:
12276 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12277 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12278 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12279 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12280 case NEON::BI__builtin_neon_vminh_f16: {
12281 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12282 Int = Intrinsic::aarch64_neon_fmin;
12283 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12284 }
12285 case NEON::BI__builtin_neon_vabd_v:
12286 case NEON::BI__builtin_neon_vabdq_v:
12287 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12288 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12289 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12290 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12291 case NEON::BI__builtin_neon_vpadal_v:
12292 case NEON::BI__builtin_neon_vpadalq_v: {
12293 unsigned ArgElts = VTy->getNumElements();
12294 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12295 unsigned BitWidth = EltTy->getBitWidth();
12296 auto *ArgTy = llvm::FixedVectorType::get(
12297 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12298 llvm::Type* Tys[2] = { VTy, ArgTy };
12299 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12301 TmpOps.push_back(Ops[1]);
12302 Function *F = CGM.getIntrinsic(Int, Tys);
12303 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12304 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12305 return Builder.CreateAdd(tmp, addend);
12306 }
12307 case NEON::BI__builtin_neon_vpmin_v:
12308 case NEON::BI__builtin_neon_vpminq_v:
12309 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12310 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12311 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12312 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12313 case NEON::BI__builtin_neon_vpmax_v:
12314 case NEON::BI__builtin_neon_vpmaxq_v:
12315 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12316 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12317 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12318 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12319 case NEON::BI__builtin_neon_vminnm_v:
12320 case NEON::BI__builtin_neon_vminnmq_v:
12321 Int = Intrinsic::aarch64_neon_fminnm;
12322 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12323 case NEON::BI__builtin_neon_vminnmh_f16:
12324 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12325 Int = Intrinsic::aarch64_neon_fminnm;
12326 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12327 case NEON::BI__builtin_neon_vmaxnm_v:
12328 case NEON::BI__builtin_neon_vmaxnmq_v:
12329 Int = Intrinsic::aarch64_neon_fmaxnm;
12330 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12331 case NEON::BI__builtin_neon_vmaxnmh_f16:
12332 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12333 Int = Intrinsic::aarch64_neon_fmaxnm;
12334 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12335 case NEON::BI__builtin_neon_vrecpss_f32: {
12336 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12337 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12338 Ops, "vrecps");
12339 }
12340 case NEON::BI__builtin_neon_vrecpsd_f64:
12341 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12342 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12343 Ops, "vrecps");
12344 case NEON::BI__builtin_neon_vrecpsh_f16:
12345 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12346 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12347 Ops, "vrecps");
12348 case NEON::BI__builtin_neon_vqshrun_n_v:
12349 Int = Intrinsic::aarch64_neon_sqshrun;
12350 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12351 case NEON::BI__builtin_neon_vqrshrun_n_v:
12352 Int = Intrinsic::aarch64_neon_sqrshrun;
12353 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12354 case NEON::BI__builtin_neon_vqshrn_n_v:
12355 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12356 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12357 case NEON::BI__builtin_neon_vrshrn_n_v:
12358 Int = Intrinsic::aarch64_neon_rshrn;
12359 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12360 case NEON::BI__builtin_neon_vqrshrn_n_v:
12361 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12362 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12363 case NEON::BI__builtin_neon_vrndah_f16: {
12364 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12365 Int = Builder.getIsFPConstrained()
12366 ? Intrinsic::experimental_constrained_round
12367 : Intrinsic::round;
12368 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12369 }
12370 case NEON::BI__builtin_neon_vrnda_v:
12371 case NEON::BI__builtin_neon_vrndaq_v: {
12372 Int = Builder.getIsFPConstrained()
12373 ? Intrinsic::experimental_constrained_round
12374 : Intrinsic::round;
12375 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12376 }
12377 case NEON::BI__builtin_neon_vrndih_f16: {
12378 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12379 Int = Builder.getIsFPConstrained()
12380 ? Intrinsic::experimental_constrained_nearbyint
12381 : Intrinsic::nearbyint;
12382 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12383 }
12384 case NEON::BI__builtin_neon_vrndmh_f16: {
12385 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12386 Int = Builder.getIsFPConstrained()
12387 ? Intrinsic::experimental_constrained_floor
12388 : Intrinsic::floor;
12389 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12390 }
12391 case NEON::BI__builtin_neon_vrndm_v:
12392 case NEON::BI__builtin_neon_vrndmq_v: {
12393 Int = Builder.getIsFPConstrained()
12394 ? Intrinsic::experimental_constrained_floor
12395 : Intrinsic::floor;
12396 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12397 }
12398 case NEON::BI__builtin_neon_vrndnh_f16: {
12399 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12400 Int = Builder.getIsFPConstrained()
12401 ? Intrinsic::experimental_constrained_roundeven
12402 : Intrinsic::roundeven;
12403 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12404 }
12405 case NEON::BI__builtin_neon_vrndn_v:
12406 case NEON::BI__builtin_neon_vrndnq_v: {
12407 Int = Builder.getIsFPConstrained()
12408 ? Intrinsic::experimental_constrained_roundeven
12409 : Intrinsic::roundeven;
12410 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12411 }
12412 case NEON::BI__builtin_neon_vrndns_f32: {
12413 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12414 Int = Builder.getIsFPConstrained()
12415 ? Intrinsic::experimental_constrained_roundeven
12416 : Intrinsic::roundeven;
12417 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12418 }
12419 case NEON::BI__builtin_neon_vrndph_f16: {
12420 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12421 Int = Builder.getIsFPConstrained()
12422 ? Intrinsic::experimental_constrained_ceil
12423 : Intrinsic::ceil;
12424 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12425 }
12426 case NEON::BI__builtin_neon_vrndp_v:
12427 case NEON::BI__builtin_neon_vrndpq_v: {
12428 Int = Builder.getIsFPConstrained()
12429 ? Intrinsic::experimental_constrained_ceil
12430 : Intrinsic::ceil;
12431 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12432 }
12433 case NEON::BI__builtin_neon_vrndxh_f16: {
12434 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12435 Int = Builder.getIsFPConstrained()
12436 ? Intrinsic::experimental_constrained_rint
12437 : Intrinsic::rint;
12438 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12439 }
12440 case NEON::BI__builtin_neon_vrndx_v:
12441 case NEON::BI__builtin_neon_vrndxq_v: {
12442 Int = Builder.getIsFPConstrained()
12443 ? Intrinsic::experimental_constrained_rint
12444 : Intrinsic::rint;
12445 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12446 }
12447 case NEON::BI__builtin_neon_vrndh_f16: {
12448 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12449 Int = Builder.getIsFPConstrained()
12450 ? Intrinsic::experimental_constrained_trunc
12451 : Intrinsic::trunc;
12452 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12453 }
12454 case NEON::BI__builtin_neon_vrnd32x_f32:
12455 case NEON::BI__builtin_neon_vrnd32xq_f32:
12456 case NEON::BI__builtin_neon_vrnd32x_f64:
12457 case NEON::BI__builtin_neon_vrnd32xq_f64: {
12458 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12459 Int = Intrinsic::aarch64_neon_frint32x;
12460 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12461 }
12462 case NEON::BI__builtin_neon_vrnd32z_f32:
12463 case NEON::BI__builtin_neon_vrnd32zq_f32:
12464 case NEON::BI__builtin_neon_vrnd32z_f64:
12465 case NEON::BI__builtin_neon_vrnd32zq_f64: {
12466 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12467 Int = Intrinsic::aarch64_neon_frint32z;
12468 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12469 }
12470 case NEON::BI__builtin_neon_vrnd64x_f32:
12471 case NEON::BI__builtin_neon_vrnd64xq_f32:
12472 case NEON::BI__builtin_neon_vrnd64x_f64:
12473 case NEON::BI__builtin_neon_vrnd64xq_f64: {
12474 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12475 Int = Intrinsic::aarch64_neon_frint64x;
12476 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12477 }
12478 case NEON::BI__builtin_neon_vrnd64z_f32:
12479 case NEON::BI__builtin_neon_vrnd64zq_f32:
12480 case NEON::BI__builtin_neon_vrnd64z_f64:
12481 case NEON::BI__builtin_neon_vrnd64zq_f64: {
12482 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12483 Int = Intrinsic::aarch64_neon_frint64z;
12484 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12485 }
12486 case NEON::BI__builtin_neon_vrnd_v:
12487 case NEON::BI__builtin_neon_vrndq_v: {
12488 Int = Builder.getIsFPConstrained()
12489 ? Intrinsic::experimental_constrained_trunc
12490 : Intrinsic::trunc;
12491 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12492 }
12493 case NEON::BI__builtin_neon_vcvt_f64_v:
12494 case NEON::BI__builtin_neon_vcvtq_f64_v:
12495 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12496 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12497 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
12498 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
12499 case NEON::BI__builtin_neon_vcvt_f64_f32: {
12500 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12501 "unexpected vcvt_f64_f32 builtin");
12502 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12503 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12504
12505 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12506 }
12507 case NEON::BI__builtin_neon_vcvt_f32_f64: {
12508 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12509 "unexpected vcvt_f32_f64 builtin");
12510 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12511 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12512
12513 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12514 }
12515 case NEON::BI__builtin_neon_vcvt_s32_v:
12516 case NEON::BI__builtin_neon_vcvt_u32_v:
12517 case NEON::BI__builtin_neon_vcvt_s64_v:
12518 case NEON::BI__builtin_neon_vcvt_u64_v:
12519 case NEON::BI__builtin_neon_vcvt_s16_f16:
12520 case NEON::BI__builtin_neon_vcvt_u16_f16:
12521 case NEON::BI__builtin_neon_vcvtq_s32_v:
12522 case NEON::BI__builtin_neon_vcvtq_u32_v:
12523 case NEON::BI__builtin_neon_vcvtq_s64_v:
12524 case NEON::BI__builtin_neon_vcvtq_u64_v:
12525 case NEON::BI__builtin_neon_vcvtq_s16_f16:
12526 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12527 Int =
12528 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12529 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12530 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12531 }
12532 case NEON::BI__builtin_neon_vcvta_s16_f16:
12533 case NEON::BI__builtin_neon_vcvta_u16_f16:
12534 case NEON::BI__builtin_neon_vcvta_s32_v:
12535 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12536 case NEON::BI__builtin_neon_vcvtaq_s32_v:
12537 case NEON::BI__builtin_neon_vcvta_u32_v:
12538 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12539 case NEON::BI__builtin_neon_vcvtaq_u32_v:
12540 case NEON::BI__builtin_neon_vcvta_s64_v:
12541 case NEON::BI__builtin_neon_vcvtaq_s64_v:
12542 case NEON::BI__builtin_neon_vcvta_u64_v:
12543 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12544 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12545 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12546 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12547 }
12548 case NEON::BI__builtin_neon_vcvtm_s16_f16:
12549 case NEON::BI__builtin_neon_vcvtm_s32_v:
12550 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12551 case NEON::BI__builtin_neon_vcvtmq_s32_v:
12552 case NEON::BI__builtin_neon_vcvtm_u16_f16:
12553 case NEON::BI__builtin_neon_vcvtm_u32_v:
12554 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12555 case NEON::BI__builtin_neon_vcvtmq_u32_v:
12556 case NEON::BI__builtin_neon_vcvtm_s64_v:
12557 case NEON::BI__builtin_neon_vcvtmq_s64_v:
12558 case NEON::BI__builtin_neon_vcvtm_u64_v:
12559 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12560 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12561 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12562 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12563 }
12564 case NEON::BI__builtin_neon_vcvtn_s16_f16:
12565 case NEON::BI__builtin_neon_vcvtn_s32_v:
12566 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12567 case NEON::BI__builtin_neon_vcvtnq_s32_v:
12568 case NEON::BI__builtin_neon_vcvtn_u16_f16:
12569 case NEON::BI__builtin_neon_vcvtn_u32_v:
12570 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12571 case NEON::BI__builtin_neon_vcvtnq_u32_v:
12572 case NEON::BI__builtin_neon_vcvtn_s64_v:
12573 case NEON::BI__builtin_neon_vcvtnq_s64_v:
12574 case NEON::BI__builtin_neon_vcvtn_u64_v:
12575 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12576 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12577 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12578 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12579 }
12580 case NEON::BI__builtin_neon_vcvtp_s16_f16:
12581 case NEON::BI__builtin_neon_vcvtp_s32_v:
12582 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12583 case NEON::BI__builtin_neon_vcvtpq_s32_v:
12584 case NEON::BI__builtin_neon_vcvtp_u16_f16:
12585 case NEON::BI__builtin_neon_vcvtp_u32_v:
12586 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12587 case NEON::BI__builtin_neon_vcvtpq_u32_v:
12588 case NEON::BI__builtin_neon_vcvtp_s64_v:
12589 case NEON::BI__builtin_neon_vcvtpq_s64_v:
12590 case NEON::BI__builtin_neon_vcvtp_u64_v:
12591 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12592 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12593 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12594 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12595 }
12596 case NEON::BI__builtin_neon_vmulx_v:
12597 case NEON::BI__builtin_neon_vmulxq_v: {
12598 Int = Intrinsic::aarch64_neon_fmulx;
12599 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12600 }
12601 case NEON::BI__builtin_neon_vmulxh_lane_f16:
12602 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12603 // vmulx_lane should be mapped to Neon scalar mulx after
12604 // extracting the scalar element
12605 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12606 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12607 Ops.pop_back();
12608 Int = Intrinsic::aarch64_neon_fmulx;
12609 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12610 }
12611 case NEON::BI__builtin_neon_vmul_lane_v:
12612 case NEON::BI__builtin_neon_vmul_laneq_v: {
12613 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12614 bool Quad = false;
12615 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12616 Quad = true;
12617 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12618 llvm::FixedVectorType *VTy =
12620 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12621 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12622 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12623 return Builder.CreateBitCast(Result, Ty);
12624 }
12625 case NEON::BI__builtin_neon_vnegd_s64:
12626 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12627 case NEON::BI__builtin_neon_vnegh_f16:
12628 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12629 case NEON::BI__builtin_neon_vpmaxnm_v:
12630 case NEON::BI__builtin_neon_vpmaxnmq_v: {
12631 Int = Intrinsic::aarch64_neon_fmaxnmp;
12632 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12633 }
12634 case NEON::BI__builtin_neon_vpminnm_v:
12635 case NEON::BI__builtin_neon_vpminnmq_v: {
12636 Int = Intrinsic::aarch64_neon_fminnmp;
12637 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12638 }
12639 case NEON::BI__builtin_neon_vsqrth_f16: {
12640 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12641 Int = Builder.getIsFPConstrained()
12642 ? Intrinsic::experimental_constrained_sqrt
12643 : Intrinsic::sqrt;
12644 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12645 }
12646 case NEON::BI__builtin_neon_vsqrt_v:
12647 case NEON::BI__builtin_neon_vsqrtq_v: {
12648 Int = Builder.getIsFPConstrained()
12649 ? Intrinsic::experimental_constrained_sqrt
12650 : Intrinsic::sqrt;
12651 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12652 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12653 }
12654 case NEON::BI__builtin_neon_vrbit_v:
12655 case NEON::BI__builtin_neon_vrbitq_v: {
12656 Int = Intrinsic::bitreverse;
12657 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12658 }
12659 case NEON::BI__builtin_neon_vaddv_u8:
12660 // FIXME: These are handled by the AArch64 scalar code.
12661 usgn = true;
12662 [[fallthrough]];
12663 case NEON::BI__builtin_neon_vaddv_s8: {
12664 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12665 Ty = Int32Ty;
12666 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12667 llvm::Type *Tys[2] = { Ty, VTy };
12668 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12669 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12670 return Builder.CreateTrunc(Ops[0], Int8Ty);
12671 }
12672 case NEON::BI__builtin_neon_vaddv_u16:
12673 usgn = true;
12674 [[fallthrough]];
12675 case NEON::BI__builtin_neon_vaddv_s16: {
12676 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12677 Ty = Int32Ty;
12678 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12679 llvm::Type *Tys[2] = { Ty, VTy };
12680 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12681 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12682 return Builder.CreateTrunc(Ops[0], Int16Ty);
12683 }
12684 case NEON::BI__builtin_neon_vaddvq_u8:
12685 usgn = true;
12686 [[fallthrough]];
12687 case NEON::BI__builtin_neon_vaddvq_s8: {
12688 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12689 Ty = Int32Ty;
12690 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12691 llvm::Type *Tys[2] = { Ty, VTy };
12692 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12693 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12694 return Builder.CreateTrunc(Ops[0], Int8Ty);
12695 }
12696 case NEON::BI__builtin_neon_vaddvq_u16:
12697 usgn = true;
12698 [[fallthrough]];
12699 case NEON::BI__builtin_neon_vaddvq_s16: {
12700 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12701 Ty = Int32Ty;
12702 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12703 llvm::Type *Tys[2] = { Ty, VTy };
12704 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12705 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12706 return Builder.CreateTrunc(Ops[0], Int16Ty);
12707 }
12708 case NEON::BI__builtin_neon_vmaxv_u8: {
12709 Int = Intrinsic::aarch64_neon_umaxv;
12710 Ty = Int32Ty;
12711 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12712 llvm::Type *Tys[2] = { Ty, VTy };
12713 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12714 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12715 return Builder.CreateTrunc(Ops[0], Int8Ty);
12716 }
12717 case NEON::BI__builtin_neon_vmaxv_u16: {
12718 Int = Intrinsic::aarch64_neon_umaxv;
12719 Ty = Int32Ty;
12720 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12721 llvm::Type *Tys[2] = { Ty, VTy };
12722 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12723 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12724 return Builder.CreateTrunc(Ops[0], Int16Ty);
12725 }
12726 case NEON::BI__builtin_neon_vmaxvq_u8: {
12727 Int = Intrinsic::aarch64_neon_umaxv;
12728 Ty = Int32Ty;
12729 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12730 llvm::Type *Tys[2] = { Ty, VTy };
12731 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12732 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12733 return Builder.CreateTrunc(Ops[0], Int8Ty);
12734 }
12735 case NEON::BI__builtin_neon_vmaxvq_u16: {
12736 Int = Intrinsic::aarch64_neon_umaxv;
12737 Ty = Int32Ty;
12738 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12739 llvm::Type *Tys[2] = { Ty, VTy };
12740 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12741 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12742 return Builder.CreateTrunc(Ops[0], Int16Ty);
12743 }
12744 case NEON::BI__builtin_neon_vmaxv_s8: {
12745 Int = Intrinsic::aarch64_neon_smaxv;
12746 Ty = Int32Ty;
12747 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12748 llvm::Type *Tys[2] = { Ty, VTy };
12749 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12750 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12751 return Builder.CreateTrunc(Ops[0], Int8Ty);
12752 }
12753 case NEON::BI__builtin_neon_vmaxv_s16: {
12754 Int = Intrinsic::aarch64_neon_smaxv;
12755 Ty = Int32Ty;
12756 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12757 llvm::Type *Tys[2] = { Ty, VTy };
12758 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12759 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12760 return Builder.CreateTrunc(Ops[0], Int16Ty);
12761 }
12762 case NEON::BI__builtin_neon_vmaxvq_s8: {
12763 Int = Intrinsic::aarch64_neon_smaxv;
12764 Ty = Int32Ty;
12765 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12766 llvm::Type *Tys[2] = { Ty, VTy };
12767 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12768 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12769 return Builder.CreateTrunc(Ops[0], Int8Ty);
12770 }
12771 case NEON::BI__builtin_neon_vmaxvq_s16: {
12772 Int = Intrinsic::aarch64_neon_smaxv;
12773 Ty = Int32Ty;
12774 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12775 llvm::Type *Tys[2] = { Ty, VTy };
12776 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12777 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12778 return Builder.CreateTrunc(Ops[0], Int16Ty);
12779 }
12780 case NEON::BI__builtin_neon_vmaxv_f16: {
12781 Int = Intrinsic::aarch64_neon_fmaxv;
12782 Ty = HalfTy;
12783 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12784 llvm::Type *Tys[2] = { Ty, VTy };
12785 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12786 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12787 return Builder.CreateTrunc(Ops[0], HalfTy);
12788 }
12789 case NEON::BI__builtin_neon_vmaxvq_f16: {
12790 Int = Intrinsic::aarch64_neon_fmaxv;
12791 Ty = HalfTy;
12792 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12793 llvm::Type *Tys[2] = { Ty, VTy };
12794 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12795 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12796 return Builder.CreateTrunc(Ops[0], HalfTy);
12797 }
12798 case NEON::BI__builtin_neon_vminv_u8: {
12799 Int = Intrinsic::aarch64_neon_uminv;
12800 Ty = Int32Ty;
12801 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12802 llvm::Type *Tys[2] = { Ty, VTy };
12803 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12804 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12805 return Builder.CreateTrunc(Ops[0], Int8Ty);
12806 }
12807 case NEON::BI__builtin_neon_vminv_u16: {
12808 Int = Intrinsic::aarch64_neon_uminv;
12809 Ty = Int32Ty;
12810 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12811 llvm::Type *Tys[2] = { Ty, VTy };
12812 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12813 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12814 return Builder.CreateTrunc(Ops[0], Int16Ty);
12815 }
12816 case NEON::BI__builtin_neon_vminvq_u8: {
12817 Int = Intrinsic::aarch64_neon_uminv;
12818 Ty = Int32Ty;
12819 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12820 llvm::Type *Tys[2] = { Ty, VTy };
12821 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12822 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12823 return Builder.CreateTrunc(Ops[0], Int8Ty);
12824 }
12825 case NEON::BI__builtin_neon_vminvq_u16: {
12826 Int = Intrinsic::aarch64_neon_uminv;
12827 Ty = Int32Ty;
12828 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12829 llvm::Type *Tys[2] = { Ty, VTy };
12830 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12831 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12832 return Builder.CreateTrunc(Ops[0], Int16Ty);
12833 }
12834 case NEON::BI__builtin_neon_vminv_s8: {
12835 Int = Intrinsic::aarch64_neon_sminv;
12836 Ty = Int32Ty;
12837 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12838 llvm::Type *Tys[2] = { Ty, VTy };
12839 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12840 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12841 return Builder.CreateTrunc(Ops[0], Int8Ty);
12842 }
12843 case NEON::BI__builtin_neon_vminv_s16: {
12844 Int = Intrinsic::aarch64_neon_sminv;
12845 Ty = Int32Ty;
12846 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12847 llvm::Type *Tys[2] = { Ty, VTy };
12848 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12849 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12850 return Builder.CreateTrunc(Ops[0], Int16Ty);
12851 }
12852 case NEON::BI__builtin_neon_vminvq_s8: {
12853 Int = Intrinsic::aarch64_neon_sminv;
12854 Ty = Int32Ty;
12855 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12856 llvm::Type *Tys[2] = { Ty, VTy };
12857 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12858 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12859 return Builder.CreateTrunc(Ops[0], Int8Ty);
12860 }
12861 case NEON::BI__builtin_neon_vminvq_s16: {
12862 Int = Intrinsic::aarch64_neon_sminv;
12863 Ty = Int32Ty;
12864 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12865 llvm::Type *Tys[2] = { Ty, VTy };
12866 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12867 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12868 return Builder.CreateTrunc(Ops[0], Int16Ty);
12869 }
12870 case NEON::BI__builtin_neon_vminv_f16: {
12871 Int = Intrinsic::aarch64_neon_fminv;
12872 Ty = HalfTy;
12873 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12874 llvm::Type *Tys[2] = { Ty, VTy };
12875 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12876 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12877 return Builder.CreateTrunc(Ops[0], HalfTy);
12878 }
12879 case NEON::BI__builtin_neon_vminvq_f16: {
12880 Int = Intrinsic::aarch64_neon_fminv;
12881 Ty = HalfTy;
12882 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12883 llvm::Type *Tys[2] = { Ty, VTy };
12884 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12885 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12886 return Builder.CreateTrunc(Ops[0], HalfTy);
12887 }
12888 case NEON::BI__builtin_neon_vmaxnmv_f16: {
12889 Int = Intrinsic::aarch64_neon_fmaxnmv;
12890 Ty = HalfTy;
12891 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12892 llvm::Type *Tys[2] = { Ty, VTy };
12893 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12894 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12895 return Builder.CreateTrunc(Ops[0], HalfTy);
12896 }
12897 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
12898 Int = Intrinsic::aarch64_neon_fmaxnmv;
12899 Ty = HalfTy;
12900 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12901 llvm::Type *Tys[2] = { Ty, VTy };
12902 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12903 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12904 return Builder.CreateTrunc(Ops[0], HalfTy);
12905 }
12906 case NEON::BI__builtin_neon_vminnmv_f16: {
12907 Int = Intrinsic::aarch64_neon_fminnmv;
12908 Ty = HalfTy;
12909 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12910 llvm::Type *Tys[2] = { Ty, VTy };
12911 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12912 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12913 return Builder.CreateTrunc(Ops[0], HalfTy);
12914 }
12915 case NEON::BI__builtin_neon_vminnmvq_f16: {
12916 Int = Intrinsic::aarch64_neon_fminnmv;
12917 Ty = HalfTy;
12918 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12919 llvm::Type *Tys[2] = { Ty, VTy };
12920 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12921 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12922 return Builder.CreateTrunc(Ops[0], HalfTy);
12923 }
12924 case NEON::BI__builtin_neon_vmul_n_f64: {
12925 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12926 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
12927 return Builder.CreateFMul(Ops[0], RHS);
12928 }
12929 case NEON::BI__builtin_neon_vaddlv_u8: {
12930 Int = Intrinsic::aarch64_neon_uaddlv;
12931 Ty = Int32Ty;
12932 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12933 llvm::Type *Tys[2] = { Ty, VTy };
12934 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12935 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12936 return Builder.CreateTrunc(Ops[0], Int16Ty);
12937 }
12938 case NEON::BI__builtin_neon_vaddlv_u16: {
12939 Int = Intrinsic::aarch64_neon_uaddlv;
12940 Ty = Int32Ty;
12941 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12942 llvm::Type *Tys[2] = { Ty, VTy };
12943 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12944 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12945 }
12946 case NEON::BI__builtin_neon_vaddlvq_u8: {
12947 Int = Intrinsic::aarch64_neon_uaddlv;
12948 Ty = Int32Ty;
12949 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12950 llvm::Type *Tys[2] = { Ty, VTy };
12951 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12952 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12953 return Builder.CreateTrunc(Ops[0], Int16Ty);
12954 }
12955 case NEON::BI__builtin_neon_vaddlvq_u16: {
12956 Int = Intrinsic::aarch64_neon_uaddlv;
12957 Ty = Int32Ty;
12958 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12959 llvm::Type *Tys[2] = { Ty, VTy };
12960 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12961 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12962 }
12963 case NEON::BI__builtin_neon_vaddlv_s8: {
12964 Int = Intrinsic::aarch64_neon_saddlv;
12965 Ty = Int32Ty;
12966 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12967 llvm::Type *Tys[2] = { Ty, VTy };
12968 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12969 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12970 return Builder.CreateTrunc(Ops[0], Int16Ty);
12971 }
12972 case NEON::BI__builtin_neon_vaddlv_s16: {
12973 Int = Intrinsic::aarch64_neon_saddlv;
12974 Ty = Int32Ty;
12975 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12976 llvm::Type *Tys[2] = { Ty, VTy };
12977 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12978 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12979 }
12980 case NEON::BI__builtin_neon_vaddlvq_s8: {
12981 Int = Intrinsic::aarch64_neon_saddlv;
12982 Ty = Int32Ty;
12983 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12984 llvm::Type *Tys[2] = { Ty, VTy };
12985 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12986 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12987 return Builder.CreateTrunc(Ops[0], Int16Ty);
12988 }
12989 case NEON::BI__builtin_neon_vaddlvq_s16: {
12990 Int = Intrinsic::aarch64_neon_saddlv;
12991 Ty = Int32Ty;
12992 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12993 llvm::Type *Tys[2] = { Ty, VTy };
12994 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12995 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12996 }
12997 case NEON::BI__builtin_neon_vsri_n_v:
12998 case NEON::BI__builtin_neon_vsriq_n_v: {
12999 Int = Intrinsic::aarch64_neon_vsri;
13000 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13001 return EmitNeonCall(Intrin, Ops, "vsri_n");
13002 }
13003 case NEON::BI__builtin_neon_vsli_n_v:
13004 case NEON::BI__builtin_neon_vsliq_n_v: {
13005 Int = Intrinsic::aarch64_neon_vsli;
13006 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13007 return EmitNeonCall(Intrin, Ops, "vsli_n");
13008 }
13009 case NEON::BI__builtin_neon_vsra_n_v:
13010 case NEON::BI__builtin_neon_vsraq_n_v:
13011 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13012 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13013 return Builder.CreateAdd(Ops[0], Ops[1]);
13014 case NEON::BI__builtin_neon_vrsra_n_v:
13015 case NEON::BI__builtin_neon_vrsraq_n_v: {
13016 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13018 TmpOps.push_back(Ops[1]);
13019 TmpOps.push_back(Ops[2]);
13020 Function* F = CGM.getIntrinsic(Int, Ty);
13021 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13022 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13023 return Builder.CreateAdd(Ops[0], tmp);
13024 }
13025 case NEON::BI__builtin_neon_vld1_v:
13026 case NEON::BI__builtin_neon_vld1q_v: {
13027 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13028 }
13029 case NEON::BI__builtin_neon_vst1_v:
13030 case NEON::BI__builtin_neon_vst1q_v:
13031 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13032 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13033 case NEON::BI__builtin_neon_vld1_lane_v:
13034 case NEON::BI__builtin_neon_vld1q_lane_v: {
13035 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13036 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13037 PtrOp0.getAlignment());
13038 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13039 }
13040 case NEON::BI__builtin_neon_vldap1_lane_s64:
13041 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13042 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13043 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13044 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13045 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13046 Ops[0] = LI;
13047 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13048 }
13049 case NEON::BI__builtin_neon_vld1_dup_v:
13050 case NEON::BI__builtin_neon_vld1q_dup_v: {
13051 Value *V = PoisonValue::get(Ty);
13052 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13053 PtrOp0.getAlignment());
13054 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13055 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13056 return EmitNeonSplat(Ops[0], CI);
13057 }
13058 case NEON::BI__builtin_neon_vst1_lane_v:
13059 case NEON::BI__builtin_neon_vst1q_lane_v:
13060 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13061 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13062 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13063 case NEON::BI__builtin_neon_vstl1_lane_s64:
13064 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13065 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13066 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13067 llvm::StoreInst *SI =
13068 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13069 SI->setAtomic(llvm::AtomicOrdering::Release);
13070 return SI;
13071 }
13072 case NEON::BI__builtin_neon_vld2_v:
13073 case NEON::BI__builtin_neon_vld2q_v: {
13074 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13075 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13076 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13077 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13078 }
13079 case NEON::BI__builtin_neon_vld3_v:
13080 case NEON::BI__builtin_neon_vld3q_v: {
13081 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13082 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13083 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13084 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13085 }
13086 case NEON::BI__builtin_neon_vld4_v:
13087 case NEON::BI__builtin_neon_vld4q_v: {
13088 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13089 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13090 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13091 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13092 }
13093 case NEON::BI__builtin_neon_vld2_dup_v:
13094 case NEON::BI__builtin_neon_vld2q_dup_v: {
13095 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13096 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13097 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13098 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13099 }
13100 case NEON::BI__builtin_neon_vld3_dup_v:
13101 case NEON::BI__builtin_neon_vld3q_dup_v: {
13102 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13103 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13104 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13105 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13106 }
13107 case NEON::BI__builtin_neon_vld4_dup_v:
13108 case NEON::BI__builtin_neon_vld4q_dup_v: {
13109 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13110 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13111 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13112 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13113 }
13114 case NEON::BI__builtin_neon_vld2_lane_v:
13115 case NEON::BI__builtin_neon_vld2q_lane_v: {
13116 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13117 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13118 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13119 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13120 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13121 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13122 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13123 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13124 }
13125 case NEON::BI__builtin_neon_vld3_lane_v:
13126 case NEON::BI__builtin_neon_vld3q_lane_v: {
13127 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13128 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13129 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13130 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13131 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13132 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13133 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13134 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13135 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13136 }
13137 case NEON::BI__builtin_neon_vld4_lane_v:
13138 case NEON::BI__builtin_neon_vld4q_lane_v: {
13139 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13140 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13141 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13142 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13143 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13144 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13145 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13146 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13147 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13148 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13149 }
13150 case NEON::BI__builtin_neon_vst2_v:
13151 case NEON::BI__builtin_neon_vst2q_v: {
13152 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13153 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13154 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13155 Ops, "");
13156 }
13157 case NEON::BI__builtin_neon_vst2_lane_v:
13158 case NEON::BI__builtin_neon_vst2q_lane_v: {
13159 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13160 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13161 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13162 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13163 Ops, "");
13164 }
13165 case NEON::BI__builtin_neon_vst3_v:
13166 case NEON::BI__builtin_neon_vst3q_v: {
13167 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13168 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13169 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13170 Ops, "");
13171 }
13172 case NEON::BI__builtin_neon_vst3_lane_v:
13173 case NEON::BI__builtin_neon_vst3q_lane_v: {
13174 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13175 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13176 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13177 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13178 Ops, "");
13179 }
13180 case NEON::BI__builtin_neon_vst4_v:
13181 case NEON::BI__builtin_neon_vst4q_v: {
13182 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13183 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13184 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13185 Ops, "");
13186 }
13187 case NEON::BI__builtin_neon_vst4_lane_v:
13188 case NEON::BI__builtin_neon_vst4q_lane_v: {
13189 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13190 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13191 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13192 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13193 Ops, "");
13194 }
13195 case NEON::BI__builtin_neon_vtrn_v:
13196 case NEON::BI__builtin_neon_vtrnq_v: {
13197 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13198 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13199 Value *SV = nullptr;
13200
13201 for (unsigned vi = 0; vi != 2; ++vi) {
13202 SmallVector<int, 16> Indices;
13203 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13204 Indices.push_back(i+vi);
13205 Indices.push_back(i+e+vi);
13206 }
13207 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13208 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13209 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13210 }
13211 return SV;
13212 }
13213 case NEON::BI__builtin_neon_vuzp_v:
13214 case NEON::BI__builtin_neon_vuzpq_v: {
13215 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13216 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13217 Value *SV = nullptr;
13218
13219 for (unsigned vi = 0; vi != 2; ++vi) {
13220 SmallVector<int, 16> Indices;
13221 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13222 Indices.push_back(2*i+vi);
13223
13224 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13225 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13226 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13227 }
13228 return SV;
13229 }
13230 case NEON::BI__builtin_neon_vzip_v:
13231 case NEON::BI__builtin_neon_vzipq_v: {
13232 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13233 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13234 Value *SV = nullptr;
13235
13236 for (unsigned vi = 0; vi != 2; ++vi) {
13237 SmallVector<int, 16> Indices;
13238 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13239 Indices.push_back((i + vi*e) >> 1);
13240 Indices.push_back(((i + vi*e) >> 1)+e);
13241 }
13242 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13243 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13244 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13245 }
13246 return SV;
13247 }
13248 case NEON::BI__builtin_neon_vqtbl1q_v: {
13249 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13250 Ops, "vtbl1");
13251 }
13252 case NEON::BI__builtin_neon_vqtbl2q_v: {
13253 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13254 Ops, "vtbl2");
13255 }
13256 case NEON::BI__builtin_neon_vqtbl3q_v: {
13257 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13258 Ops, "vtbl3");
13259 }
13260 case NEON::BI__builtin_neon_vqtbl4q_v: {
13261 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13262 Ops, "vtbl4");
13263 }
13264 case NEON::BI__builtin_neon_vqtbx1q_v: {
13265 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13266 Ops, "vtbx1");
13267 }
13268 case NEON::BI__builtin_neon_vqtbx2q_v: {
13269 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13270 Ops, "vtbx2");
13271 }
13272 case NEON::BI__builtin_neon_vqtbx3q_v: {
13273 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13274 Ops, "vtbx3");
13275 }
13276 case NEON::BI__builtin_neon_vqtbx4q_v: {
13277 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13278 Ops, "vtbx4");
13279 }
13280 case NEON::BI__builtin_neon_vsqadd_v:
13281 case NEON::BI__builtin_neon_vsqaddq_v: {
13282 Int = Intrinsic::aarch64_neon_usqadd;
13283 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13284 }
13285 case NEON::BI__builtin_neon_vuqadd_v:
13286 case NEON::BI__builtin_neon_vuqaddq_v: {
13287 Int = Intrinsic::aarch64_neon_suqadd;
13288 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13289 }
13290 }
13291}
13292
13293Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13294 const CallExpr *E) {
13295 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13296 BuiltinID == BPF::BI__builtin_btf_type_id ||
13297 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13298 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13299 "unexpected BPF builtin");
13300
13301 // A sequence number, injected into IR builtin functions, to
13302 // prevent CSE given the only difference of the function
13303 // may just be the debuginfo metadata.
13304 static uint32_t BuiltinSeqNum;
13305
13306 switch (BuiltinID) {
13307 default:
13308 llvm_unreachable("Unexpected BPF builtin");
13309 case BPF::BI__builtin_preserve_field_info: {
13310 const Expr *Arg = E->getArg(0);
13311 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13312
13313 if (!getDebugInfo()) {
13314 CGM.Error(E->getExprLoc(),
13315 "using __builtin_preserve_field_info() without -g");
13316 return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
13317 : EmitLValue(Arg).getPointer(*this);
13318 }
13319
13320 // Enable underlying preserve_*_access_index() generation.
13321 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13322 IsInPreservedAIRegion = true;
13323 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
13324 : EmitLValue(Arg).getPointer(*this);
13325 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13326
13327 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13328 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13329
13330 // Built the IR for the preserve_field_info intrinsic.
13331 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13332 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13333 {FieldAddr->getType()});
13334 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13335 }
13336 case BPF::BI__builtin_btf_type_id:
13337 case BPF::BI__builtin_preserve_type_info: {
13338 if (!getDebugInfo()) {
13339 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13340 return nullptr;
13341 }
13342
13343 const Expr *Arg0 = E->getArg(0);
13344 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13345 Arg0->getType(), Arg0->getExprLoc());
13346
13347 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13348 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13349 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13350
13351 llvm::Function *FnDecl;
13352 if (BuiltinID == BPF::BI__builtin_btf_type_id)
13353 FnDecl = llvm::Intrinsic::getDeclaration(
13354 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13355 else
13356 FnDecl = llvm::Intrinsic::getDeclaration(
13357 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13358 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13359 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13360 return Fn;
13361 }
13362 case BPF::BI__builtin_preserve_enum_value: {
13363 if (!getDebugInfo()) {
13364 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13365 return nullptr;
13366 }
13367
13368 const Expr *Arg0 = E->getArg(0);
13369 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13370 Arg0->getType(), Arg0->getExprLoc());
13371
13372 // Find enumerator
13373 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13374 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13375 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13376 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13377
13378 auto InitVal = Enumerator->getInitVal();
13379 std::string InitValStr;
13380 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13381 InitValStr = std::to_string(InitVal.getSExtValue());
13382 else
13383 InitValStr = std::to_string(InitVal.getZExtValue());
13384 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13385 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13386
13387 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13388 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13389 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13390
13391 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13392 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13393 CallInst *Fn =
13394 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13395 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13396 return Fn;
13397 }
13398 }
13399}
13400
13401llvm::Value *CodeGenFunction::
13403 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13404 "Not a power-of-two sized vector!");
13405 bool AllConstants = true;
13406 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13407 AllConstants &= isa<Constant>(Ops[i]);
13408
13409 // If this is a constant vector, create a ConstantVector.
13410 if (AllConstants) {
13412 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13413 CstOps.push_back(cast<Constant>(Ops[i]));
13414 return llvm::ConstantVector::get(CstOps);
13415 }
13416
13417 // Otherwise, insertelement the values to build the vector.
13418 Value *Result = llvm::PoisonValue::get(
13419 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13420
13421 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13422 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13423
13424 return Result;
13425}
13426
13427// Convert the mask from an integer type to a vector of i1.
13429 unsigned NumElts) {
13430
13431 auto *MaskTy = llvm::FixedVectorType::get(
13432 CGF.Builder.getInt1Ty(),
13433 cast<IntegerType>(Mask->getType())->getBitWidth());
13434 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13435
13436 // If we have less than 8 elements, then the starting mask was an i8 and
13437 // we need to extract down to the right number of elements.
13438 if (NumElts < 8) {
13439 int Indices[4];
13440 for (unsigned i = 0; i != NumElts; ++i)
13441 Indices[i] = i;
13442 MaskVec = CGF.Builder.CreateShuffleVector(
13443 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13444 }
13445 return MaskVec;
13446}
13447
13449 Align Alignment) {
13450 Value *Ptr = Ops[0];
13451
13452 Value *MaskVec = getMaskVecValue(
13453 CGF, Ops[2],
13454 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13455
13456 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13457}
13458
13460 Align Alignment) {
13461 llvm::Type *Ty = Ops[1]->getType();
13462 Value *Ptr = Ops[0];
13463
13464 Value *MaskVec = getMaskVecValue(
13465 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13466
13467 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13468}
13469
13471 ArrayRef<Value *> Ops) {
13472 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13473 Value *Ptr = Ops[0];
13474
13475 Value *MaskVec = getMaskVecValue(
13476 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13477
13478 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13479 ResultTy);
13480 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13481}
13482
13485 bool IsCompress) {
13486 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13487
13488 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13489
13490 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13491 : Intrinsic::x86_avx512_mask_expand;
13492 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13493 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13494}
13495
13497 ArrayRef<Value *> Ops) {
13498 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13499 Value *Ptr = Ops[0];
13500
13501 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13502
13503 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13504 ResultTy);
13505 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13506}
13507
13508static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13510 bool InvertLHS = false) {
13511 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13512 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13513 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13514
13515 if (InvertLHS)
13516 LHS = CGF.Builder.CreateNot(LHS);
13517
13518 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13519 Ops[0]->getType());
13520}
13521
13523 Value *Amt, bool IsRight) {
13524 llvm::Type *Ty = Op0->getType();
13525
13526 // Amount may be scalar immediate, in which case create a splat vector.
13527 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13528 // we only care about the lowest log2 bits anyway.
13529 if (Amt->getType() != Ty) {
13530 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13531 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13532 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13533 }
13534
13535 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13536 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13537 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13538}
13539
13541 bool IsSigned) {
13542 Value *Op0 = Ops[0];
13543 Value *Op1 = Ops[1];
13544 llvm::Type *Ty = Op0->getType();
13545 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13546
13547 CmpInst::Predicate Pred;
13548 switch (Imm) {
13549 case 0x0:
13550 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13551 break;
13552 case 0x1:
13553 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13554 break;
13555 case 0x2:
13556 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13557 break;
13558 case 0x3:
13559 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13560 break;
13561 case 0x4:
13562 Pred = ICmpInst::ICMP_EQ;
13563 break;
13564 case 0x5:
13565 Pred = ICmpInst::ICMP_NE;
13566 break;
13567 case 0x6:
13568 return llvm::Constant::getNullValue(Ty); // FALSE
13569 case 0x7:
13570 return llvm::Constant::getAllOnesValue(Ty); // TRUE
13571 default:
13572 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13573 }
13574
13575 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13576 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13577 return Res;
13578}
13579
13581 Value *Mask, Value *Op0, Value *Op1) {
13582
13583 // If the mask is all ones just return first argument.
13584 if (const auto *C = dyn_cast<Constant>(Mask))
13585 if (C->isAllOnesValue())
13586 return Op0;
13587
13588 Mask = getMaskVecValue(
13589 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13590
13591 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13592}
13593
13595 Value *Mask, Value *Op0, Value *Op1) {
13596 // If the mask is all ones just return first argument.
13597 if (const auto *C = dyn_cast<Constant>(Mask))
13598 if (C->isAllOnesValue())
13599 return Op0;
13600
13601 auto *MaskTy = llvm::FixedVectorType::get(
13602 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13603 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13604 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13605 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13606}
13607
13609 unsigned NumElts, Value *MaskIn) {
13610 if (MaskIn) {
13611 const auto *C = dyn_cast<Constant>(MaskIn);
13612 if (!C || !C->isAllOnesValue())
13613 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13614 }
13615
13616 if (NumElts < 8) {
13617 int Indices[8];
13618 for (unsigned i = 0; i != NumElts; ++i)
13619 Indices[i] = i;
13620 for (unsigned i = NumElts; i != 8; ++i)
13621 Indices[i] = i % NumElts + NumElts;
13622 Cmp = CGF.Builder.CreateShuffleVector(
13623 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13624 }
13625
13626 return CGF.Builder.CreateBitCast(Cmp,
13627 IntegerType::get(CGF.getLLVMContext(),
13628 std::max(NumElts, 8U)));
13629}
13630
13632 bool Signed, ArrayRef<Value *> Ops) {
13633 assert((Ops.size() == 2 || Ops.size() == 4) &&
13634 "Unexpected number of arguments");
13635 unsigned NumElts =
13636 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13637 Value *Cmp;
13638
13639 if (CC == 3) {
13640 Cmp = Constant::getNullValue(
13641 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13642 } else if (CC == 7) {
13643 Cmp = Constant::getAllOnesValue(
13644 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13645 } else {
13646 ICmpInst::Predicate Pred;
13647 switch (CC) {
13648 default: llvm_unreachable("Unknown condition code");
13649 case 0: Pred = ICmpInst::ICMP_EQ; break;
13650 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13651 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13652 case 4: Pred = ICmpInst::ICMP_NE; break;
13653 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13654 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13655 }
13656 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13657 }
13658
13659 Value *MaskIn = nullptr;
13660 if (Ops.size() == 4)
13661 MaskIn = Ops[3];
13662
13663 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13664}
13665
13667 Value *Zero = Constant::getNullValue(In->getType());
13668 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13669}
13670
13672 ArrayRef<Value *> Ops, bool IsSigned) {
13673 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13674 llvm::Type *Ty = Ops[1]->getType();
13675
13676 Value *Res;
13677 if (Rnd != 4) {
13678 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13679 : Intrinsic::x86_avx512_uitofp_round;
13680 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13681 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13682 } else {
13683 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13684 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
13685 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
13686 }
13687
13688 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13689}
13690
13691// Lowers X86 FMA intrinsics to IR.
13693 ArrayRef<Value *> Ops, unsigned BuiltinID,
13694 bool IsAddSub) {
13695
13696 bool Subtract = false;
13697 Intrinsic::ID IID = Intrinsic::not_intrinsic;
13698 switch (BuiltinID) {
13699 default: break;
13700 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13701 Subtract = true;
13702 [[fallthrough]];
13703 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13704 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13705 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13706 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13707 break;
13708 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13709 Subtract = true;
13710 [[fallthrough]];
13711 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13712 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13713 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13714 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13715 break;
13716 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13717 Subtract = true;
13718 [[fallthrough]];
13719 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13720 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13721 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13722 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13723 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13724 Subtract = true;
13725 [[fallthrough]];
13726 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13727 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13728 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13729 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13730 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13731 Subtract = true;
13732 [[fallthrough]];
13733 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13734 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13735 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13736 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13737 break;
13738 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13739 Subtract = true;
13740 [[fallthrough]];
13741 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13742 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13743 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13744 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13745 break;
13746 }
13747
13748 Value *A = Ops[0];
13749 Value *B = Ops[1];
13750 Value *C = Ops[2];
13751
13752 if (Subtract)
13753 C = CGF.Builder.CreateFNeg(C);
13754
13755 Value *Res;
13756
13757 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13758 if (IID != Intrinsic::not_intrinsic &&
13759 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
13760 IsAddSub)) {
13761 Function *Intr = CGF.CGM.getIntrinsic(IID);
13762 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
13763 } else {
13764 llvm::Type *Ty = A->getType();
13765 Function *FMA;
13766 if (CGF.Builder.getIsFPConstrained()) {
13767 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13768 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13769 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
13770 } else {
13771 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13772 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
13773 }
13774 }
13775
13776 // Handle any required masking.
13777 Value *MaskFalseVal = nullptr;
13778 switch (BuiltinID) {
13779 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13780 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13781 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13782 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13783 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13784 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13785 MaskFalseVal = Ops[0];
13786 break;
13787 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13788 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13789 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13790 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13791 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13792 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13793 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
13794 break;
13795 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13796 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13797 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13798 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13799 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13800 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13801 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13802 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13803 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13804 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13805 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13806 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13807 MaskFalseVal = Ops[2];
13808 break;
13809 }
13810
13811 if (MaskFalseVal)
13812 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
13813
13814 return Res;
13815}
13816
13818 MutableArrayRef<Value *> Ops, Value *Upper,
13819 bool ZeroMask = false, unsigned PTIdx = 0,
13820 bool NegAcc = false) {
13821 unsigned Rnd = 4;
13822 if (Ops.size() > 4)
13823 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
13824
13825 if (NegAcc)
13826 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
13827
13828 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
13829 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
13830 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
13831 Value *Res;
13832 if (Rnd != 4) {
13833 Intrinsic::ID IID;
13834
13835 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
13836 case 16:
13837 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
13838 break;
13839 case 32:
13840 IID = Intrinsic::x86_avx512_vfmadd_f32;
13841 break;
13842 case 64:
13843 IID = Intrinsic::x86_avx512_vfmadd_f64;
13844 break;
13845 default:
13846 llvm_unreachable("Unexpected size");
13847 }
13848 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13849 {Ops[0], Ops[1], Ops[2], Ops[4]});
13850 } else if (CGF.Builder.getIsFPConstrained()) {
13851 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13852 Function *FMA = CGF.CGM.getIntrinsic(
13853 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
13854 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
13855 } else {
13856 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
13857 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
13858 }
13859 // If we have more than 3 arguments, we need to do masking.
13860 if (Ops.size() > 3) {
13861 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
13862 : Ops[PTIdx];
13863
13864 // If we negated the accumulator and the its the PassThru value we need to
13865 // bypass the negate. Conveniently Upper should be the same thing in this
13866 // case.
13867 if (NegAcc && PTIdx == 2)
13868 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
13869
13870 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
13871 }
13872 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
13873}
13874
13875static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
13876 ArrayRef<Value *> Ops) {
13877 llvm::Type *Ty = Ops[0]->getType();
13878 // Arguments have a vXi32 type so cast to vXi64.
13879 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
13880 Ty->getPrimitiveSizeInBits() / 64);
13881 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
13882 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
13883
13884 if (IsSigned) {
13885 // Shift left then arithmetic shift right.
13886 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
13887 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
13888 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
13889 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
13890 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
13891 } else {
13892 // Clear the upper bits.
13893 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
13894 LHS = CGF.Builder.CreateAnd(LHS, Mask);
13895 RHS = CGF.Builder.CreateAnd(RHS, Mask);
13896 }
13897
13898 return CGF.Builder.CreateMul(LHS, RHS);
13899}
13900
13901// Emit a masked pternlog intrinsic. This only exists because the header has to
13902// use a macro and we aren't able to pass the input argument to a pternlog
13903// builtin and a select builtin without evaluating it twice.
13904static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
13905 ArrayRef<Value *> Ops) {
13906 llvm::Type *Ty = Ops[0]->getType();
13907
13908 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
13909 unsigned EltWidth = Ty->getScalarSizeInBits();
13910 Intrinsic::ID IID;
13911 if (VecWidth == 128 && EltWidth == 32)
13912 IID = Intrinsic::x86_avx512_pternlog_d_128;
13913 else if (VecWidth == 256 && EltWidth == 32)
13914 IID = Intrinsic::x86_avx512_pternlog_d_256;
13915 else if (VecWidth == 512 && EltWidth == 32)
13916 IID = Intrinsic::x86_avx512_pternlog_d_512;
13917 else if (VecWidth == 128 && EltWidth == 64)
13918 IID = Intrinsic::x86_avx512_pternlog_q_128;
13919 else if (VecWidth == 256 && EltWidth == 64)
13920 IID = Intrinsic::x86_avx512_pternlog_q_256;
13921 else if (VecWidth == 512 && EltWidth == 64)
13922 IID = Intrinsic::x86_avx512_pternlog_q_512;
13923 else
13924 llvm_unreachable("Unexpected intrinsic");
13925
13926 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13927 Ops.drop_back());
13928 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
13929 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
13930}
13931
13933 llvm::Type *DstTy) {
13934 unsigned NumberOfElements =
13935 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
13936 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
13937 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
13938}
13939
13940Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
13941 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
13942 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
13943 return EmitX86CpuIs(CPUStr);
13944}
13945
13946// Convert F16 halfs to floats.
13949 llvm::Type *DstTy) {
13950 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
13951 "Unknown cvtph2ps intrinsic");
13952
13953 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
13954 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
13955 Function *F =
13956 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
13957 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
13958 }
13959
13960 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
13961 Value *Src = Ops[0];
13962
13963 // Extract the subvector.
13964 if (NumDstElts !=
13965 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
13966 assert(NumDstElts == 4 && "Unexpected vector size");
13967 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
13968 }
13969
13970 // Bitcast from vXi16 to vXf16.
13971 auto *HalfTy = llvm::FixedVectorType::get(
13972 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
13973 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
13974
13975 // Perform the fp-extension.
13976 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
13977
13978 if (Ops.size() >= 3)
13979 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13980 return Res;
13981}
13982
13983Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
13984
13985 llvm::Type *Int32Ty = Builder.getInt32Ty();
13986
13987 // Matching the struct layout from the compiler-rt/libgcc structure that is
13988 // filled in:
13989 // unsigned int __cpu_vendor;
13990 // unsigned int __cpu_type;
13991 // unsigned int __cpu_subtype;
13992 // unsigned int __cpu_features[1];
13993 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
13994 llvm::ArrayType::get(Int32Ty, 1));
13995
13996 // Grab the global __cpu_model.
13997 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
13998 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
13999
14000 // Calculate the index needed to access the correct field based on the
14001 // range. Also adjust the expected value.
14002 unsigned Index;
14003 unsigned Value;
14004 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14005#define X86_VENDOR(ENUM, STRING) \
14006 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14007#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14008 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14009#define X86_CPU_TYPE(ENUM, STR) \
14010 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14011#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14012 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14013#define X86_CPU_SUBTYPE(ENUM, STR) \
14014 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14015#include "llvm/TargetParser/X86TargetParser.def"
14016 .Default({0, 0});
14017 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14018
14019 // Grab the appropriate field from __cpu_model.
14020 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14021 ConstantInt::get(Int32Ty, Index)};
14022 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
14023 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14025
14026 // Check the value of the field against the requested value.
14027 return Builder.CreateICmpEQ(CpuValue,
14028 llvm::ConstantInt::get(Int32Ty, Value));
14029}
14030
14031Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14032 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14033 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14034 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14035 return Builder.getFalse();
14036 return EmitX86CpuSupports(FeatureStr);
14037}
14038
14039Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14040 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14041}
14042
14043llvm::Value *
14044CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14045 Value *Result = Builder.getTrue();
14046 if (FeatureMask[0] != 0) {
14047 // Matching the struct layout from the compiler-rt/libgcc structure that is
14048 // filled in:
14049 // unsigned int __cpu_vendor;
14050 // unsigned int __cpu_type;
14051 // unsigned int __cpu_subtype;
14052 // unsigned int __cpu_features[1];
14053 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14054 llvm::ArrayType::get(Int32Ty, 1));
14055
14056 // Grab the global __cpu_model.
14057 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14058 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14059
14060 // Grab the first (0th) element from the field __cpu_features off of the
14061 // global in the struct STy.
14062 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14063 Builder.getInt32(0)};
14064 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
14065 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14067
14068 // Check the value of the bit corresponding to the feature requested.
14069 Value *Mask = Builder.getInt32(FeatureMask[0]);
14070 Value *Bitset = Builder.CreateAnd(Features, Mask);
14071 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14072 Result = Builder.CreateAnd(Result, Cmp);
14073 }
14074
14075 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14076 llvm::Constant *CpuFeatures2 =
14077 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14078 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14079 for (int i = 1; i != 4; ++i) {
14080 const uint32_t M = FeatureMask[i];
14081 if (!M)
14082 continue;
14083 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14084 Value *Features = Builder.CreateAlignedLoad(
14085 Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs),
14087 // Check the value of the bit corresponding to the feature requested.
14088 Value *Mask = Builder.getInt32(M);
14089 Value *Bitset = Builder.CreateAnd(Features, Mask);
14090 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14091 Result = Builder.CreateAnd(Result, Cmp);
14092 }
14093
14094 return Result;
14095}
14096
14097Value *CodeGenFunction::EmitAArch64CpuInit() {
14098 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14099 llvm::FunctionCallee Func =
14100 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14101 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14102 cast<llvm::GlobalValue>(Func.getCallee())
14103 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14104 return Builder.CreateCall(Func);
14105}
14106
14107Value *CodeGenFunction::EmitX86CpuInit() {
14108 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14109 /*Variadic*/ false);
14110 llvm::FunctionCallee Func =
14111 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14112 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14113 cast<llvm::GlobalValue>(Func.getCallee())
14114 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14115 return Builder.CreateCall(Func);
14116}
14117
14118Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14119 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14120 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14122 ArgStr.split(Features, "+");
14123 for (auto &Feature : Features) {
14124 Feature = Feature.trim();
14125 if (!llvm::AArch64::parseArchExtension(Feature))
14126 return Builder.getFalse();
14127 if (Feature != "default")
14128 Features.push_back(Feature);
14129 }
14130 return EmitAArch64CpuSupports(Features);
14131}
14132
14133llvm::Value *
14134CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14135 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14136 Value *Result = Builder.getTrue();
14137 if (FeaturesMask != 0) {
14138 // Get features from structure in runtime library
14139 // struct {
14140 // unsigned long long features;
14141 // } __aarch64_cpu_features;
14142 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14143 llvm::Constant *AArch64CPUFeatures =
14144 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14145 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14146 llvm::Value *CpuFeatures = Builder.CreateGEP(
14147 STy, AArch64CPUFeatures,
14148 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14149 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14151 Value *Mask = Builder.getInt64(FeaturesMask);
14152 Value *Bitset = Builder.CreateAnd(Features, Mask);
14153 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14154 Result = Builder.CreateAnd(Result, Cmp);
14155 }
14156 return Result;
14157}
14158
14159Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14160 const CallExpr *E) {
14161 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14162 return EmitX86CpuIs(E);
14163 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14164 return EmitX86CpuSupports(E);
14165 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14166 return EmitX86CpuInit();
14167
14168 // Handle MSVC intrinsics before argument evaluation to prevent double
14169 // evaluation.
14170 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14171 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
14172
14174 bool IsMaskFCmp = false;
14175 bool IsConjFMA = false;
14176
14177 // Find out if any arguments are required to be integer constant expressions.
14178 unsigned ICEArguments = 0;
14180 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
14181 assert(Error == ASTContext::GE_None && "Should not codegen an error");
14182
14183 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14184 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
14185 }
14186
14187 // These exist so that the builtin that takes an immediate can be bounds
14188 // checked by clang to avoid passing bad immediates to the backend. Since
14189 // AVX has a larger immediate than SSE we would need separate builtins to
14190 // do the different bounds checking. Rather than create a clang specific
14191 // SSE only builtin, this implements eight separate builtins to match gcc
14192 // implementation.
14193 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14194 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
14195 llvm::Function *F = CGM.getIntrinsic(ID);
14196 return Builder.CreateCall(F, Ops);
14197 };
14198
14199 // For the vector forms of FP comparisons, translate the builtins directly to
14200 // IR.
14201 // TODO: The builtins could be removed if the SSE header files used vector
14202 // extension comparisons directly (vector ordered/unordered may need
14203 // additional support via __builtin_isnan()).
14204 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14205 bool IsSignaling) {
14206 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14207 Value *Cmp;
14208 if (IsSignaling)
14209 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14210 else
14211 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14212 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
14213 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
14214 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
14215 return Builder.CreateBitCast(Sext, FPVecTy);
14216 };
14217
14218 switch (BuiltinID) {
14219 default: return nullptr;
14220 case X86::BI_mm_prefetch: {
14221 Value *Address = Ops[0];
14222 ConstantInt *C = cast<ConstantInt>(Ops[1]);
14223 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
14224 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
14225 Value *Data = ConstantInt::get(Int32Ty, 1);
14226 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14227 return Builder.CreateCall(F, {Address, RW, Locality, Data});
14228 }
14229 case X86::BI_mm_clflush: {
14230 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14231 Ops[0]);
14232 }
14233 case X86::BI_mm_lfence: {
14234 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14235 }
14236 case X86::BI_mm_mfence: {
14237 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14238 }
14239 case X86::BI_mm_sfence: {
14240 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14241 }
14242 case X86::BI_mm_pause: {
14243 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14244 }
14245 case X86::BI__rdtsc: {
14246 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14247 }
14248 case X86::BI__builtin_ia32_rdtscp: {
14249 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14250 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14251 Ops[0]);
14252 return Builder.CreateExtractValue(Call, 0);
14253 }
14254 case X86::BI__builtin_ia32_lzcnt_u16:
14255 case X86::BI__builtin_ia32_lzcnt_u32:
14256 case X86::BI__builtin_ia32_lzcnt_u64: {
14257 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14258 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14259 }
14260 case X86::BI__builtin_ia32_tzcnt_u16:
14261 case X86::BI__builtin_ia32_tzcnt_u32:
14262 case X86::BI__builtin_ia32_tzcnt_u64: {
14263 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14264 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14265 }
14266 case X86::BI__builtin_ia32_undef128:
14267 case X86::BI__builtin_ia32_undef256:
14268 case X86::BI__builtin_ia32_undef512:
14269 // The x86 definition of "undef" is not the same as the LLVM definition
14270 // (PR32176). We leave optimizing away an unnecessary zero constant to the
14271 // IR optimizer and backend.
14272 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14273 // value, we should use that here instead of a zero.
14274 return llvm::Constant::getNullValue(ConvertType(E->getType()));
14275 case X86::BI__builtin_ia32_vec_init_v8qi:
14276 case X86::BI__builtin_ia32_vec_init_v4hi:
14277 case X86::BI__builtin_ia32_vec_init_v2si:
14278 return Builder.CreateBitCast(BuildVector(Ops),
14279 llvm::Type::getX86_MMXTy(getLLVMContext()));
14280 case X86::BI__builtin_ia32_vec_ext_v2si:
14281 case X86::BI__builtin_ia32_vec_ext_v16qi:
14282 case X86::BI__builtin_ia32_vec_ext_v8hi:
14283 case X86::BI__builtin_ia32_vec_ext_v4si:
14284 case X86::BI__builtin_ia32_vec_ext_v4sf:
14285 case X86::BI__builtin_ia32_vec_ext_v2di:
14286 case X86::BI__builtin_ia32_vec_ext_v32qi:
14287 case X86::BI__builtin_ia32_vec_ext_v16hi:
14288 case X86::BI__builtin_ia32_vec_ext_v8si:
14289 case X86::BI__builtin_ia32_vec_ext_v4di: {
14290 unsigned NumElts =
14291 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14292 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14293 Index &= NumElts - 1;
14294 // These builtins exist so we can ensure the index is an ICE and in range.
14295 // Otherwise we could just do this in the header file.
14296 return Builder.CreateExtractElement(Ops[0], Index);
14297 }
14298 case X86::BI__builtin_ia32_vec_set_v16qi:
14299 case X86::BI__builtin_ia32_vec_set_v8hi:
14300 case X86::BI__builtin_ia32_vec_set_v4si:
14301 case X86::BI__builtin_ia32_vec_set_v2di:
14302 case X86::BI__builtin_ia32_vec_set_v32qi:
14303 case X86::BI__builtin_ia32_vec_set_v16hi:
14304 case X86::BI__builtin_ia32_vec_set_v8si:
14305 case X86::BI__builtin_ia32_vec_set_v4di: {
14306 unsigned NumElts =
14307 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14308 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14309 Index &= NumElts - 1;
14310 // These builtins exist so we can ensure the index is an ICE and in range.
14311 // Otherwise we could just do this in the header file.
14312 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14313 }
14314 case X86::BI_mm_setcsr:
14315 case X86::BI__builtin_ia32_ldmxcsr: {
14316 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
14317 Builder.CreateStore(Ops[0], Tmp);
14318 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14319 Tmp.getPointer());
14320 }
14321 case X86::BI_mm_getcsr:
14322 case X86::BI__builtin_ia32_stmxcsr: {
14323 Address Tmp = CreateMemTemp(E->getType());
14324 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14325 Tmp.getPointer());
14326 return Builder.CreateLoad(Tmp, "stmxcsr");
14327 }
14328 case X86::BI__builtin_ia32_xsave:
14329 case X86::BI__builtin_ia32_xsave64:
14330 case X86::BI__builtin_ia32_xrstor:
14331 case X86::BI__builtin_ia32_xrstor64:
14332 case X86::BI__builtin_ia32_xsaveopt:
14333 case X86::BI__builtin_ia32_xsaveopt64:
14334 case X86::BI__builtin_ia32_xrstors:
14335 case X86::BI__builtin_ia32_xrstors64:
14336 case X86::BI__builtin_ia32_xsavec:
14337 case X86::BI__builtin_ia32_xsavec64:
14338 case X86::BI__builtin_ia32_xsaves:
14339 case X86::BI__builtin_ia32_xsaves64:
14340 case X86::BI__builtin_ia32_xsetbv:
14341 case X86::BI_xsetbv: {
14342 Intrinsic::ID ID;
14343#define INTRINSIC_X86_XSAVE_ID(NAME) \
14344 case X86::BI__builtin_ia32_##NAME: \
14345 ID = Intrinsic::x86_##NAME; \
14346 break
14347 switch (BuiltinID) {
14348 default: llvm_unreachable("Unsupported intrinsic!");
14350 INTRINSIC_X86_XSAVE_ID(xsave64);
14351 INTRINSIC_X86_XSAVE_ID(xrstor);
14352 INTRINSIC_X86_XSAVE_ID(xrstor64);
14353 INTRINSIC_X86_XSAVE_ID(xsaveopt);
14354 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14355 INTRINSIC_X86_XSAVE_ID(xrstors);
14356 INTRINSIC_X86_XSAVE_ID(xrstors64);
14357 INTRINSIC_X86_XSAVE_ID(xsavec);
14358 INTRINSIC_X86_XSAVE_ID(xsavec64);
14359 INTRINSIC_X86_XSAVE_ID(xsaves);
14360 INTRINSIC_X86_XSAVE_ID(xsaves64);
14361 INTRINSIC_X86_XSAVE_ID(xsetbv);
14362 case X86::BI_xsetbv:
14363 ID = Intrinsic::x86_xsetbv;
14364 break;
14365 }
14366#undef INTRINSIC_X86_XSAVE_ID
14367 Value *Mhi = Builder.CreateTrunc(
14368 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14369 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14370 Ops[1] = Mhi;
14371 Ops.push_back(Mlo);
14372 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14373 }
14374 case X86::BI__builtin_ia32_xgetbv:
14375 case X86::BI_xgetbv:
14376 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14377 case X86::BI__builtin_ia32_storedqudi128_mask:
14378 case X86::BI__builtin_ia32_storedqusi128_mask:
14379 case X86::BI__builtin_ia32_storedquhi128_mask:
14380 case X86::BI__builtin_ia32_storedquqi128_mask:
14381 case X86::BI__builtin_ia32_storeupd128_mask:
14382 case X86::BI__builtin_ia32_storeups128_mask:
14383 case X86::BI__builtin_ia32_storedqudi256_mask:
14384 case X86::BI__builtin_ia32_storedqusi256_mask:
14385 case X86::BI__builtin_ia32_storedquhi256_mask:
14386 case X86::BI__builtin_ia32_storedquqi256_mask:
14387 case X86::BI__builtin_ia32_storeupd256_mask:
14388 case X86::BI__builtin_ia32_storeups256_mask:
14389 case X86::BI__builtin_ia32_storedqudi512_mask:
14390 case X86::BI__builtin_ia32_storedqusi512_mask:
14391 case X86::BI__builtin_ia32_storedquhi512_mask:
14392 case X86::BI__builtin_ia32_storedquqi512_mask:
14393 case X86::BI__builtin_ia32_storeupd512_mask:
14394 case X86::BI__builtin_ia32_storeups512_mask:
14395 return EmitX86MaskedStore(*this, Ops, Align(1));
14396
14397 case X86::BI__builtin_ia32_storesh128_mask:
14398 case X86::BI__builtin_ia32_storess128_mask:
14399 case X86::BI__builtin_ia32_storesd128_mask:
14400 return EmitX86MaskedStore(*this, Ops, Align(1));
14401
14402 case X86::BI__builtin_ia32_vpopcntb_128:
14403 case X86::BI__builtin_ia32_vpopcntd_128:
14404 case X86::BI__builtin_ia32_vpopcntq_128:
14405 case X86::BI__builtin_ia32_vpopcntw_128:
14406 case X86::BI__builtin_ia32_vpopcntb_256:
14407 case X86::BI__builtin_ia32_vpopcntd_256:
14408 case X86::BI__builtin_ia32_vpopcntq_256:
14409 case X86::BI__builtin_ia32_vpopcntw_256:
14410 case X86::BI__builtin_ia32_vpopcntb_512:
14411 case X86::BI__builtin_ia32_vpopcntd_512:
14412 case X86::BI__builtin_ia32_vpopcntq_512:
14413 case X86::BI__builtin_ia32_vpopcntw_512: {
14414 llvm::Type *ResultType = ConvertType(E->getType());
14415 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14416 return Builder.CreateCall(F, Ops);
14417 }
14418 case X86::BI__builtin_ia32_cvtmask2b128:
14419 case X86::BI__builtin_ia32_cvtmask2b256:
14420 case X86::BI__builtin_ia32_cvtmask2b512:
14421 case X86::BI__builtin_ia32_cvtmask2w128:
14422 case X86::BI__builtin_ia32_cvtmask2w256:
14423 case X86::BI__builtin_ia32_cvtmask2w512:
14424 case X86::BI__builtin_ia32_cvtmask2d128:
14425 case X86::BI__builtin_ia32_cvtmask2d256:
14426 case X86::BI__builtin_ia32_cvtmask2d512:
14427 case X86::BI__builtin_ia32_cvtmask2q128:
14428 case X86::BI__builtin_ia32_cvtmask2q256:
14429 case X86::BI__builtin_ia32_cvtmask2q512:
14430 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14431
14432 case X86::BI__builtin_ia32_cvtb2mask128:
14433 case X86::BI__builtin_ia32_cvtb2mask256:
14434 case X86::BI__builtin_ia32_cvtb2mask512:
14435 case X86::BI__builtin_ia32_cvtw2mask128:
14436 case X86::BI__builtin_ia32_cvtw2mask256:
14437 case X86::BI__builtin_ia32_cvtw2mask512:
14438 case X86::BI__builtin_ia32_cvtd2mask128:
14439 case X86::BI__builtin_ia32_cvtd2mask256:
14440 case X86::BI__builtin_ia32_cvtd2mask512:
14441 case X86::BI__builtin_ia32_cvtq2mask128:
14442 case X86::BI__builtin_ia32_cvtq2mask256:
14443 case X86::BI__builtin_ia32_cvtq2mask512:
14444 return EmitX86ConvertToMask(*this, Ops[0]);
14445
14446 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14447 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14448 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14449 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14450 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14451 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14452 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14453 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14454 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14455 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14456 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14457 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14458 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14459 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14460
14461 case X86::BI__builtin_ia32_vfmaddss3:
14462 case X86::BI__builtin_ia32_vfmaddsd3:
14463 case X86::BI__builtin_ia32_vfmaddsh3_mask:
14464 case X86::BI__builtin_ia32_vfmaddss3_mask:
14465 case X86::BI__builtin_ia32_vfmaddsd3_mask:
14466 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14467 case X86::BI__builtin_ia32_vfmaddss:
14468 case X86::BI__builtin_ia32_vfmaddsd:
14469 return EmitScalarFMAExpr(*this, E, Ops,
14470 Constant::getNullValue(Ops[0]->getType()));
14471 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14472 case X86::BI__builtin_ia32_vfmaddss3_maskz:
14473 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14474 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14475 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14476 case X86::BI__builtin_ia32_vfmaddss3_mask3:
14477 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14478 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14479 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14480 case X86::BI__builtin_ia32_vfmsubss3_mask3:
14481 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14482 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14483 /*NegAcc*/ true);
14484 case X86::BI__builtin_ia32_vfmaddph:
14485 case X86::BI__builtin_ia32_vfmaddps:
14486 case X86::BI__builtin_ia32_vfmaddpd:
14487 case X86::BI__builtin_ia32_vfmaddph256:
14488 case X86::BI__builtin_ia32_vfmaddps256:
14489 case X86::BI__builtin_ia32_vfmaddpd256:
14490 case X86::BI__builtin_ia32_vfmaddph512_mask:
14491 case X86::BI__builtin_ia32_vfmaddph512_maskz:
14492 case X86::BI__builtin_ia32_vfmaddph512_mask3:
14493 case X86::BI__builtin_ia32_vfmaddps512_mask:
14494 case X86::BI__builtin_ia32_vfmaddps512_maskz:
14495 case X86::BI__builtin_ia32_vfmaddps512_mask3:
14496 case X86::BI__builtin_ia32_vfmsubps512_mask3:
14497 case X86::BI__builtin_ia32_vfmaddpd512_mask:
14498 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14499 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14500 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14501 case X86::BI__builtin_ia32_vfmsubph512_mask3:
14502 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14503 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14504 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14505 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14506 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14507 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14508 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14509 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14510 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14511 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14512 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14513 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14514 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14515 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14516
14517 case X86::BI__builtin_ia32_movdqa32store128_mask:
14518 case X86::BI__builtin_ia32_movdqa64store128_mask:
14519 case X86::BI__builtin_ia32_storeaps128_mask:
14520 case X86::BI__builtin_ia32_storeapd128_mask:
14521 case X86::BI__builtin_ia32_movdqa32store256_mask:
14522 case X86::BI__builtin_ia32_movdqa64store256_mask:
14523 case X86::BI__builtin_ia32_storeaps256_mask:
14524 case X86::BI__builtin_ia32_storeapd256_mask:
14525 case X86::BI__builtin_ia32_movdqa32store512_mask:
14526 case X86::BI__builtin_ia32_movdqa64store512_mask:
14527 case X86::BI__builtin_ia32_storeaps512_mask:
14528 case X86::BI__builtin_ia32_storeapd512_mask:
14529 return EmitX86MaskedStore(
14530 *this, Ops,
14531 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14532
14533 case X86::BI__builtin_ia32_loadups128_mask:
14534 case X86::BI__builtin_ia32_loadups256_mask:
14535 case X86::BI__builtin_ia32_loadups512_mask:
14536 case X86::BI__builtin_ia32_loadupd128_mask:
14537 case X86::BI__builtin_ia32_loadupd256_mask:
14538 case X86::BI__builtin_ia32_loadupd512_mask:
14539 case X86::BI__builtin_ia32_loaddquqi128_mask:
14540 case X86::BI__builtin_ia32_loaddquqi256_mask:
14541 case X86::BI__builtin_ia32_loaddquqi512_mask:
14542 case X86::BI__builtin_ia32_loaddquhi128_mask:
14543 case X86::BI__builtin_ia32_loaddquhi256_mask:
14544 case X86::BI__builtin_ia32_loaddquhi512_mask:
14545 case X86::BI__builtin_ia32_loaddqusi128_mask:
14546 case X86::BI__builtin_ia32_loaddqusi256_mask:
14547 case X86::BI__builtin_ia32_loaddqusi512_mask:
14548 case X86::BI__builtin_ia32_loaddqudi128_mask:
14549 case X86::BI__builtin_ia32_loaddqudi256_mask:
14550 case X86::BI__builtin_ia32_loaddqudi512_mask:
14551 return EmitX86MaskedLoad(*this, Ops, Align(1));
14552
14553 case X86::BI__builtin_ia32_loadsh128_mask:
14554 case X86::BI__builtin_ia32_loadss128_mask:
14555 case X86::BI__builtin_ia32_loadsd128_mask:
14556 return EmitX86MaskedLoad(*this, Ops, Align(1));
14557
14558 case X86::BI__builtin_ia32_loadaps128_mask:
14559 case X86::BI__builtin_ia32_loadaps256_mask:
14560 case X86::BI__builtin_ia32_loadaps512_mask:
14561 case X86::BI__builtin_ia32_loadapd128_mask:
14562 case X86::BI__builtin_ia32_loadapd256_mask:
14563 case X86::BI__builtin_ia32_loadapd512_mask:
14564 case X86::BI__builtin_ia32_movdqa32load128_mask:
14565 case X86::BI__builtin_ia32_movdqa32load256_mask:
14566 case X86::BI__builtin_ia32_movdqa32load512_mask:
14567 case X86::BI__builtin_ia32_movdqa64load128_mask:
14568 case X86::BI__builtin_ia32_movdqa64load256_mask:
14569 case X86::BI__builtin_ia32_movdqa64load512_mask:
14570 return EmitX86MaskedLoad(
14571 *this, Ops,
14572 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14573
14574 case X86::BI__builtin_ia32_expandloaddf128_mask:
14575 case X86::BI__builtin_ia32_expandloaddf256_mask:
14576 case X86::BI__builtin_ia32_expandloaddf512_mask:
14577 case X86::BI__builtin_ia32_expandloadsf128_mask:
14578 case X86::BI__builtin_ia32_expandloadsf256_mask:
14579 case X86::BI__builtin_ia32_expandloadsf512_mask:
14580 case X86::BI__builtin_ia32_expandloaddi128_mask:
14581 case X86::BI__builtin_ia32_expandloaddi256_mask:
14582 case X86::BI__builtin_ia32_expandloaddi512_mask:
14583 case X86::BI__builtin_ia32_expandloadsi128_mask:
14584 case X86::BI__builtin_ia32_expandloadsi256_mask:
14585 case X86::BI__builtin_ia32_expandloadsi512_mask:
14586 case X86::BI__builtin_ia32_expandloadhi128_mask:
14587 case X86::BI__builtin_ia32_expandloadhi256_mask:
14588 case X86::BI__builtin_ia32_expandloadhi512_mask:
14589 case X86::BI__builtin_ia32_expandloadqi128_mask:
14590 case X86::BI__builtin_ia32_expandloadqi256_mask:
14591 case X86::BI__builtin_ia32_expandloadqi512_mask:
14592 return EmitX86ExpandLoad(*this, Ops);
14593
14594 case X86::BI__builtin_ia32_compressstoredf128_mask:
14595 case X86::BI__builtin_ia32_compressstoredf256_mask:
14596 case X86::BI__builtin_ia32_compressstoredf512_mask:
14597 case X86::BI__builtin_ia32_compressstoresf128_mask:
14598 case X86::BI__builtin_ia32_compressstoresf256_mask:
14599 case X86::BI__builtin_ia32_compressstoresf512_mask:
14600 case X86::BI__builtin_ia32_compressstoredi128_mask:
14601 case X86::BI__builtin_ia32_compressstoredi256_mask:
14602 case X86::BI__builtin_ia32_compressstoredi512_mask:
14603 case X86::BI__builtin_ia32_compressstoresi128_mask:
14604 case X86::BI__builtin_ia32_compressstoresi256_mask:
14605 case X86::BI__builtin_ia32_compressstoresi512_mask:
14606 case X86::BI__builtin_ia32_compressstorehi128_mask:
14607 case X86::BI__builtin_ia32_compressstorehi256_mask:
14608 case X86::BI__builtin_ia32_compressstorehi512_mask:
14609 case X86::BI__builtin_ia32_compressstoreqi128_mask:
14610 case X86::BI__builtin_ia32_compressstoreqi256_mask:
14611 case X86::BI__builtin_ia32_compressstoreqi512_mask:
14612 return EmitX86CompressStore(*this, Ops);
14613
14614 case X86::BI__builtin_ia32_expanddf128_mask:
14615 case X86::BI__builtin_ia32_expanddf256_mask:
14616 case X86::BI__builtin_ia32_expanddf512_mask:
14617 case X86::BI__builtin_ia32_expandsf128_mask:
14618 case X86::BI__builtin_ia32_expandsf256_mask:
14619 case X86::BI__builtin_ia32_expandsf512_mask:
14620 case X86::BI__builtin_ia32_expanddi128_mask:
14621 case X86::BI__builtin_ia32_expanddi256_mask:
14622 case X86::BI__builtin_ia32_expanddi512_mask:
14623 case X86::BI__builtin_ia32_expandsi128_mask:
14624 case X86::BI__builtin_ia32_expandsi256_mask:
14625 case X86::BI__builtin_ia32_expandsi512_mask:
14626 case X86::BI__builtin_ia32_expandhi128_mask:
14627 case X86::BI__builtin_ia32_expandhi256_mask:
14628 case X86::BI__builtin_ia32_expandhi512_mask:
14629 case X86::BI__builtin_ia32_expandqi128_mask:
14630 case X86::BI__builtin_ia32_expandqi256_mask:
14631 case X86::BI__builtin_ia32_expandqi512_mask:
14632 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
14633
14634 case X86::BI__builtin_ia32_compressdf128_mask:
14635 case X86::BI__builtin_ia32_compressdf256_mask:
14636 case X86::BI__builtin_ia32_compressdf512_mask:
14637 case X86::BI__builtin_ia32_compresssf128_mask:
14638 case X86::BI__builtin_ia32_compresssf256_mask:
14639 case X86::BI__builtin_ia32_compresssf512_mask:
14640 case X86::BI__builtin_ia32_compressdi128_mask:
14641 case X86::BI__builtin_ia32_compressdi256_mask:
14642 case X86::BI__builtin_ia32_compressdi512_mask:
14643 case X86::BI__builtin_ia32_compresssi128_mask:
14644 case X86::BI__builtin_ia32_compresssi256_mask:
14645 case X86::BI__builtin_ia32_compresssi512_mask:
14646 case X86::BI__builtin_ia32_compresshi128_mask:
14647 case X86::BI__builtin_ia32_compresshi256_mask:
14648 case X86::BI__builtin_ia32_compresshi512_mask:
14649 case X86::BI__builtin_ia32_compressqi128_mask:
14650 case X86::BI__builtin_ia32_compressqi256_mask:
14651 case X86::BI__builtin_ia32_compressqi512_mask:
14652 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
14653
14654 case X86::BI__builtin_ia32_gather3div2df:
14655 case X86::BI__builtin_ia32_gather3div2di:
14656 case X86::BI__builtin_ia32_gather3div4df:
14657 case X86::BI__builtin_ia32_gather3div4di:
14658 case X86::BI__builtin_ia32_gather3div4sf:
14659 case X86::BI__builtin_ia32_gather3div4si:
14660 case X86::BI__builtin_ia32_gather3div8sf:
14661 case X86::BI__builtin_ia32_gather3div8si:
14662 case X86::BI__builtin_ia32_gather3siv2df:
14663 case X86::BI__builtin_ia32_gather3siv2di:
14664 case X86::BI__builtin_ia32_gather3siv4df:
14665 case X86::BI__builtin_ia32_gather3siv4di:
14666 case X86::BI__builtin_ia32_gather3siv4sf:
14667 case X86::BI__builtin_ia32_gather3siv4si:
14668 case X86::BI__builtin_ia32_gather3siv8sf:
14669 case X86::BI__builtin_ia32_gather3siv8si:
14670 case X86::BI__builtin_ia32_gathersiv8df:
14671 case X86::BI__builtin_ia32_gathersiv16sf:
14672 case X86::BI__builtin_ia32_gatherdiv8df:
14673 case X86::BI__builtin_ia32_gatherdiv16sf:
14674 case X86::BI__builtin_ia32_gathersiv8di:
14675 case X86::BI__builtin_ia32_gathersiv16si:
14676 case X86::BI__builtin_ia32_gatherdiv8di:
14677 case X86::BI__builtin_ia32_gatherdiv16si: {
14678 Intrinsic::ID IID;
14679 switch (BuiltinID) {
14680 default: llvm_unreachable("Unexpected builtin");
14681 case X86::BI__builtin_ia32_gather3div2df:
14682 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
14683 break;
14684 case X86::BI__builtin_ia32_gather3div2di:
14685 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
14686 break;
14687 case X86::BI__builtin_ia32_gather3div4df:
14688 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
14689 break;
14690 case X86::BI__builtin_ia32_gather3div4di:
14691 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
14692 break;
14693 case X86::BI__builtin_ia32_gather3div4sf:
14694 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
14695 break;
14696 case X86::BI__builtin_ia32_gather3div4si:
14697 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
14698 break;
14699 case X86::BI__builtin_ia32_gather3div8sf:
14700 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
14701 break;
14702 case X86::BI__builtin_ia32_gather3div8si:
14703 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
14704 break;
14705 case X86::BI__builtin_ia32_gather3siv2df:
14706 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
14707 break;
14708 case X86::BI__builtin_ia32_gather3siv2di:
14709 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
14710 break;
14711 case X86::BI__builtin_ia32_gather3siv4df:
14712 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
14713 break;
14714 case X86::BI__builtin_ia32_gather3siv4di:
14715 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
14716 break;
14717 case X86::BI__builtin_ia32_gather3siv4sf:
14718 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
14719 break;
14720 case X86::BI__builtin_ia32_gather3siv4si:
14721 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
14722 break;
14723 case X86::BI__builtin_ia32_gather3siv8sf:
14724 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
14725 break;
14726 case X86::BI__builtin_ia32_gather3siv8si:
14727 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
14728 break;
14729 case X86::BI__builtin_ia32_gathersiv8df:
14730 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14731 break;
14732 case X86::BI__builtin_ia32_gathersiv16sf:
14733 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14734 break;
14735 case X86::BI__builtin_ia32_gatherdiv8df:
14736 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14737 break;
14738 case X86::BI__builtin_ia32_gatherdiv16sf:
14739 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14740 break;
14741 case X86::BI__builtin_ia32_gathersiv8di:
14742 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14743 break;
14744 case X86::BI__builtin_ia32_gathersiv16si:
14745 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14746 break;
14747 case X86::BI__builtin_ia32_gatherdiv8di:
14748 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14749 break;
14750 case X86::BI__builtin_ia32_gatherdiv16si:
14751 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14752 break;
14753 }
14754
14755 unsigned MinElts = std::min(
14756 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
14757 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
14758 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
14759 Function *Intr = CGM.getIntrinsic(IID);
14760 return Builder.CreateCall(Intr, Ops);
14761 }
14762
14763 case X86::BI__builtin_ia32_scattersiv8df:
14764 case X86::BI__builtin_ia32_scattersiv16sf:
14765 case X86::BI__builtin_ia32_scatterdiv8df:
14766 case X86::BI__builtin_ia32_scatterdiv16sf:
14767 case X86::BI__builtin_ia32_scattersiv8di:
14768 case X86::BI__builtin_ia32_scattersiv16si:
14769 case X86::BI__builtin_ia32_scatterdiv8di:
14770 case X86::BI__builtin_ia32_scatterdiv16si:
14771 case X86::BI__builtin_ia32_scatterdiv2df:
14772 case X86::BI__builtin_ia32_scatterdiv2di:
14773 case X86::BI__builtin_ia32_scatterdiv4df:
14774 case X86::BI__builtin_ia32_scatterdiv4di:
14775 case X86::BI__builtin_ia32_scatterdiv4sf:
14776 case X86::BI__builtin_ia32_scatterdiv4si:
14777 case X86::BI__builtin_ia32_scatterdiv8sf:
14778 case X86::BI__builtin_ia32_scatterdiv8si:
14779 case X86::BI__builtin_ia32_scattersiv2df:
14780 case X86::BI__builtin_ia32_scattersiv2di:
14781 case X86::BI__builtin_ia32_scattersiv4df:
14782 case X86::BI__builtin_ia32_scattersiv4di:
14783 case X86::BI__builtin_ia32_scattersiv4sf:
14784 case X86::BI__builtin_ia32_scattersiv4si:
14785 case X86::BI__builtin_ia32_scattersiv8sf:
14786 case X86::BI__builtin_ia32_scattersiv8si: {
14787 Intrinsic::ID IID;
14788 switch (BuiltinID) {
14789 default: llvm_unreachable("Unexpected builtin");
14790 case X86::BI__builtin_ia32_scattersiv8df:
14791 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
14792 break;
14793 case X86::BI__builtin_ia32_scattersiv16sf:
14794 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
14795 break;
14796 case X86::BI__builtin_ia32_scatterdiv8df:
14797 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
14798 break;
14799 case X86::BI__builtin_ia32_scatterdiv16sf:
14800 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
14801 break;
14802 case X86::BI__builtin_ia32_scattersiv8di:
14803 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
14804 break;
14805 case X86::BI__builtin_ia32_scattersiv16si:
14806 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
14807 break;
14808 case X86::BI__builtin_ia32_scatterdiv8di:
14809 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
14810 break;
14811 case X86::BI__builtin_ia32_scatterdiv16si:
14812 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
14813 break;
14814 case X86::BI__builtin_ia32_scatterdiv2df:
14815 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
14816 break;
14817 case X86::BI__builtin_ia32_scatterdiv2di:
14818 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
14819 break;
14820 case X86::BI__builtin_ia32_scatterdiv4df:
14821 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
14822 break;
14823 case X86::BI__builtin_ia32_scatterdiv4di:
14824 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
14825 break;
14826 case X86::BI__builtin_ia32_scatterdiv4sf:
14827 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
14828 break;
14829 case X86::BI__builtin_ia32_scatterdiv4si:
14830 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
14831 break;
14832 case X86::BI__builtin_ia32_scatterdiv8sf:
14833 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
14834 break;
14835 case X86::BI__builtin_ia32_scatterdiv8si:
14836 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
14837 break;
14838 case X86::BI__builtin_ia32_scattersiv2df:
14839 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
14840 break;
14841 case X86::BI__builtin_ia32_scattersiv2di:
14842 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
14843 break;
14844 case X86::BI__builtin_ia32_scattersiv4df:
14845 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
14846 break;
14847 case X86::BI__builtin_ia32_scattersiv4di:
14848 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
14849 break;
14850 case X86::BI__builtin_ia32_scattersiv4sf:
14851 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
14852 break;
14853 case X86::BI__builtin_ia32_scattersiv4si:
14854 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
14855 break;
14856 case X86::BI__builtin_ia32_scattersiv8sf:
14857 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
14858 break;
14859 case X86::BI__builtin_ia32_scattersiv8si:
14860 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
14861 break;
14862 }
14863
14864 unsigned MinElts = std::min(
14865 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
14866 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
14867 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
14868 Function *Intr = CGM.getIntrinsic(IID);
14869 return Builder.CreateCall(Intr, Ops);
14870 }
14871
14872 case X86::BI__builtin_ia32_vextractf128_pd256:
14873 case X86::BI__builtin_ia32_vextractf128_ps256:
14874 case X86::BI__builtin_ia32_vextractf128_si256:
14875 case X86::BI__builtin_ia32_extract128i256:
14876 case X86::BI__builtin_ia32_extractf64x4_mask:
14877 case X86::BI__builtin_ia32_extractf32x4_mask:
14878 case X86::BI__builtin_ia32_extracti64x4_mask:
14879 case X86::BI__builtin_ia32_extracti32x4_mask:
14880 case X86::BI__builtin_ia32_extractf32x8_mask:
14881 case X86::BI__builtin_ia32_extracti32x8_mask:
14882 case X86::BI__builtin_ia32_extractf32x4_256_mask:
14883 case X86::BI__builtin_ia32_extracti32x4_256_mask:
14884 case X86::BI__builtin_ia32_extractf64x2_256_mask:
14885 case X86::BI__builtin_ia32_extracti64x2_256_mask:
14886 case X86::BI__builtin_ia32_extractf64x2_512_mask:
14887 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
14888 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
14889 unsigned NumElts = DstTy->getNumElements();
14890 unsigned SrcNumElts =
14891 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14892 unsigned SubVectors = SrcNumElts / NumElts;
14893 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14894 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14895 Index &= SubVectors - 1; // Remove any extra bits.
14896 Index *= NumElts;
14897
14898 int Indices[16];
14899 for (unsigned i = 0; i != NumElts; ++i)
14900 Indices[i] = i + Index;
14901
14902 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14903 "extract");
14904
14905 if (Ops.size() == 4)
14906 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
14907
14908 return Res;
14909 }
14910 case X86::BI__builtin_ia32_vinsertf128_pd256:
14911 case X86::BI__builtin_ia32_vinsertf128_ps256:
14912 case X86::BI__builtin_ia32_vinsertf128_si256:
14913 case X86::BI__builtin_ia32_insert128i256:
14914 case X86::BI__builtin_ia32_insertf64x4:
14915 case X86::BI__builtin_ia32_insertf32x4:
14916 case X86::BI__builtin_ia32_inserti64x4:
14917 case X86::BI__builtin_ia32_inserti32x4:
14918 case X86::BI__builtin_ia32_insertf32x8:
14919 case X86::BI__builtin_ia32_inserti32x8:
14920 case X86::BI__builtin_ia32_insertf32x4_256:
14921 case X86::BI__builtin_ia32_inserti32x4_256:
14922 case X86::BI__builtin_ia32_insertf64x2_256:
14923 case X86::BI__builtin_ia32_inserti64x2_256:
14924 case X86::BI__builtin_ia32_insertf64x2_512:
14925 case X86::BI__builtin_ia32_inserti64x2_512: {
14926 unsigned DstNumElts =
14927 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14928 unsigned SrcNumElts =
14929 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
14930 unsigned SubVectors = DstNumElts / SrcNumElts;
14931 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14932 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14933 Index &= SubVectors - 1; // Remove any extra bits.
14934 Index *= SrcNumElts;
14935
14936 int Indices[16];
14937 for (unsigned i = 0; i != DstNumElts; ++i)
14938 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
14939
14940 Value *Op1 = Builder.CreateShuffleVector(
14941 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
14942
14943 for (unsigned i = 0; i != DstNumElts; ++i) {
14944 if (i >= Index && i < (Index + SrcNumElts))
14945 Indices[i] = (i - Index) + DstNumElts;
14946 else
14947 Indices[i] = i;
14948 }
14949
14950 return Builder.CreateShuffleVector(Ops[0], Op1,
14951 ArrayRef(Indices, DstNumElts), "insert");
14952 }
14953 case X86::BI__builtin_ia32_pmovqd512_mask:
14954 case X86::BI__builtin_ia32_pmovwb512_mask: {
14955 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
14956 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
14957 }
14958 case X86::BI__builtin_ia32_pmovdb512_mask:
14959 case X86::BI__builtin_ia32_pmovdw512_mask:
14960 case X86::BI__builtin_ia32_pmovqw512_mask: {
14961 if (const auto *C = dyn_cast<Constant>(Ops[2]))
14962 if (C->isAllOnesValue())
14963 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
14964
14965 Intrinsic::ID IID;
14966 switch (BuiltinID) {
14967 default: llvm_unreachable("Unsupported intrinsic!");
14968 case X86::BI__builtin_ia32_pmovdb512_mask:
14969 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
14970 break;
14971 case X86::BI__builtin_ia32_pmovdw512_mask:
14972 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
14973 break;
14974 case X86::BI__builtin_ia32_pmovqw512_mask:
14975 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
14976 break;
14977 }
14978
14979 Function *Intr = CGM.getIntrinsic(IID);
14980 return Builder.CreateCall(Intr, Ops);
14981 }
14982 case X86::BI__builtin_ia32_pblendw128:
14983 case X86::BI__builtin_ia32_blendpd:
14984 case X86::BI__builtin_ia32_blendps:
14985 case X86::BI__builtin_ia32_blendpd256:
14986 case X86::BI__builtin_ia32_blendps256:
14987 case X86::BI__builtin_ia32_pblendw256:
14988 case X86::BI__builtin_ia32_pblendd128:
14989 case X86::BI__builtin_ia32_pblendd256: {
14990 unsigned NumElts =
14991 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14992 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14993
14994 int Indices[16];
14995 // If there are more than 8 elements, the immediate is used twice so make
14996 // sure we handle that.
14997 for (unsigned i = 0; i != NumElts; ++i)
14998 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
14999
15000 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15001 ArrayRef(Indices, NumElts), "blend");
15002 }
15003 case X86::BI__builtin_ia32_pshuflw:
15004 case X86::BI__builtin_ia32_pshuflw256:
15005 case X86::BI__builtin_ia32_pshuflw512: {
15006 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15007 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15008 unsigned NumElts = Ty->getNumElements();
15009
15010 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15011 Imm = (Imm & 0xff) * 0x01010101;
15012
15013 int Indices[32];
15014 for (unsigned l = 0; l != NumElts; l += 8) {
15015 for (unsigned i = 0; i != 4; ++i) {
15016 Indices[l + i] = l + (Imm & 3);
15017 Imm >>= 2;
15018 }
15019 for (unsigned i = 4; i != 8; ++i)
15020 Indices[l + i] = l + i;
15021 }
15022
15023 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15024 "pshuflw");
15025 }
15026 case X86::BI__builtin_ia32_pshufhw:
15027 case X86::BI__builtin_ia32_pshufhw256:
15028 case X86::BI__builtin_ia32_pshufhw512: {
15029 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15030 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15031 unsigned NumElts = Ty->getNumElements();
15032
15033 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15034 Imm = (Imm & 0xff) * 0x01010101;
15035
15036 int Indices[32];
15037 for (unsigned l = 0; l != NumElts; l += 8) {
15038 for (unsigned i = 0; i != 4; ++i)
15039 Indices[l + i] = l + i;
15040 for (unsigned i = 4; i != 8; ++i) {
15041 Indices[l + i] = l + 4 + (Imm & 3);
15042 Imm >>= 2;
15043 }
15044 }
15045
15046 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15047 "pshufhw");
15048 }
15049 case X86::BI__builtin_ia32_pshufd:
15050 case X86::BI__builtin_ia32_pshufd256:
15051 case X86::BI__builtin_ia32_pshufd512:
15052 case X86::BI__builtin_ia32_vpermilpd:
15053 case X86::BI__builtin_ia32_vpermilps:
15054 case X86::BI__builtin_ia32_vpermilpd256:
15055 case X86::BI__builtin_ia32_vpermilps256:
15056 case X86::BI__builtin_ia32_vpermilpd512:
15057 case X86::BI__builtin_ia32_vpermilps512: {
15058 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15059 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15060 unsigned NumElts = Ty->getNumElements();
15061 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15062 unsigned NumLaneElts = NumElts / NumLanes;
15063
15064 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15065 Imm = (Imm & 0xff) * 0x01010101;
15066
15067 int Indices[16];
15068 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15069 for (unsigned i = 0; i != NumLaneElts; ++i) {
15070 Indices[i + l] = (Imm % NumLaneElts) + l;
15071 Imm /= NumLaneElts;
15072 }
15073 }
15074
15075 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15076 "permil");
15077 }
15078 case X86::BI__builtin_ia32_shufpd:
15079 case X86::BI__builtin_ia32_shufpd256:
15080 case X86::BI__builtin_ia32_shufpd512:
15081 case X86::BI__builtin_ia32_shufps:
15082 case X86::BI__builtin_ia32_shufps256:
15083 case X86::BI__builtin_ia32_shufps512: {
15084 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15085 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15086 unsigned NumElts = Ty->getNumElements();
15087 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15088 unsigned NumLaneElts = NumElts / NumLanes;
15089
15090 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15091 Imm = (Imm & 0xff) * 0x01010101;
15092
15093 int Indices[16];
15094 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15095 for (unsigned i = 0; i != NumLaneElts; ++i) {
15096 unsigned Index = Imm % NumLaneElts;
15097 Imm /= NumLaneElts;
15098 if (i >= (NumLaneElts / 2))
15099 Index += NumElts;
15100 Indices[l + i] = l + Index;
15101 }
15102 }
15103
15104 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15105 ArrayRef(Indices, NumElts), "shufp");
15106 }
15107 case X86::BI__builtin_ia32_permdi256:
15108 case X86::BI__builtin_ia32_permdf256:
15109 case X86::BI__builtin_ia32_permdi512:
15110 case X86::BI__builtin_ia32_permdf512: {
15111 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15112 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15113 unsigned NumElts = Ty->getNumElements();
15114
15115 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15116 int Indices[8];
15117 for (unsigned l = 0; l != NumElts; l += 4)
15118 for (unsigned i = 0; i != 4; ++i)
15119 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15120
15121 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15122 "perm");
15123 }
15124 case X86::BI__builtin_ia32_palignr128:
15125 case X86::BI__builtin_ia32_palignr256:
15126 case X86::BI__builtin_ia32_palignr512: {
15127 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15128
15129 unsigned NumElts =
15130 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15131 assert(NumElts % 16 == 0);
15132
15133 // If palignr is shifting the pair of vectors more than the size of two
15134 // lanes, emit zero.
15135 if (ShiftVal >= 32)
15136 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15137
15138 // If palignr is shifting the pair of input vectors more than one lane,
15139 // but less than two lanes, convert to shifting in zeroes.
15140 if (ShiftVal > 16) {
15141 ShiftVal -= 16;
15142 Ops[1] = Ops[0];
15143 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15144 }
15145
15146 int Indices[64];
15147 // 256-bit palignr operates on 128-bit lanes so we need to handle that
15148 for (unsigned l = 0; l != NumElts; l += 16) {
15149 for (unsigned i = 0; i != 16; ++i) {
15150 unsigned Idx = ShiftVal + i;
15151 if (Idx >= 16)
15152 Idx += NumElts - 16; // End of lane, switch operand.
15153 Indices[l + i] = Idx + l;
15154 }
15155 }
15156
15157 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15158 ArrayRef(Indices, NumElts), "palignr");
15159 }
15160 case X86::BI__builtin_ia32_alignd128:
15161 case X86::BI__builtin_ia32_alignd256:
15162 case X86::BI__builtin_ia32_alignd512:
15163 case X86::BI__builtin_ia32_alignq128:
15164 case X86::BI__builtin_ia32_alignq256:
15165 case X86::BI__builtin_ia32_alignq512: {
15166 unsigned NumElts =
15167 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15168 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15169
15170 // Mask the shift amount to width of a vector.
15171 ShiftVal &= NumElts - 1;
15172
15173 int Indices[16];
15174 for (unsigned i = 0; i != NumElts; ++i)
15175 Indices[i] = i + ShiftVal;
15176
15177 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15178 ArrayRef(Indices, NumElts), "valign");
15179 }
15180 case X86::BI__builtin_ia32_shuf_f32x4_256:
15181 case X86::BI__builtin_ia32_shuf_f64x2_256:
15182 case X86::BI__builtin_ia32_shuf_i32x4_256:
15183 case X86::BI__builtin_ia32_shuf_i64x2_256:
15184 case X86::BI__builtin_ia32_shuf_f32x4:
15185 case X86::BI__builtin_ia32_shuf_f64x2:
15186 case X86::BI__builtin_ia32_shuf_i32x4:
15187 case X86::BI__builtin_ia32_shuf_i64x2: {
15188 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15189 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15190 unsigned NumElts = Ty->getNumElements();
15191 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15192 unsigned NumLaneElts = NumElts / NumLanes;
15193
15194 int Indices[16];
15195 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15196 unsigned Index = (Imm % NumLanes) * NumLaneElts;
15197 Imm /= NumLanes; // Discard the bits we just used.
15198 if (l >= (NumElts / 2))
15199 Index += NumElts; // Switch to other source.
15200 for (unsigned i = 0; i != NumLaneElts; ++i) {
15201 Indices[l + i] = Index + i;
15202 }
15203 }
15204
15205 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15206 ArrayRef(Indices, NumElts), "shuf");
15207 }
15208
15209 case X86::BI__builtin_ia32_vperm2f128_pd256:
15210 case X86::BI__builtin_ia32_vperm2f128_ps256:
15211 case X86::BI__builtin_ia32_vperm2f128_si256:
15212 case X86::BI__builtin_ia32_permti256: {
15213 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15214 unsigned NumElts =
15215 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15216
15217 // This takes a very simple approach since there are two lanes and a
15218 // shuffle can have 2 inputs. So we reserve the first input for the first
15219 // lane and the second input for the second lane. This may result in
15220 // duplicate sources, but this can be dealt with in the backend.
15221
15222 Value *OutOps[2];
15223 int Indices[8];
15224 for (unsigned l = 0; l != 2; ++l) {
15225 // Determine the source for this lane.
15226 if (Imm & (1 << ((l * 4) + 3)))
15227 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
15228 else if (Imm & (1 << ((l * 4) + 1)))
15229 OutOps[l] = Ops[1];
15230 else
15231 OutOps[l] = Ops[0];
15232
15233 for (unsigned i = 0; i != NumElts/2; ++i) {
15234 // Start with ith element of the source for this lane.
15235 unsigned Idx = (l * NumElts) + i;
15236 // If bit 0 of the immediate half is set, switch to the high half of
15237 // the source.
15238 if (Imm & (1 << (l * 4)))
15239 Idx += NumElts/2;
15240 Indices[(l * (NumElts/2)) + i] = Idx;
15241 }
15242 }
15243
15244 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
15245 ArrayRef(Indices, NumElts), "vperm");
15246 }
15247
15248 case X86::BI__builtin_ia32_pslldqi128_byteshift:
15249 case X86::BI__builtin_ia32_pslldqi256_byteshift:
15250 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15251 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15252 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15253 // Builtin type is vXi64 so multiply by 8 to get bytes.
15254 unsigned NumElts = ResultType->getNumElements() * 8;
15255
15256 // If pslldq is shifting the vector more than 15 bytes, emit zero.
15257 if (ShiftVal >= 16)
15258 return llvm::Constant::getNullValue(ResultType);
15259
15260 int Indices[64];
15261 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15262 for (unsigned l = 0; l != NumElts; l += 16) {
15263 for (unsigned i = 0; i != 16; ++i) {
15264 unsigned Idx = NumElts + i - ShiftVal;
15265 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15266 Indices[l + i] = Idx + l;
15267 }
15268 }
15269
15270 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15271 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15272 Value *Zero = llvm::Constant::getNullValue(VecTy);
15273 Value *SV = Builder.CreateShuffleVector(
15274 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
15275 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
15276 }
15277 case X86::BI__builtin_ia32_psrldqi128_byteshift:
15278 case X86::BI__builtin_ia32_psrldqi256_byteshift:
15279 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15280 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15281 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15282 // Builtin type is vXi64 so multiply by 8 to get bytes.
15283 unsigned NumElts = ResultType->getNumElements() * 8;
15284
15285 // If psrldq is shifting the vector more than 15 bytes, emit zero.
15286 if (ShiftVal >= 16)
15287 return llvm::Constant::getNullValue(ResultType);
15288
15289 int Indices[64];
15290 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15291 for (unsigned l = 0; l != NumElts; l += 16) {
15292 for (unsigned i = 0; i != 16; ++i) {
15293 unsigned Idx = i + ShiftVal;
15294 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15295 Indices[l + i] = Idx + l;
15296 }
15297 }
15298
15299 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15300 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15301 Value *Zero = llvm::Constant::getNullValue(VecTy);
15302 Value *SV = Builder.CreateShuffleVector(
15303 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15304 return Builder.CreateBitCast(SV, ResultType, "cast");
15305 }
15306 case X86::BI__builtin_ia32_kshiftliqi:
15307 case X86::BI__builtin_ia32_kshiftlihi:
15308 case X86::BI__builtin_ia32_kshiftlisi:
15309 case X86::BI__builtin_ia32_kshiftlidi: {
15310 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15311 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15312
15313 if (ShiftVal >= NumElts)
15314 return llvm::Constant::getNullValue(Ops[0]->getType());
15315
15316 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15317
15318 int Indices[64];
15319 for (unsigned i = 0; i != NumElts; ++i)
15320 Indices[i] = NumElts + i - ShiftVal;
15321
15322 Value *Zero = llvm::Constant::getNullValue(In->getType());
15323 Value *SV = Builder.CreateShuffleVector(
15324 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15325 return Builder.CreateBitCast(SV, Ops[0]->getType());
15326 }
15327 case X86::BI__builtin_ia32_kshiftriqi:
15328 case X86::BI__builtin_ia32_kshiftrihi:
15329 case X86::BI__builtin_ia32_kshiftrisi:
15330 case X86::BI__builtin_ia32_kshiftridi: {
15331 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15332 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15333
15334 if (ShiftVal >= NumElts)
15335 return llvm::Constant::getNullValue(Ops[0]->getType());
15336
15337 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15338
15339 int Indices[64];
15340 for (unsigned i = 0; i != NumElts; ++i)
15341 Indices[i] = i + ShiftVal;
15342
15343 Value *Zero = llvm::Constant::getNullValue(In->getType());
15344 Value *SV = Builder.CreateShuffleVector(
15345 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15346 return Builder.CreateBitCast(SV, Ops[0]->getType());
15347 }
15348 case X86::BI__builtin_ia32_movnti:
15349 case X86::BI__builtin_ia32_movnti64:
15350 case X86::BI__builtin_ia32_movntsd:
15351 case X86::BI__builtin_ia32_movntss: {
15352 llvm::MDNode *Node = llvm::MDNode::get(
15353 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15354
15355 Value *Ptr = Ops[0];
15356 Value *Src = Ops[1];
15357
15358 // Extract the 0'th element of the source vector.
15359 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15360 BuiltinID == X86::BI__builtin_ia32_movntss)
15361 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15362
15363 // Unaligned nontemporal store of the scalar value.
15364 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15365 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15366 SI->setAlignment(llvm::Align(1));
15367 return SI;
15368 }
15369 // Rotate is a special case of funnel shift - 1st 2 args are the same.
15370 case X86::BI__builtin_ia32_vprotb:
15371 case X86::BI__builtin_ia32_vprotw:
15372 case X86::BI__builtin_ia32_vprotd:
15373 case X86::BI__builtin_ia32_vprotq:
15374 case X86::BI__builtin_ia32_vprotbi:
15375 case X86::BI__builtin_ia32_vprotwi:
15376 case X86::BI__builtin_ia32_vprotdi:
15377 case X86::BI__builtin_ia32_vprotqi:
15378 case X86::BI__builtin_ia32_prold128:
15379 case X86::BI__builtin_ia32_prold256:
15380 case X86::BI__builtin_ia32_prold512:
15381 case X86::BI__builtin_ia32_prolq128:
15382 case X86::BI__builtin_ia32_prolq256:
15383 case X86::BI__builtin_ia32_prolq512:
15384 case X86::BI__builtin_ia32_prolvd128:
15385 case X86::BI__builtin_ia32_prolvd256:
15386 case X86::BI__builtin_ia32_prolvd512:
15387 case X86::BI__builtin_ia32_prolvq128:
15388 case X86::BI__builtin_ia32_prolvq256:
15389 case X86::BI__builtin_ia32_prolvq512:
15390 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15391 case X86::BI__builtin_ia32_prord128:
15392 case X86::BI__builtin_ia32_prord256:
15393 case X86::BI__builtin_ia32_prord512:
15394 case X86::BI__builtin_ia32_prorq128:
15395 case X86::BI__builtin_ia32_prorq256:
15396 case X86::BI__builtin_ia32_prorq512:
15397 case X86::BI__builtin_ia32_prorvd128:
15398 case X86::BI__builtin_ia32_prorvd256:
15399 case X86::BI__builtin_ia32_prorvd512:
15400 case X86::BI__builtin_ia32_prorvq128:
15401 case X86::BI__builtin_ia32_prorvq256:
15402 case X86::BI__builtin_ia32_prorvq512:
15403 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15404 case X86::BI__builtin_ia32_selectb_128:
15405 case X86::BI__builtin_ia32_selectb_256:
15406 case X86::BI__builtin_ia32_selectb_512:
15407 case X86::BI__builtin_ia32_selectw_128:
15408 case X86::BI__builtin_ia32_selectw_256:
15409 case X86::BI__builtin_ia32_selectw_512:
15410 case X86::BI__builtin_ia32_selectd_128:
15411 case X86::BI__builtin_ia32_selectd_256:
15412 case X86::BI__builtin_ia32_selectd_512:
15413 case X86::BI__builtin_ia32_selectq_128:
15414 case X86::BI__builtin_ia32_selectq_256:
15415 case X86::BI__builtin_ia32_selectq_512:
15416 case X86::BI__builtin_ia32_selectph_128:
15417 case X86::BI__builtin_ia32_selectph_256:
15418 case X86::BI__builtin_ia32_selectph_512:
15419 case X86::BI__builtin_ia32_selectpbf_128:
15420 case X86::BI__builtin_ia32_selectpbf_256:
15421 case X86::BI__builtin_ia32_selectpbf_512:
15422 case X86::BI__builtin_ia32_selectps_128:
15423 case X86::BI__builtin_ia32_selectps_256:
15424 case X86::BI__builtin_ia32_selectps_512:
15425 case X86::BI__builtin_ia32_selectpd_128:
15426 case X86::BI__builtin_ia32_selectpd_256:
15427 case X86::BI__builtin_ia32_selectpd_512:
15428 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15429 case X86::BI__builtin_ia32_selectsh_128:
15430 case X86::BI__builtin_ia32_selectsbf_128:
15431 case X86::BI__builtin_ia32_selectss_128:
15432 case X86::BI__builtin_ia32_selectsd_128: {
15433 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15434 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15435 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15436 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15437 }
15438 case X86::BI__builtin_ia32_cmpb128_mask:
15439 case X86::BI__builtin_ia32_cmpb256_mask:
15440 case X86::BI__builtin_ia32_cmpb512_mask:
15441 case X86::BI__builtin_ia32_cmpw128_mask:
15442 case X86::BI__builtin_ia32_cmpw256_mask:
15443 case X86::BI__builtin_ia32_cmpw512_mask:
15444 case X86::BI__builtin_ia32_cmpd128_mask:
15445 case X86::BI__builtin_ia32_cmpd256_mask:
15446 case X86::BI__builtin_ia32_cmpd512_mask:
15447 case X86::BI__builtin_ia32_cmpq128_mask:
15448 case X86::BI__builtin_ia32_cmpq256_mask:
15449 case X86::BI__builtin_ia32_cmpq512_mask: {
15450 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15451 return EmitX86MaskedCompare(*this, CC, true, Ops);
15452 }
15453 case X86::BI__builtin_ia32_ucmpb128_mask:
15454 case X86::BI__builtin_ia32_ucmpb256_mask:
15455 case X86::BI__builtin_ia32_ucmpb512_mask:
15456 case X86::BI__builtin_ia32_ucmpw128_mask:
15457 case X86::BI__builtin_ia32_ucmpw256_mask:
15458 case X86::BI__builtin_ia32_ucmpw512_mask:
15459 case X86::BI__builtin_ia32_ucmpd128_mask:
15460 case X86::BI__builtin_ia32_ucmpd256_mask:
15461 case X86::BI__builtin_ia32_ucmpd512_mask:
15462 case X86::BI__builtin_ia32_ucmpq128_mask:
15463 case X86::BI__builtin_ia32_ucmpq256_mask:
15464 case X86::BI__builtin_ia32_ucmpq512_mask: {
15465 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15466 return EmitX86MaskedCompare(*this, CC, false, Ops);
15467 }
15468 case X86::BI__builtin_ia32_vpcomb:
15469 case X86::BI__builtin_ia32_vpcomw:
15470 case X86::BI__builtin_ia32_vpcomd:
15471 case X86::BI__builtin_ia32_vpcomq:
15472 return EmitX86vpcom(*this, Ops, true);
15473 case X86::BI__builtin_ia32_vpcomub:
15474 case X86::BI__builtin_ia32_vpcomuw:
15475 case X86::BI__builtin_ia32_vpcomud:
15476 case X86::BI__builtin_ia32_vpcomuq:
15477 return EmitX86vpcom(*this, Ops, false);
15478
15479 case X86::BI__builtin_ia32_kortestcqi:
15480 case X86::BI__builtin_ia32_kortestchi:
15481 case X86::BI__builtin_ia32_kortestcsi:
15482 case X86::BI__builtin_ia32_kortestcdi: {
15483 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15484 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15485 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15486 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15487 }
15488 case X86::BI__builtin_ia32_kortestzqi:
15489 case X86::BI__builtin_ia32_kortestzhi:
15490 case X86::BI__builtin_ia32_kortestzsi:
15491 case X86::BI__builtin_ia32_kortestzdi: {
15492 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15493 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15494 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15495 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15496 }
15497
15498 case X86::BI__builtin_ia32_ktestcqi:
15499 case X86::BI__builtin_ia32_ktestzqi:
15500 case X86::BI__builtin_ia32_ktestchi:
15501 case X86::BI__builtin_ia32_ktestzhi:
15502 case X86::BI__builtin_ia32_ktestcsi:
15503 case X86::BI__builtin_ia32_ktestzsi:
15504 case X86::BI__builtin_ia32_ktestcdi:
15505 case X86::BI__builtin_ia32_ktestzdi: {
15506 Intrinsic::ID IID;
15507 switch (BuiltinID) {
15508 default: llvm_unreachable("Unsupported intrinsic!");
15509 case X86::BI__builtin_ia32_ktestcqi:
15510 IID = Intrinsic::x86_avx512_ktestc_b;
15511 break;
15512 case X86::BI__builtin_ia32_ktestzqi:
15513 IID = Intrinsic::x86_avx512_ktestz_b;
15514 break;
15515 case X86::BI__builtin_ia32_ktestchi:
15516 IID = Intrinsic::x86_avx512_ktestc_w;
15517 break;
15518 case X86::BI__builtin_ia32_ktestzhi:
15519 IID = Intrinsic::x86_avx512_ktestz_w;
15520 break;
15521 case X86::BI__builtin_ia32_ktestcsi:
15522 IID = Intrinsic::x86_avx512_ktestc_d;
15523 break;
15524 case X86::BI__builtin_ia32_ktestzsi:
15525 IID = Intrinsic::x86_avx512_ktestz_d;
15526 break;
15527 case X86::BI__builtin_ia32_ktestcdi:
15528 IID = Intrinsic::x86_avx512_ktestc_q;
15529 break;
15530 case X86::BI__builtin_ia32_ktestzdi:
15531 IID = Intrinsic::x86_avx512_ktestz_q;
15532 break;
15533 }
15534
15535 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15536 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15537 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15538 Function *Intr = CGM.getIntrinsic(IID);
15539 return Builder.CreateCall(Intr, {LHS, RHS});
15540 }
15541
15542 case X86::BI__builtin_ia32_kaddqi:
15543 case X86::BI__builtin_ia32_kaddhi:
15544 case X86::BI__builtin_ia32_kaddsi:
15545 case X86::BI__builtin_ia32_kadddi: {
15546 Intrinsic::ID IID;
15547 switch (BuiltinID) {
15548 default: llvm_unreachable("Unsupported intrinsic!");
15549 case X86::BI__builtin_ia32_kaddqi:
15550 IID = Intrinsic::x86_avx512_kadd_b;
15551 break;
15552 case X86::BI__builtin_ia32_kaddhi:
15553 IID = Intrinsic::x86_avx512_kadd_w;
15554 break;
15555 case X86::BI__builtin_ia32_kaddsi:
15556 IID = Intrinsic::x86_avx512_kadd_d;
15557 break;
15558 case X86::BI__builtin_ia32_kadddi:
15559 IID = Intrinsic::x86_avx512_kadd_q;
15560 break;
15561 }
15562
15563 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15564 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15565 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15566 Function *Intr = CGM.getIntrinsic(IID);
15567 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15568 return Builder.CreateBitCast(Res, Ops[0]->getType());
15569 }
15570 case X86::BI__builtin_ia32_kandqi:
15571 case X86::BI__builtin_ia32_kandhi:
15572 case X86::BI__builtin_ia32_kandsi:
15573 case X86::BI__builtin_ia32_kanddi:
15574 return EmitX86MaskLogic(*this, Instruction::And, Ops);
15575 case X86::BI__builtin_ia32_kandnqi:
15576 case X86::BI__builtin_ia32_kandnhi:
15577 case X86::BI__builtin_ia32_kandnsi:
15578 case X86::BI__builtin_ia32_kandndi:
15579 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15580 case X86::BI__builtin_ia32_korqi:
15581 case X86::BI__builtin_ia32_korhi:
15582 case X86::BI__builtin_ia32_korsi:
15583 case X86::BI__builtin_ia32_kordi:
15584 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15585 case X86::BI__builtin_ia32_kxnorqi:
15586 case X86::BI__builtin_ia32_kxnorhi:
15587 case X86::BI__builtin_ia32_kxnorsi:
15588 case X86::BI__builtin_ia32_kxnordi:
15589 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15590 case X86::BI__builtin_ia32_kxorqi:
15591 case X86::BI__builtin_ia32_kxorhi:
15592 case X86::BI__builtin_ia32_kxorsi:
15593 case X86::BI__builtin_ia32_kxordi:
15594 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
15595 case X86::BI__builtin_ia32_knotqi:
15596 case X86::BI__builtin_ia32_knothi:
15597 case X86::BI__builtin_ia32_knotsi:
15598 case X86::BI__builtin_ia32_knotdi: {
15599 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15600 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15601 return Builder.CreateBitCast(Builder.CreateNot(Res),
15602 Ops[0]->getType());
15603 }
15604 case X86::BI__builtin_ia32_kmovb:
15605 case X86::BI__builtin_ia32_kmovw:
15606 case X86::BI__builtin_ia32_kmovd:
15607 case X86::BI__builtin_ia32_kmovq: {
15608 // Bitcast to vXi1 type and then back to integer. This gets the mask
15609 // register type into the IR, but might be optimized out depending on
15610 // what's around it.
15611 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15612 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15613 return Builder.CreateBitCast(Res, Ops[0]->getType());
15614 }
15615
15616 case X86::BI__builtin_ia32_kunpckdi:
15617 case X86::BI__builtin_ia32_kunpcksi:
15618 case X86::BI__builtin_ia32_kunpckhi: {
15619 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15620 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15621 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15622 int Indices[64];
15623 for (unsigned i = 0; i != NumElts; ++i)
15624 Indices[i] = i;
15625
15626 // First extract half of each vector. This gives better codegen than
15627 // doing it in a single shuffle.
15628 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15629 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
15630 // Concat the vectors.
15631 // NOTE: Operands are swapped to match the intrinsic definition.
15632 Value *Res =
15633 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
15634 return Builder.CreateBitCast(Res, Ops[0]->getType());
15635 }
15636
15637 case X86::BI__builtin_ia32_vplzcntd_128:
15638 case X86::BI__builtin_ia32_vplzcntd_256:
15639 case X86::BI__builtin_ia32_vplzcntd_512:
15640 case X86::BI__builtin_ia32_vplzcntq_128:
15641 case X86::BI__builtin_ia32_vplzcntq_256:
15642 case X86::BI__builtin_ia32_vplzcntq_512: {
15643 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15644 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
15645 }
15646 case X86::BI__builtin_ia32_sqrtss:
15647 case X86::BI__builtin_ia32_sqrtsd: {
15648 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
15649 Function *F;
15650 if (Builder.getIsFPConstrained()) {
15651 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15652 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15653 A->getType());
15654 A = Builder.CreateConstrainedFPCall(F, {A});
15655 } else {
15656 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15657 A = Builder.CreateCall(F, {A});
15658 }
15659 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15660 }
15661 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15662 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15663 case X86::BI__builtin_ia32_sqrtss_round_mask: {
15664 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
15665 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15666 // otherwise keep the intrinsic.
15667 if (CC != 4) {
15668 Intrinsic::ID IID;
15669
15670 switch (BuiltinID) {
15671 default:
15672 llvm_unreachable("Unsupported intrinsic!");
15673 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15674 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
15675 break;
15676 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15677 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
15678 break;
15679 case X86::BI__builtin_ia32_sqrtss_round_mask:
15680 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
15681 break;
15682 }
15683 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15684 }
15685 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15686 Function *F;
15687 if (Builder.getIsFPConstrained()) {
15688 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15689 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15690 A->getType());
15691 A = Builder.CreateConstrainedFPCall(F, A);
15692 } else {
15693 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15694 A = Builder.CreateCall(F, A);
15695 }
15696 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15697 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
15698 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15699 }
15700 case X86::BI__builtin_ia32_sqrtpd256:
15701 case X86::BI__builtin_ia32_sqrtpd:
15702 case X86::BI__builtin_ia32_sqrtps256:
15703 case X86::BI__builtin_ia32_sqrtps:
15704 case X86::BI__builtin_ia32_sqrtph256:
15705 case X86::BI__builtin_ia32_sqrtph:
15706 case X86::BI__builtin_ia32_sqrtph512:
15707 case X86::BI__builtin_ia32_sqrtps512:
15708 case X86::BI__builtin_ia32_sqrtpd512: {
15709 if (Ops.size() == 2) {
15710 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15711 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15712 // otherwise keep the intrinsic.
15713 if (CC != 4) {
15714 Intrinsic::ID IID;
15715
15716 switch (BuiltinID) {
15717 default:
15718 llvm_unreachable("Unsupported intrinsic!");
15719 case X86::BI__builtin_ia32_sqrtph512:
15720 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
15721 break;
15722 case X86::BI__builtin_ia32_sqrtps512:
15723 IID = Intrinsic::x86_avx512_sqrt_ps_512;
15724 break;
15725 case X86::BI__builtin_ia32_sqrtpd512:
15726 IID = Intrinsic::x86_avx512_sqrt_pd_512;
15727 break;
15728 }
15729 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15730 }
15731 }
15732 if (Builder.getIsFPConstrained()) {
15733 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15734 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15735 Ops[0]->getType());
15736 return Builder.CreateConstrainedFPCall(F, Ops[0]);
15737 } else {
15738 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15739 return Builder.CreateCall(F, Ops[0]);
15740 }
15741 }
15742
15743 case X86::BI__builtin_ia32_pmuludq128:
15744 case X86::BI__builtin_ia32_pmuludq256:
15745 case X86::BI__builtin_ia32_pmuludq512:
15746 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
15747
15748 case X86::BI__builtin_ia32_pmuldq128:
15749 case X86::BI__builtin_ia32_pmuldq256:
15750 case X86::BI__builtin_ia32_pmuldq512:
15751 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
15752
15753 case X86::BI__builtin_ia32_pternlogd512_mask:
15754 case X86::BI__builtin_ia32_pternlogq512_mask:
15755 case X86::BI__builtin_ia32_pternlogd128_mask:
15756 case X86::BI__builtin_ia32_pternlogd256_mask:
15757 case X86::BI__builtin_ia32_pternlogq128_mask:
15758 case X86::BI__builtin_ia32_pternlogq256_mask:
15759 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
15760
15761 case X86::BI__builtin_ia32_pternlogd512_maskz:
15762 case X86::BI__builtin_ia32_pternlogq512_maskz:
15763 case X86::BI__builtin_ia32_pternlogd128_maskz:
15764 case X86::BI__builtin_ia32_pternlogd256_maskz:
15765 case X86::BI__builtin_ia32_pternlogq128_maskz:
15766 case X86::BI__builtin_ia32_pternlogq256_maskz:
15767 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
15768
15769 case X86::BI__builtin_ia32_vpshldd128:
15770 case X86::BI__builtin_ia32_vpshldd256:
15771 case X86::BI__builtin_ia32_vpshldd512:
15772 case X86::BI__builtin_ia32_vpshldq128:
15773 case X86::BI__builtin_ia32_vpshldq256:
15774 case X86::BI__builtin_ia32_vpshldq512:
15775 case X86::BI__builtin_ia32_vpshldw128:
15776 case X86::BI__builtin_ia32_vpshldw256:
15777 case X86::BI__builtin_ia32_vpshldw512:
15778 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15779
15780 case X86::BI__builtin_ia32_vpshrdd128:
15781 case X86::BI__builtin_ia32_vpshrdd256:
15782 case X86::BI__builtin_ia32_vpshrdd512:
15783 case X86::BI__builtin_ia32_vpshrdq128:
15784 case X86::BI__builtin_ia32_vpshrdq256:
15785 case X86::BI__builtin_ia32_vpshrdq512:
15786 case X86::BI__builtin_ia32_vpshrdw128:
15787 case X86::BI__builtin_ia32_vpshrdw256:
15788 case X86::BI__builtin_ia32_vpshrdw512:
15789 // Ops 0 and 1 are swapped.
15790 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15791
15792 case X86::BI__builtin_ia32_vpshldvd128:
15793 case X86::BI__builtin_ia32_vpshldvd256:
15794 case X86::BI__builtin_ia32_vpshldvd512:
15795 case X86::BI__builtin_ia32_vpshldvq128:
15796 case X86::BI__builtin_ia32_vpshldvq256:
15797 case X86::BI__builtin_ia32_vpshldvq512:
15798 case X86::BI__builtin_ia32_vpshldvw128:
15799 case X86::BI__builtin_ia32_vpshldvw256:
15800 case X86::BI__builtin_ia32_vpshldvw512:
15801 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15802
15803 case X86::BI__builtin_ia32_vpshrdvd128:
15804 case X86::BI__builtin_ia32_vpshrdvd256:
15805 case X86::BI__builtin_ia32_vpshrdvd512:
15806 case X86::BI__builtin_ia32_vpshrdvq128:
15807 case X86::BI__builtin_ia32_vpshrdvq256:
15808 case X86::BI__builtin_ia32_vpshrdvq512:
15809 case X86::BI__builtin_ia32_vpshrdvw128:
15810 case X86::BI__builtin_ia32_vpshrdvw256:
15811 case X86::BI__builtin_ia32_vpshrdvw512:
15812 // Ops 0 and 1 are swapped.
15813 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15814
15815 // Reductions
15816 case X86::BI__builtin_ia32_reduce_fadd_pd512:
15817 case X86::BI__builtin_ia32_reduce_fadd_ps512:
15818 case X86::BI__builtin_ia32_reduce_fadd_ph512:
15819 case X86::BI__builtin_ia32_reduce_fadd_ph256:
15820 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
15821 Function *F =
15822 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
15823 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15824 Builder.getFastMathFlags().setAllowReassoc();
15825 return Builder.CreateCall(F, {Ops[0], Ops[1]});
15826 }
15827 case X86::BI__builtin_ia32_reduce_fmul_pd512:
15828 case X86::BI__builtin_ia32_reduce_fmul_ps512:
15829 case X86::BI__builtin_ia32_reduce_fmul_ph512:
15830 case X86::BI__builtin_ia32_reduce_fmul_ph256:
15831 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
15832 Function *F =
15833 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
15834 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15835 Builder.getFastMathFlags().setAllowReassoc();
15836 return Builder.CreateCall(F, {Ops[0], Ops[1]});
15837 }
15838 case X86::BI__builtin_ia32_reduce_fmax_pd512:
15839 case X86::BI__builtin_ia32_reduce_fmax_ps512:
15840 case X86::BI__builtin_ia32_reduce_fmax_ph512:
15841 case X86::BI__builtin_ia32_reduce_fmax_ph256:
15842 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
15843 Function *F =
15844 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
15845 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15846 Builder.getFastMathFlags().setNoNaNs();
15847 return Builder.CreateCall(F, {Ops[0]});
15848 }
15849 case X86::BI__builtin_ia32_reduce_fmin_pd512:
15850 case X86::BI__builtin_ia32_reduce_fmin_ps512:
15851 case X86::BI__builtin_ia32_reduce_fmin_ph512:
15852 case X86::BI__builtin_ia32_reduce_fmin_ph256:
15853 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
15854 Function *F =
15855 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
15856 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15857 Builder.getFastMathFlags().setNoNaNs();
15858 return Builder.CreateCall(F, {Ops[0]});
15859 }
15860
15861 // 3DNow!
15862 case X86::BI__builtin_ia32_pswapdsf:
15863 case X86::BI__builtin_ia32_pswapdsi: {
15864 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
15865 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
15866 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
15867 return Builder.CreateCall(F, Ops, "pswapd");
15868 }
15869 case X86::BI__builtin_ia32_rdrand16_step:
15870 case X86::BI__builtin_ia32_rdrand32_step:
15871 case X86::BI__builtin_ia32_rdrand64_step:
15872 case X86::BI__builtin_ia32_rdseed16_step:
15873 case X86::BI__builtin_ia32_rdseed32_step:
15874 case X86::BI__builtin_ia32_rdseed64_step: {
15875 Intrinsic::ID ID;
15876 switch (BuiltinID) {
15877 default: llvm_unreachable("Unsupported intrinsic!");
15878 case X86::BI__builtin_ia32_rdrand16_step:
15879 ID = Intrinsic::x86_rdrand_16;
15880 break;
15881 case X86::BI__builtin_ia32_rdrand32_step:
15882 ID = Intrinsic::x86_rdrand_32;
15883 break;
15884 case X86::BI__builtin_ia32_rdrand64_step:
15885 ID = Intrinsic::x86_rdrand_64;
15886 break;
15887 case X86::BI__builtin_ia32_rdseed16_step:
15888 ID = Intrinsic::x86_rdseed_16;
15889 break;
15890 case X86::BI__builtin_ia32_rdseed32_step:
15891 ID = Intrinsic::x86_rdseed_32;
15892 break;
15893 case X86::BI__builtin_ia32_rdseed64_step:
15894 ID = Intrinsic::x86_rdseed_64;
15895 break;
15896 }
15897
15898 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
15899 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
15900 Ops[0]);
15901 return Builder.CreateExtractValue(Call, 1);
15902 }
15903 case X86::BI__builtin_ia32_addcarryx_u32:
15904 case X86::BI__builtin_ia32_addcarryx_u64:
15905 case X86::BI__builtin_ia32_subborrow_u32:
15906 case X86::BI__builtin_ia32_subborrow_u64: {
15907 Intrinsic::ID IID;
15908 switch (BuiltinID) {
15909 default: llvm_unreachable("Unsupported intrinsic!");
15910 case X86::BI__builtin_ia32_addcarryx_u32:
15911 IID = Intrinsic::x86_addcarry_32;
15912 break;
15913 case X86::BI__builtin_ia32_addcarryx_u64:
15914 IID = Intrinsic::x86_addcarry_64;
15915 break;
15916 case X86::BI__builtin_ia32_subborrow_u32:
15917 IID = Intrinsic::x86_subborrow_32;
15918 break;
15919 case X86::BI__builtin_ia32_subborrow_u64:
15920 IID = Intrinsic::x86_subborrow_64;
15921 break;
15922 }
15923
15924 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
15925 { Ops[0], Ops[1], Ops[2] });
15926 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15927 Ops[3]);
15928 return Builder.CreateExtractValue(Call, 0);
15929 }
15930
15931 case X86::BI__builtin_ia32_fpclassps128_mask:
15932 case X86::BI__builtin_ia32_fpclassps256_mask:
15933 case X86::BI__builtin_ia32_fpclassps512_mask:
15934 case X86::BI__builtin_ia32_fpclassph128_mask:
15935 case X86::BI__builtin_ia32_fpclassph256_mask:
15936 case X86::BI__builtin_ia32_fpclassph512_mask:
15937 case X86::BI__builtin_ia32_fpclasspd128_mask:
15938 case X86::BI__builtin_ia32_fpclasspd256_mask:
15939 case X86::BI__builtin_ia32_fpclasspd512_mask: {
15940 unsigned NumElts =
15941 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15942 Value *MaskIn = Ops[2];
15943 Ops.erase(&Ops[2]);
15944
15945 Intrinsic::ID ID;
15946 switch (BuiltinID) {
15947 default: llvm_unreachable("Unsupported intrinsic!");
15948 case X86::BI__builtin_ia32_fpclassph128_mask:
15949 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
15950 break;
15951 case X86::BI__builtin_ia32_fpclassph256_mask:
15952 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
15953 break;
15954 case X86::BI__builtin_ia32_fpclassph512_mask:
15955 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
15956 break;
15957 case X86::BI__builtin_ia32_fpclassps128_mask:
15958 ID = Intrinsic::x86_avx512_fpclass_ps_128;
15959 break;
15960 case X86::BI__builtin_ia32_fpclassps256_mask:
15961 ID = Intrinsic::x86_avx512_fpclass_ps_256;
15962 break;
15963 case X86::BI__builtin_ia32_fpclassps512_mask:
15964 ID = Intrinsic::x86_avx512_fpclass_ps_512;
15965 break;
15966 case X86::BI__builtin_ia32_fpclasspd128_mask:
15967 ID = Intrinsic::x86_avx512_fpclass_pd_128;
15968 break;
15969 case X86::BI__builtin_ia32_fpclasspd256_mask:
15970 ID = Intrinsic::x86_avx512_fpclass_pd_256;
15971 break;
15972 case X86::BI__builtin_ia32_fpclasspd512_mask:
15973 ID = Intrinsic::x86_avx512_fpclass_pd_512;
15974 break;
15975 }
15976
15977 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15978 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
15979 }
15980
15981 case X86::BI__builtin_ia32_vp2intersect_q_512:
15982 case X86::BI__builtin_ia32_vp2intersect_q_256:
15983 case X86::BI__builtin_ia32_vp2intersect_q_128:
15984 case X86::BI__builtin_ia32_vp2intersect_d_512:
15985 case X86::BI__builtin_ia32_vp2intersect_d_256:
15986 case X86::BI__builtin_ia32_vp2intersect_d_128: {
15987 unsigned NumElts =
15988 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15989 Intrinsic::ID ID;
15990
15991 switch (BuiltinID) {
15992 default: llvm_unreachable("Unsupported intrinsic!");
15993 case X86::BI__builtin_ia32_vp2intersect_q_512:
15994 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
15995 break;
15996 case X86::BI__builtin_ia32_vp2intersect_q_256:
15997 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
15998 break;
15999 case X86::BI__builtin_ia32_vp2intersect_q_128:
16000 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16001 break;
16002 case X86::BI__builtin_ia32_vp2intersect_d_512:
16003 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16004 break;
16005 case X86::BI__builtin_ia32_vp2intersect_d_256:
16006 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16007 break;
16008 case X86::BI__builtin_ia32_vp2intersect_d_128:
16009 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16010 break;
16011 }
16012
16013 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16014 Value *Result = Builder.CreateExtractValue(Call, 0);
16015 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16017
16018 Result = Builder.CreateExtractValue(Call, 1);
16019 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16021 }
16022
16023 case X86::BI__builtin_ia32_vpmultishiftqb128:
16024 case X86::BI__builtin_ia32_vpmultishiftqb256:
16025 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16026 Intrinsic::ID ID;
16027 switch (BuiltinID) {
16028 default: llvm_unreachable("Unsupported intrinsic!");
16029 case X86::BI__builtin_ia32_vpmultishiftqb128:
16030 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16031 break;
16032 case X86::BI__builtin_ia32_vpmultishiftqb256:
16033 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16034 break;
16035 case X86::BI__builtin_ia32_vpmultishiftqb512:
16036 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16037 break;
16038 }
16039
16040 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16041 }
16042
16043 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16044 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16045 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16046 unsigned NumElts =
16047 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16048 Value *MaskIn = Ops[2];
16049 Ops.erase(&Ops[2]);
16050
16051 Intrinsic::ID ID;
16052 switch (BuiltinID) {
16053 default: llvm_unreachable("Unsupported intrinsic!");
16054 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16055 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16056 break;
16057 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16058 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16059 break;
16060 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16061 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16062 break;
16063 }
16064
16065 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16066 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16067 }
16068
16069 // packed comparison intrinsics
16070 case X86::BI__builtin_ia32_cmpeqps:
16071 case X86::BI__builtin_ia32_cmpeqpd:
16072 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16073 case X86::BI__builtin_ia32_cmpltps:
16074 case X86::BI__builtin_ia32_cmpltpd:
16075 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16076 case X86::BI__builtin_ia32_cmpleps:
16077 case X86::BI__builtin_ia32_cmplepd:
16078 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16079 case X86::BI__builtin_ia32_cmpunordps:
16080 case X86::BI__builtin_ia32_cmpunordpd:
16081 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16082 case X86::BI__builtin_ia32_cmpneqps:
16083 case X86::BI__builtin_ia32_cmpneqpd:
16084 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16085 case X86::BI__builtin_ia32_cmpnltps:
16086 case X86::BI__builtin_ia32_cmpnltpd:
16087 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16088 case X86::BI__builtin_ia32_cmpnleps:
16089 case X86::BI__builtin_ia32_cmpnlepd:
16090 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16091 case X86::BI__builtin_ia32_cmpordps:
16092 case X86::BI__builtin_ia32_cmpordpd:
16093 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16094 case X86::BI__builtin_ia32_cmpph128_mask:
16095 case X86::BI__builtin_ia32_cmpph256_mask:
16096 case X86::BI__builtin_ia32_cmpph512_mask:
16097 case X86::BI__builtin_ia32_cmpps128_mask:
16098 case X86::BI__builtin_ia32_cmpps256_mask:
16099 case X86::BI__builtin_ia32_cmpps512_mask:
16100 case X86::BI__builtin_ia32_cmppd128_mask:
16101 case X86::BI__builtin_ia32_cmppd256_mask:
16102 case X86::BI__builtin_ia32_cmppd512_mask:
16103 IsMaskFCmp = true;
16104 [[fallthrough]];
16105 case X86::BI__builtin_ia32_cmpps:
16106 case X86::BI__builtin_ia32_cmpps256:
16107 case X86::BI__builtin_ia32_cmppd:
16108 case X86::BI__builtin_ia32_cmppd256: {
16109 // Lowering vector comparisons to fcmp instructions, while
16110 // ignoring signalling behaviour requested
16111 // ignoring rounding mode requested
16112 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16113
16114 // The third argument is the comparison condition, and integer in the
16115 // range [0, 31]
16116 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16117
16118 // Lowering to IR fcmp instruction.
16119 // Ignoring requested signaling behaviour,
16120 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16121 FCmpInst::Predicate Pred;
16122 bool IsSignaling;
16123 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16124 // behavior is inverted. We'll handle that after the switch.
16125 switch (CC & 0xf) {
16126 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16127 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16128 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16129 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16130 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16131 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16132 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16133 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16134 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
16135 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
16136 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
16137 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16138 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
16139 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
16140 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
16141 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
16142 default: llvm_unreachable("Unhandled CC");
16143 }
16144
16145 // Invert the signalling behavior for 16-31.
16146 if (CC & 0x10)
16147 IsSignaling = !IsSignaling;
16148
16149 // If the predicate is true or false and we're using constrained intrinsics,
16150 // we don't have a compare intrinsic we can use. Just use the legacy X86
16151 // specific intrinsic.
16152 // If the intrinsic is mask enabled and we're using constrained intrinsics,
16153 // use the legacy X86 specific intrinsic.
16154 if (Builder.getIsFPConstrained() &&
16155 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16156 IsMaskFCmp)) {
16157
16158 Intrinsic::ID IID;
16159 switch (BuiltinID) {
16160 default: llvm_unreachable("Unexpected builtin");
16161 case X86::BI__builtin_ia32_cmpps:
16162 IID = Intrinsic::x86_sse_cmp_ps;
16163 break;
16164 case X86::BI__builtin_ia32_cmpps256:
16165 IID = Intrinsic::x86_avx_cmp_ps_256;
16166 break;
16167 case X86::BI__builtin_ia32_cmppd:
16168 IID = Intrinsic::x86_sse2_cmp_pd;
16169 break;
16170 case X86::BI__builtin_ia32_cmppd256:
16171 IID = Intrinsic::x86_avx_cmp_pd_256;
16172 break;
16173 case X86::BI__builtin_ia32_cmpph128_mask:
16174 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16175 break;
16176 case X86::BI__builtin_ia32_cmpph256_mask:
16177 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16178 break;
16179 case X86::BI__builtin_ia32_cmpph512_mask:
16180 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16181 break;
16182 case X86::BI__builtin_ia32_cmpps512_mask:
16183 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16184 break;
16185 case X86::BI__builtin_ia32_cmppd512_mask:
16186 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16187 break;
16188 case X86::BI__builtin_ia32_cmpps128_mask:
16189 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16190 break;
16191 case X86::BI__builtin_ia32_cmpps256_mask:
16192 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16193 break;
16194 case X86::BI__builtin_ia32_cmppd128_mask:
16195 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16196 break;
16197 case X86::BI__builtin_ia32_cmppd256_mask:
16198 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16199 break;
16200 }
16201
16202 Function *Intr = CGM.getIntrinsic(IID);
16203 if (IsMaskFCmp) {
16204 unsigned NumElts =
16205 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16206 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
16207 Value *Cmp = Builder.CreateCall(Intr, Ops);
16208 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
16209 }
16210
16211 return Builder.CreateCall(Intr, Ops);
16212 }
16213
16214 // Builtins without the _mask suffix return a vector of integers
16215 // of the same width as the input vectors
16216 if (IsMaskFCmp) {
16217 // We ignore SAE if strict FP is disabled. We only keep precise
16218 // exception behavior under strict FP.
16219 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16220 // object will be required.
16221 unsigned NumElts =
16222 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16223 Value *Cmp;
16224 if (IsSignaling)
16225 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
16226 else
16227 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
16228 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
16229 }
16230
16231 return getVectorFCmpIR(Pred, IsSignaling);
16232 }
16233
16234 // SSE scalar comparison intrinsics
16235 case X86::BI__builtin_ia32_cmpeqss:
16236 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16237 case X86::BI__builtin_ia32_cmpltss:
16238 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16239 case X86::BI__builtin_ia32_cmpless:
16240 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16241 case X86::BI__builtin_ia32_cmpunordss:
16242 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16243 case X86::BI__builtin_ia32_cmpneqss:
16244 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16245 case X86::BI__builtin_ia32_cmpnltss:
16246 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16247 case X86::BI__builtin_ia32_cmpnless:
16248 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16249 case X86::BI__builtin_ia32_cmpordss:
16250 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16251 case X86::BI__builtin_ia32_cmpeqsd:
16252 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16253 case X86::BI__builtin_ia32_cmpltsd:
16254 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16255 case X86::BI__builtin_ia32_cmplesd:
16256 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16257 case X86::BI__builtin_ia32_cmpunordsd:
16258 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16259 case X86::BI__builtin_ia32_cmpneqsd:
16260 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16261 case X86::BI__builtin_ia32_cmpnltsd:
16262 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16263 case X86::BI__builtin_ia32_cmpnlesd:
16264 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16265 case X86::BI__builtin_ia32_cmpordsd:
16266 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16267
16268 // f16c half2float intrinsics
16269 case X86::BI__builtin_ia32_vcvtph2ps:
16270 case X86::BI__builtin_ia32_vcvtph2ps256:
16271 case X86::BI__builtin_ia32_vcvtph2ps_mask:
16272 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16273 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16274 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16275 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16276 }
16277
16278 // AVX512 bf16 intrinsics
16279 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16280 Ops[2] = getMaskVecValue(
16281 *this, Ops[2],
16282 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
16283 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16284 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16285 }
16286 case X86::BI__builtin_ia32_cvtsbf162ss_32:
16287 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16288
16289 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16290 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16291 Intrinsic::ID IID;
16292 switch (BuiltinID) {
16293 default: llvm_unreachable("Unsupported intrinsic!");
16294 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16295 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16296 break;
16297 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16298 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16299 break;
16300 }
16301 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16302 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16303 }
16304
16305 case X86::BI__cpuid:
16306 case X86::BI__cpuidex: {
16307 Value *FuncId = EmitScalarExpr(E->getArg(1));
16308 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16309 ? EmitScalarExpr(E->getArg(2))
16310 : llvm::ConstantInt::get(Int32Ty, 0);
16311
16312 llvm::StructType *CpuidRetTy =
16313 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16314 llvm::FunctionType *FTy =
16315 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16316
16317 StringRef Asm, Constraints;
16318 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16319 Asm = "cpuid";
16320 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16321 } else {
16322 // x86-64 uses %rbx as the base register, so preserve it.
16323 Asm = "xchgq %rbx, ${1:q}\n"
16324 "cpuid\n"
16325 "xchgq %rbx, ${1:q}";
16326 Constraints = "={ax},=r,={cx},={dx},0,2";
16327 }
16328
16329 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16330 /*hasSideEffects=*/false);
16331 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16332 Value *BasePtr = EmitScalarExpr(E->getArg(0));
16333 Value *Store = nullptr;
16334 for (unsigned i = 0; i < 4; i++) {
16335 Value *Extracted = Builder.CreateExtractValue(IACall, i);
16336 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16337 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16338 }
16339
16340 // Return the last store instruction to signal that we have emitted the
16341 // the intrinsic.
16342 return Store;
16343 }
16344
16345 case X86::BI__emul:
16346 case X86::BI__emulu: {
16347 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16348 bool isSigned = (BuiltinID == X86::BI__emul);
16349 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16350 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16351 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16352 }
16353 case X86::BI__mulh:
16354 case X86::BI__umulh:
16355 case X86::BI_mul128:
16356 case X86::BI_umul128: {
16357 llvm::Type *ResType = ConvertType(E->getType());
16358 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16359
16360 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16361 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16362 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16363
16364 Value *MulResult, *HigherBits;
16365 if (IsSigned) {
16366 MulResult = Builder.CreateNSWMul(LHS, RHS);
16367 HigherBits = Builder.CreateAShr(MulResult, 64);
16368 } else {
16369 MulResult = Builder.CreateNUWMul(LHS, RHS);
16370 HigherBits = Builder.CreateLShr(MulResult, 64);
16371 }
16372 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16373
16374 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16375 return HigherBits;
16376
16377 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16378 Builder.CreateStore(HigherBits, HighBitsAddress);
16379 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16380 }
16381
16382 case X86::BI__faststorefence: {
16383 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16384 llvm::SyncScope::System);
16385 }
16386 case X86::BI__shiftleft128:
16387 case X86::BI__shiftright128: {
16388 llvm::Function *F = CGM.getIntrinsic(
16389 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16390 Int64Ty);
16391 // Flip low/high ops and zero-extend amount to matching type.
16392 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16393 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16394 std::swap(Ops[0], Ops[1]);
16395 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16396 return Builder.CreateCall(F, Ops);
16397 }
16398 case X86::BI_ReadWriteBarrier:
16399 case X86::BI_ReadBarrier:
16400 case X86::BI_WriteBarrier: {
16401 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16402 llvm::SyncScope::SingleThread);
16403 }
16404
16405 case X86::BI_AddressOfReturnAddress: {
16406 Function *F =
16407 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16408 return Builder.CreateCall(F);
16409 }
16410 case X86::BI__stosb: {
16411 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16412 // instruction, but it will create a memset that won't be optimized away.
16413 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16414 }
16415 case X86::BI__ud2:
16416 // llvm.trap makes a ud2a instruction on x86.
16417 return EmitTrapCall(Intrinsic::trap);
16418 case X86::BI__int2c: {
16419 // This syscall signals a driver assertion failure in x86 NT kernels.
16420 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16421 llvm::InlineAsm *IA =
16422 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16423 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16424 getLLVMContext(), llvm::AttributeList::FunctionIndex,
16425 llvm::Attribute::NoReturn);
16426 llvm::CallInst *CI = Builder.CreateCall(IA);
16427 CI->setAttributes(NoReturnAttr);
16428 return CI;
16429 }
16430 case X86::BI__readfsbyte:
16431 case X86::BI__readfsword:
16432 case X86::BI__readfsdword:
16433 case X86::BI__readfsqword: {
16434 llvm::Type *IntTy = ConvertType(E->getType());
16435 Value *Ptr = Builder.CreateIntToPtr(
16436 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16437 LoadInst *Load = Builder.CreateAlignedLoad(
16438 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16439 Load->setVolatile(true);
16440 return Load;
16441 }
16442 case X86::BI__readgsbyte:
16443 case X86::BI__readgsword:
16444 case X86::BI__readgsdword:
16445 case X86::BI__readgsqword: {
16446 llvm::Type *IntTy = ConvertType(E->getType());
16447 Value *Ptr = Builder.CreateIntToPtr(
16448 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16449 LoadInst *Load = Builder.CreateAlignedLoad(
16450 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16451 Load->setVolatile(true);
16452 return Load;
16453 }
16454 case X86::BI__builtin_ia32_encodekey128_u32: {
16455 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16456
16457 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16458
16459 for (int i = 0; i < 3; ++i) {
16460 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16461 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16462 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16463 }
16464
16465 return Builder.CreateExtractValue(Call, 0);
16466 }
16467 case X86::BI__builtin_ia32_encodekey256_u32: {
16468 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16469
16470 Value *Call =
16471 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16472
16473 for (int i = 0; i < 4; ++i) {
16474 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16475 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16476 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16477 }
16478
16479 return Builder.CreateExtractValue(Call, 0);
16480 }
16481 case X86::BI__builtin_ia32_aesenc128kl_u8:
16482 case X86::BI__builtin_ia32_aesdec128kl_u8:
16483 case X86::BI__builtin_ia32_aesenc256kl_u8:
16484 case X86::BI__builtin_ia32_aesdec256kl_u8: {
16485 Intrinsic::ID IID;
16486 StringRef BlockName;
16487 switch (BuiltinID) {
16488 default:
16489 llvm_unreachable("Unexpected builtin");
16490 case X86::BI__builtin_ia32_aesenc128kl_u8:
16491 IID = Intrinsic::x86_aesenc128kl;
16492 BlockName = "aesenc128kl";
16493 break;
16494 case X86::BI__builtin_ia32_aesdec128kl_u8:
16495 IID = Intrinsic::x86_aesdec128kl;
16496 BlockName = "aesdec128kl";
16497 break;
16498 case X86::BI__builtin_ia32_aesenc256kl_u8:
16499 IID = Intrinsic::x86_aesenc256kl;
16500 BlockName = "aesenc256kl";
16501 break;
16502 case X86::BI__builtin_ia32_aesdec256kl_u8:
16503 IID = Intrinsic::x86_aesdec256kl;
16504 BlockName = "aesdec256kl";
16505 break;
16506 }
16507
16508 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16509
16510 BasicBlock *NoError =
16511 createBasicBlock(BlockName + "_no_error", this->CurFn);
16512 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16513 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16514
16515 Value *Ret = Builder.CreateExtractValue(Call, 0);
16516 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16517 Value *Out = Builder.CreateExtractValue(Call, 1);
16518 Builder.CreateCondBr(Succ, NoError, Error);
16519
16520 Builder.SetInsertPoint(NoError);
16522 Builder.CreateBr(End);
16523
16524 Builder.SetInsertPoint(Error);
16525 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16526 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16527 Builder.CreateBr(End);
16528
16529 Builder.SetInsertPoint(End);
16530 return Builder.CreateExtractValue(Call, 0);
16531 }
16532 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16533 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16534 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16535 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16536 Intrinsic::ID IID;
16537 StringRef BlockName;
16538 switch (BuiltinID) {
16539 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16540 IID = Intrinsic::x86_aesencwide128kl;
16541 BlockName = "aesencwide128kl";
16542 break;
16543 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16544 IID = Intrinsic::x86_aesdecwide128kl;
16545 BlockName = "aesdecwide128kl";
16546 break;
16547 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16548 IID = Intrinsic::x86_aesencwide256kl;
16549 BlockName = "aesencwide256kl";
16550 break;
16551 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16552 IID = Intrinsic::x86_aesdecwide256kl;
16553 BlockName = "aesdecwide256kl";
16554 break;
16555 }
16556
16557 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16558 Value *InOps[9];
16559 InOps[0] = Ops[2];
16560 for (int i = 0; i != 8; ++i) {
16561 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16562 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16563 }
16564
16565 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16566
16567 BasicBlock *NoError =
16568 createBasicBlock(BlockName + "_no_error", this->CurFn);
16569 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16570 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16571
16572 Value *Ret = Builder.CreateExtractValue(Call, 0);
16573 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16574 Builder.CreateCondBr(Succ, NoError, Error);
16575
16576 Builder.SetInsertPoint(NoError);
16577 for (int i = 0; i != 8; ++i) {
16578 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16579 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16580 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16581 }
16582 Builder.CreateBr(End);
16583
16584 Builder.SetInsertPoint(Error);
16585 for (int i = 0; i != 8; ++i) {
16586 Value *Out = Builder.CreateExtractValue(Call, i + 1);
16587 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16588 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16589 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16590 }
16591 Builder.CreateBr(End);
16592
16593 Builder.SetInsertPoint(End);
16594 return Builder.CreateExtractValue(Call, 0);
16595 }
16596 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16597 IsConjFMA = true;
16598 [[fallthrough]];
16599 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16600 Intrinsic::ID IID = IsConjFMA
16601 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16602 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16603 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16604 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16605 }
16606 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16607 IsConjFMA = true;
16608 [[fallthrough]];
16609 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16610 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16611 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16612 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16613 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16614 return EmitX86Select(*this, And, Call, Ops[0]);
16615 }
16616 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16617 IsConjFMA = true;
16618 [[fallthrough]];
16619 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16620 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16621 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16622 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16623 static constexpr int Mask[] = {0, 5, 6, 7};
16624 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16625 }
16626 case X86::BI__builtin_ia32_prefetchi:
16627 return Builder.CreateCall(
16628 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16629 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16630 llvm::ConstantInt::get(Int32Ty, 0)});
16631 }
16632}
16633
16634Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
16635 const CallExpr *E) {
16636 // Do not emit the builtin arguments in the arguments of a function call,
16637 // because the evaluation order of function arguments is not specified in C++.
16638 // This is important when testing to ensure the arguments are emitted in the
16639 // same order every time. Eg:
16640 // Instead of:
16641 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16642 // EmitScalarExpr(E->getArg(1)), "swdiv");
16643 // Use:
16644 // Value *Op0 = EmitScalarExpr(E->getArg(0));
16645 // Value *Op1 = EmitScalarExpr(E->getArg(1));
16646 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
16647
16648 Intrinsic::ID ID = Intrinsic::not_intrinsic;
16649
16650#include "llvm/TargetParser/PPCTargetParser.def"
16651 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
16652 unsigned CompOp,
16653 unsigned OpValue) -> Value * {
16654 if (SupportMethod == AIX_BUILTIN_PPC_FALSE)
16655 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
16656
16657 if (SupportMethod == AIX_BUILTIN_PPC_TRUE)
16658 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
16659
16660 assert(SupportMethod <= USE_SYS_CONF && "Invalid value for SupportMethod.");
16661 assert((CompOp == COMP_EQ) && "Only equal comparisons are supported.");
16662
16663 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
16664 llvm::Constant *SysConf =
16665 CGM.CreateRuntimeVariable(STy, "_system_configuration");
16666
16667 // Grab the appropriate field from _system_configuration.
16668 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
16669 ConstantInt::get(Int32Ty, FieldIdx)};
16670
16671 llvm::Value *FieldValue = Builder.CreateGEP(STy, SysConf, Idxs);
16672 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
16674 assert(FieldValue->getType()->isIntegerTy(32) &&
16675 "Only 32-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
16676 return Builder.CreateICmp(ICmpInst::ICMP_EQ, FieldValue,
16677 ConstantInt::get(Int32Ty, OpValue));
16678 };
16679
16680 switch (BuiltinID) {
16681 default: return nullptr;
16682
16683 case Builtin::BI__builtin_cpu_is: {
16684 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16685 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16686 llvm::Triple Triple = getTarget().getTriple();
16687
16688 if (Triple.isOSAIX()) {
16689 unsigned IsCpuSupport, FieldIdx, CompareOp, CpuIdValue;
16690 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUType;
16691 std::tie(IsCpuSupport, FieldIdx, CompareOp, CpuIdValue) =
16692 static_cast<CPUType>(StringSwitch<CPUType>(CPUStr)
16693#define PPC_AIX_CPU(NAME, SUPPORT_MAGIC, INDEX, COMPARE_OP, VALUE) \
16694 .Case(NAME, {SUPPORT_MAGIC, INDEX, COMPARE_OP, VALUE})
16695#include "llvm/TargetParser/PPCTargetParser.def"
16696 );
16697 return GenAIXPPCBuiltinCpuExpr(IsCpuSupport, FieldIdx, CompareOp,
16698 CpuIdValue);
16699 }
16700
16701 assert(Triple.isOSLinux() &&
16702 "__builtin_cpu_is() is only supported for AIX and Linux.");
16703 unsigned NumCPUID = StringSwitch<unsigned>(CPUStr)
16704#define PPC_LNX_CPU(Name, NumericID) .Case(Name, NumericID)
16705#include "llvm/TargetParser/PPCTargetParser.def"
16706 .Default(-1U);
16707 assert(NumCPUID < -1U && "Invalid CPU name. Missed by SemaChecking?");
16708 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
16709 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16710 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
16711 return Builder.CreateICmpEQ(TheCall,
16712 llvm::ConstantInt::get(Int32Ty, NumCPUID));
16713 }
16714 case Builtin::BI__builtin_cpu_supports: {
16715 unsigned FeatureWord;
16716 unsigned BitMask;
16717 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16718 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16719 std::tie(FeatureWord, BitMask) =
16720 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
16721#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
16722 .Case(Name, {FA_WORD, Bitmask})
16723#include "llvm/TargetParser/PPCTargetParser.def"
16724 .Default({0, 0});
16725 if (!BitMask)
16726 return Builder.getFalse();
16727 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
16728 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16729 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
16730 Value *Mask =
16731 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
16732 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
16733#undef PPC_FAWORD_HWCAP
16734#undef PPC_FAWORD_HWCAP2
16735#undef PPC_FAWORD_CPUID
16736 }
16737
16738 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
16739 // call __builtin_readcyclecounter.
16740 case PPC::BI__builtin_ppc_get_timebase:
16741 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
16742
16743 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
16744 case PPC::BI__builtin_altivec_lvx:
16745 case PPC::BI__builtin_altivec_lvxl:
16746 case PPC::BI__builtin_altivec_lvebx:
16747 case PPC::BI__builtin_altivec_lvehx:
16748 case PPC::BI__builtin_altivec_lvewx:
16749 case PPC::BI__builtin_altivec_lvsl:
16750 case PPC::BI__builtin_altivec_lvsr:
16751 case PPC::BI__builtin_vsx_lxvd2x:
16752 case PPC::BI__builtin_vsx_lxvw4x:
16753 case PPC::BI__builtin_vsx_lxvd2x_be:
16754 case PPC::BI__builtin_vsx_lxvw4x_be:
16755 case PPC::BI__builtin_vsx_lxvl:
16756 case PPC::BI__builtin_vsx_lxvll:
16757 {
16759 Ops.push_back(EmitScalarExpr(E->getArg(0)));
16760 Ops.push_back(EmitScalarExpr(E->getArg(1)));
16761 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
16762 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
16763 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
16764 Ops.pop_back();
16765 }
16766
16767 switch (BuiltinID) {
16768 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
16769 case PPC::BI__builtin_altivec_lvx:
16770 ID = Intrinsic::ppc_altivec_lvx;
16771 break;
16772 case PPC::BI__builtin_altivec_lvxl:
16773 ID = Intrinsic::ppc_altivec_lvxl;
16774 break;
16775 case PPC::BI__builtin_altivec_lvebx:
16776 ID = Intrinsic::ppc_altivec_lvebx;
16777 break;
16778 case PPC::BI__builtin_altivec_lvehx:
16779 ID = Intrinsic::ppc_altivec_lvehx;
16780 break;
16781 case PPC::BI__builtin_altivec_lvewx:
16782 ID = Intrinsic::ppc_altivec_lvewx;
16783 break;
16784 case PPC::BI__builtin_altivec_lvsl:
16785 ID = Intrinsic::ppc_altivec_lvsl;
16786 break;
16787 case PPC::BI__builtin_altivec_lvsr:
16788 ID = Intrinsic::ppc_altivec_lvsr;
16789 break;
16790 case PPC::BI__builtin_vsx_lxvd2x:
16791 ID = Intrinsic::ppc_vsx_lxvd2x;
16792 break;
16793 case PPC::BI__builtin_vsx_lxvw4x:
16794 ID = Intrinsic::ppc_vsx_lxvw4x;
16795 break;
16796 case PPC::BI__builtin_vsx_lxvd2x_be:
16797 ID = Intrinsic::ppc_vsx_lxvd2x_be;
16798 break;
16799 case PPC::BI__builtin_vsx_lxvw4x_be:
16800 ID = Intrinsic::ppc_vsx_lxvw4x_be;
16801 break;
16802 case PPC::BI__builtin_vsx_lxvl:
16803 ID = Intrinsic::ppc_vsx_lxvl;
16804 break;
16805 case PPC::BI__builtin_vsx_lxvll:
16806 ID = Intrinsic::ppc_vsx_lxvll;
16807 break;
16808 }
16809 llvm::Function *F = CGM.getIntrinsic(ID);
16810 return Builder.CreateCall(F, Ops, "");
16811 }
16812
16813 // vec_st, vec_xst_be
16814 case PPC::BI__builtin_altivec_stvx:
16815 case PPC::BI__builtin_altivec_stvxl:
16816 case PPC::BI__builtin_altivec_stvebx:
16817 case PPC::BI__builtin_altivec_stvehx:
16818 case PPC::BI__builtin_altivec_stvewx:
16819 case PPC::BI__builtin_vsx_stxvd2x:
16820 case PPC::BI__builtin_vsx_stxvw4x:
16821 case PPC::BI__builtin_vsx_stxvd2x_be:
16822 case PPC::BI__builtin_vsx_stxvw4x_be:
16823 case PPC::BI__builtin_vsx_stxvl:
16824 case PPC::BI__builtin_vsx_stxvll:
16825 {
16827 Ops.push_back(EmitScalarExpr(E->getArg(0)));
16828 Ops.push_back(EmitScalarExpr(E->getArg(1)));
16829 Ops.push_back(EmitScalarExpr(E->getArg(2)));
16830 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
16831 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
16832 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
16833 Ops.pop_back();
16834 }
16835
16836 switch (BuiltinID) {
16837 default: llvm_unreachable("Unsupported st intrinsic!");
16838 case PPC::BI__builtin_altivec_stvx:
16839 ID = Intrinsic::ppc_altivec_stvx;
16840 break;
16841 case PPC::BI__builtin_altivec_stvxl:
16842 ID = Intrinsic::ppc_altivec_stvxl;
16843 break;
16844 case PPC::BI__builtin_altivec_stvebx:
16845 ID = Intrinsic::ppc_altivec_stvebx;
16846 break;
16847 case PPC::BI__builtin_altivec_stvehx:
16848 ID = Intrinsic::ppc_altivec_stvehx;
16849 break;
16850 case PPC::BI__builtin_altivec_stvewx:
16851 ID = Intrinsic::ppc_altivec_stvewx;
16852 break;
16853 case PPC::BI__builtin_vsx_stxvd2x:
16854 ID = Intrinsic::ppc_vsx_stxvd2x;
16855 break;
16856 case PPC::BI__builtin_vsx_stxvw4x:
16857 ID = Intrinsic::ppc_vsx_stxvw4x;
16858 break;
16859 case PPC::BI__builtin_vsx_stxvd2x_be:
16860 ID = Intrinsic::ppc_vsx_stxvd2x_be;
16861 break;
16862 case PPC::BI__builtin_vsx_stxvw4x_be:
16863 ID = Intrinsic::ppc_vsx_stxvw4x_be;
16864 break;
16865 case PPC::BI__builtin_vsx_stxvl:
16866 ID = Intrinsic::ppc_vsx_stxvl;
16867 break;
16868 case PPC::BI__builtin_vsx_stxvll:
16869 ID = Intrinsic::ppc_vsx_stxvll;
16870 break;
16871 }
16872 llvm::Function *F = CGM.getIntrinsic(ID);
16873 return Builder.CreateCall(F, Ops, "");
16874 }
16875 case PPC::BI__builtin_vsx_ldrmb: {
16876 // Essentially boils down to performing an unaligned VMX load sequence so
16877 // as to avoid crossing a page boundary and then shuffling the elements
16878 // into the right side of the vector register.
16879 Value *Op0 = EmitScalarExpr(E->getArg(0));
16880 Value *Op1 = EmitScalarExpr(E->getArg(1));
16881 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
16882 llvm::Type *ResTy = ConvertType(E->getType());
16883 bool IsLE = getTarget().isLittleEndian();
16884
16885 // If the user wants the entire vector, just load the entire vector.
16886 if (NumBytes == 16) {
16887 Value *LD =
16889 if (!IsLE)
16890 return LD;
16891
16892 // Reverse the bytes on LE.
16893 SmallVector<int, 16> RevMask;
16894 for (int Idx = 0; Idx < 16; Idx++)
16895 RevMask.push_back(15 - Idx);
16896 return Builder.CreateShuffleVector(LD, LD, RevMask);
16897 }
16898
16899 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
16900 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
16901 : Intrinsic::ppc_altivec_lvsl);
16902 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
16903 Value *HiMem = Builder.CreateGEP(
16904 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
16905 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
16906 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
16907 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
16908
16909 Op0 = IsLE ? HiLd : LoLd;
16910 Op1 = IsLE ? LoLd : HiLd;
16911 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
16912 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
16913
16914 if (IsLE) {
16915 SmallVector<int, 16> Consts;
16916 for (int Idx = 0; Idx < 16; Idx++) {
16917 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
16918 : 16 - (NumBytes - Idx);
16919 Consts.push_back(Val);
16920 }
16921 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
16922 Zero, Consts);
16923 }
16925 for (int Idx = 0; Idx < 16; Idx++)
16926 Consts.push_back(Builder.getInt8(NumBytes + Idx));
16927 Value *Mask2 = ConstantVector::get(Consts);
16928 return Builder.CreateBitCast(
16929 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
16930 }
16931 case PPC::BI__builtin_vsx_strmb: {
16932 Value *Op0 = EmitScalarExpr(E->getArg(0));
16933 Value *Op1 = EmitScalarExpr(E->getArg(1));
16934 Value *Op2 = EmitScalarExpr(E->getArg(2));
16935 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
16936 bool IsLE = getTarget().isLittleEndian();
16937 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
16938 // Storing the whole vector, simply store it on BE and reverse bytes and
16939 // store on LE.
16940 if (Width == 16) {
16941 Value *StVec = Op2;
16942 if (IsLE) {
16943 SmallVector<int, 16> RevMask;
16944 for (int Idx = 0; Idx < 16; Idx++)
16945 RevMask.push_back(15 - Idx);
16946 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
16947 }
16948 return Builder.CreateStore(
16949 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
16950 }
16951 auto *ConvTy = Int64Ty;
16952 unsigned NumElts = 0;
16953 switch (Width) {
16954 default:
16955 llvm_unreachable("width for stores must be a power of 2");
16956 case 8:
16957 ConvTy = Int64Ty;
16958 NumElts = 2;
16959 break;
16960 case 4:
16961 ConvTy = Int32Ty;
16962 NumElts = 4;
16963 break;
16964 case 2:
16965 ConvTy = Int16Ty;
16966 NumElts = 8;
16967 break;
16968 case 1:
16969 ConvTy = Int8Ty;
16970 NumElts = 16;
16971 break;
16972 }
16973 Value *Vec = Builder.CreateBitCast(
16974 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
16975 Value *Ptr =
16976 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
16977 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
16978 if (IsLE && Width > 1) {
16979 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
16980 Elt = Builder.CreateCall(F, Elt);
16981 }
16982 return Builder.CreateStore(
16983 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
16984 };
16985 unsigned Stored = 0;
16986 unsigned RemainingBytes = NumBytes;
16987 Value *Result;
16988 if (NumBytes == 16)
16989 return StoreSubVec(16, 0, 0);
16990 if (NumBytes >= 8) {
16991 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
16992 RemainingBytes -= 8;
16993 Stored += 8;
16994 }
16995 if (RemainingBytes >= 4) {
16996 Result = StoreSubVec(4, NumBytes - Stored - 4,
16997 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
16998 RemainingBytes -= 4;
16999 Stored += 4;
17000 }
17001 if (RemainingBytes >= 2) {
17002 Result = StoreSubVec(2, NumBytes - Stored - 2,
17003 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17004 RemainingBytes -= 2;
17005 Stored += 2;
17006 }
17007 if (RemainingBytes)
17008 Result =
17009 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
17010 return Result;
17011 }
17012 // Square root
17013 case PPC::BI__builtin_vsx_xvsqrtsp:
17014 case PPC::BI__builtin_vsx_xvsqrtdp: {
17015 llvm::Type *ResultType = ConvertType(E->getType());
17016 Value *X = EmitScalarExpr(E->getArg(0));
17017 if (Builder.getIsFPConstrained()) {
17018 llvm::Function *F = CGM.getIntrinsic(
17019 Intrinsic::experimental_constrained_sqrt, ResultType);
17020 return Builder.CreateConstrainedFPCall(F, X);
17021 } else {
17022 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17023 return Builder.CreateCall(F, X);
17024 }
17025 }
17026 // Count leading zeros
17027 case PPC::BI__builtin_altivec_vclzb:
17028 case PPC::BI__builtin_altivec_vclzh:
17029 case PPC::BI__builtin_altivec_vclzw:
17030 case PPC::BI__builtin_altivec_vclzd: {
17031 llvm::Type *ResultType = ConvertType(E->getType());
17032 Value *X = EmitScalarExpr(E->getArg(0));
17033 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17034 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17035 return Builder.CreateCall(F, {X, Undef});
17036 }
17037 case PPC::BI__builtin_altivec_vctzb:
17038 case PPC::BI__builtin_altivec_vctzh:
17039 case PPC::BI__builtin_altivec_vctzw:
17040 case PPC::BI__builtin_altivec_vctzd: {
17041 llvm::Type *ResultType = ConvertType(E->getType());
17042 Value *X = EmitScalarExpr(E->getArg(0));
17043 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17044 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17045 return Builder.CreateCall(F, {X, Undef});
17046 }
17047 case PPC::BI__builtin_altivec_vinsd:
17048 case PPC::BI__builtin_altivec_vinsw:
17049 case PPC::BI__builtin_altivec_vinsd_elt:
17050 case PPC::BI__builtin_altivec_vinsw_elt: {
17051 llvm::Type *ResultType = ConvertType(E->getType());
17052 Value *Op0 = EmitScalarExpr(E->getArg(0));
17053 Value *Op1 = EmitScalarExpr(E->getArg(1));
17054 Value *Op2 = EmitScalarExpr(E->getArg(2));
17055
17056 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17057 BuiltinID == PPC::BI__builtin_altivec_vinsd);
17058
17059 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17060 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
17061
17062 // The third argument must be a compile time constant.
17063 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17064 assert(ArgCI &&
17065 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
17066
17067 // Valid value for the third argument is dependent on the input type and
17068 // builtin called.
17069 int ValidMaxValue = 0;
17070 if (IsUnaligned)
17071 ValidMaxValue = (Is32bit) ? 12 : 8;
17072 else
17073 ValidMaxValue = (Is32bit) ? 3 : 1;
17074
17075 // Get value of third argument.
17076 int64_t ConstArg = ArgCI->getSExtValue();
17077
17078 // Compose range checking error message.
17079 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
17080 RangeErrMsg += " number " + llvm::to_string(ConstArg);
17081 RangeErrMsg += " is outside of the valid range [0, ";
17082 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
17083
17084 // Issue error if third argument is not within the valid range.
17085 if (ConstArg < 0 || ConstArg > ValidMaxValue)
17086 CGM.Error(E->getExprLoc(), RangeErrMsg);
17087
17088 // Input to vec_replace_elt is an element index, convert to byte index.
17089 if (!IsUnaligned) {
17090 ConstArg *= Is32bit ? 4 : 8;
17091 // Fix the constant according to endianess.
17092 if (getTarget().isLittleEndian())
17093 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
17094 }
17095
17096 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
17097 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
17098 // Casting input to vector int as per intrinsic definition.
17099 Op0 =
17100 Is32bit
17101 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
17102 : Builder.CreateBitCast(Op0,
17103 llvm::FixedVectorType::get(Int64Ty, 2));
17104 return Builder.CreateBitCast(
17105 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
17106 }
17107 case PPC::BI__builtin_altivec_vpopcntb:
17108 case PPC::BI__builtin_altivec_vpopcnth:
17109 case PPC::BI__builtin_altivec_vpopcntw:
17110 case PPC::BI__builtin_altivec_vpopcntd: {
17111 llvm::Type *ResultType = ConvertType(E->getType());
17112 Value *X = EmitScalarExpr(E->getArg(0));
17113 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17114 return Builder.CreateCall(F, X);
17115 }
17116 case PPC::BI__builtin_altivec_vadduqm:
17117 case PPC::BI__builtin_altivec_vsubuqm: {
17118 Value *Op0 = EmitScalarExpr(E->getArg(0));
17119 Value *Op1 = EmitScalarExpr(E->getArg(1));
17120 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17121 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
17122 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
17123 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
17124 return Builder.CreateAdd(Op0, Op1, "vadduqm");
17125 else
17126 return Builder.CreateSub(Op0, Op1, "vsubuqm");
17127 }
17128 case PPC::BI__builtin_altivec_vaddcuq_c:
17129 case PPC::BI__builtin_altivec_vsubcuq_c: {
17131 Value *Op0 = EmitScalarExpr(E->getArg(0));
17132 Value *Op1 = EmitScalarExpr(E->getArg(1));
17133 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17134 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17135 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17136 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17137 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
17138 ? Intrinsic::ppc_altivec_vaddcuq
17139 : Intrinsic::ppc_altivec_vsubcuq;
17140 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17141 }
17142 case PPC::BI__builtin_altivec_vaddeuqm_c:
17143 case PPC::BI__builtin_altivec_vaddecuq_c:
17144 case PPC::BI__builtin_altivec_vsubeuqm_c:
17145 case PPC::BI__builtin_altivec_vsubecuq_c: {
17147 Value *Op0 = EmitScalarExpr(E->getArg(0));
17148 Value *Op1 = EmitScalarExpr(E->getArg(1));
17149 Value *Op2 = EmitScalarExpr(E->getArg(2));
17150 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17151 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17152 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17153 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17154 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
17155 switch (BuiltinID) {
17156 default:
17157 llvm_unreachable("Unsupported intrinsic!");
17158 case PPC::BI__builtin_altivec_vaddeuqm_c:
17159 ID = Intrinsic::ppc_altivec_vaddeuqm;
17160 break;
17161 case PPC::BI__builtin_altivec_vaddecuq_c:
17162 ID = Intrinsic::ppc_altivec_vaddecuq;
17163 break;
17164 case PPC::BI__builtin_altivec_vsubeuqm_c:
17165 ID = Intrinsic::ppc_altivec_vsubeuqm;
17166 break;
17167 case PPC::BI__builtin_altivec_vsubecuq_c:
17168 ID = Intrinsic::ppc_altivec_vsubecuq;
17169 break;
17170 }
17171 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17172 }
17173 case PPC::BI__builtin_ppc_rldimi:
17174 case PPC::BI__builtin_ppc_rlwimi: {
17175 Value *Op0 = EmitScalarExpr(E->getArg(0));
17176 Value *Op1 = EmitScalarExpr(E->getArg(1));
17177 Value *Op2 = EmitScalarExpr(E->getArg(2));
17178 Value *Op3 = EmitScalarExpr(E->getArg(3));
17179 return Builder.CreateCall(
17180 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
17181 ? Intrinsic::ppc_rldimi
17182 : Intrinsic::ppc_rlwimi),
17183 {Op0, Op1, Op2, Op3});
17184 }
17185 case PPC::BI__builtin_ppc_rlwnm: {
17186 Value *Op0 = EmitScalarExpr(E->getArg(0));
17187 Value *Op1 = EmitScalarExpr(E->getArg(1));
17188 Value *Op2 = EmitScalarExpr(E->getArg(2));
17189 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
17190 {Op0, Op1, Op2});
17191 }
17192 case PPC::BI__builtin_ppc_poppar4:
17193 case PPC::BI__builtin_ppc_poppar8: {
17194 Value *Op0 = EmitScalarExpr(E->getArg(0));
17195 llvm::Type *ArgType = Op0->getType();
17196 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17197 Value *Tmp = Builder.CreateCall(F, Op0);
17198
17199 llvm::Type *ResultType = ConvertType(E->getType());
17200 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
17201 if (Result->getType() != ResultType)
17202 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
17203 "cast");
17204 return Result;
17205 }
17206 case PPC::BI__builtin_ppc_cmpb: {
17207 Value *Op0 = EmitScalarExpr(E->getArg(0));
17208 Value *Op1 = EmitScalarExpr(E->getArg(1));
17209 if (getTarget().getTriple().isPPC64()) {
17210 Function *F =
17211 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17212 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
17213 }
17214 // For 32 bit, emit the code as below:
17215 // %conv = trunc i64 %a to i32
17216 // %conv1 = trunc i64 %b to i32
17217 // %shr = lshr i64 %a, 32
17218 // %conv2 = trunc i64 %shr to i32
17219 // %shr3 = lshr i64 %b, 32
17220 // %conv4 = trunc i64 %shr3 to i32
17221 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17222 // %conv5 = zext i32 %0 to i64
17223 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17224 // %conv614 = zext i32 %1 to i64
17225 // %shl = shl nuw i64 %conv614, 32
17226 // %or = or i64 %shl, %conv5
17227 // ret i64 %or
17228 Function *F =
17229 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17230 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
17231 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
17232 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
17233 Value *ArgOneHi =
17234 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
17235 Value *ArgTwoHi =
17236 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
17237 Value *ResLo = Builder.CreateZExt(
17238 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
17239 Value *ResHiShift = Builder.CreateZExt(
17240 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
17241 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
17242 return Builder.CreateOr(ResLo, ResHi);
17243 }
17244 // Copy sign
17245 case PPC::BI__builtin_vsx_xvcpsgnsp:
17246 case PPC::BI__builtin_vsx_xvcpsgndp: {
17247 llvm::Type *ResultType = ConvertType(E->getType());
17248 Value *X = EmitScalarExpr(E->getArg(0));
17249 Value *Y = EmitScalarExpr(E->getArg(1));
17250 ID = Intrinsic::copysign;
17251 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17252 return Builder.CreateCall(F, {X, Y});
17253 }
17254 // Rounding/truncation
17255 case PPC::BI__builtin_vsx_xvrspip:
17256 case PPC::BI__builtin_vsx_xvrdpip:
17257 case PPC::BI__builtin_vsx_xvrdpim:
17258 case PPC::BI__builtin_vsx_xvrspim:
17259 case PPC::BI__builtin_vsx_xvrdpi:
17260 case PPC::BI__builtin_vsx_xvrspi:
17261 case PPC::BI__builtin_vsx_xvrdpic:
17262 case PPC::BI__builtin_vsx_xvrspic:
17263 case PPC::BI__builtin_vsx_xvrdpiz:
17264 case PPC::BI__builtin_vsx_xvrspiz: {
17265 llvm::Type *ResultType = ConvertType(E->getType());
17266 Value *X = EmitScalarExpr(E->getArg(0));
17267 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17268 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17269 ID = Builder.getIsFPConstrained()
17270 ? Intrinsic::experimental_constrained_floor
17271 : Intrinsic::floor;
17272 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17273 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17274 ID = Builder.getIsFPConstrained()
17275 ? Intrinsic::experimental_constrained_round
17276 : Intrinsic::round;
17277 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17278 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17279 ID = Builder.getIsFPConstrained()
17280 ? Intrinsic::experimental_constrained_rint
17281 : Intrinsic::rint;
17282 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17283 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17284 ID = Builder.getIsFPConstrained()
17285 ? Intrinsic::experimental_constrained_ceil
17286 : Intrinsic::ceil;
17287 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17288 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17289 ID = Builder.getIsFPConstrained()
17290 ? Intrinsic::experimental_constrained_trunc
17291 : Intrinsic::trunc;
17292 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17293 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
17294 : Builder.CreateCall(F, X);
17295 }
17296
17297 // Absolute value
17298 case PPC::BI__builtin_vsx_xvabsdp:
17299 case PPC::BI__builtin_vsx_xvabssp: {
17300 llvm::Type *ResultType = ConvertType(E->getType());
17301 Value *X = EmitScalarExpr(E->getArg(0));
17302 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17303 return Builder.CreateCall(F, X);
17304 }
17305
17306 // Fastmath by default
17307 case PPC::BI__builtin_ppc_recipdivf:
17308 case PPC::BI__builtin_ppc_recipdivd:
17309 case PPC::BI__builtin_ppc_rsqrtf:
17310 case PPC::BI__builtin_ppc_rsqrtd: {
17311 FastMathFlags FMF = Builder.getFastMathFlags();
17312 Builder.getFastMathFlags().setFast();
17313 llvm::Type *ResultType = ConvertType(E->getType());
17314 Value *X = EmitScalarExpr(E->getArg(0));
17315
17316 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17317 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17318 Value *Y = EmitScalarExpr(E->getArg(1));
17319 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
17320 Builder.getFastMathFlags() &= (FMF);
17321 return FDiv;
17322 }
17323 auto *One = ConstantFP::get(ResultType, 1.0);
17324 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17325 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
17326 Builder.getFastMathFlags() &= (FMF);
17327 return FDiv;
17328 }
17329 case PPC::BI__builtin_ppc_alignx: {
17330 Value *Op0 = EmitScalarExpr(E->getArg(0));
17331 Value *Op1 = EmitScalarExpr(E->getArg(1));
17332 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
17333 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
17334 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
17335 llvm::Value::MaximumAlignment);
17336
17338 /*The expr loc is sufficient.*/ SourceLocation(),
17339 AlignmentCI, nullptr);
17340 return Op1;
17341 }
17342 case PPC::BI__builtin_ppc_rdlam: {
17343 Value *Op0 = EmitScalarExpr(E->getArg(0));
17344 Value *Op1 = EmitScalarExpr(E->getArg(1));
17345 Value *Op2 = EmitScalarExpr(E->getArg(2));
17346 llvm::Type *Ty = Op0->getType();
17347 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
17348 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17349 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
17350 return Builder.CreateAnd(Rotate, Op2);
17351 }
17352 case PPC::BI__builtin_ppc_load2r: {
17353 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17354 Value *Op0 = EmitScalarExpr(E->getArg(0));
17355 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
17356 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
17357 }
17358 // FMA variations
17359 case PPC::BI__builtin_ppc_fnmsub:
17360 case PPC::BI__builtin_ppc_fnmsubs:
17361 case PPC::BI__builtin_vsx_xvmaddadp:
17362 case PPC::BI__builtin_vsx_xvmaddasp:
17363 case PPC::BI__builtin_vsx_xvnmaddadp:
17364 case PPC::BI__builtin_vsx_xvnmaddasp:
17365 case PPC::BI__builtin_vsx_xvmsubadp:
17366 case PPC::BI__builtin_vsx_xvmsubasp:
17367 case PPC::BI__builtin_vsx_xvnmsubadp:
17368 case PPC::BI__builtin_vsx_xvnmsubasp: {
17369 llvm::Type *ResultType = ConvertType(E->getType());
17370 Value *X = EmitScalarExpr(E->getArg(0));
17371 Value *Y = EmitScalarExpr(E->getArg(1));
17372 Value *Z = EmitScalarExpr(E->getArg(2));
17373 llvm::Function *F;
17374 if (Builder.getIsFPConstrained())
17375 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17376 else
17377 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17378 switch (BuiltinID) {
17379 case PPC::BI__builtin_vsx_xvmaddadp:
17380 case PPC::BI__builtin_vsx_xvmaddasp:
17381 if (Builder.getIsFPConstrained())
17382 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17383 else
17384 return Builder.CreateCall(F, {X, Y, Z});
17385 case PPC::BI__builtin_vsx_xvnmaddadp:
17386 case PPC::BI__builtin_vsx_xvnmaddasp:
17387 if (Builder.getIsFPConstrained())
17388 return Builder.CreateFNeg(
17389 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17390 else
17391 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17392 case PPC::BI__builtin_vsx_xvmsubadp:
17393 case PPC::BI__builtin_vsx_xvmsubasp:
17394 if (Builder.getIsFPConstrained())
17395 return Builder.CreateConstrainedFPCall(
17396 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17397 else
17398 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17399 case PPC::BI__builtin_ppc_fnmsub:
17400 case PPC::BI__builtin_ppc_fnmsubs:
17401 case PPC::BI__builtin_vsx_xvnmsubadp:
17402 case PPC::BI__builtin_vsx_xvnmsubasp:
17403 if (Builder.getIsFPConstrained())
17404 return Builder.CreateFNeg(
17405 Builder.CreateConstrainedFPCall(
17406 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17407 "neg");
17408 else
17409 return Builder.CreateCall(
17410 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17411 }
17412 llvm_unreachable("Unknown FMA operation");
17413 return nullptr; // Suppress no-return warning
17414 }
17415
17416 case PPC::BI__builtin_vsx_insertword: {
17417 Value *Op0 = EmitScalarExpr(E->getArg(0));
17418 Value *Op1 = EmitScalarExpr(E->getArg(1));
17419 Value *Op2 = EmitScalarExpr(E->getArg(2));
17420 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17421
17422 // Third argument is a compile time constant int. It must be clamped to
17423 // to the range [0, 12].
17424 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17425 assert(ArgCI &&
17426 "Third arg to xxinsertw intrinsic must be constant integer");
17427 const int64_t MaxIndex = 12;
17428 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17429
17430 // The builtin semantics don't exactly match the xxinsertw instructions
17431 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17432 // word from the first argument, and inserts it in the second argument. The
17433 // instruction extracts the word from its second input register and inserts
17434 // it into its first input register, so swap the first and second arguments.
17435 std::swap(Op0, Op1);
17436
17437 // Need to cast the second argument from a vector of unsigned int to a
17438 // vector of long long.
17439 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17440
17441 if (getTarget().isLittleEndian()) {
17442 // Reverse the double words in the vector we will extract from.
17443 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17444 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17445
17446 // Reverse the index.
17447 Index = MaxIndex - Index;
17448 }
17449
17450 // Intrinsic expects the first arg to be a vector of int.
17451 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17452 Op2 = ConstantInt::getSigned(Int32Ty, Index);
17453 return Builder.CreateCall(F, {Op0, Op1, Op2});
17454 }
17455
17456 case PPC::BI__builtin_vsx_extractuword: {
17457 Value *Op0 = EmitScalarExpr(E->getArg(0));
17458 Value *Op1 = EmitScalarExpr(E->getArg(1));
17459 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17460
17461 // Intrinsic expects the first argument to be a vector of doublewords.
17462 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17463
17464 // The second argument is a compile time constant int that needs to
17465 // be clamped to the range [0, 12].
17466 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17467 assert(ArgCI &&
17468 "Second Arg to xxextractuw intrinsic must be a constant integer!");
17469 const int64_t MaxIndex = 12;
17470 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17471
17472 if (getTarget().isLittleEndian()) {
17473 // Reverse the index.
17474 Index = MaxIndex - Index;
17475 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17476
17477 // Emit the call, then reverse the double words of the results vector.
17478 Value *Call = Builder.CreateCall(F, {Op0, Op1});
17479
17480 Value *ShuffleCall =
17481 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17482 return ShuffleCall;
17483 } else {
17484 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17485 return Builder.CreateCall(F, {Op0, Op1});
17486 }
17487 }
17488
17489 case PPC::BI__builtin_vsx_xxpermdi: {
17490 Value *Op0 = EmitScalarExpr(E->getArg(0));
17491 Value *Op1 = EmitScalarExpr(E->getArg(1));
17492 Value *Op2 = EmitScalarExpr(E->getArg(2));
17493 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17494 assert(ArgCI && "Third arg must be constant integer!");
17495
17496 unsigned Index = ArgCI->getZExtValue();
17497 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17498 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17499
17500 // Account for endianness by treating this as just a shuffle. So we use the
17501 // same indices for both LE and BE in order to produce expected results in
17502 // both cases.
17503 int ElemIdx0 = (Index & 2) >> 1;
17504 int ElemIdx1 = 2 + (Index & 1);
17505
17506 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17507 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17508 QualType BIRetType = E->getType();
17509 auto RetTy = ConvertType(BIRetType);
17510 return Builder.CreateBitCast(ShuffleCall, RetTy);
17511 }
17512
17513 case PPC::BI__builtin_vsx_xxsldwi: {
17514 Value *Op0 = EmitScalarExpr(E->getArg(0));
17515 Value *Op1 = EmitScalarExpr(E->getArg(1));
17516 Value *Op2 = EmitScalarExpr(E->getArg(2));
17517 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17518 assert(ArgCI && "Third argument must be a compile time constant");
17519 unsigned Index = ArgCI->getZExtValue() & 0x3;
17520 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17521 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17522
17523 // Create a shuffle mask
17524 int ElemIdx0;
17525 int ElemIdx1;
17526 int ElemIdx2;
17527 int ElemIdx3;
17528 if (getTarget().isLittleEndian()) {
17529 // Little endian element N comes from element 8+N-Index of the
17530 // concatenated wide vector (of course, using modulo arithmetic on
17531 // the total number of elements).
17532 ElemIdx0 = (8 - Index) % 8;
17533 ElemIdx1 = (9 - Index) % 8;
17534 ElemIdx2 = (10 - Index) % 8;
17535 ElemIdx3 = (11 - Index) % 8;
17536 } else {
17537 // Big endian ElemIdx<N> = Index + N
17538 ElemIdx0 = Index;
17539 ElemIdx1 = Index + 1;
17540 ElemIdx2 = Index + 2;
17541 ElemIdx3 = Index + 3;
17542 }
17543
17544 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17545 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17546 QualType BIRetType = E->getType();
17547 auto RetTy = ConvertType(BIRetType);
17548 return Builder.CreateBitCast(ShuffleCall, RetTy);
17549 }
17550
17551 case PPC::BI__builtin_pack_vector_int128: {
17552 Value *Op0 = EmitScalarExpr(E->getArg(0));
17553 Value *Op1 = EmitScalarExpr(E->getArg(1));
17554 bool isLittleEndian = getTarget().isLittleEndian();
17555 Value *PoisonValue =
17556 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17557 Value *Res = Builder.CreateInsertElement(
17558 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17559 Res = Builder.CreateInsertElement(Res, Op1,
17560 (uint64_t)(isLittleEndian ? 0 : 1));
17561 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17562 }
17563
17564 case PPC::BI__builtin_unpack_vector_int128: {
17565 Value *Op0 = EmitScalarExpr(E->getArg(0));
17566 Value *Op1 = EmitScalarExpr(E->getArg(1));
17567 ConstantInt *Index = cast<ConstantInt>(Op1);
17568 Value *Unpacked = Builder.CreateBitCast(
17569 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17570
17571 if (getTarget().isLittleEndian())
17572 Index =
17573 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
17574
17575 return Builder.CreateExtractElement(Unpacked, Index);
17576 }
17577
17578 case PPC::BI__builtin_ppc_sthcx: {
17579 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17580 Value *Op0 = EmitScalarExpr(E->getArg(0));
17581 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
17582 return Builder.CreateCall(F, {Op0, Op1});
17583 }
17584
17585 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17586 // Some of the MMA instructions accumulate their result into an existing
17587 // accumulator whereas the others generate a new accumulator. So we need to
17588 // use custom code generation to expand a builtin call with a pointer to a
17589 // load (if the corresponding instruction accumulates its result) followed by
17590 // the call to the intrinsic and a store of the result.
17591#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
17592 case PPC::BI__builtin_##Name:
17593#include "clang/Basic/BuiltinsPPC.def"
17594 {
17596 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
17597 if (E->getArg(i)->getType()->isArrayType())
17598 Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
17599 else
17600 Ops.push_back(EmitScalarExpr(E->getArg(i)));
17601 // The first argument of these two builtins is a pointer used to store their
17602 // result. However, the llvm intrinsics return their result in multiple
17603 // return values. So, here we emit code extracting these values from the
17604 // intrinsic results and storing them using that pointer.
17605 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
17606 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
17607 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
17608 unsigned NumVecs = 2;
17609 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
17610 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
17611 NumVecs = 4;
17612 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
17613 }
17614 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
17616 Value *Vec = Builder.CreateLoad(Addr);
17617 Value *Call = Builder.CreateCall(F, {Vec});
17618 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
17619 Value *Ptr = Ops[0];
17620 for (unsigned i=0; i<NumVecs; i++) {
17621 Value *Vec = Builder.CreateExtractValue(Call, i);
17622 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
17623 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
17624 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
17625 }
17626 return Call;
17627 }
17628 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
17629 BuiltinID == PPC::BI__builtin_mma_build_acc) {
17630 // Reverse the order of the operands for LE, so the
17631 // same builtin call can be used on both LE and BE
17632 // without the need for the programmer to swap operands.
17633 // The operands are reversed starting from the second argument,
17634 // the first operand is the pointer to the pair/accumulator
17635 // that is being built.
17636 if (getTarget().isLittleEndian())
17637 std::reverse(Ops.begin() + 1, Ops.end());
17638 }
17639 bool Accumulate;
17640 switch (BuiltinID) {
17641 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
17642 case PPC::BI__builtin_##Name: \
17643 ID = Intrinsic::ppc_##Intr; \
17644 Accumulate = Acc; \
17645 break;
17646 #include "clang/Basic/BuiltinsPPC.def"
17647 }
17648 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17649 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
17650 BuiltinID == PPC::BI__builtin_mma_lxvp ||
17651 BuiltinID == PPC::BI__builtin_mma_stxvp) {
17652 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17653 BuiltinID == PPC::BI__builtin_mma_lxvp) {
17654 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17655 } else {
17656 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17657 }
17658 Ops.pop_back();
17659 llvm::Function *F = CGM.getIntrinsic(ID);
17660 return Builder.CreateCall(F, Ops, "");
17661 }
17662 SmallVector<Value*, 4> CallOps;
17663 if (Accumulate) {
17665 Value *Acc = Builder.CreateLoad(Addr);
17666 CallOps.push_back(Acc);
17667 }
17668 for (unsigned i=1; i<Ops.size(); i++)
17669 CallOps.push_back(Ops[i]);
17670 llvm::Function *F = CGM.getIntrinsic(ID);
17671 Value *Call = Builder.CreateCall(F, CallOps);
17672 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
17673 }
17674
17675 case PPC::BI__builtin_ppc_compare_and_swap:
17676 case PPC::BI__builtin_ppc_compare_and_swaplp: {
17678 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
17679 Value *OldVal = Builder.CreateLoad(OldValAddr);
17680 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
17681 LValue LV = MakeAddrLValue(Addr, AtomicTy);
17682 Value *Op2 = EmitScalarExpr(E->getArg(2));
17683 auto Pair = EmitAtomicCompareExchange(
17684 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
17685 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
17686 // Unlike c11's atomic_compare_exchange, according to
17687 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
17688 // > In either case, the contents of the memory location specified by addr
17689 // > are copied into the memory location specified by old_val_addr.
17690 // But it hasn't specified storing to OldValAddr is atomic or not and
17691 // which order to use. Now following XL's codegen, treat it as a normal
17692 // store.
17693 Value *LoadedVal = Pair.first.getScalarVal();
17694 Builder.CreateStore(LoadedVal, OldValAddr);
17695 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
17696 }
17697 case PPC::BI__builtin_ppc_fetch_and_add:
17698 case PPC::BI__builtin_ppc_fetch_and_addlp: {
17699 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
17700 llvm::AtomicOrdering::Monotonic);
17701 }
17702 case PPC::BI__builtin_ppc_fetch_and_and:
17703 case PPC::BI__builtin_ppc_fetch_and_andlp: {
17704 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
17705 llvm::AtomicOrdering::Monotonic);
17706 }
17707
17708 case PPC::BI__builtin_ppc_fetch_and_or:
17709 case PPC::BI__builtin_ppc_fetch_and_orlp: {
17710 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
17711 llvm::AtomicOrdering::Monotonic);
17712 }
17713 case PPC::BI__builtin_ppc_fetch_and_swap:
17714 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
17715 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
17716 llvm::AtomicOrdering::Monotonic);
17717 }
17718 case PPC::BI__builtin_ppc_ldarx:
17719 case PPC::BI__builtin_ppc_lwarx:
17720 case PPC::BI__builtin_ppc_lharx:
17721 case PPC::BI__builtin_ppc_lbarx:
17722 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
17723 case PPC::BI__builtin_ppc_mfspr: {
17724 Value *Op0 = EmitScalarExpr(E->getArg(0));
17725 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17726 ? Int32Ty
17727 : Int64Ty;
17728 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
17729 return Builder.CreateCall(F, {Op0});
17730 }
17731 case PPC::BI__builtin_ppc_mtspr: {
17732 Value *Op0 = EmitScalarExpr(E->getArg(0));
17733 Value *Op1 = EmitScalarExpr(E->getArg(1));
17734 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17735 ? Int32Ty
17736 : Int64Ty;
17737 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
17738 return Builder.CreateCall(F, {Op0, Op1});
17739 }
17740 case PPC::BI__builtin_ppc_popcntb: {
17741 Value *ArgValue = EmitScalarExpr(E->getArg(0));
17742 llvm::Type *ArgType = ArgValue->getType();
17743 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
17744 return Builder.CreateCall(F, {ArgValue}, "popcntb");
17745 }
17746 case PPC::BI__builtin_ppc_mtfsf: {
17747 // The builtin takes a uint32 that needs to be cast to an
17748 // f64 to be passed to the intrinsic.
17749 Value *Op0 = EmitScalarExpr(E->getArg(0));
17750 Value *Op1 = EmitScalarExpr(E->getArg(1));
17751 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
17752 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
17753 return Builder.CreateCall(F, {Op0, Cast}, "");
17754 }
17755
17756 case PPC::BI__builtin_ppc_swdiv_nochk:
17757 case PPC::BI__builtin_ppc_swdivs_nochk: {
17758 Value *Op0 = EmitScalarExpr(E->getArg(0));
17759 Value *Op1 = EmitScalarExpr(E->getArg(1));
17760 FastMathFlags FMF = Builder.getFastMathFlags();
17761 Builder.getFastMathFlags().setFast();
17762 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
17763 Builder.getFastMathFlags() &= (FMF);
17764 return FDiv;
17765 }
17766 case PPC::BI__builtin_ppc_fric:
17768 *this, E, Intrinsic::rint,
17769 Intrinsic::experimental_constrained_rint))
17770 .getScalarVal();
17771 case PPC::BI__builtin_ppc_frim:
17772 case PPC::BI__builtin_ppc_frims:
17774 *this, E, Intrinsic::floor,
17775 Intrinsic::experimental_constrained_floor))
17776 .getScalarVal();
17777 case PPC::BI__builtin_ppc_frin:
17778 case PPC::BI__builtin_ppc_frins:
17780 *this, E, Intrinsic::round,
17781 Intrinsic::experimental_constrained_round))
17782 .getScalarVal();
17783 case PPC::BI__builtin_ppc_frip:
17784 case PPC::BI__builtin_ppc_frips:
17786 *this, E, Intrinsic::ceil,
17787 Intrinsic::experimental_constrained_ceil))
17788 .getScalarVal();
17789 case PPC::BI__builtin_ppc_friz:
17790 case PPC::BI__builtin_ppc_frizs:
17792 *this, E, Intrinsic::trunc,
17793 Intrinsic::experimental_constrained_trunc))
17794 .getScalarVal();
17795 case PPC::BI__builtin_ppc_fsqrt:
17796 case PPC::BI__builtin_ppc_fsqrts:
17798 *this, E, Intrinsic::sqrt,
17799 Intrinsic::experimental_constrained_sqrt))
17800 .getScalarVal();
17801 case PPC::BI__builtin_ppc_test_data_class: {
17802 Value *Op0 = EmitScalarExpr(E->getArg(0));
17803 Value *Op1 = EmitScalarExpr(E->getArg(1));
17804 return Builder.CreateCall(
17805 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
17806 {Op0, Op1}, "test_data_class");
17807 }
17808 case PPC::BI__builtin_ppc_maxfe: {
17809 Value *Op0 = EmitScalarExpr(E->getArg(0));
17810 Value *Op1 = EmitScalarExpr(E->getArg(1));
17811 Value *Op2 = EmitScalarExpr(E->getArg(2));
17812 Value *Op3 = EmitScalarExpr(E->getArg(3));
17813 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
17814 {Op0, Op1, Op2, Op3});
17815 }
17816 case PPC::BI__builtin_ppc_maxfl: {
17817 Value *Op0 = EmitScalarExpr(E->getArg(0));
17818 Value *Op1 = EmitScalarExpr(E->getArg(1));
17819 Value *Op2 = EmitScalarExpr(E->getArg(2));
17820 Value *Op3 = EmitScalarExpr(E->getArg(3));
17821 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
17822 {Op0, Op1, Op2, Op3});
17823 }
17824 case PPC::BI__builtin_ppc_maxfs: {
17825 Value *Op0 = EmitScalarExpr(E->getArg(0));
17826 Value *Op1 = EmitScalarExpr(E->getArg(1));
17827 Value *Op2 = EmitScalarExpr(E->getArg(2));
17828 Value *Op3 = EmitScalarExpr(E->getArg(3));
17829 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
17830 {Op0, Op1, Op2, Op3});
17831 }
17832 case PPC::BI__builtin_ppc_minfe: {
17833 Value *Op0 = EmitScalarExpr(E->getArg(0));
17834 Value *Op1 = EmitScalarExpr(E->getArg(1));
17835 Value *Op2 = EmitScalarExpr(E->getArg(2));
17836 Value *Op3 = EmitScalarExpr(E->getArg(3));
17837 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
17838 {Op0, Op1, Op2, Op3});
17839 }
17840 case PPC::BI__builtin_ppc_minfl: {
17841 Value *Op0 = EmitScalarExpr(E->getArg(0));
17842 Value *Op1 = EmitScalarExpr(E->getArg(1));
17843 Value *Op2 = EmitScalarExpr(E->getArg(2));
17844 Value *Op3 = EmitScalarExpr(E->getArg(3));
17845 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
17846 {Op0, Op1, Op2, Op3});
17847 }
17848 case PPC::BI__builtin_ppc_minfs: {
17849 Value *Op0 = EmitScalarExpr(E->getArg(0));
17850 Value *Op1 = EmitScalarExpr(E->getArg(1));
17851 Value *Op2 = EmitScalarExpr(E->getArg(2));
17852 Value *Op3 = EmitScalarExpr(E->getArg(3));
17853 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
17854 {Op0, Op1, Op2, Op3});
17855 }
17856 case PPC::BI__builtin_ppc_swdiv:
17857 case PPC::BI__builtin_ppc_swdivs: {
17858 Value *Op0 = EmitScalarExpr(E->getArg(0));
17859 Value *Op1 = EmitScalarExpr(E->getArg(1));
17860 return Builder.CreateFDiv(Op0, Op1, "swdiv");
17861 }
17862 case PPC::BI__builtin_ppc_set_fpscr_rn:
17863 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
17864 {EmitScalarExpr(E->getArg(0))});
17865 case PPC::BI__builtin_ppc_mffs:
17866 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
17867 }
17868}
17869
17870namespace {
17871// If \p E is not null pointer, insert address space cast to match return
17872// type of \p E if necessary.
17873Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
17874 const CallExpr *E = nullptr) {
17875 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
17876 auto *Call = CGF.Builder.CreateCall(F);
17877 Call->addRetAttr(
17878 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
17879 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
17880 if (!E)
17881 return Call;
17882 QualType BuiltinRetType = E->getType();
17883 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
17884 if (RetTy == Call->getType())
17885 return Call;
17886 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
17887}
17888
17889Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
17890 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
17891 auto *Call = CGF.Builder.CreateCall(F);
17892 Call->addRetAttr(
17893 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
17894 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
17895 return Call;
17896}
17897
17898// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
17899/// Emit code based on Code Object ABI version.
17900/// COV_4 : Emit code to use dispatch ptr
17901/// COV_5+ : Emit code to use implicitarg ptr
17902/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
17903/// and use its value for COV_4 or COV_5+ approach. It is used for
17904/// compiling device libraries in an ABI-agnostic way.
17905///
17906/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
17907/// clang during compilation of user code.
17908Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
17909 llvm::LoadInst *LD;
17910
17911 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
17912
17913 if (Cov == CodeObjectVersionKind::COV_None) {
17914 StringRef Name = "__oclc_ABI_version";
17915 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
17916 if (!ABIVersionC)
17917 ABIVersionC = new llvm::GlobalVariable(
17918 CGF.CGM.getModule(), CGF.Int32Ty, false,
17919 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
17920 llvm::GlobalVariable::NotThreadLocal,
17922
17923 // This load will be eliminated by the IPSCCP because it is constant
17924 // weak_odr without externally_initialized. Either changing it to weak or
17925 // adding externally_initialized will keep the load.
17926 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
17927 CGF.CGM.getIntAlign());
17928
17929 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
17930 ABIVersion,
17931 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
17932
17933 // Indexing the implicit kernarg segment.
17934 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
17935 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
17936
17937 // Indexing the HSA kernel_dispatch_packet struct.
17938 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
17939 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
17940
17941 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
17942 LD = CGF.Builder.CreateLoad(
17944 } else {
17945 Value *GEP = nullptr;
17946 if (Cov >= CodeObjectVersionKind::COV_5) {
17947 // Indexing the implicit kernarg segment.
17948 GEP = CGF.Builder.CreateConstGEP1_32(
17949 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
17950 } else {
17951 // Indexing the HSA kernel_dispatch_packet struct.
17952 GEP = CGF.Builder.CreateConstGEP1_32(
17953 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
17954 }
17955 LD = CGF.Builder.CreateLoad(
17957 }
17958
17959 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
17960 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
17961 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
17962 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
17963 LD->setMetadata(llvm::LLVMContext::MD_noundef,
17964 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17965 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
17966 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17967 return LD;
17968}
17969
17970// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
17971Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
17972 const unsigned XOffset = 12;
17973 auto *DP = EmitAMDGPUDispatchPtr(CGF);
17974 // Indexing the HSA kernel_dispatch_packet struct.
17975 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
17976 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
17977 auto *LD = CGF.Builder.CreateLoad(
17979 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
17980 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17981 return LD;
17982}
17983} // namespace
17984
17985// For processing memory ordering and memory scope arguments of various
17986// amdgcn builtins.
17987// \p Order takes a C++11 comptabile memory-ordering specifier and converts
17988// it into LLVM's memory ordering specifier using atomic C ABI, and writes
17989// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
17990// specific SyncScopeID and writes it to \p SSID.
17992 llvm::AtomicOrdering &AO,
17993 llvm::SyncScope::ID &SSID) {
17994 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
17995
17996 // Map C11/C++11 memory ordering to LLVM memory ordering
17997 assert(llvm::isValidAtomicOrderingCABI(ord));
17998 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
17999 case llvm::AtomicOrderingCABI::acquire:
18000 case llvm::AtomicOrderingCABI::consume:
18001 AO = llvm::AtomicOrdering::Acquire;
18002 break;
18003 case llvm::AtomicOrderingCABI::release:
18004 AO = llvm::AtomicOrdering::Release;
18005 break;
18006 case llvm::AtomicOrderingCABI::acq_rel:
18007 AO = llvm::AtomicOrdering::AcquireRelease;
18008 break;
18009 case llvm::AtomicOrderingCABI::seq_cst:
18010 AO = llvm::AtomicOrdering::SequentiallyConsistent;
18011 break;
18012 case llvm::AtomicOrderingCABI::relaxed:
18013 AO = llvm::AtomicOrdering::Monotonic;
18014 break;
18015 }
18016
18017 StringRef scp;
18018 llvm::getConstantStringInfo(Scope, scp);
18019 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18020}
18021
18022llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
18023 unsigned Idx,
18024 const CallExpr *E) {
18025 llvm::Value *Arg = nullptr;
18026 if ((ICEArguments & (1 << Idx)) == 0) {
18027 Arg = EmitScalarExpr(E->getArg(Idx));
18028 } else {
18029 // If this is required to be a constant, constant fold it so that we
18030 // know that the generated intrinsic gets a ConstantInt.
18031 std::optional<llvm::APSInt> Result =
18033 assert(Result && "Expected argument to be a constant");
18034 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
18035 }
18036 return Arg;
18037}
18038
18040 const CallExpr *E) {
18041 if (!getLangOpts().HLSL)
18042 return nullptr;
18043
18044 switch (BuiltinID) {
18045 case Builtin::BI__builtin_hlsl_elementwise_any: {
18046 Value *Op0 = EmitScalarExpr(E->getArg(0));
18047 return Builder.CreateIntrinsic(
18048 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18049 Intrinsic::dx_any, ArrayRef<Value *>{Op0}, nullptr, "dx.any");
18050 }
18051 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
18052 Value *OpX = EmitScalarExpr(E->getArg(0));
18053 Value *OpMin = EmitScalarExpr(E->getArg(1));
18054 Value *OpMax = EmitScalarExpr(E->getArg(2));
18055
18056 QualType Ty = E->getArg(0)->getType();
18057 bool IsUnsigned = false;
18058 if (auto *VecTy = Ty->getAs<VectorType>())
18059 Ty = VecTy->getElementType();
18060 IsUnsigned = Ty->isUnsignedIntegerType();
18061 return Builder.CreateIntrinsic(
18062 /*ReturnType=*/OpX->getType(),
18063 IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18064 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18065 }
18066 case Builtin::BI__builtin_hlsl_dot: {
18067 Value *Op0 = EmitScalarExpr(E->getArg(0));
18068 Value *Op1 = EmitScalarExpr(E->getArg(1));
18069 llvm::Type *T0 = Op0->getType();
18070 llvm::Type *T1 = Op1->getType();
18071 if (!T0->isVectorTy() && !T1->isVectorTy()) {
18072 if (T0->isFloatingPointTy())
18073 return Builder.CreateFMul(Op0, Op1, "dx.dot");
18074
18075 if (T0->isIntegerTy())
18076 return Builder.CreateMul(Op0, Op1, "dx.dot");
18077
18078 // Bools should have been promoted
18079 llvm_unreachable(
18080 "Scalar dot product is only supported on ints and floats.");
18081 }
18082 // A VectorSplat should have happened
18083 assert(T0->isVectorTy() && T1->isVectorTy() &&
18084 "Dot product of vector and scalar is not supported.");
18085
18086 // A vector sext or sitofp should have happened
18087 assert(T0->getScalarType() == T1->getScalarType() &&
18088 "Dot product of vectors need the same element types.");
18089
18090 [[maybe_unused]] auto *VecTy0 =
18091 E->getArg(0)->getType()->getAs<VectorType>();
18092 [[maybe_unused]] auto *VecTy1 =
18093 E->getArg(1)->getType()->getAs<VectorType>();
18094 // A HLSLVectorTruncation should have happend
18095 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
18096 "Dot product requires vectors to be of the same size.");
18097
18098 return Builder.CreateIntrinsic(
18099 /*ReturnType=*/T0->getScalarType(), Intrinsic::dx_dot,
18100 ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
18101 } break;
18102 case Builtin::BI__builtin_hlsl_lerp: {
18103 Value *X = EmitScalarExpr(E->getArg(0));
18104 Value *Y = EmitScalarExpr(E->getArg(1));
18105 Value *S = EmitScalarExpr(E->getArg(2));
18106 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18107 llvm_unreachable("lerp operand must have a float representation");
18108 return Builder.CreateIntrinsic(
18109 /*ReturnType=*/X->getType(), Intrinsic::dx_lerp,
18110 ArrayRef<Value *>{X, Y, S}, nullptr, "dx.lerp");
18111 }
18112 case Builtin::BI__builtin_hlsl_elementwise_frac: {
18113 Value *Op0 = EmitScalarExpr(E->getArg(0));
18114 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18115 llvm_unreachable("frac operand must have a float representation");
18116 return Builder.CreateIntrinsic(
18117 /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac,
18118 ArrayRef<Value *>{Op0}, nullptr, "dx.frac");
18119 }
18120 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
18121 Value *Op0 = EmitScalarExpr(E->getArg(0));
18122 llvm::Type *Xty = Op0->getType();
18123 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
18124 if (Xty->isVectorTy()) {
18125 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
18126 retType = llvm::VectorType::get(
18127 retType, ElementCount::getFixed(XVecTy->getNumElements()));
18128 }
18129 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18130 llvm_unreachable("isinf operand must have a float representation");
18131 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
18132 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
18133 }
18134 case Builtin::BI__builtin_hlsl_mad: {
18135 Value *M = EmitScalarExpr(E->getArg(0));
18136 Value *A = EmitScalarExpr(E->getArg(1));
18137 Value *B = EmitScalarExpr(E->getArg(2));
18138 if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
18139 return Builder.CreateIntrinsic(
18140 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
18141 ArrayRef<Value *>{M, A, B}, nullptr, "dx.fmad");
18142 }
18144 return Builder.CreateIntrinsic(
18145 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
18146 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
18147 }
18149 return Builder.CreateIntrinsic(
18150 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
18151 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
18152 }
18153 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
18154 Value *Op0 = EmitScalarExpr(E->getArg(0));
18155 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18156 llvm_unreachable("rcp operand must have a float representation");
18157 return Builder.CreateIntrinsic(
18158 /*ReturnType=*/Op0->getType(), Intrinsic::dx_rcp,
18159 ArrayRef<Value *>{Op0}, nullptr, "dx.rcp");
18160 }
18161 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
18162 Value *Op0 = EmitScalarExpr(E->getArg(0));
18163 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18164 llvm_unreachable("rsqrt operand must have a float representation");
18165 return Builder.CreateIntrinsic(
18166 /*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
18167 ArrayRef<Value *>{Op0}, nullptr, "dx.rsqrt");
18168 }
18169 }
18170 return nullptr;
18171}
18172
18174 const CallExpr *E) {
18175 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
18176 llvm::SyncScope::ID SSID;
18177 switch (BuiltinID) {
18178 case AMDGPU::BI__builtin_amdgcn_div_scale:
18179 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
18180 // Translate from the intrinsics's struct return to the builtin's out
18181 // argument.
18182
18183 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
18184
18185 llvm::Value *X = EmitScalarExpr(E->getArg(0));
18186 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
18187 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
18188
18189 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
18190 X->getType());
18191
18192 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
18193
18194 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
18195 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
18196
18197 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
18198
18199 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
18200 Builder.CreateStore(FlagExt, FlagOutPtr);
18201 return Result;
18202 }
18203 case AMDGPU::BI__builtin_amdgcn_div_fmas:
18204 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
18205 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18206 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18207 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18208 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18209
18210 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
18211 Src0->getType());
18212 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
18213 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
18214 }
18215
18216 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
18217 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
18218 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
18219 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
18220 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
18221 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
18223 // Find out if any arguments are required to be integer constant
18224 // expressions.
18225 unsigned ICEArguments = 0;
18227 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
18228 assert(Error == ASTContext::GE_None && "Should not codegen an error");
18229 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
18230 Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
18231 }
18232 assert(Args.size() == 5 || Args.size() == 6);
18233 if (Args.size() == 5)
18234 Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
18235 Function *F =
18236 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
18237 return Builder.CreateCall(F, Args);
18238 }
18239 case AMDGPU::BI__builtin_amdgcn_div_fixup:
18240 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
18241 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
18242 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
18243 case AMDGPU::BI__builtin_amdgcn_trig_preop:
18244 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
18245 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
18246 case AMDGPU::BI__builtin_amdgcn_rcp:
18247 case AMDGPU::BI__builtin_amdgcn_rcpf:
18248 case AMDGPU::BI__builtin_amdgcn_rcph:
18249 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
18250 case AMDGPU::BI__builtin_amdgcn_sqrt:
18251 case AMDGPU::BI__builtin_amdgcn_sqrtf:
18252 case AMDGPU::BI__builtin_amdgcn_sqrth:
18253 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
18254 case AMDGPU::BI__builtin_amdgcn_rsq:
18255 case AMDGPU::BI__builtin_amdgcn_rsqf:
18256 case AMDGPU::BI__builtin_amdgcn_rsqh:
18257 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
18258 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
18259 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
18260 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
18261 case AMDGPU::BI__builtin_amdgcn_sinf:
18262 case AMDGPU::BI__builtin_amdgcn_sinh:
18263 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
18264 case AMDGPU::BI__builtin_amdgcn_cosf:
18265 case AMDGPU::BI__builtin_amdgcn_cosh:
18266 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
18267 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
18268 return EmitAMDGPUDispatchPtr(*this, E);
18269 case AMDGPU::BI__builtin_amdgcn_logf:
18270 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
18271 case AMDGPU::BI__builtin_amdgcn_exp2f:
18272 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2);
18273 case AMDGPU::BI__builtin_amdgcn_log_clampf:
18274 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
18275 case AMDGPU::BI__builtin_amdgcn_ldexp:
18276 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
18277 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18278 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18279 llvm::Function *F =
18280 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
18281 return Builder.CreateCall(F, {Src0, Src1});
18282 }
18283 case AMDGPU::BI__builtin_amdgcn_ldexph: {
18284 // The raw instruction has a different behavior for out of bounds exponent
18285 // values (implicit truncation instead of saturate to short_min/short_max).
18286 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18287 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18288 llvm::Function *F =
18289 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
18290 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
18291 }
18292 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
18293 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
18294 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
18295 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
18296 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
18297 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
18298 Value *Src0 = EmitScalarExpr(E->getArg(0));
18299 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18300 { Builder.getInt32Ty(), Src0->getType() });
18301 return Builder.CreateCall(F, Src0);
18302 }
18303 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
18304 Value *Src0 = EmitScalarExpr(E->getArg(0));
18305 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18306 { Builder.getInt16Ty(), Src0->getType() });
18307 return Builder.CreateCall(F, Src0);
18308 }
18309 case AMDGPU::BI__builtin_amdgcn_fract:
18310 case AMDGPU::BI__builtin_amdgcn_fractf:
18311 case AMDGPU::BI__builtin_amdgcn_fracth:
18312 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
18313 case AMDGPU::BI__builtin_amdgcn_lerp:
18314 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
18315 case AMDGPU::BI__builtin_amdgcn_ubfe:
18316 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
18317 case AMDGPU::BI__builtin_amdgcn_sbfe:
18318 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
18319 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18320 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18321 llvm::Type *ResultType = ConvertType(E->getType());
18322 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18323 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18324 return Builder.CreateCall(F, { Src });
18325 }
18326 case AMDGPU::BI__builtin_amdgcn_uicmp:
18327 case AMDGPU::BI__builtin_amdgcn_uicmpl:
18328 case AMDGPU::BI__builtin_amdgcn_sicmp:
18329 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18330 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18331 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18332 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18333
18334 // FIXME-GFX10: How should 32 bit mask be handled?
18335 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18336 { Builder.getInt64Ty(), Src0->getType() });
18337 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18338 }
18339 case AMDGPU::BI__builtin_amdgcn_fcmp:
18340 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18341 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18342 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18343 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18344
18345 // FIXME-GFX10: How should 32 bit mask be handled?
18346 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18347 { Builder.getInt64Ty(), Src0->getType() });
18348 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18349 }
18350 case AMDGPU::BI__builtin_amdgcn_class:
18351 case AMDGPU::BI__builtin_amdgcn_classf:
18352 case AMDGPU::BI__builtin_amdgcn_classh:
18353 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18354 case AMDGPU::BI__builtin_amdgcn_fmed3f:
18355 case AMDGPU::BI__builtin_amdgcn_fmed3h:
18356 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
18357 case AMDGPU::BI__builtin_amdgcn_ds_append:
18358 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18359 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18360 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18361 Value *Src0 = EmitScalarExpr(E->getArg(0));
18362 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18363 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18364 }
18365 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18366 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18367 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
18368 Intrinsic::ID Intrin;
18369 switch (BuiltinID) {
18370 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18371 Intrin = Intrinsic::amdgcn_ds_fadd;
18372 break;
18373 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18374 Intrin = Intrinsic::amdgcn_ds_fmin;
18375 break;
18376 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
18377 Intrin = Intrinsic::amdgcn_ds_fmax;
18378 break;
18379 }
18380 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18381 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18382 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18383 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18384 llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
18385 llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
18386 llvm::FunctionType *FTy = F->getFunctionType();
18387 llvm::Type *PTy = FTy->getParamType(0);
18389 return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
18390 }
18391 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18392 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18393 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18394 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18395 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18396 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18397 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18398 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18399 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18400 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
18401 Intrinsic::ID IID;
18402 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18403 switch (BuiltinID) {
18404 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18405 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18406 IID = Intrinsic::amdgcn_global_atomic_fadd;
18407 break;
18408 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18409 ArgTy = llvm::FixedVectorType::get(
18410 llvm::Type::getHalfTy(getLLVMContext()), 2);
18411 IID = Intrinsic::amdgcn_global_atomic_fadd;
18412 break;
18413 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18414 IID = Intrinsic::amdgcn_global_atomic_fadd;
18415 break;
18416 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18417 IID = Intrinsic::amdgcn_global_atomic_fmin;
18418 break;
18419 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18420 IID = Intrinsic::amdgcn_global_atomic_fmax;
18421 break;
18422 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18423 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18424 break;
18425 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18426 IID = Intrinsic::amdgcn_flat_atomic_fmin;
18427 break;
18428 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18429 IID = Intrinsic::amdgcn_flat_atomic_fmax;
18430 break;
18431 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18432 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18433 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18434 break;
18435 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
18436 ArgTy = llvm::FixedVectorType::get(
18437 llvm::Type::getHalfTy(getLLVMContext()), 2);
18438 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18439 break;
18440 }
18441 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18442 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18443 llvm::Function *F =
18444 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18445 return Builder.CreateCall(F, {Addr, Val});
18446 }
18447 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18448 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18449 Intrinsic::ID IID;
18450 switch (BuiltinID) {
18451 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18452 IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18453 break;
18454 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18455 IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18456 break;
18457 }
18458 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18459 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18460 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18461 return Builder.CreateCall(F, {Addr, Val});
18462 }
18463 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18464 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18465 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
18466 Intrinsic::ID IID;
18467 llvm::Type *ArgTy;
18468 switch (BuiltinID) {
18469 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18470 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18471 IID = Intrinsic::amdgcn_ds_fadd;
18472 break;
18473 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18474 ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18475 IID = Intrinsic::amdgcn_ds_fadd;
18476 break;
18477 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
18478 ArgTy = llvm::FixedVectorType::get(
18479 llvm::Type::getHalfTy(getLLVMContext()), 2);
18480 IID = Intrinsic::amdgcn_ds_fadd;
18481 break;
18482 }
18483 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18484 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18485 llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
18486 llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
18487 llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
18488 llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
18489 llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
18490 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
18491 }
18492 case AMDGPU::BI__builtin_amdgcn_global_load_tr_i32:
18493 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v2i32:
18494 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v4f16:
18495 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v4i16:
18496 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v8f16:
18497 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v8i16: {
18498
18499 llvm::Type *ArgTy;
18500 switch (BuiltinID) {
18501 case AMDGPU::BI__builtin_amdgcn_global_load_tr_i32:
18502 ArgTy = llvm::Type::getInt32Ty(getLLVMContext());
18503 break;
18504 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v2i32:
18505 ArgTy = llvm::FixedVectorType::get(
18506 llvm::Type::getInt32Ty(getLLVMContext()), 2);
18507 break;
18508 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v4f16:
18509 ArgTy = llvm::FixedVectorType::get(
18510 llvm::Type::getHalfTy(getLLVMContext()), 4);
18511 break;
18512 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v4i16:
18513 ArgTy = llvm::FixedVectorType::get(
18514 llvm::Type::getInt16Ty(getLLVMContext()), 4);
18515 break;
18516 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v8f16:
18517 ArgTy = llvm::FixedVectorType::get(
18518 llvm::Type::getHalfTy(getLLVMContext()), 8);
18519 break;
18520 case AMDGPU::BI__builtin_amdgcn_global_load_tr_v8i16:
18521 ArgTy = llvm::FixedVectorType::get(
18522 llvm::Type::getInt16Ty(getLLVMContext()), 8);
18523 break;
18524 }
18525
18526 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18527 llvm::Function *F =
18528 CGM.getIntrinsic(Intrinsic::amdgcn_global_load_tr, {ArgTy});
18529 return Builder.CreateCall(F, {Addr});
18530 }
18531 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
18532 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
18533 {llvm::Type::getInt64Ty(getLLVMContext())});
18534 return Builder.CreateCall(F);
18535 }
18536 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
18537 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
18538 {llvm::Type::getInt64Ty(getLLVMContext())});
18539 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
18540 return Builder.CreateCall(F, {Env});
18541 }
18542 case AMDGPU::BI__builtin_amdgcn_read_exec:
18543 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
18544 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
18545 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
18546 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
18547 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
18548 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
18549 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
18550 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
18551 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
18552 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
18553 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
18554 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
18555 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
18556 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
18557 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
18558
18559 // The builtins take these arguments as vec4 where the last element is
18560 // ignored. The intrinsic takes them as vec3.
18561 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
18562 ArrayRef<int>{0, 1, 2});
18563 RayDir =
18564 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
18565 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
18566 ArrayRef<int>{0, 1, 2});
18567
18568 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18569 {NodePtr->getType(), RayDir->getType()});
18570 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
18571 RayInverseDir, TextureDescr});
18572 }
18573
18574 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
18576 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18577 Args.push_back(EmitScalarExpr(E->getArg(i)));
18578
18579 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
18580 Value *Call = Builder.CreateCall(F, Args);
18581 Value *Rtn = Builder.CreateExtractValue(Call, 0);
18582 Value *A = Builder.CreateExtractValue(Call, 1);
18583 llvm::Type *RetTy = ConvertType(E->getType());
18584 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
18585 (uint64_t)0);
18586 return Builder.CreateInsertElement(I0, A, 1);
18587 }
18588
18589 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18590 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18591 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18592 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18593 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18594 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18595 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18596 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18597 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18598 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18599 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18600 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18601 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18602 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18603 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18604 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18605 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18606 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18607 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18608 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18609 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18610 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18611 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18612 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18613 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18614 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18615 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18616 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18617 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18618 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18619 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18620 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18621 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18622 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18623 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18624 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18625 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18626 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18627 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18628 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18629 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18630 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18631 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18632 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18633 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18634 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18635 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18636 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18637 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18638 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18639 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18640 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18641 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18642 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18643 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18644 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18645 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18646 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18647 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18648 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
18649
18650 // These operations perform a matrix multiplication and accumulation of
18651 // the form:
18652 // D = A * B + C
18653 // We need to specify one type for matrices AB and one for matrices CD.
18654 // Sparse matrix operations can have different types for A and B as well as
18655 // an additional type for sparsity index.
18656 // Destination type should be put before types used for source operands.
18657 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
18658 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
18659 // There is no need for the variable opsel argument, so always set it to
18660 // "false".
18661 bool AppendFalseForOpselArg = false;
18662 unsigned BuiltinWMMAOp;
18663
18664 switch (BuiltinID) {
18665 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18666 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18667 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18668 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18669 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18670 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
18671 break;
18672 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18673 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18674 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18675 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18676 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18677 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
18678 break;
18679 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18680 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18681 AppendFalseForOpselArg = true;
18682 LLVM_FALLTHROUGH;
18683 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18684 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18685 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18686 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
18687 break;
18688 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18689 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18690 AppendFalseForOpselArg = true;
18691 LLVM_FALLTHROUGH;
18692 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18693 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18694 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18695 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
18696 break;
18697 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18698 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18699 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18700 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
18701 break;
18702 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18703 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18704 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18705 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
18706 break;
18707 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18708 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18709 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18710 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18711 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18712 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
18713 break;
18714 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18715 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18716 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18717 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18718 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18719 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
18720 break;
18721 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18722 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18723 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18724 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
18725 break;
18726 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18727 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18728 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18729 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
18730 break;
18731 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18732 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18733 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18734 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
18735 break;
18736 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18737 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18738 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18739 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
18740 break;
18741 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18742 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18743 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18744 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
18745 break;
18746 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18747 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18748 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18749 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
18750 break;
18751 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18752 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18753 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18754 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
18755 break;
18756 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18757 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18758 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18759 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
18760 break;
18761 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18762 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18763 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18764 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
18765 break;
18766 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18767 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18768 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18769 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
18770 break;
18771 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18772 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18773 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18774 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
18775 break;
18776 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18777 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18778 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18779 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
18780 break;
18781 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18782 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18783 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18784 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
18785 break;
18786 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18787 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18788 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18789 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
18790 break;
18791 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18792 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18793 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18794 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
18795 break;
18796 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18797 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
18798 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18799 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
18800 break;
18801 }
18802
18804 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18805 Args.push_back(EmitScalarExpr(E->getArg(i)));
18806 if (AppendFalseForOpselArg)
18807 Args.push_back(Builder.getFalse());
18808
18810 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
18811 ArgTypes.push_back(Args[ArgIdx]->getType());
18812
18813 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
18814 return Builder.CreateCall(F, Args);
18815 }
18816
18817 // amdgcn workitem
18818 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
18819 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
18820 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
18821 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
18822 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
18823 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
18824
18825 // amdgcn workgroup size
18826 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
18827 return EmitAMDGPUWorkGroupSize(*this, 0);
18828 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
18829 return EmitAMDGPUWorkGroupSize(*this, 1);
18830 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
18831 return EmitAMDGPUWorkGroupSize(*this, 2);
18832
18833 // amdgcn grid size
18834 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
18835 return EmitAMDGPUGridSize(*this, 0);
18836 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
18837 return EmitAMDGPUGridSize(*this, 1);
18838 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
18839 return EmitAMDGPUGridSize(*this, 2);
18840
18841 // r600 intrinsics
18842 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
18843 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
18844 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
18845 case AMDGPU::BI__builtin_r600_read_tidig_x:
18846 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
18847 case AMDGPU::BI__builtin_r600_read_tidig_y:
18848 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
18849 case AMDGPU::BI__builtin_r600_read_tidig_z:
18850 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
18851 case AMDGPU::BI__builtin_amdgcn_alignbit: {
18852 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18853 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18854 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18855 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
18856 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18857 }
18858 case AMDGPU::BI__builtin_amdgcn_fence: {
18860 EmitScalarExpr(E->getArg(1)), AO, SSID);
18861 return Builder.CreateFence(AO, SSID);
18862 }
18863 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
18864 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
18865 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
18866 case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
18867 llvm::AtomicRMWInst::BinOp BinOp;
18868 switch (BuiltinID) {
18869 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
18870 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
18871 BinOp = llvm::AtomicRMWInst::UIncWrap;
18872 break;
18873 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
18874 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
18875 BinOp = llvm::AtomicRMWInst::UDecWrap;
18876 break;
18877 }
18878
18879 Address Ptr = CheckAtomicAlignment(*this, E);
18880 Value *Val = EmitScalarExpr(E->getArg(1));
18881
18883 EmitScalarExpr(E->getArg(3)), AO, SSID);
18884
18885 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
18886 bool Volatile =
18887 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
18888
18889 llvm::AtomicRMWInst *RMW =
18890 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
18891 if (Volatile)
18892 RMW->setVolatile(true);
18893 return RMW;
18894 }
18895 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
18896 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
18897 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
18898 llvm::Type *ResultType = ConvertType(E->getType());
18899 // s_sendmsg_rtn is mangled using return type only.
18900 Function *F =
18901 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
18902 return Builder.CreateCall(F, {Arg});
18903 }
18904 default:
18905 return nullptr;
18906 }
18907}
18908
18909/// Handle a SystemZ function in which the final argument is a pointer
18910/// to an int that receives the post-instruction CC value. At the LLVM level
18911/// this is represented as a function that returns a {result, cc} pair.
18913 unsigned IntrinsicID,
18914 const CallExpr *E) {
18915 unsigned NumArgs = E->getNumArgs() - 1;
18916 SmallVector<Value *, 8> Args(NumArgs);
18917 for (unsigned I = 0; I < NumArgs; ++I)
18918 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
18919 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
18920 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
18921 Value *Call = CGF.Builder.CreateCall(F, Args);
18922 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
18923 CGF.Builder.CreateStore(CC, CCPtr);
18924 return CGF.Builder.CreateExtractValue(Call, 0);
18925}
18926
18928 const CallExpr *E) {
18929 switch (BuiltinID) {
18930 case SystemZ::BI__builtin_tbegin: {
18931 Value *TDB = EmitScalarExpr(E->getArg(0));
18932 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
18933 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
18934 return Builder.CreateCall(F, {TDB, Control});
18935 }
18936 case SystemZ::BI__builtin_tbegin_nofloat: {
18937 Value *TDB = EmitScalarExpr(E->getArg(0));
18938 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
18939 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
18940 return Builder.CreateCall(F, {TDB, Control});
18941 }
18942 case SystemZ::BI__builtin_tbeginc: {
18943 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
18944 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
18945 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
18946 return Builder.CreateCall(F, {TDB, Control});
18947 }
18948 case SystemZ::BI__builtin_tabort: {
18949 Value *Data = EmitScalarExpr(E->getArg(0));
18950 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
18951 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
18952 }
18953 case SystemZ::BI__builtin_non_tx_store: {
18955 Value *Data = EmitScalarExpr(E->getArg(1));
18956 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
18957 return Builder.CreateCall(F, {Data, Address});
18958 }
18959
18960 // Vector builtins. Note that most vector builtins are mapped automatically
18961 // to target-specific LLVM intrinsics. The ones handled specially here can
18962 // be represented via standard LLVM IR, which is preferable to enable common
18963 // LLVM optimizations.
18964
18965 case SystemZ::BI__builtin_s390_vpopctb:
18966 case SystemZ::BI__builtin_s390_vpopcth:
18967 case SystemZ::BI__builtin_s390_vpopctf:
18968 case SystemZ::BI__builtin_s390_vpopctg: {
18969 llvm::Type *ResultType = ConvertType(E->getType());
18970 Value *X = EmitScalarExpr(E->getArg(0));
18971 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
18972 return Builder.CreateCall(F, X);
18973 }
18974
18975 case SystemZ::BI__builtin_s390_vclzb:
18976 case SystemZ::BI__builtin_s390_vclzh:
18977 case SystemZ::BI__builtin_s390_vclzf:
18978 case SystemZ::BI__builtin_s390_vclzg: {
18979 llvm::Type *ResultType = ConvertType(E->getType());
18980 Value *X = EmitScalarExpr(E->getArg(0));
18981 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18982 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18983 return Builder.CreateCall(F, {X, Undef});
18984 }
18985
18986 case SystemZ::BI__builtin_s390_vctzb:
18987 case SystemZ::BI__builtin_s390_vctzh:
18988 case SystemZ::BI__builtin_s390_vctzf:
18989 case SystemZ::BI__builtin_s390_vctzg: {
18990 llvm::Type *ResultType = ConvertType(E->getType());
18991 Value *X = EmitScalarExpr(E->getArg(0));
18992 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18993 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18994 return Builder.CreateCall(F, {X, Undef});
18995 }
18996
18997 case SystemZ::BI__builtin_s390_verllb:
18998 case SystemZ::BI__builtin_s390_verllh:
18999 case SystemZ::BI__builtin_s390_verllf:
19000 case SystemZ::BI__builtin_s390_verllg: {
19001 llvm::Type *ResultType = ConvertType(E->getType());
19002 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19003 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19004 // Splat scalar rotate amount to vector type.
19005 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
19006 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
19007 Amt = Builder.CreateVectorSplat(NumElts, Amt);
19008 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19009 return Builder.CreateCall(F, { Src, Src, Amt });
19010 }
19011
19012 case SystemZ::BI__builtin_s390_verllvb:
19013 case SystemZ::BI__builtin_s390_verllvh:
19014 case SystemZ::BI__builtin_s390_verllvf:
19015 case SystemZ::BI__builtin_s390_verllvg: {
19016 llvm::Type *ResultType = ConvertType(E->getType());
19017 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19018 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19019 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19020 return Builder.CreateCall(F, { Src, Src, Amt });
19021 }
19022
19023 case SystemZ::BI__builtin_s390_vfsqsb:
19024 case SystemZ::BI__builtin_s390_vfsqdb: {
19025 llvm::Type *ResultType = ConvertType(E->getType());
19026 Value *X = EmitScalarExpr(E->getArg(0));
19027 if (Builder.getIsFPConstrained()) {
19028 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
19029 return Builder.CreateConstrainedFPCall(F, { X });
19030 } else {
19031 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
19032 return Builder.CreateCall(F, X);
19033 }
19034 }
19035 case SystemZ::BI__builtin_s390_vfmasb:
19036 case SystemZ::BI__builtin_s390_vfmadb: {
19037 llvm::Type *ResultType = ConvertType(E->getType());
19038 Value *X = EmitScalarExpr(E->getArg(0));
19039 Value *Y = EmitScalarExpr(E->getArg(1));
19040 Value *Z = EmitScalarExpr(E->getArg(2));
19041 if (Builder.getIsFPConstrained()) {
19042 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19043 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
19044 } else {
19045 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19046 return Builder.CreateCall(F, {X, Y, Z});
19047 }
19048 }
19049 case SystemZ::BI__builtin_s390_vfmssb:
19050 case SystemZ::BI__builtin_s390_vfmsdb: {
19051 llvm::Type *ResultType = ConvertType(E->getType());
19052 Value *X = EmitScalarExpr(E->getArg(0));
19053 Value *Y = EmitScalarExpr(E->getArg(1));
19054 Value *Z = EmitScalarExpr(E->getArg(2));
19055 if (Builder.getIsFPConstrained()) {
19056 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19057 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19058 } else {
19059 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19060 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19061 }
19062 }
19063 case SystemZ::BI__builtin_s390_vfnmasb:
19064 case SystemZ::BI__builtin_s390_vfnmadb: {
19065 llvm::Type *ResultType = ConvertType(E->getType());
19066 Value *X = EmitScalarExpr(E->getArg(0));
19067 Value *Y = EmitScalarExpr(E->getArg(1));
19068 Value *Z = EmitScalarExpr(E->getArg(2));
19069 if (Builder.getIsFPConstrained()) {
19070 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19071 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
19072 } else {
19073 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19074 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
19075 }
19076 }
19077 case SystemZ::BI__builtin_s390_vfnmssb:
19078 case SystemZ::BI__builtin_s390_vfnmsdb: {
19079 llvm::Type *ResultType = ConvertType(E->getType());
19080 Value *X = EmitScalarExpr(E->getArg(0));
19081 Value *Y = EmitScalarExpr(E->getArg(1));
19082 Value *Z = EmitScalarExpr(E->getArg(2));
19083 if (Builder.getIsFPConstrained()) {
19084 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19085 Value *NegZ = Builder.CreateFNeg(Z, "sub");
19086 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
19087 } else {
19088 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19089 Value *NegZ = Builder.CreateFNeg(Z, "neg");
19090 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
19091 }
19092 }
19093 case SystemZ::BI__builtin_s390_vflpsb:
19094 case SystemZ::BI__builtin_s390_vflpdb: {
19095 llvm::Type *ResultType = ConvertType(E->getType());
19096 Value *X = EmitScalarExpr(E->getArg(0));
19097 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19098 return Builder.CreateCall(F, X);
19099 }
19100 case SystemZ::BI__builtin_s390_vflnsb:
19101 case SystemZ::BI__builtin_s390_vflndb: {
19102 llvm::Type *ResultType = ConvertType(E->getType());
19103 Value *X = EmitScalarExpr(E->getArg(0));
19104 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19105 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
19106 }
19107 case SystemZ::BI__builtin_s390_vfisb:
19108 case SystemZ::BI__builtin_s390_vfidb: {
19109 llvm::Type *ResultType = ConvertType(E->getType());
19110 Value *X = EmitScalarExpr(E->getArg(0));
19111 // Constant-fold the M4 and M5 mask arguments.
19112 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
19113 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19114 // Check whether this instance can be represented via a LLVM standard
19115 // intrinsic. We only support some combinations of M4 and M5.
19116 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19117 Intrinsic::ID CI;
19118 switch (M4.getZExtValue()) {
19119 default: break;
19120 case 0: // IEEE-inexact exception allowed
19121 switch (M5.getZExtValue()) {
19122 default: break;
19123 case 0: ID = Intrinsic::rint;
19124 CI = Intrinsic::experimental_constrained_rint; break;
19125 }
19126 break;
19127 case 4: // IEEE-inexact exception suppressed
19128 switch (M5.getZExtValue()) {
19129 default: break;
19130 case 0: ID = Intrinsic::nearbyint;
19131 CI = Intrinsic::experimental_constrained_nearbyint; break;
19132 case 1: ID = Intrinsic::round;
19133 CI = Intrinsic::experimental_constrained_round; break;
19134 case 5: ID = Intrinsic::trunc;
19135 CI = Intrinsic::experimental_constrained_trunc; break;
19136 case 6: ID = Intrinsic::ceil;
19137 CI = Intrinsic::experimental_constrained_ceil; break;
19138 case 7: ID = Intrinsic::floor;
19139 CI = Intrinsic::experimental_constrained_floor; break;
19140 }
19141 break;
19142 }
19143 if (ID != Intrinsic::not_intrinsic) {
19144 if (Builder.getIsFPConstrained()) {
19145 Function *F = CGM.getIntrinsic(CI, ResultType);
19146 return Builder.CreateConstrainedFPCall(F, X);
19147 } else {
19148 Function *F = CGM.getIntrinsic(ID, ResultType);
19149 return Builder.CreateCall(F, X);
19150 }
19151 }
19152 switch (BuiltinID) { // FIXME: constrained version?
19153 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
19154 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
19155 default: llvm_unreachable("Unknown BuiltinID");
19156 }
19157 Function *F = CGM.getIntrinsic(ID);
19158 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19159 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
19160 return Builder.CreateCall(F, {X, M4Value, M5Value});
19161 }
19162 case SystemZ::BI__builtin_s390_vfmaxsb:
19163 case SystemZ::BI__builtin_s390_vfmaxdb: {
19164 llvm::Type *ResultType = ConvertType(E->getType());
19165 Value *X = EmitScalarExpr(E->getArg(0));
19166 Value *Y = EmitScalarExpr(E->getArg(1));
19167 // Constant-fold the M4 mask argument.
19168 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19169 // Check whether this instance can be represented via a LLVM standard
19170 // intrinsic. We only support some values of M4.
19171 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19172 Intrinsic::ID CI;
19173 switch (M4.getZExtValue()) {
19174 default: break;
19175 case 4: ID = Intrinsic::maxnum;
19176 CI = Intrinsic::experimental_constrained_maxnum; break;
19177 }
19178 if (ID != Intrinsic::not_intrinsic) {
19179 if (Builder.getIsFPConstrained()) {
19180 Function *F = CGM.getIntrinsic(CI, ResultType);
19181 return Builder.CreateConstrainedFPCall(F, {X, Y});
19182 } else {
19183 Function *F = CGM.getIntrinsic(ID, ResultType);
19184 return Builder.CreateCall(F, {X, Y});
19185 }
19186 }
19187 switch (BuiltinID) {
19188 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
19189 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
19190 default: llvm_unreachable("Unknown BuiltinID");
19191 }
19192 Function *F = CGM.getIntrinsic(ID);
19193 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19194 return Builder.CreateCall(F, {X, Y, M4Value});
19195 }
19196 case SystemZ::BI__builtin_s390_vfminsb:
19197 case SystemZ::BI__builtin_s390_vfmindb: {
19198 llvm::Type *ResultType = ConvertType(E->getType());
19199 Value *X = EmitScalarExpr(E->getArg(0));
19200 Value *Y = EmitScalarExpr(E->getArg(1));
19201 // Constant-fold the M4 mask argument.
19202 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19203 // Check whether this instance can be represented via a LLVM standard
19204 // intrinsic. We only support some values of M4.
19205 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19206 Intrinsic::ID CI;
19207 switch (M4.getZExtValue()) {
19208 default: break;
19209 case 4: ID = Intrinsic::minnum;
19210 CI = Intrinsic::experimental_constrained_minnum; break;
19211 }
19212 if (ID != Intrinsic::not_intrinsic) {
19213 if (Builder.getIsFPConstrained()) {
19214 Function *F = CGM.getIntrinsic(CI, ResultType);
19215 return Builder.CreateConstrainedFPCall(F, {X, Y});
19216 } else {
19217 Function *F = CGM.getIntrinsic(ID, ResultType);
19218 return Builder.CreateCall(F, {X, Y});
19219 }
19220 }
19221 switch (BuiltinID) {
19222 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
19223 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
19224 default: llvm_unreachable("Unknown BuiltinID");
19225 }
19226 Function *F = CGM.getIntrinsic(ID);
19227 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19228 return Builder.CreateCall(F, {X, Y, M4Value});
19229 }
19230
19231 case SystemZ::BI__builtin_s390_vlbrh:
19232 case SystemZ::BI__builtin_s390_vlbrf:
19233 case SystemZ::BI__builtin_s390_vlbrg: {
19234 llvm::Type *ResultType = ConvertType(E->getType());
19235 Value *X = EmitScalarExpr(E->getArg(0));
19236 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
19237 return Builder.CreateCall(F, X);
19238 }
19239
19240 // Vector intrinsics that output the post-instruction CC value.
19241
19242#define INTRINSIC_WITH_CC(NAME) \
19243 case SystemZ::BI__builtin_##NAME: \
19244 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
19245
19246 INTRINSIC_WITH_CC(s390_vpkshs);
19247 INTRINSIC_WITH_CC(s390_vpksfs);
19248 INTRINSIC_WITH_CC(s390_vpksgs);
19249
19250 INTRINSIC_WITH_CC(s390_vpklshs);
19251 INTRINSIC_WITH_CC(s390_vpklsfs);
19252 INTRINSIC_WITH_CC(s390_vpklsgs);
19253
19254 INTRINSIC_WITH_CC(s390_vceqbs);
19255 INTRINSIC_WITH_CC(s390_vceqhs);
19256 INTRINSIC_WITH_CC(s390_vceqfs);
19257 INTRINSIC_WITH_CC(s390_vceqgs);
19258
19259 INTRINSIC_WITH_CC(s390_vchbs);
19260 INTRINSIC_WITH_CC(s390_vchhs);
19261 INTRINSIC_WITH_CC(s390_vchfs);
19262 INTRINSIC_WITH_CC(s390_vchgs);
19263
19264 INTRINSIC_WITH_CC(s390_vchlbs);
19265 INTRINSIC_WITH_CC(s390_vchlhs);
19266 INTRINSIC_WITH_CC(s390_vchlfs);
19267 INTRINSIC_WITH_CC(s390_vchlgs);
19268
19269 INTRINSIC_WITH_CC(s390_vfaebs);
19270 INTRINSIC_WITH_CC(s390_vfaehs);
19271 INTRINSIC_WITH_CC(s390_vfaefs);
19272
19273 INTRINSIC_WITH_CC(s390_vfaezbs);
19274 INTRINSIC_WITH_CC(s390_vfaezhs);
19275 INTRINSIC_WITH_CC(s390_vfaezfs);
19276
19277 INTRINSIC_WITH_CC(s390_vfeebs);
19278 INTRINSIC_WITH_CC(s390_vfeehs);
19279 INTRINSIC_WITH_CC(s390_vfeefs);
19280
19281 INTRINSIC_WITH_CC(s390_vfeezbs);
19282 INTRINSIC_WITH_CC(s390_vfeezhs);
19283 INTRINSIC_WITH_CC(s390_vfeezfs);
19284
19285 INTRINSIC_WITH_CC(s390_vfenebs);
19286 INTRINSIC_WITH_CC(s390_vfenehs);
19287 INTRINSIC_WITH_CC(s390_vfenefs);
19288
19289 INTRINSIC_WITH_CC(s390_vfenezbs);
19290 INTRINSIC_WITH_CC(s390_vfenezhs);
19291 INTRINSIC_WITH_CC(s390_vfenezfs);
19292
19293 INTRINSIC_WITH_CC(s390_vistrbs);
19294 INTRINSIC_WITH_CC(s390_vistrhs);
19295 INTRINSIC_WITH_CC(s390_vistrfs);
19296
19297 INTRINSIC_WITH_CC(s390_vstrcbs);
19298 INTRINSIC_WITH_CC(s390_vstrchs);
19299 INTRINSIC_WITH_CC(s390_vstrcfs);
19300
19301 INTRINSIC_WITH_CC(s390_vstrczbs);
19302 INTRINSIC_WITH_CC(s390_vstrczhs);
19303 INTRINSIC_WITH_CC(s390_vstrczfs);
19304
19305 INTRINSIC_WITH_CC(s390_vfcesbs);
19306 INTRINSIC_WITH_CC(s390_vfcedbs);
19307 INTRINSIC_WITH_CC(s390_vfchsbs);
19308 INTRINSIC_WITH_CC(s390_vfchdbs);
19309 INTRINSIC_WITH_CC(s390_vfchesbs);
19310 INTRINSIC_WITH_CC(s390_vfchedbs);
19311
19312 INTRINSIC_WITH_CC(s390_vftcisb);
19313 INTRINSIC_WITH_CC(s390_vftcidb);
19314
19315 INTRINSIC_WITH_CC(s390_vstrsb);
19316 INTRINSIC_WITH_CC(s390_vstrsh);
19317 INTRINSIC_WITH_CC(s390_vstrsf);
19318
19319 INTRINSIC_WITH_CC(s390_vstrszb);
19320 INTRINSIC_WITH_CC(s390_vstrszh);
19321 INTRINSIC_WITH_CC(s390_vstrszf);
19322
19323#undef INTRINSIC_WITH_CC
19324
19325 default:
19326 return nullptr;
19327 }
19328}
19329
19330namespace {
19331// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
19332struct NVPTXMmaLdstInfo {
19333 unsigned NumResults; // Number of elements to load/store
19334 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
19335 unsigned IID_col;
19336 unsigned IID_row;
19337};
19338
19339#define MMA_INTR(geom_op_type, layout) \
19340 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
19341#define MMA_LDST(n, geom_op_type) \
19342 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
19343
19344static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
19345 switch (BuiltinID) {
19346 // FP MMA loads
19347 case NVPTX::BI__hmma_m16n16k16_ld_a:
19348 return MMA_LDST(8, m16n16k16_load_a_f16);
19349 case NVPTX::BI__hmma_m16n16k16_ld_b:
19350 return MMA_LDST(8, m16n16k16_load_b_f16);
19351 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19352 return MMA_LDST(4, m16n16k16_load_c_f16);
19353 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19354 return MMA_LDST(8, m16n16k16_load_c_f32);
19355 case NVPTX::BI__hmma_m32n8k16_ld_a:
19356 return MMA_LDST(8, m32n8k16_load_a_f16);
19357 case NVPTX::BI__hmma_m32n8k16_ld_b:
19358 return MMA_LDST(8, m32n8k16_load_b_f16);
19359 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19360 return MMA_LDST(4, m32n8k16_load_c_f16);
19361 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19362 return MMA_LDST(8, m32n8k16_load_c_f32);
19363 case NVPTX::BI__hmma_m8n32k16_ld_a:
19364 return MMA_LDST(8, m8n32k16_load_a_f16);
19365 case NVPTX::BI__hmma_m8n32k16_ld_b:
19366 return MMA_LDST(8, m8n32k16_load_b_f16);
19367 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19368 return MMA_LDST(4, m8n32k16_load_c_f16);
19369 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19370 return MMA_LDST(8, m8n32k16_load_c_f32);
19371
19372 // Integer MMA loads
19373 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19374 return MMA_LDST(2, m16n16k16_load_a_s8);
19375 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19376 return MMA_LDST(2, m16n16k16_load_a_u8);
19377 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19378 return MMA_LDST(2, m16n16k16_load_b_s8);
19379 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19380 return MMA_LDST(2, m16n16k16_load_b_u8);
19381 case NVPTX::BI__imma_m16n16k16_ld_c:
19382 return MMA_LDST(8, m16n16k16_load_c_s32);
19383 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19384 return MMA_LDST(4, m32n8k16_load_a_s8);
19385 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19386 return MMA_LDST(4, m32n8k16_load_a_u8);
19387 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19388 return MMA_LDST(1, m32n8k16_load_b_s8);
19389 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19390 return MMA_LDST(1, m32n8k16_load_b_u8);
19391 case NVPTX::BI__imma_m32n8k16_ld_c:
19392 return MMA_LDST(8, m32n8k16_load_c_s32);
19393 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19394 return MMA_LDST(1, m8n32k16_load_a_s8);
19395 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19396 return MMA_LDST(1, m8n32k16_load_a_u8);
19397 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19398 return MMA_LDST(4, m8n32k16_load_b_s8);
19399 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19400 return MMA_LDST(4, m8n32k16_load_b_u8);
19401 case NVPTX::BI__imma_m8n32k16_ld_c:
19402 return MMA_LDST(8, m8n32k16_load_c_s32);
19403
19404 // Sub-integer MMA loads.
19405 // Only row/col layout is supported by A/B fragments.
19406 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19407 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
19408 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19409 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
19410 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19411 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
19412 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19413 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
19414 case NVPTX::BI__imma_m8n8k32_ld_c:
19415 return MMA_LDST(2, m8n8k32_load_c_s32);
19416 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19417 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
19418 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19419 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
19420 case NVPTX::BI__bmma_m8n8k128_ld_c:
19421 return MMA_LDST(2, m8n8k128_load_c_s32);
19422
19423 // Double MMA loads
19424 case NVPTX::BI__dmma_m8n8k4_ld_a:
19425 return MMA_LDST(1, m8n8k4_load_a_f64);
19426 case NVPTX::BI__dmma_m8n8k4_ld_b:
19427 return MMA_LDST(1, m8n8k4_load_b_f64);
19428 case NVPTX::BI__dmma_m8n8k4_ld_c:
19429 return MMA_LDST(2, m8n8k4_load_c_f64);
19430
19431 // Alternate float MMA loads
19432 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
19433 return MMA_LDST(4, m16n16k16_load_a_bf16);
19434 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
19435 return MMA_LDST(4, m16n16k16_load_b_bf16);
19436 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
19437 return MMA_LDST(2, m8n32k16_load_a_bf16);
19438 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
19439 return MMA_LDST(8, m8n32k16_load_b_bf16);
19440 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
19441 return MMA_LDST(8, m32n8k16_load_a_bf16);
19442 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
19443 return MMA_LDST(2, m32n8k16_load_b_bf16);
19444 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
19445 return MMA_LDST(4, m16n16k8_load_a_tf32);
19446 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
19447 return MMA_LDST(4, m16n16k8_load_b_tf32);
19448 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
19449 return MMA_LDST(8, m16n16k8_load_c_f32);
19450
19451 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
19452 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
19453 // use fragment C for both loads and stores.
19454 // FP MMA stores.
19455 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
19456 return MMA_LDST(4, m16n16k16_store_d_f16);
19457 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
19458 return MMA_LDST(8, m16n16k16_store_d_f32);
19459 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
19460 return MMA_LDST(4, m32n8k16_store_d_f16);
19461 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
19462 return MMA_LDST(8, m32n8k16_store_d_f32);
19463 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
19464 return MMA_LDST(4, m8n32k16_store_d_f16);
19465 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
19466 return MMA_LDST(8, m8n32k16_store_d_f32);
19467
19468 // Integer and sub-integer MMA stores.
19469 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
19470 // name, integer loads/stores use LLVM's i32.
19471 case NVPTX::BI__imma_m16n16k16_st_c_i32:
19472 return MMA_LDST(8, m16n16k16_store_d_s32);
19473 case NVPTX::BI__imma_m32n8k16_st_c_i32:
19474 return MMA_LDST(8, m32n8k16_store_d_s32);
19475 case NVPTX::BI__imma_m8n32k16_st_c_i32:
19476 return MMA_LDST(8, m8n32k16_store_d_s32);
19477 case NVPTX::BI__imma_m8n8k32_st_c_i32:
19478 return MMA_LDST(2, m8n8k32_store_d_s32);
19479 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
19480 return MMA_LDST(2, m8n8k128_store_d_s32);
19481
19482 // Double MMA store
19483 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
19484 return MMA_LDST(2, m8n8k4_store_d_f64);
19485
19486 // Alternate float MMA store
19487 case NVPTX::BI__mma_m16n16k8_st_c_f32:
19488 return MMA_LDST(8, m16n16k8_store_d_f32);
19489
19490 default:
19491 llvm_unreachable("Unknown MMA builtin");
19492 }
19493}
19494#undef MMA_LDST
19495#undef MMA_INTR
19496
19497
19498struct NVPTXMmaInfo {
19499 unsigned NumEltsA;
19500 unsigned NumEltsB;
19501 unsigned NumEltsC;
19502 unsigned NumEltsD;
19503
19504 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
19505 // over 'col' for layout. The index of non-satf variants is expected to match
19506 // the undocumented layout constants used by CUDA's mma.hpp.
19507 std::array<unsigned, 8> Variants;
19508
19509 unsigned getMMAIntrinsic(int Layout, bool Satf) {
19510 unsigned Index = Layout + 4 * Satf;
19511 if (Index >= Variants.size())
19512 return 0;
19513 return Variants[Index];
19514 }
19515};
19516
19517 // Returns an intrinsic that matches Layout and Satf for valid combinations of
19518 // Layout and Satf, 0 otherwise.
19519static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
19520 // clang-format off
19521#define MMA_VARIANTS(geom, type) \
19522 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
19523 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19524 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
19525 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
19526#define MMA_SATF_VARIANTS(geom, type) \
19527 MMA_VARIANTS(geom, type), \
19528 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
19529 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19530 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
19531 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
19532// Sub-integer MMA only supports row.col layout.
19533#define MMA_VARIANTS_I4(geom, type) \
19534 0, \
19535 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19536 0, \
19537 0, \
19538 0, \
19539 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19540 0, \
19541 0
19542// b1 MMA does not support .satfinite.
19543#define MMA_VARIANTS_B1_XOR(geom, type) \
19544 0, \
19545 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
19546 0, \
19547 0, \
19548 0, \
19549 0, \
19550 0, \
19551 0
19552#define MMA_VARIANTS_B1_AND(geom, type) \
19553 0, \
19554 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
19555 0, \
19556 0, \
19557 0, \
19558 0, \
19559 0, \
19560 0
19561 // clang-format on
19562 switch (BuiltinID) {
19563 // FP MMA
19564 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
19565 // NumEltsN of return value are ordered as A,B,C,D.
19566 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
19567 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
19568 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
19569 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
19570 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
19571 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
19572 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
19573 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
19574 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
19575 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
19576 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
19577 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
19578 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
19579 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
19580 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
19581 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
19582 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
19583 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
19584 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
19585 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
19586 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
19587 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
19588 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
19589 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
19590
19591 // Integer MMA
19592 case NVPTX::BI__imma_m16n16k16_mma_s8:
19593 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
19594 case NVPTX::BI__imma_m16n16k16_mma_u8:
19595 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
19596 case NVPTX::BI__imma_m32n8k16_mma_s8:
19597 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
19598 case NVPTX::BI__imma_m32n8k16_mma_u8:
19599 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
19600 case NVPTX::BI__imma_m8n32k16_mma_s8:
19601 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
19602 case NVPTX::BI__imma_m8n32k16_mma_u8:
19603 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
19604
19605 // Sub-integer MMA
19606 case NVPTX::BI__imma_m8n8k32_mma_s4:
19607 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
19608 case NVPTX::BI__imma_m8n8k32_mma_u4:
19609 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
19610 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
19611 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
19612 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
19613 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
19614
19615 // Double MMA
19616 case NVPTX::BI__dmma_m8n8k4_mma_f64:
19617 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
19618
19619 // Alternate FP MMA
19620 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
19621 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
19622 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
19623 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
19624 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
19625 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
19626 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
19627 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
19628 default:
19629 llvm_unreachable("Unexpected builtin ID.");
19630 }
19631#undef MMA_VARIANTS
19632#undef MMA_SATF_VARIANTS
19633#undef MMA_VARIANTS_I4
19634#undef MMA_VARIANTS_B1_AND
19635#undef MMA_VARIANTS_B1_XOR
19636}
19637
19638static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
19639 const CallExpr *E) {
19640 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19641 QualType ArgType = E->getArg(0)->getType();
19643 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
19644 return CGF.Builder.CreateCall(
19645 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19646 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
19647}
19648
19649static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
19650 const CallExpr *E) {
19651 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19652 llvm::Type *ElemTy =
19654 return CGF.Builder.CreateCall(
19655 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19656 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
19657}
19658
19659static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
19660 CodeGenFunction &CGF, const CallExpr *E,
19661 int SrcSize) {
19662 return E->getNumArgs() == 3
19663 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
19664 {CGF.EmitScalarExpr(E->getArg(0)),
19665 CGF.EmitScalarExpr(E->getArg(1)),
19666 CGF.EmitScalarExpr(E->getArg(2))})
19667 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
19668 {CGF.EmitScalarExpr(E->getArg(0)),
19669 CGF.EmitScalarExpr(E->getArg(1))});
19670}
19671
19672static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
19673 const CallExpr *E, CodeGenFunction &CGF) {
19674 auto &C = CGF.CGM.getContext();
19675 if (!(C.getLangOpts().NativeHalfType ||
19676 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
19677 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
19678 " requires native half type support.");
19679 return nullptr;
19680 }
19681
19682 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
19683 IntrinsicID == Intrinsic::nvvm_ldu_global_f)
19684 return MakeLdgLdu(IntrinsicID, CGF, E);
19685
19687 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
19688 auto *FTy = F->getFunctionType();
19689 unsigned ICEArguments = 0;
19691 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
19692 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19693 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
19694 assert((ICEArguments & (1 << i)) == 0);
19695 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
19696 auto *PTy = FTy->getParamType(i);
19697 if (PTy != ArgValue->getType())
19698 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
19699 Args.push_back(ArgValue);
19700 }
19701
19702 return CGF.Builder.CreateCall(F, Args);
19703}
19704} // namespace
19705
19707 const CallExpr *E) {
19708 switch (BuiltinID) {
19709 case NVPTX::BI__nvvm_atom_add_gen_i:
19710 case NVPTX::BI__nvvm_atom_add_gen_l:
19711 case NVPTX::BI__nvvm_atom_add_gen_ll:
19712 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
19713
19714 case NVPTX::BI__nvvm_atom_sub_gen_i:
19715 case NVPTX::BI__nvvm_atom_sub_gen_l:
19716 case NVPTX::BI__nvvm_atom_sub_gen_ll:
19717 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
19718
19719 case NVPTX::BI__nvvm_atom_and_gen_i:
19720 case NVPTX::BI__nvvm_atom_and_gen_l:
19721 case NVPTX::BI__nvvm_atom_and_gen_ll:
19722 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
19723
19724 case NVPTX::BI__nvvm_atom_or_gen_i:
19725 case NVPTX::BI__nvvm_atom_or_gen_l:
19726 case NVPTX::BI__nvvm_atom_or_gen_ll:
19727 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
19728
19729 case NVPTX::BI__nvvm_atom_xor_gen_i:
19730 case NVPTX::BI__nvvm_atom_xor_gen_l:
19731 case NVPTX::BI__nvvm_atom_xor_gen_ll:
19732 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
19733
19734 case NVPTX::BI__nvvm_atom_xchg_gen_i:
19735 case NVPTX::BI__nvvm_atom_xchg_gen_l:
19736 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
19737 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
19738
19739 case NVPTX::BI__nvvm_atom_max_gen_i:
19740 case NVPTX::BI__nvvm_atom_max_gen_l:
19741 case NVPTX::BI__nvvm_atom_max_gen_ll:
19742 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
19743
19744 case NVPTX::BI__nvvm_atom_max_gen_ui:
19745 case NVPTX::BI__nvvm_atom_max_gen_ul:
19746 case NVPTX::BI__nvvm_atom_max_gen_ull:
19747 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
19748
19749 case NVPTX::BI__nvvm_atom_min_gen_i:
19750 case NVPTX::BI__nvvm_atom_min_gen_l:
19751 case NVPTX::BI__nvvm_atom_min_gen_ll:
19752 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
19753
19754 case NVPTX::BI__nvvm_atom_min_gen_ui:
19755 case NVPTX::BI__nvvm_atom_min_gen_ul:
19756 case NVPTX::BI__nvvm_atom_min_gen_ull:
19757 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
19758
19759 case NVPTX::BI__nvvm_atom_cas_gen_i:
19760 case NVPTX::BI__nvvm_atom_cas_gen_l:
19761 case NVPTX::BI__nvvm_atom_cas_gen_ll:
19762 // __nvvm_atom_cas_gen_* should return the old value rather than the
19763 // success flag.
19764 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
19765
19766 case NVPTX::BI__nvvm_atom_add_gen_f:
19767 case NVPTX::BI__nvvm_atom_add_gen_d: {
19768 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
19769 Value *Val = EmitScalarExpr(E->getArg(1));
19770
19771 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
19772 AtomicOrdering::SequentiallyConsistent);
19773 }
19774
19775 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
19776 Value *Ptr = EmitScalarExpr(E->getArg(0));
19777 Value *Val = EmitScalarExpr(E->getArg(1));
19778 Function *FnALI32 =
19779 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
19780 return Builder.CreateCall(FnALI32, {Ptr, Val});
19781 }
19782
19783 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
19784 Value *Ptr = EmitScalarExpr(E->getArg(0));
19785 Value *Val = EmitScalarExpr(E->getArg(1));
19786 Function *FnALD32 =
19787 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
19788 return Builder.CreateCall(FnALD32, {Ptr, Val});
19789 }
19790
19791 case NVPTX::BI__nvvm_ldg_c:
19792 case NVPTX::BI__nvvm_ldg_sc:
19793 case NVPTX::BI__nvvm_ldg_c2:
19794 case NVPTX::BI__nvvm_ldg_sc2:
19795 case NVPTX::BI__nvvm_ldg_c4:
19796 case NVPTX::BI__nvvm_ldg_sc4:
19797 case NVPTX::BI__nvvm_ldg_s:
19798 case NVPTX::BI__nvvm_ldg_s2:
19799 case NVPTX::BI__nvvm_ldg_s4:
19800 case NVPTX::BI__nvvm_ldg_i:
19801 case NVPTX::BI__nvvm_ldg_i2:
19802 case NVPTX::BI__nvvm_ldg_i4:
19803 case NVPTX::BI__nvvm_ldg_l:
19804 case NVPTX::BI__nvvm_ldg_l2:
19805 case NVPTX::BI__nvvm_ldg_ll:
19806 case NVPTX::BI__nvvm_ldg_ll2:
19807 case NVPTX::BI__nvvm_ldg_uc:
19808 case NVPTX::BI__nvvm_ldg_uc2:
19809 case NVPTX::BI__nvvm_ldg_uc4:
19810 case NVPTX::BI__nvvm_ldg_us:
19811 case NVPTX::BI__nvvm_ldg_us2:
19812 case NVPTX::BI__nvvm_ldg_us4:
19813 case NVPTX::BI__nvvm_ldg_ui:
19814 case NVPTX::BI__nvvm_ldg_ui2:
19815 case NVPTX::BI__nvvm_ldg_ui4:
19816 case NVPTX::BI__nvvm_ldg_ul:
19817 case NVPTX::BI__nvvm_ldg_ul2:
19818 case NVPTX::BI__nvvm_ldg_ull:
19819 case NVPTX::BI__nvvm_ldg_ull2:
19820 // PTX Interoperability section 2.2: "For a vector with an even number of
19821 // elements, its alignment is set to number of elements times the alignment
19822 // of its member: n*alignof(t)."
19823 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
19824 case NVPTX::BI__nvvm_ldg_f:
19825 case NVPTX::BI__nvvm_ldg_f2:
19826 case NVPTX::BI__nvvm_ldg_f4:
19827 case NVPTX::BI__nvvm_ldg_d:
19828 case NVPTX::BI__nvvm_ldg_d2:
19829 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
19830
19831 case NVPTX::BI__nvvm_ldu_c:
19832 case NVPTX::BI__nvvm_ldu_sc:
19833 case NVPTX::BI__nvvm_ldu_c2:
19834 case NVPTX::BI__nvvm_ldu_sc2:
19835 case NVPTX::BI__nvvm_ldu_c4:
19836 case NVPTX::BI__nvvm_ldu_sc4:
19837 case NVPTX::BI__nvvm_ldu_s:
19838 case NVPTX::BI__nvvm_ldu_s2:
19839 case NVPTX::BI__nvvm_ldu_s4:
19840 case NVPTX::BI__nvvm_ldu_i:
19841 case NVPTX::BI__nvvm_ldu_i2:
19842 case NVPTX::BI__nvvm_ldu_i4:
19843 case NVPTX::BI__nvvm_ldu_l:
19844 case NVPTX::BI__nvvm_ldu_l2:
19845 case NVPTX::BI__nvvm_ldu_ll:
19846 case NVPTX::BI__nvvm_ldu_ll2:
19847 case NVPTX::BI__nvvm_ldu_uc:
19848 case NVPTX::BI__nvvm_ldu_uc2:
19849 case NVPTX::BI__nvvm_ldu_uc4:
19850 case NVPTX::BI__nvvm_ldu_us:
19851 case NVPTX::BI__nvvm_ldu_us2:
19852 case NVPTX::BI__nvvm_ldu_us4:
19853 case NVPTX::BI__nvvm_ldu_ui:
19854 case NVPTX::BI__nvvm_ldu_ui2:
19855 case NVPTX::BI__nvvm_ldu_ui4:
19856 case NVPTX::BI__nvvm_ldu_ul:
19857 case NVPTX::BI__nvvm_ldu_ul2:
19858 case NVPTX::BI__nvvm_ldu_ull:
19859 case NVPTX::BI__nvvm_ldu_ull2:
19860 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
19861 case NVPTX::BI__nvvm_ldu_f:
19862 case NVPTX::BI__nvvm_ldu_f2:
19863 case NVPTX::BI__nvvm_ldu_f4:
19864 case NVPTX::BI__nvvm_ldu_d:
19865 case NVPTX::BI__nvvm_ldu_d2:
19866 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
19867
19868 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
19869 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
19870 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
19871 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
19872 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
19873 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
19874 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
19875 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
19876 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
19877 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
19878 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
19879 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
19880 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
19881 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
19882 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
19883 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
19884 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
19885 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
19886 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
19887 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
19888 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
19889 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
19890 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
19891 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
19892 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
19893 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
19894 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
19895 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
19896 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
19897 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
19898 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
19899 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
19900 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
19901 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
19902 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
19903 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
19904 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
19905 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
19906 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
19907 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
19908 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
19909 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
19910 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
19911 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
19912 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
19913 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
19914 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
19915 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
19916 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
19917 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
19918 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
19919 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
19920 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
19921 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
19922 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
19923 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
19924 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
19925 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
19926 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
19927 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
19928 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
19929 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
19930 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
19931 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
19932 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
19933 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
19934 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
19935 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
19936 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
19937 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
19938 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
19939 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
19940 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
19941 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
19942 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
19943 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
19944 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
19945 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
19946 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
19947 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
19948 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
19949 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
19950 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
19951 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
19952 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
19953 Value *Ptr = EmitScalarExpr(E->getArg(0));
19954 llvm::Type *ElemTy =
19956 return Builder.CreateCall(
19958 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
19959 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
19960 }
19961 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
19962 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
19963 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
19964 Value *Ptr = EmitScalarExpr(E->getArg(0));
19965 llvm::Type *ElemTy =
19967 return Builder.CreateCall(
19969 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
19970 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
19971 }
19972 case NVPTX::BI__nvvm_match_all_sync_i32p:
19973 case NVPTX::BI__nvvm_match_all_sync_i64p: {
19974 Value *Mask = EmitScalarExpr(E->getArg(0));
19975 Value *Val = EmitScalarExpr(E->getArg(1));
19976 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
19977 Value *ResultPair = Builder.CreateCall(
19978 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
19979 ? Intrinsic::nvvm_match_all_sync_i32p
19980 : Intrinsic::nvvm_match_all_sync_i64p),
19981 {Mask, Val});
19982 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
19983 PredOutPtr.getElementType());
19984 Builder.CreateStore(Pred, PredOutPtr);
19985 return Builder.CreateExtractValue(ResultPair, 0);
19986 }
19987
19988 // FP MMA loads
19989 case NVPTX::BI__hmma_m16n16k16_ld_a:
19990 case NVPTX::BI__hmma_m16n16k16_ld_b:
19991 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19992 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19993 case NVPTX::BI__hmma_m32n8k16_ld_a:
19994 case NVPTX::BI__hmma_m32n8k16_ld_b:
19995 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19996 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19997 case NVPTX::BI__hmma_m8n32k16_ld_a:
19998 case NVPTX::BI__hmma_m8n32k16_ld_b:
19999 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20000 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20001 // Integer MMA loads.
20002 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20003 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20004 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20005 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20006 case NVPTX::BI__imma_m16n16k16_ld_c:
20007 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20008 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20009 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20010 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20011 case NVPTX::BI__imma_m32n8k16_ld_c:
20012 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20013 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20014 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20015 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20016 case NVPTX::BI__imma_m8n32k16_ld_c:
20017 // Sub-integer MMA loads.
20018 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20019 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20020 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20021 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20022 case NVPTX::BI__imma_m8n8k32_ld_c:
20023 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20024 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20025 case NVPTX::BI__bmma_m8n8k128_ld_c:
20026 // Double MMA loads.
20027 case NVPTX::BI__dmma_m8n8k4_ld_a:
20028 case NVPTX::BI__dmma_m8n8k4_ld_b:
20029 case NVPTX::BI__dmma_m8n8k4_ld_c:
20030 // Alternate float MMA loads.
20031 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20032 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20033 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20034 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20035 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20036 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20037 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20038 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20039 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
20041 Value *Src = EmitScalarExpr(E->getArg(1));
20042 Value *Ldm = EmitScalarExpr(E->getArg(2));
20043 std::optional<llvm::APSInt> isColMajorArg =
20045 if (!isColMajorArg)
20046 return nullptr;
20047 bool isColMajor = isColMajorArg->getSExtValue();
20048 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20049 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20050 if (IID == 0)
20051 return nullptr;
20052
20053 Value *Result =
20054 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
20055
20056 // Save returned values.
20057 assert(II.NumResults);
20058 if (II.NumResults == 1) {
20061 } else {
20062 for (unsigned i = 0; i < II.NumResults; ++i) {
20064 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
20065 Dst.getElementType()),
20067 llvm::ConstantInt::get(IntTy, i)),
20069 }
20070 }
20071 return Result;
20072 }
20073
20074 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20075 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20076 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20077 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20078 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20079 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20080 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20081 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20082 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20083 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20084 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20085 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20086 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
20087 Value *Dst = EmitScalarExpr(E->getArg(0));
20089 Value *Ldm = EmitScalarExpr(E->getArg(2));
20090 std::optional<llvm::APSInt> isColMajorArg =
20092 if (!isColMajorArg)
20093 return nullptr;
20094 bool isColMajor = isColMajorArg->getSExtValue();
20095 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20096 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20097 if (IID == 0)
20098 return nullptr;
20099 Function *Intrinsic =
20100 CGM.getIntrinsic(IID, Dst->getType());
20101 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
20102 SmallVector<Value *, 10> Values = {Dst};
20103 for (unsigned i = 0; i < II.NumResults; ++i) {
20105 Src.getElementType(),
20107 llvm::ConstantInt::get(IntTy, i)),
20109 Values.push_back(Builder.CreateBitCast(V, ParamType));
20110 }
20111 Values.push_back(Ldm);
20112 Value *Result = Builder.CreateCall(Intrinsic, Values);
20113 return Result;
20114 }
20115
20116 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
20117 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
20118 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20119 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20120 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20121 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20122 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20123 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20124 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20125 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20126 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20127 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20128 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20129 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20130 case NVPTX::BI__imma_m16n16k16_mma_s8:
20131 case NVPTX::BI__imma_m16n16k16_mma_u8:
20132 case NVPTX::BI__imma_m32n8k16_mma_s8:
20133 case NVPTX::BI__imma_m32n8k16_mma_u8:
20134 case NVPTX::BI__imma_m8n32k16_mma_s8:
20135 case NVPTX::BI__imma_m8n32k16_mma_u8:
20136 case NVPTX::BI__imma_m8n8k32_mma_s4:
20137 case NVPTX::BI__imma_m8n8k32_mma_u4:
20138 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20139 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20140 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20141 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20142 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20143 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20144 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
20149 std::optional<llvm::APSInt> LayoutArg =
20151 if (!LayoutArg)
20152 return nullptr;
20153 int Layout = LayoutArg->getSExtValue();
20154 if (Layout < 0 || Layout > 3)
20155 return nullptr;
20156 llvm::APSInt SatfArg;
20157 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
20158 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
20159 SatfArg = 0; // .b1 does not have satf argument.
20160 else if (std::optional<llvm::APSInt> OptSatfArg =
20162 SatfArg = *OptSatfArg;
20163 else
20164 return nullptr;
20165 bool Satf = SatfArg.getSExtValue();
20166 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
20167 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
20168 if (IID == 0) // Unsupported combination of Layout/Satf.
20169 return nullptr;
20170
20172 Function *Intrinsic = CGM.getIntrinsic(IID);
20173 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
20174 // Load A
20175 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
20177 SrcA.getElementType(),
20179 llvm::ConstantInt::get(IntTy, i)),
20181 Values.push_back(Builder.CreateBitCast(V, AType));
20182 }
20183 // Load B
20184 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
20185 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
20187 SrcB.getElementType(),
20189 llvm::ConstantInt::get(IntTy, i)),
20191 Values.push_back(Builder.CreateBitCast(V, BType));
20192 }
20193 // Load C
20194 llvm::Type *CType =
20195 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
20196 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
20198 SrcC.getElementType(),
20200 llvm::ConstantInt::get(IntTy, i)),
20202 Values.push_back(Builder.CreateBitCast(V, CType));
20203 }
20204 Value *Result = Builder.CreateCall(Intrinsic, Values);
20205 llvm::Type *DType = Dst.getElementType();
20206 for (unsigned i = 0; i < MI.NumEltsD; ++i)
20208 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
20210 llvm::ConstantInt::get(IntTy, i)),
20212 return Result;
20213 }
20214 // The following builtins require half type support
20215 case NVPTX::BI__nvvm_ex2_approx_f16:
20216 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
20217 case NVPTX::BI__nvvm_ex2_approx_f16x2:
20218 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
20219 case NVPTX::BI__nvvm_ff2f16x2_rn:
20220 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
20221 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
20222 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
20223 case NVPTX::BI__nvvm_ff2f16x2_rz:
20224 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
20225 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
20226 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
20227 case NVPTX::BI__nvvm_fma_rn_f16:
20228 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
20229 case NVPTX::BI__nvvm_fma_rn_f16x2:
20230 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
20231 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
20232 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
20233 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
20234 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
20235 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
20236 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
20237 *this);
20238 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
20239 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
20240 *this);
20241 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
20242 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
20243 *this);
20244 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
20245 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
20246 *this);
20247 case NVPTX::BI__nvvm_fma_rn_relu_f16:
20248 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
20249 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
20250 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
20251 case NVPTX::BI__nvvm_fma_rn_sat_f16:
20252 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
20253 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
20254 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
20255 case NVPTX::BI__nvvm_fmax_f16:
20256 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
20257 case NVPTX::BI__nvvm_fmax_f16x2:
20258 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
20259 case NVPTX::BI__nvvm_fmax_ftz_f16:
20260 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
20261 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
20262 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
20263 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
20264 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
20265 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
20266 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
20267 *this);
20268 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
20269 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
20270 E, *this);
20271 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
20272 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
20273 BuiltinID, E, *this);
20274 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
20275 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
20276 *this);
20277 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
20278 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
20279 E, *this);
20280 case NVPTX::BI__nvvm_fmax_nan_f16:
20281 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
20282 case NVPTX::BI__nvvm_fmax_nan_f16x2:
20283 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
20284 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
20285 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
20286 *this);
20287 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
20288 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
20289 E, *this);
20290 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
20291 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
20292 *this);
20293 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
20294 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
20295 *this);
20296 case NVPTX::BI__nvvm_fmin_f16:
20297 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
20298 case NVPTX::BI__nvvm_fmin_f16x2:
20299 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
20300 case NVPTX::BI__nvvm_fmin_ftz_f16:
20301 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
20302 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
20303 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
20304 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
20305 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
20306 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
20307 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
20308 *this);
20309 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
20310 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
20311 E, *this);
20312 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
20313 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
20314 BuiltinID, E, *this);
20315 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
20316 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
20317 *this);
20318 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
20319 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
20320 E, *this);
20321 case NVPTX::BI__nvvm_fmin_nan_f16:
20322 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
20323 case NVPTX::BI__nvvm_fmin_nan_f16x2:
20324 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
20325 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
20326 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
20327 *this);
20328 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
20329 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
20330 E, *this);
20331 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
20332 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
20333 *this);
20334 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
20335 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
20336 *this);
20337 case NVPTX::BI__nvvm_ldg_h:
20338 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20339 case NVPTX::BI__nvvm_ldg_h2:
20340 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20341 case NVPTX::BI__nvvm_ldu_h:
20342 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20343 case NVPTX::BI__nvvm_ldu_h2: {
20344 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20345 }
20346 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
20347 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
20348 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
20349 4);
20350 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
20351 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
20352 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
20353 8);
20354 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
20355 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
20356 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
20357 16);
20358 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
20359 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
20360 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
20361 16);
20362 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
20363 return Builder.CreateCall(
20364 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
20365 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
20366 return Builder.CreateCall(
20367 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
20368 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
20369 return Builder.CreateCall(
20370 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
20371 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
20372 return Builder.CreateCall(
20373 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
20374 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
20375 return Builder.CreateCall(
20376 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
20377 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
20378 return Builder.CreateCall(
20379 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
20380 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
20381 return Builder.CreateCall(
20382 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
20383 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
20384 return Builder.CreateCall(
20385 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
20386 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
20387 return Builder.CreateCall(
20388 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
20389 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
20390 return Builder.CreateCall(
20391 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
20392 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
20393 return Builder.CreateCall(
20394 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
20395 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
20396 return Builder.CreateCall(
20397 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
20398 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
20399 return Builder.CreateCall(
20400 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
20401 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
20402 return Builder.CreateCall(
20403 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
20404 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
20405 return Builder.CreateCall(
20406 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
20407 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
20408 return Builder.CreateCall(
20409 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
20410 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
20411 return Builder.CreateCall(
20412 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
20413 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
20414 return Builder.CreateCall(
20415 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
20416 case NVPTX::BI__nvvm_is_explicit_cluster:
20417 return Builder.CreateCall(
20418 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
20419 case NVPTX::BI__nvvm_isspacep_shared_cluster:
20420 return Builder.CreateCall(
20421 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
20422 EmitScalarExpr(E->getArg(0)));
20423 case NVPTX::BI__nvvm_mapa:
20424 return Builder.CreateCall(
20425 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
20426 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20427 case NVPTX::BI__nvvm_mapa_shared_cluster:
20428 return Builder.CreateCall(
20429 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
20430 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20431 case NVPTX::BI__nvvm_getctarank:
20432 return Builder.CreateCall(
20433 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
20434 EmitScalarExpr(E->getArg(0)));
20435 case NVPTX::BI__nvvm_getctarank_shared_cluster:
20436 return Builder.CreateCall(
20437 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
20438 EmitScalarExpr(E->getArg(0)));
20439 case NVPTX::BI__nvvm_barrier_cluster_arrive:
20440 return Builder.CreateCall(
20441 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
20442 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
20443 return Builder.CreateCall(
20444 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
20445 case NVPTX::BI__nvvm_barrier_cluster_wait:
20446 return Builder.CreateCall(
20447 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
20448 case NVPTX::BI__nvvm_fence_sc_cluster:
20449 return Builder.CreateCall(
20450 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
20451 default:
20452 return nullptr;
20453 }
20454}
20455
20456namespace {
20457struct BuiltinAlignArgs {
20458 llvm::Value *Src = nullptr;
20459 llvm::Type *SrcType = nullptr;
20460 llvm::Value *Alignment = nullptr;
20461 llvm::Value *Mask = nullptr;
20462 llvm::IntegerType *IntType = nullptr;
20463
20464 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
20465 QualType AstType = E->getArg(0)->getType();
20466 if (AstType->isArrayType())
20467 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20468 else
20469 Src = CGF.EmitScalarExpr(E->getArg(0));
20470 SrcType = Src->getType();
20471 if (SrcType->isPointerTy()) {
20472 IntType = IntegerType::get(
20473 CGF.getLLVMContext(),
20474 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
20475 } else {
20476 assert(SrcType->isIntegerTy());
20477 IntType = cast<llvm::IntegerType>(SrcType);
20478 }
20479 Alignment = CGF.EmitScalarExpr(E->getArg(1));
20480 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
20481 auto *One = llvm::ConstantInt::get(IntType, 1);
20482 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
20483 }
20484};
20485} // namespace
20486
20487/// Generate (x & (y-1)) == 0.
20489 BuiltinAlignArgs Args(E, *this);
20490 llvm::Value *SrcAddress = Args.Src;
20491 if (Args.SrcType->isPointerTy())
20492 SrcAddress =
20493 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
20494 return RValue::get(Builder.CreateICmpEQ(
20495 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
20496 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
20497}
20498
20499/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
20500/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
20501/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
20502RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
20503 BuiltinAlignArgs Args(E, *this);
20504 llvm::Value *SrcForMask = Args.Src;
20505 if (AlignUp) {
20506 // When aligning up we have to first add the mask to ensure we go over the
20507 // next alignment value and then align down to the next valid multiple.
20508 // By adding the mask, we ensure that align_up on an already aligned
20509 // value will not change the value.
20510 if (Args.Src->getType()->isPointerTy()) {
20511 if (getLangOpts().isSignedOverflowDefined())
20512 SrcForMask =
20513 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
20514 else
20515 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
20516 /*SignedIndices=*/true,
20517 /*isSubtraction=*/false,
20518 E->getExprLoc(), "over_boundary");
20519 } else {
20520 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
20521 }
20522 }
20523 // Invert the mask to only clear the lower bits.
20524 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
20525 llvm::Value *Result = nullptr;
20526 if (Args.Src->getType()->isPointerTy()) {
20527 Result = Builder.CreateIntrinsic(
20528 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
20529 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
20530 } else {
20531 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
20532 }
20533 assert(Result->getType() == Args.SrcType);
20534 return RValue::get(Result);
20535}
20536
20538 const CallExpr *E) {
20539 switch (BuiltinID) {
20540 case WebAssembly::BI__builtin_wasm_memory_size: {
20541 llvm::Type *ResultType = ConvertType(E->getType());
20542 Value *I = EmitScalarExpr(E->getArg(0));
20543 Function *Callee =
20544 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
20545 return Builder.CreateCall(Callee, I);
20546 }
20547 case WebAssembly::BI__builtin_wasm_memory_grow: {
20548 llvm::Type *ResultType = ConvertType(E->getType());
20549 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
20550 EmitScalarExpr(E->getArg(1))};
20551 Function *Callee =
20552 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
20553 return Builder.CreateCall(Callee, Args);
20554 }
20555 case WebAssembly::BI__builtin_wasm_tls_size: {
20556 llvm::Type *ResultType = ConvertType(E->getType());
20557 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
20558 return Builder.CreateCall(Callee);
20559 }
20560 case WebAssembly::BI__builtin_wasm_tls_align: {
20561 llvm::Type *ResultType = ConvertType(E->getType());
20562 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
20563 return Builder.CreateCall(Callee);
20564 }
20565 case WebAssembly::BI__builtin_wasm_tls_base: {
20566 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
20567 return Builder.CreateCall(Callee);
20568 }
20569 case WebAssembly::BI__builtin_wasm_throw: {
20570 Value *Tag = EmitScalarExpr(E->getArg(0));
20571 Value *Obj = EmitScalarExpr(E->getArg(1));
20572 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
20573 return Builder.CreateCall(Callee, {Tag, Obj});
20574 }
20575 case WebAssembly::BI__builtin_wasm_rethrow: {
20576 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
20577 return Builder.CreateCall(Callee);
20578 }
20579 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
20580 Value *Addr = EmitScalarExpr(E->getArg(0));
20582 Value *Timeout = EmitScalarExpr(E->getArg(2));
20583 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
20584 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20585 }
20586 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
20587 Value *Addr = EmitScalarExpr(E->getArg(0));
20589 Value *Timeout = EmitScalarExpr(E->getArg(2));
20590 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
20591 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20592 }
20593 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
20594 Value *Addr = EmitScalarExpr(E->getArg(0));
20595 Value *Count = EmitScalarExpr(E->getArg(1));
20596 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
20597 return Builder.CreateCall(Callee, {Addr, Count});
20598 }
20599 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
20600 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
20601 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
20602 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
20603 Value *Src = EmitScalarExpr(E->getArg(0));
20604 llvm::Type *ResT = ConvertType(E->getType());
20605 Function *Callee =
20606 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
20607 return Builder.CreateCall(Callee, {Src});
20608 }
20609 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
20610 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
20611 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
20612 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
20613 Value *Src = EmitScalarExpr(E->getArg(0));
20614 llvm::Type *ResT = ConvertType(E->getType());
20615 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
20616 {ResT, Src->getType()});
20617 return Builder.CreateCall(Callee, {Src});
20618 }
20619 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
20620 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
20621 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
20622 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
20623 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
20624 Value *Src = EmitScalarExpr(E->getArg(0));
20625 llvm::Type *ResT = ConvertType(E->getType());
20626 Function *Callee =
20627 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
20628 return Builder.CreateCall(Callee, {Src});
20629 }
20630 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
20631 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
20632 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
20633 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
20634 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
20635 Value *Src = EmitScalarExpr(E->getArg(0));
20636 llvm::Type *ResT = ConvertType(E->getType());
20637 Function *Callee =
20638 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
20639 return Builder.CreateCall(Callee, {Src});
20640 }
20641 case WebAssembly::BI__builtin_wasm_min_f32:
20642 case WebAssembly::BI__builtin_wasm_min_f64:
20643 case WebAssembly::BI__builtin_wasm_min_f32x4:
20644 case WebAssembly::BI__builtin_wasm_min_f64x2: {
20645 Value *LHS = EmitScalarExpr(E->getArg(0));
20646 Value *RHS = EmitScalarExpr(E->getArg(1));
20647 Function *Callee =
20648 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
20649 return Builder.CreateCall(Callee, {LHS, RHS});
20650 }
20651 case WebAssembly::BI__builtin_wasm_max_f32:
20652 case WebAssembly::BI__builtin_wasm_max_f64:
20653 case WebAssembly::BI__builtin_wasm_max_f32x4:
20654 case WebAssembly::BI__builtin_wasm_max_f64x2: {
20655 Value *LHS = EmitScalarExpr(E->getArg(0));
20656 Value *RHS = EmitScalarExpr(E->getArg(1));
20657 Function *Callee =
20658 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
20659 return Builder.CreateCall(Callee, {LHS, RHS});
20660 }
20661 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
20662 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
20663 Value *LHS = EmitScalarExpr(E->getArg(0));
20664 Value *RHS = EmitScalarExpr(E->getArg(1));
20665 Function *Callee =
20666 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
20667 return Builder.CreateCall(Callee, {LHS, RHS});
20668 }
20669 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
20670 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
20671 Value *LHS = EmitScalarExpr(E->getArg(0));
20672 Value *RHS = EmitScalarExpr(E->getArg(1));
20673 Function *Callee =
20674 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
20675 return Builder.CreateCall(Callee, {LHS, RHS});
20676 }
20677 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20678 case WebAssembly::BI__builtin_wasm_floor_f32x4:
20679 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20680 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20681 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20682 case WebAssembly::BI__builtin_wasm_floor_f64x2:
20683 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20684 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
20685 unsigned IntNo;
20686 switch (BuiltinID) {
20687 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20688 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20689 IntNo = Intrinsic::ceil;
20690 break;
20691 case WebAssembly::BI__builtin_wasm_floor_f32x4:
20692 case WebAssembly::BI__builtin_wasm_floor_f64x2:
20693 IntNo = Intrinsic::floor;
20694 break;
20695 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20696 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20697 IntNo = Intrinsic::trunc;
20698 break;
20699 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20700 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
20701 IntNo = Intrinsic::nearbyint;
20702 break;
20703 default:
20704 llvm_unreachable("unexpected builtin ID");
20705 }
20706 Value *Value = EmitScalarExpr(E->getArg(0));
20708 return Builder.CreateCall(Callee, Value);
20709 }
20710 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
20711 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
20712 return Builder.CreateCall(Callee);
20713 }
20714 case WebAssembly::BI__builtin_wasm_ref_null_func: {
20715 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
20716 return Builder.CreateCall(Callee);
20717 }
20718 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
20719 Value *Src = EmitScalarExpr(E->getArg(0));
20720 Value *Indices = EmitScalarExpr(E->getArg(1));
20721 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
20722 return Builder.CreateCall(Callee, {Src, Indices});
20723 }
20724 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20725 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20726 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20727 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20728 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20729 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20730 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20731 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
20732 unsigned IntNo;
20733 switch (BuiltinID) {
20734 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20735 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20736 IntNo = Intrinsic::sadd_sat;
20737 break;
20738 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20739 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20740 IntNo = Intrinsic::uadd_sat;
20741 break;
20742 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20743 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20744 IntNo = Intrinsic::wasm_sub_sat_signed;
20745 break;
20746 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20747 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
20748 IntNo = Intrinsic::wasm_sub_sat_unsigned;
20749 break;
20750 default:
20751 llvm_unreachable("unexpected builtin ID");
20752 }
20753 Value *LHS = EmitScalarExpr(E->getArg(0));
20754 Value *RHS = EmitScalarExpr(E->getArg(1));
20756 return Builder.CreateCall(Callee, {LHS, RHS});
20757 }
20758 case WebAssembly::BI__builtin_wasm_abs_i8x16:
20759 case WebAssembly::BI__builtin_wasm_abs_i16x8:
20760 case WebAssembly::BI__builtin_wasm_abs_i32x4:
20761 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
20762 Value *Vec = EmitScalarExpr(E->getArg(0));
20763 Value *Neg = Builder.CreateNeg(Vec, "neg");
20764 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
20765 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
20766 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
20767 }
20768 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20769 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20770 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20771 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20772 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20773 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20774 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20775 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20776 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20777 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20778 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20779 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
20780 Value *LHS = EmitScalarExpr(E->getArg(0));
20781 Value *RHS = EmitScalarExpr(E->getArg(1));
20782 Value *ICmp;
20783 switch (BuiltinID) {
20784 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20785 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20786 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20787 ICmp = Builder.CreateICmpSLT(LHS, RHS);
20788 break;
20789 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20790 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20791 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20792 ICmp = Builder.CreateICmpULT(LHS, RHS);
20793 break;
20794 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20795 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20796 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20797 ICmp = Builder.CreateICmpSGT(LHS, RHS);
20798 break;
20799 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20800 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20801 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
20802 ICmp = Builder.CreateICmpUGT(LHS, RHS);
20803 break;
20804 default:
20805 llvm_unreachable("unexpected builtin ID");
20806 }
20807 return Builder.CreateSelect(ICmp, LHS, RHS);
20808 }
20809 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
20810 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
20811 Value *LHS = EmitScalarExpr(E->getArg(0));
20812 Value *RHS = EmitScalarExpr(E->getArg(1));
20813 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
20814 ConvertType(E->getType()));
20815 return Builder.CreateCall(Callee, {LHS, RHS});
20816 }
20817 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
20818 Value *LHS = EmitScalarExpr(E->getArg(0));
20819 Value *RHS = EmitScalarExpr(E->getArg(1));
20820 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
20821 return Builder.CreateCall(Callee, {LHS, RHS});
20822 }
20823 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20824 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20825 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20826 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
20827 Value *Vec = EmitScalarExpr(E->getArg(0));
20828 unsigned IntNo;
20829 switch (BuiltinID) {
20830 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20831 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20832 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
20833 break;
20834 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20835 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
20836 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
20837 break;
20838 default:
20839 llvm_unreachable("unexpected builtin ID");
20840 }
20841
20843 return Builder.CreateCall(Callee, Vec);
20844 }
20845 case WebAssembly::BI__builtin_wasm_bitselect: {
20846 Value *V1 = EmitScalarExpr(E->getArg(0));
20847 Value *V2 = EmitScalarExpr(E->getArg(1));
20848 Value *C = EmitScalarExpr(E->getArg(2));
20849 Function *Callee =
20850 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
20851 return Builder.CreateCall(Callee, {V1, V2, C});
20852 }
20853 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
20854 Value *LHS = EmitScalarExpr(E->getArg(0));
20855 Value *RHS = EmitScalarExpr(E->getArg(1));
20856 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
20857 return Builder.CreateCall(Callee, {LHS, RHS});
20858 }
20859 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
20860 Value *Vec = EmitScalarExpr(E->getArg(0));
20861 Function *Callee =
20862 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
20863 return Builder.CreateCall(Callee, {Vec});
20864 }
20865 case WebAssembly::BI__builtin_wasm_any_true_v128:
20866 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
20867 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
20868 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
20869 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
20870 unsigned IntNo;
20871 switch (BuiltinID) {
20872 case WebAssembly::BI__builtin_wasm_any_true_v128:
20873 IntNo = Intrinsic::wasm_anytrue;
20874 break;
20875 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
20876 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
20877 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
20878 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
20879 IntNo = Intrinsic::wasm_alltrue;
20880 break;
20881 default:
20882 llvm_unreachable("unexpected builtin ID");
20883 }
20884 Value *Vec = EmitScalarExpr(E->getArg(0));
20885 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
20886 return Builder.CreateCall(Callee, {Vec});
20887 }
20888 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
20889 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
20890 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
20891 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
20892 Value *Vec = EmitScalarExpr(E->getArg(0));
20893 Function *Callee =
20894 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
20895 return Builder.CreateCall(Callee, {Vec});
20896 }
20897 case WebAssembly::BI__builtin_wasm_abs_f32x4:
20898 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
20899 Value *Vec = EmitScalarExpr(E->getArg(0));
20900 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
20901 return Builder.CreateCall(Callee, {Vec});
20902 }
20903 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
20904 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
20905 Value *Vec = EmitScalarExpr(E->getArg(0));
20906 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
20907 return Builder.CreateCall(Callee, {Vec});
20908 }
20909 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
20910 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
20911 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
20912 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
20913 Value *Low = EmitScalarExpr(E->getArg(0));
20914 Value *High = EmitScalarExpr(E->getArg(1));
20915 unsigned IntNo;
20916 switch (BuiltinID) {
20917 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
20918 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
20919 IntNo = Intrinsic::wasm_narrow_signed;
20920 break;
20921 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
20922 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
20923 IntNo = Intrinsic::wasm_narrow_unsigned;
20924 break;
20925 default:
20926 llvm_unreachable("unexpected builtin ID");
20927 }
20928 Function *Callee =
20929 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
20930 return Builder.CreateCall(Callee, {Low, High});
20931 }
20932 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
20933 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
20934 Value *Vec = EmitScalarExpr(E->getArg(0));
20935 unsigned IntNo;
20936 switch (BuiltinID) {
20937 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
20938 IntNo = Intrinsic::fptosi_sat;
20939 break;
20940 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
20941 IntNo = Intrinsic::fptoui_sat;
20942 break;
20943 default:
20944 llvm_unreachable("unexpected builtin ID");
20945 }
20946 llvm::Type *SrcT = Vec->getType();
20947 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
20948 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
20949 Value *Trunc = Builder.CreateCall(Callee, Vec);
20950 Value *Splat = Constant::getNullValue(TruncT);
20951 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
20952 }
20953 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
20954 Value *Ops[18];
20955 size_t OpIdx = 0;
20956 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
20957 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
20958 while (OpIdx < 18) {
20959 std::optional<llvm::APSInt> LaneConst =
20961 assert(LaneConst && "Constant arg isn't actually constant?");
20962 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
20963 }
20964 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
20965 return Builder.CreateCall(Callee, Ops);
20966 }
20967 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
20968 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
20969 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
20970 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
20971 Value *A = EmitScalarExpr(E->getArg(0));
20972 Value *B = EmitScalarExpr(E->getArg(1));
20973 Value *C = EmitScalarExpr(E->getArg(2));
20974 unsigned IntNo;
20975 switch (BuiltinID) {
20976 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
20977 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
20978 IntNo = Intrinsic::wasm_relaxed_madd;
20979 break;
20980 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
20981 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
20982 IntNo = Intrinsic::wasm_relaxed_nmadd;
20983 break;
20984 default:
20985 llvm_unreachable("unexpected builtin ID");
20986 }
20987 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
20988 return Builder.CreateCall(Callee, {A, B, C});
20989 }
20990 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
20991 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
20992 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
20993 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
20994 Value *A = EmitScalarExpr(E->getArg(0));
20995 Value *B = EmitScalarExpr(E->getArg(1));
20996 Value *C = EmitScalarExpr(E->getArg(2));
20997 Function *Callee =
20998 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
20999 return Builder.CreateCall(Callee, {A, B, C});
21000 }
21001 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
21002 Value *Src = EmitScalarExpr(E->getArg(0));
21003 Value *Indices = EmitScalarExpr(E->getArg(1));
21004 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
21005 return Builder.CreateCall(Callee, {Src, Indices});
21006 }
21007 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21008 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21009 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21010 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
21011 Value *LHS = EmitScalarExpr(E->getArg(0));
21012 Value *RHS = EmitScalarExpr(E->getArg(1));
21013 unsigned IntNo;
21014 switch (BuiltinID) {
21015 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21016 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21017 IntNo = Intrinsic::wasm_relaxed_min;
21018 break;
21019 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21020 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
21021 IntNo = Intrinsic::wasm_relaxed_max;
21022 break;
21023 default:
21024 llvm_unreachable("unexpected builtin ID");
21025 }
21026 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
21027 return Builder.CreateCall(Callee, {LHS, RHS});
21028 }
21029 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21030 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21031 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21032 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
21033 Value *Vec = EmitScalarExpr(E->getArg(0));
21034 unsigned IntNo;
21035 switch (BuiltinID) {
21036 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21037 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
21038 break;
21039 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21040 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
21041 break;
21042 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21043 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
21044 break;
21045 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
21046 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
21047 break;
21048 default:
21049 llvm_unreachable("unexpected builtin ID");
21050 }
21051 Function *Callee = CGM.getIntrinsic(IntNo);
21052 return Builder.CreateCall(Callee, {Vec});
21053 }
21054 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
21055 Value *LHS = EmitScalarExpr(E->getArg(0));
21056 Value *RHS = EmitScalarExpr(E->getArg(1));
21057 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
21058 return Builder.CreateCall(Callee, {LHS, RHS});
21059 }
21060 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
21061 Value *LHS = EmitScalarExpr(E->getArg(0));
21062 Value *RHS = EmitScalarExpr(E->getArg(1));
21063 Function *Callee =
21064 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
21065 return Builder.CreateCall(Callee, {LHS, RHS});
21066 }
21067 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
21068 Value *LHS = EmitScalarExpr(E->getArg(0));
21069 Value *RHS = EmitScalarExpr(E->getArg(1));
21070 Value *Acc = EmitScalarExpr(E->getArg(2));
21071 Function *Callee =
21072 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
21073 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21074 }
21075 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
21076 Value *LHS = EmitScalarExpr(E->getArg(0));
21077 Value *RHS = EmitScalarExpr(E->getArg(1));
21078 Value *Acc = EmitScalarExpr(E->getArg(2));
21079 Function *Callee =
21080 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
21081 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21082 }
21083 case WebAssembly::BI__builtin_wasm_table_get: {
21084 assert(E->getArg(0)->getType()->isArrayType());
21086 Value *Index = EmitScalarExpr(E->getArg(1));
21089 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
21090 else if (E->getType().isWebAssemblyFuncrefType())
21091 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
21092 else
21093 llvm_unreachable(
21094 "Unexpected reference type for __builtin_wasm_table_get");
21095 return Builder.CreateCall(Callee, {Table, Index});
21096 }
21097 case WebAssembly::BI__builtin_wasm_table_set: {
21098 assert(E->getArg(0)->getType()->isArrayType());
21100 Value *Index = EmitScalarExpr(E->getArg(1));
21101 Value *Val = EmitScalarExpr(E->getArg(2));
21104 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
21105 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21106 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
21107 else
21108 llvm_unreachable(
21109 "Unexpected reference type for __builtin_wasm_table_set");
21110 return Builder.CreateCall(Callee, {Table, Index, Val});
21111 }
21112 case WebAssembly::BI__builtin_wasm_table_size: {
21113 assert(E->getArg(0)->getType()->isArrayType());
21115 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
21116 return Builder.CreateCall(Callee, Value);
21117 }
21118 case WebAssembly::BI__builtin_wasm_table_grow: {
21119 assert(E->getArg(0)->getType()->isArrayType());
21121 Value *Val = EmitScalarExpr(E->getArg(1));
21122 Value *NElems = EmitScalarExpr(E->getArg(2));
21123
21126 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
21127 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21128 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21129 else
21130 llvm_unreachable(
21131 "Unexpected reference type for __builtin_wasm_table_grow");
21132
21133 return Builder.CreateCall(Callee, {Table, Val, NElems});
21134 }
21135 case WebAssembly::BI__builtin_wasm_table_fill: {
21136 assert(E->getArg(0)->getType()->isArrayType());
21138 Value *Index = EmitScalarExpr(E->getArg(1));
21139 Value *Val = EmitScalarExpr(E->getArg(2));
21140 Value *NElems = EmitScalarExpr(E->getArg(3));
21141
21144 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
21145 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21146 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21147 else
21148 llvm_unreachable(
21149 "Unexpected reference type for __builtin_wasm_table_fill");
21150
21151 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
21152 }
21153 case WebAssembly::BI__builtin_wasm_table_copy: {
21154 assert(E->getArg(0)->getType()->isArrayType());
21157 Value *DstIdx = EmitScalarExpr(E->getArg(2));
21158 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
21159 Value *NElems = EmitScalarExpr(E->getArg(4));
21160
21161 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
21162
21163 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
21164 }
21165 default:
21166 return nullptr;
21167 }
21168}
21169
21170static std::pair<Intrinsic::ID, unsigned>
21172 struct Info {
21173 unsigned BuiltinID;
21174 Intrinsic::ID IntrinsicID;
21175 unsigned VecLen;
21176 };
21177 static Info Infos[] = {
21178#define CUSTOM_BUILTIN_MAPPING(x,s) \
21179 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
21180 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
21181 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
21182 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
21183 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
21184 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
21185 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
21186 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
21187 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
21188 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
21189 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
21190 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
21191 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
21192 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
21193 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
21194 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
21195 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
21196 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
21197 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
21198 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
21199 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
21200 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
21201 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
21202 // Legacy builtins that take a vector in place of a vector predicate.
21203 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
21204 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
21205 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
21206 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
21207 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
21208 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
21209 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
21210 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
21211#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
21212#undef CUSTOM_BUILTIN_MAPPING
21213 };
21214
21215 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
21216 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
21217 (void)SortOnce;
21218
21219 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
21220 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
21221 return {Intrinsic::not_intrinsic, 0};
21222
21223 return {F->IntrinsicID, F->VecLen};
21224}
21225
21227 const CallExpr *E) {
21228 Intrinsic::ID ID;
21229 unsigned VecLen;
21230 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
21231
21232 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
21233 // The base pointer is passed by address, so it needs to be loaded.
21236 llvm::Value *Base = Builder.CreateLoad(BP);
21237 // The treatment of both loads and stores is the same: the arguments for
21238 // the builtin are the same as the arguments for the intrinsic.
21239 // Load:
21240 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
21241 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
21242 // Store:
21243 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
21244 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
21246 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
21247 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21248
21249 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
21250 // The load intrinsics generate two results (Value, NewBase), stores
21251 // generate one (NewBase). The new base address needs to be stored.
21252 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
21253 : Result;
21254 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
21256 llvm::Value *RetVal =
21257 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
21258 if (IsLoad)
21259 RetVal = Builder.CreateExtractValue(Result, 0);
21260 return RetVal;
21261 };
21262
21263 // Handle the conversion of bit-reverse load intrinsics to bit code.
21264 // The intrinsic call after this function only reads from memory and the
21265 // write to memory is dealt by the store instruction.
21266 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
21267 // The intrinsic generates one result, which is the new value for the base
21268 // pointer. It needs to be returned. The result of the load instruction is
21269 // passed to intrinsic by address, so the value needs to be stored.
21270 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
21271
21272 // Expressions like &(*pt++) will be incremented per evaluation.
21273 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
21274 // per call.
21275 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
21276 DestAddr = Address(DestAddr.getPointer(), Int8Ty, DestAddr.getAlignment());
21277 llvm::Value *DestAddress = DestAddr.getPointer();
21278
21279 // Operands are Base, Dest, Modifier.
21280 // The intrinsic format in LLVM IR is defined as
21281 // { ValueType, i8* } (i8*, i32).
21282 llvm::Value *Result = Builder.CreateCall(
21283 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
21284
21285 // The value needs to be stored as the variable is passed by reference.
21286 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
21287
21288 // The store needs to be truncated to fit the destination type.
21289 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
21290 // to be handled with stores of respective destination type.
21291 DestVal = Builder.CreateTrunc(DestVal, DestTy);
21292
21293 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
21294 // The updated value of the base pointer is returned.
21295 return Builder.CreateExtractValue(Result, 1);
21296 };
21297
21298 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
21299 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
21300 : Intrinsic::hexagon_V6_vandvrt;
21301 return Builder.CreateCall(CGM.getIntrinsic(ID),
21302 {Vec, Builder.getInt32(-1)});
21303 };
21304 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
21305 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
21306 : Intrinsic::hexagon_V6_vandqrt;
21307 return Builder.CreateCall(CGM.getIntrinsic(ID),
21308 {Pred, Builder.getInt32(-1)});
21309 };
21310
21311 switch (BuiltinID) {
21312 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
21313 // and the corresponding C/C++ builtins use loads/stores to update
21314 // the predicate.
21315 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
21316 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
21317 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
21318 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
21319 // Get the type from the 0-th argument.
21320 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21321 Address PredAddr =
21323 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
21324 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21325 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
21326
21327 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21328 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
21329 PredAddr.getAlignment());
21330 return Builder.CreateExtractValue(Result, 0);
21331 }
21332 // These are identical to the builtins above, except they don't consume
21333 // input carry, only generate carry-out. Since they still produce two
21334 // outputs, generate the store of the predicate, but no load.
21335 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
21336 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
21337 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
21338 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
21339 // Get the type from the 0-th argument.
21340 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21341 Address PredAddr =
21343 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21344 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21345
21346 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21347 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
21348 PredAddr.getAlignment());
21349 return Builder.CreateExtractValue(Result, 0);
21350 }
21351
21352 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
21353 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
21354 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
21355 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
21356 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
21357 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
21358 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
21359 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
21361 const Expr *PredOp = E->getArg(0);
21362 // There will be an implicit cast to a boolean vector. Strip it.
21363 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
21364 if (Cast->getCastKind() == CK_BitCast)
21365 PredOp = Cast->getSubExpr();
21366 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
21367 }
21368 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
21369 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21370 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
21371 }
21372
21373 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
21374 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
21375 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
21376 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
21377 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
21378 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
21379 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
21380 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
21381 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
21382 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
21383 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
21384 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
21385 return MakeCircOp(ID, /*IsLoad=*/true);
21386 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
21387 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
21388 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
21389 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
21390 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
21391 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
21392 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
21393 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
21394 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
21395 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
21396 return MakeCircOp(ID, /*IsLoad=*/false);
21397 case Hexagon::BI__builtin_brev_ldub:
21398 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
21399 case Hexagon::BI__builtin_brev_ldb:
21400 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
21401 case Hexagon::BI__builtin_brev_lduh:
21402 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
21403 case Hexagon::BI__builtin_brev_ldh:
21404 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
21405 case Hexagon::BI__builtin_brev_ldw:
21406 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
21407 case Hexagon::BI__builtin_brev_ldd:
21408 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
21409 } // switch
21410
21411 return nullptr;
21412}
21413
21415 const CallExpr *E,
21416 ReturnValueSlot ReturnValue) {
21418 llvm::Type *ResultType = ConvertType(E->getType());
21419
21420 // Find out if any arguments are required to be integer constant expressions.
21421 unsigned ICEArguments = 0;
21423 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
21424 if (Error == ASTContext::GE_Missing_type) {
21425 // Vector intrinsics don't have a type string.
21426 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
21427 BuiltinID <= clang::RISCV::LastRVVBuiltin);
21428 ICEArguments = 0;
21429 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
21430 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
21431 ICEArguments = 1 << 1;
21432 } else {
21433 assert(Error == ASTContext::GE_None && "Unexpected error");
21434 }
21435
21436 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
21437 ICEArguments |= (1 << 1);
21438 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
21439 ICEArguments |= (1 << 2);
21440
21441 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
21442 // Handle aggregate argument, namely RVV tuple types in segment load/store
21445 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this));
21446 Ops.push_back(AggValue);
21447 continue;
21448 }
21449 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
21450 }
21451
21452 Intrinsic::ID ID = Intrinsic::not_intrinsic;
21453 unsigned NF = 1;
21454 // The 0th bit simulates the `vta` of RVV
21455 // The 1st bit simulates the `vma` of RVV
21456 constexpr unsigned RVV_VTA = 0x1;
21457 constexpr unsigned RVV_VMA = 0x2;
21458 int PolicyAttrs = 0;
21459 bool IsMasked = false;
21460
21461 // Required for overloaded intrinsics.
21463 switch (BuiltinID) {
21464 default: llvm_unreachable("unexpected builtin ID");
21465 case RISCV::BI__builtin_riscv_orc_b_32:
21466 case RISCV::BI__builtin_riscv_orc_b_64:
21467 case RISCV::BI__builtin_riscv_clz_32:
21468 case RISCV::BI__builtin_riscv_clz_64:
21469 case RISCV::BI__builtin_riscv_ctz_32:
21470 case RISCV::BI__builtin_riscv_ctz_64:
21471 case RISCV::BI__builtin_riscv_clmul_32:
21472 case RISCV::BI__builtin_riscv_clmul_64:
21473 case RISCV::BI__builtin_riscv_clmulh_32:
21474 case RISCV::BI__builtin_riscv_clmulh_64:
21475 case RISCV::BI__builtin_riscv_clmulr_32:
21476 case RISCV::BI__builtin_riscv_clmulr_64:
21477 case RISCV::BI__builtin_riscv_xperm4_32:
21478 case RISCV::BI__builtin_riscv_xperm4_64:
21479 case RISCV::BI__builtin_riscv_xperm8_32:
21480 case RISCV::BI__builtin_riscv_xperm8_64:
21481 case RISCV::BI__builtin_riscv_brev8_32:
21482 case RISCV::BI__builtin_riscv_brev8_64:
21483 case RISCV::BI__builtin_riscv_zip_32:
21484 case RISCV::BI__builtin_riscv_unzip_32: {
21485 switch (BuiltinID) {
21486 default: llvm_unreachable("unexpected builtin ID");
21487 // Zbb
21488 case RISCV::BI__builtin_riscv_orc_b_32:
21489 case RISCV::BI__builtin_riscv_orc_b_64:
21490 ID = Intrinsic::riscv_orc_b;
21491 break;
21492 case RISCV::BI__builtin_riscv_clz_32:
21493 case RISCV::BI__builtin_riscv_clz_64: {
21494 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
21495 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
21496 if (Result->getType() != ResultType)
21497 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
21498 "cast");
21499 return Result;
21500 }
21501 case RISCV::BI__builtin_riscv_ctz_32:
21502 case RISCV::BI__builtin_riscv_ctz_64: {
21503 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
21504 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
21505 if (Result->getType() != ResultType)
21506 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
21507 "cast");
21508 return Result;
21509 }
21510
21511 // Zbc
21512 case RISCV::BI__builtin_riscv_clmul_32:
21513 case RISCV::BI__builtin_riscv_clmul_64:
21514 ID = Intrinsic::riscv_clmul;
21515 break;
21516 case RISCV::BI__builtin_riscv_clmulh_32:
21517 case RISCV::BI__builtin_riscv_clmulh_64:
21518 ID = Intrinsic::riscv_clmulh;
21519 break;
21520 case RISCV::BI__builtin_riscv_clmulr_32:
21521 case RISCV::BI__builtin_riscv_clmulr_64:
21522 ID = Intrinsic::riscv_clmulr;
21523 break;
21524
21525 // Zbkx
21526 case RISCV::BI__builtin_riscv_xperm8_32:
21527 case RISCV::BI__builtin_riscv_xperm8_64:
21528 ID = Intrinsic::riscv_xperm8;
21529 break;
21530 case RISCV::BI__builtin_riscv_xperm4_32:
21531 case RISCV::BI__builtin_riscv_xperm4_64:
21532 ID = Intrinsic::riscv_xperm4;
21533 break;
21534
21535 // Zbkb
21536 case RISCV::BI__builtin_riscv_brev8_32:
21537 case RISCV::BI__builtin_riscv_brev8_64:
21538 ID = Intrinsic::riscv_brev8;
21539 break;
21540 case RISCV::BI__builtin_riscv_zip_32:
21541 ID = Intrinsic::riscv_zip;
21542 break;
21543 case RISCV::BI__builtin_riscv_unzip_32:
21544 ID = Intrinsic::riscv_unzip;
21545 break;
21546 }
21547
21548 IntrinsicTypes = {ResultType};
21549 break;
21550 }
21551
21552 // Zk builtins
21553
21554 // Zknh
21555 case RISCV::BI__builtin_riscv_sha256sig0:
21556 ID = Intrinsic::riscv_sha256sig0;
21557 break;
21558 case RISCV::BI__builtin_riscv_sha256sig1:
21559 ID = Intrinsic::riscv_sha256sig1;
21560 break;
21561 case RISCV::BI__builtin_riscv_sha256sum0:
21562 ID = Intrinsic::riscv_sha256sum0;
21563 break;
21564 case RISCV::BI__builtin_riscv_sha256sum1:
21565 ID = Intrinsic::riscv_sha256sum1;
21566 break;
21567
21568 // Zksed
21569 case RISCV::BI__builtin_riscv_sm4ks:
21570 ID = Intrinsic::riscv_sm4ks;
21571 break;
21572 case RISCV::BI__builtin_riscv_sm4ed:
21573 ID = Intrinsic::riscv_sm4ed;
21574 break;
21575
21576 // Zksh
21577 case RISCV::BI__builtin_riscv_sm3p0:
21578 ID = Intrinsic::riscv_sm3p0;
21579 break;
21580 case RISCV::BI__builtin_riscv_sm3p1:
21581 ID = Intrinsic::riscv_sm3p1;
21582 break;
21583
21584 // Zihintntl
21585 case RISCV::BI__builtin_riscv_ntl_load: {
21586 llvm::Type *ResTy = ConvertType(E->getType());
21587 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21588 if (Ops.size() == 2)
21589 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
21590
21591 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21593 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
21594 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21595 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
21596
21597 int Width;
21598 if(ResTy->isScalableTy()) {
21599 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
21600 llvm::Type *ScalarTy = ResTy->getScalarType();
21601 Width = ScalarTy->getPrimitiveSizeInBits() *
21602 SVTy->getElementCount().getKnownMinValue();
21603 } else
21604 Width = ResTy->getPrimitiveSizeInBits();
21605 LoadInst *Load = Builder.CreateLoad(
21606 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
21607
21608 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
21609 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
21610 RISCVDomainNode);
21611
21612 return Load;
21613 }
21614 case RISCV::BI__builtin_riscv_ntl_store: {
21615 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21616 if (Ops.size() == 3)
21617 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
21618
21619 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21621 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
21622 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21623 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
21624
21625 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
21626 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
21627 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
21628 RISCVDomainNode);
21629
21630 return Store;
21631 }
21632
21633 // Vector builtins are handled from here.
21634#include "clang/Basic/riscv_vector_builtin_cg.inc"
21635 // SiFive Vector builtins are handled from here.
21636#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
21637 }
21638
21639 assert(ID != Intrinsic::not_intrinsic);
21640
21641 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
21642 return Builder.CreateCall(F, Ops, "");
21643}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3259
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8299
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9135
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:210
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1182
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6315
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:389
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2048
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2014
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6184
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2505
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9105
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:800
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9098
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7340
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9325
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7352
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7322
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8367
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2383
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:442
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:734
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:492
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6311
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7353
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1380
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7357
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:856
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:629
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2411
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:466
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:769
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9094
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7354
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9172
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6073
constexpr unsigned SVEBitsPerBlock
Definition: CGBuiltin.cpp:9609
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1765
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7154
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6308
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:594
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:509
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:262
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9199
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1611
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1465
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1243
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6320
@ UnsignedAlts
Definition: CGBuiltin.cpp:6278
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6283
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6287
@ Use64BitVectors
Definition: CGBuiltin.cpp:6280
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6275
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6285
@ InventFloatType
Definition: CGBuiltin.cpp:6277
@ AddRetType
Definition: CGBuiltin.cpp:6270
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6272
@ VectorizeRetType
Definition: CGBuiltin.cpp:6274
@ VectorRet
Definition: CGBuiltin.cpp:6284
@ Add1ArgType
Definition: CGBuiltin.cpp:6271
@ Use128BitVectors
Definition: CGBuiltin.cpp:6281
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:675
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:666
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2241
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:808
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1319
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9161
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:546
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:755
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2295
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition: CGBuiltin.cpp:9611
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2434
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6143
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:253
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9187
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:181
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8278
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:71
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8270
@ VolatileRead
Definition: CGBuiltin.cpp:8272
@ NormalRead
Definition: CGBuiltin.cpp:8271
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:347
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:192
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2283
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:307
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:526
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:170
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9127
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7349
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6639
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:234
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:719
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:7415
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:617
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:605
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:647
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:478
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2516
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1192
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2249
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:584
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:565
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:704
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:245
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1228
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8197
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2008
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:453
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7351
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:6914
CodeGenFunction::ComplexPairTy ComplexPairTy
const Environment & Env
Definition: HTMLLogger.cpp:148
unsigned Iter
Definition: HTMLLogger.cpp:154
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:142
llvm::MachO::Record Record
Definition: MachO.h:28
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:423
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1113
IdentifierTable & Idents
Definition: ASTContext.h:639
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:641
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2048
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2592
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2315
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1086
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:752
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2217
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2220
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
CharUnits getSize() const
getSize - Get the record size in characters.
Definition: RecordLayout.h:193
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3147
QualType getElementType() const
Definition: Type.h:3159
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:149
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:247
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:251
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:122
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2819
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:3010
bool hasStoredFPFeatures() const
Definition: Expr.h:2981
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Expr.cpp:1618
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition: Expr.h:2989
Expr * getCallee()
Definition: Expr.h:2969
FPOptionsOverride getFPFeatures() const
Definition: Expr.h:3101
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2997
arg_range arguments()
Definition: Expr.h:3058
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1570
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
An aligned address.
Definition: Address.h:29
static Address invalid()
Definition: Address.h:46
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:78
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:62
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:100
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:93
llvm::Value * getPointer() const
Definition: Address.h:51
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:57
An aggregate value slot.
Definition: CGValue.h:512
Address getAddress() const
Definition: CGValue.h:650
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:863
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:880
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:97
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:104
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:318
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:156
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:311
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:140
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:333
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:112
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:326
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:130
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:71
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:268
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:356
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:297
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:89
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:213
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:149
Address CreateGEP(Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:246
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:263
All available information about a concrete callee.
Definition: CGCall.h:62
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:129
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CGFunctionInfo - Class to encapsulate the information about a function definition.
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:258
void add(RValue rvalue, QualType type)
Definition: CGCall.h:282
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V, QualType Type, CharUnits Alignment=CharUnits::Zero(), SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
Emit a check that V is the address of storage of the appropriate size and alignment for an object of ...
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * FormSVEBuiltinResult(llvm::Value *Call)
FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider vector.
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
llvm::Value * EmitCountedByFieldExpr(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
const FieldDecl * FindFlexibleArrayMemberField(ASTContext &Ctx, const RecordDecl *RD, StringRef Name, uint64_t &Offset)
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E)
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
const FieldDecl * FindCountedByField(const FieldDecl *FD)
Find the FieldDecl specified in a FAM's "counted_by" attribute.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:96
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1625
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:674
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:662
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:352
LValue - This represents an lvalue references.
Definition: CGValue.h:171
llvm::Value * getBitFieldPointer() const
Definition: CGValue.h:408
Address getAddress(CodeGenFunction &CGF) const
Definition: CGValue.h:350
void setNontemporal(bool Value)
Definition: CGValue.h:307
llvm::Value * getPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:346
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:39
static RValue getIgnored()
Definition: CGValue.h:84
static RValue get(llvm::Value *V)
Definition: CGValue.h:89
static RValue getAggregate(Address addr, bool isVolatile=false)
Definition: CGValue.h:110
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:96
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:356
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:145
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:135
const T & getABIInfo() const
Definition: TargetInfo.h:56
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:117
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:154
Complex values, per C99 6.2.5p11.
Definition: Type.h:2845
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:3710
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:1947
decl_range decls() const
decls_begin/decls_end - Iterate over the declarations stored in this context.
Definition: DeclBase.h:2332
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:85
T * getAttr() const
Definition: DeclBase.h:578
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:227
DeclContext * getDeclContext()
Definition: DeclBase.h:453
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:413
bool hasAttr() const
Definition: DeclBase.h:582
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3050
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3045
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3041
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:825
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3542
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3025
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3904
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:870
Represents a member of a struct/union/class.
Definition: Decl.h:3025
Represents a function declaration or definition.
Definition: Decl.h:1959
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2674
Represents a prototype with parameter type info, e.g.
Definition: Type.h:4199
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5338
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:292
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:6751
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2898
QualType getPointeeType() const
Definition: Type.h:2908
A (possibly-)qualified type.
Definition: Type.h:737
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:6985
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2785
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:7027
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2781
The collection of all-type qualifiers we support.
Definition: Type.h:147
Represents a struct/union/class.
Definition: Decl.h:4133
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
bool isUnion() const
Definition: Decl.h:3755
Exposes information about the current target.
Definition: TargetInfo.h:213
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:304
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:663
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1220
bool isLittleEndian() const
Definition: TargetInfo.h:1615
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:827
bool isBigEndian() const
Definition: TargetInfo.h:1614
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1621
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:699
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1606
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1819
bool isBlockPointerType() const
Definition: Type.h:7162
bool isVoidType() const
Definition: Type.h:7443
bool isBooleanType() const
Definition: Type.h:7567
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2083
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:627
bool isArrayType() const
Definition: Type.h:7220
bool isPointerType() const
Definition: Type.h:7154
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:7479
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:7724
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:651
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:7554
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2173
bool isBitIntType() const
Definition: Type.h:7378
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2123
bool isObjCObjectPointerType() const
Definition: Type.h:7282
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2195
bool isFloatingType() const
Definition: Type.cpp:2186
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2133
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:7657
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1823
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:706
QualType getType() const
Definition: Decl.h:717
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:3512
unsigned getNumElements() const
Definition: Type.h:3527
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:181
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: Integral.h:29
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:912
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:217
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1809
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:489
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1369
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1675
The JSON file list parser is used to communicate input to InstallAPI.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:151
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
unsigned long uint64_t
long int64_t
YAML serialization mapping.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:21
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:159
uint64_t Width
Definition: ASTContext.h:153
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742