clang 23.0.0git
AMDGPU.cpp
Go to the documentation of this file.
1//===------- AMDCPU.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
14#include "CodeGenFunction.h"
15#include "TargetInfo.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/IntrinsicsSPIRV.h"
24#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
25#include "llvm/Support/AMDGPUAddrSpace.h"
26#include "llvm/Support/AtomicOrdering.h"
27
28using namespace clang;
29using namespace CodeGen;
30using namespace llvm;
31
32namespace {
33
34// Has second type mangled argument.
35static Value *
37 Intrinsic::ID IntrinsicID,
38 Intrinsic::ID ConstrainedIntrinsicID) {
39 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
40 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
41
42 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
43 if (CGF.Builder.getIsFPConstrained()) {
44 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
45 {Src0->getType(), Src1->getType()});
46 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
47 }
48
49 Function *F =
50 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
51 return CGF.Builder.CreateCall(F, {Src0, Src1});
52}
53
54// If \p E is not null pointer, insert address space cast to match return
55// type of \p E if necessary.
56Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
57 const CallExpr *E = nullptr) {
58 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
59 auto *Call = CGF.Builder.CreateCall(F);
60 if (!E)
61 return Call;
62 QualType BuiltinRetType = E->getType();
63 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
64 if (RetTy == Call->getType())
65 return Call;
66 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
67}
68
69Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
70 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
71 auto *Call = CGF.Builder.CreateCall(F);
72 Call->addRetAttr(
73 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
74 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
75 return Call;
76}
77
78static llvm::Intrinsic::ID getAMDGPUWorkGroupID(CodeGenFunction &CGF,
79 unsigned Index) {
80 switch (Index) {
81 case 0:
82 return llvm::Intrinsic::amdgcn_workgroup_id_x;
83 case 1:
84 return llvm::Intrinsic::amdgcn_workgroup_id_y;
85 case 2:
86 return llvm::Intrinsic::amdgcn_workgroup_id_z;
87 default:
88 llvm_unreachable("unhandled index");
89 }
90}
91
92static void setNoundefInvariantLoad(llvm::LoadInst *Ld) {
93 Ld->setMetadata(llvm::LLVMContext::MD_noundef,
94 llvm::MDNode::get(Ld->getContext(), {}));
95 Ld->setMetadata(llvm::LLVMContext::MD_invariant_load,
96 llvm::MDNode::get(Ld->getContext(), {}));
97}
98
99static void addMaxWorkGroupSizeRangeMetadata(CodeGenFunction &CGF,
100 llvm::LoadInst *GroupSize) {
101 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
102 llvm::MDNode *RNode = MDHelper.createRange(
103 APInt(16, 1), APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
104 GroupSize->setMetadata(llvm::LLVMContext::MD_range, RNode);
105 setNoundefInvariantLoad(GroupSize);
106}
107
108static Value *emitAMDGPUWorkGroupSizeV5(CodeGenFunction &CGF, unsigned Index) {
109 llvm::Value *ImplicitArgPtr = EmitAMDGPUImplicitArgPtr(CGF);
110
111 // offsetof(amdhsa_implicit_kernarg_v5, block_count[Index])
112 unsigned BlockCountOffset = 0 + Index * 4;
113 // offsetof(amdhsa_implicit_kernarg_v5, group_size[Index])
114 unsigned GroupSizeOffset = 12 + Index * 2;
115 // offsetof(amdhsa_implicit_kernarg_v5, remainder[Index])
116 unsigned RemainderOffset = 18 + Index * 2;
117
118 if (CGF.CGM.getLangOpts().OffloadUniformBlock) {
119 // Indexing the implicit kernarg segment.
120 llvm::Value *GroupSizeGEP = CGF.Builder.CreateConstInBoundsGEP1_64(
121 CGF.Int8Ty, ImplicitArgPtr, GroupSizeOffset);
122 llvm::LoadInst *GroupSize = CGF.Builder.CreateLoad(
123 Address(GroupSizeGEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
124
125 addMaxWorkGroupSizeRangeMetadata(CGF, GroupSize);
126
127 return CGF.Builder.CreateZExt(GroupSize, CGF.Int32Ty);
128 }
129
130 llvm::Value *BlockCountGEP = CGF.Builder.CreateConstGEP1_64(
131 CGF.Int8Ty, ImplicitArgPtr, BlockCountOffset);
132 llvm::LoadInst *BlockCount = CGF.Builder.CreateLoad(
133 Address(BlockCountGEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
134 setNoundefInvariantLoad(BlockCount);
135
136 llvm::Value *WorkgroupID =
137 CGF.Builder.CreateIntrinsic(getAMDGPUWorkGroupID(CGF, Index), {});
138 llvm::Value *IsFull = CGF.Builder.CreateICmpULT(WorkgroupID, BlockCount);
139
140 llvm::Value *StructOffset = CGF.Builder.CreateSelect(
141 IsFull, ConstantInt::get(CGF.Int32Ty, GroupSizeOffset),
142 ConstantInt::get(CGF.Int32Ty, RemainderOffset));
143
144 llvm::Value *SizeGEP =
145 CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, ImplicitArgPtr, StructOffset);
146 llvm::LoadInst *Size = CGF.Builder.CreateLoad(
147 Address(SizeGEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
148 addMaxWorkGroupSizeRangeMetadata(CGF, Size);
149 setNoundefInvariantLoad(Size);
150
151 return CGF.Builder.CreateZExt(Size, CGF.Int32Ty);
152}
153
154static Value *emitAMDGPUWorkGroupSizeV4(CodeGenFunction &CGF, unsigned Index) {
155 llvm::Value *DispatchPtr = EmitAMDGPUDispatchPtr(CGF);
156
157 // Indexing the HSA kernel_dispatch_packet struct.
158 llvm::Value *GroupSizeGEP = CGF.Builder.CreateConstInBoundsGEP1_64(
159 CGF.Int8Ty, DispatchPtr, 4 + Index * 2);
160 llvm::LoadInst *GroupSizeLD = CGF.Builder.CreateLoad(
161 Address(GroupSizeGEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
162
163 addMaxWorkGroupSizeRangeMetadata(CGF, GroupSizeLD);
164
165 llvm::Value *GroupSize = CGF.Builder.CreateZExt(GroupSizeLD, CGF.Int32Ty);
166
167 if (CGF.CGM.getLangOpts().OffloadUniformBlock)
168 return GroupSize;
169
170 llvm::Value *WorkgroupID =
171 CGF.Builder.CreateIntrinsic(getAMDGPUWorkGroupID(CGF, Index), {});
172
173 llvm::Value *GridSizeGEP = CGF.Builder.CreateConstInBoundsGEP1_64(
174 CGF.Int8Ty, DispatchPtr, 12 + Index * 4);
175 llvm::LoadInst *GridSize = CGF.Builder.CreateLoad(
176 Address(GridSizeGEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
177
178 llvm::MDBuilder MDB(CGF.getLLVMContext());
179
180 // Known non-zero.
181 GridSize->setMetadata(llvm::LLVMContext::MD_range,
182 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
183 GridSize->setMetadata(llvm::LLVMContext::MD_invariant_load,
184 llvm::MDNode::get(CGF.getLLVMContext(), {}));
185
186 llvm::Value *Mul = CGF.Builder.CreateMul(WorkgroupID, GroupSize);
187 llvm::Value *Remainder = CGF.Builder.CreateSub(GridSize, Mul);
188
189 llvm::Value *IsPartial = CGF.Builder.CreateICmpULT(Remainder, GroupSize);
190
191 return CGF.Builder.CreateSelect(IsPartial, Remainder, GroupSize);
192}
193
194// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
195/// Emit code based on Code Object ABI version.
196/// COV_4 : Emit code to use dispatch ptr
197/// COV_5+ : Emit code to use implicitarg ptr
198/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
199/// and use its value for COV_4 or COV_5+ approach. It is used for
200/// compiling device libraries in an ABI-agnostic way.
201Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
202 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
203
204 // Do not emit __oclc_ABI_version references with non-empt environment.
205 if (Cov == CodeObjectVersionKind::COV_None &&
206 CGF.getTarget().getTriple().hasEnvironment())
207 Cov = CodeObjectVersionKind::COV_6;
208
209 if (Cov == CodeObjectVersionKind::COV_None) {
210 StringRef Name = "__oclc_ABI_version";
211 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
212 if (!ABIVersionC)
213 ABIVersionC = new llvm::GlobalVariable(
214 CGF.CGM.getModule(), CGF.Int32Ty, false,
215 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
216 llvm::GlobalVariable::NotThreadLocal,
218
219 // This load will be eliminated by the IPSCCP because it is constant
220 // weak_odr without externally_initialized. Either changing it to weak or
221 // adding externally_initialized will keep the load.
222 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
223 CGF.CGM.getIntAlign());
224
225 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
226 ABIVersion,
227 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
228
229 llvm::Value *V5Impl = emitAMDGPUWorkGroupSizeV5(CGF, Index);
230 llvm::Value *V4Impl = emitAMDGPUWorkGroupSizeV4(CGF, Index);
231 return CGF.Builder.CreateSelect(IsCOV5, V5Impl, V4Impl);
232 }
233
234 return Cov >= CodeObjectVersionKind::COV_5
235 ? emitAMDGPUWorkGroupSizeV5(CGF, Index)
236 : emitAMDGPUWorkGroupSizeV4(CGF, Index);
237}
238
239// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
240Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
241 const unsigned XOffset = 12;
242 auto *DP = EmitAMDGPUDispatchPtr(CGF);
243 // Indexing the HSA kernel_dispatch_packet struct.
244 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
245 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
246 auto *LD = CGF.Builder.CreateLoad(
248
249 llvm::MDBuilder MDB(CGF.getLLVMContext());
250
251 // Known non-zero.
252 LD->setMetadata(llvm::LLVMContext::MD_range,
253 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
254 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
255 llvm::MDNode::get(CGF.getLLVMContext(), {}));
256 return LD;
257}
258} // namespace
259
260// Generates the IR for __builtin_read_exec_*.
261// Lowers the builtin to amdgcn_ballot intrinsic.
263 llvm::Type *RegisterType,
264 llvm::Type *ValueType, bool isExecHi) {
265 CodeGen::CGBuilderTy &Builder = CGF.Builder;
266 CodeGen::CodeGenModule &CGM = CGF.CGM;
267
268 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
269 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
270
271 if (isExecHi) {
272 Value *Rt2 = Builder.CreateLShr(Call, 32);
273 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
274 return Rt2;
275 }
276
277 return Call;
278}
279
281 llvm::Value *RsrcPtr) {
282 auto &B = CGF.Builder;
283 auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8);
284
285 if (RsrcPtr->getType() == VecTy)
286 return RsrcPtr;
287
288 if (RsrcPtr->getType()->isIntegerTy(32)) {
289 llvm::PointerType *VecPtrTy =
290 llvm::PointerType::get(CGF.getLLVMContext(), 8);
291 llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");
292 return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val");
293 }
294
295 if (RsrcPtr->getType()->isPointerTy()) {
296 auto *VecPtrTy = llvm::PointerType::get(
297 CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace());
298 llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed");
299 return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val");
300 }
301
302 const auto &DL = CGF.CGM.getDataLayout();
303 if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256)
304 return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val");
305
306 llvm::report_fatal_error("Unexpected texture resource argument form");
307}
308
309llvm::CallInst *
311 const clang::CallExpr *E,
312 unsigned IntrinsicID, bool IsImageStore) {
313 auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned {
314 QualType TexQT = CGF.getContext().AMDGPUTextureTy;
315 for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {
316 QualType ArgTy = E->getArg(I)->getType();
317 if (ArgTy == TexQT) {
318 return I;
319 }
320
321 if (ArgTy.getCanonicalType() == TexQT.getCanonicalType()) {
322 return I;
323 }
324 }
325
326 return ~0U;
327 };
328
330 unsigned RsrcIndex = findTextureDescIndex(E);
331
332 if (RsrcIndex == ~0U) {
333 llvm::report_fatal_error("Invalid argument count for image builtin");
334 }
335
336 for (unsigned I = 0; I < E->getNumArgs(); ++I) {
337 llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I));
338 if (I == RsrcIndex)
340 Args.push_back(V);
341 }
342
343 llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(E->getType());
344 llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);
345 return Call;
346}
347
348// Emit an intrinsic that has 1 float or double operand, and 1 integer.
350 const CallExpr *E,
351 unsigned IntrinsicID) {
352 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
353 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
354
355 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
356 return CGF.Builder.CreateCall(F, {Src0, Src1});
357}
358
359static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
360 if (AMDGCNScope == "agent")
361 return "device";
362 if (AMDGCNScope == "wavefront")
363 return "subgroup";
364 return AMDGCNScope;
365}
366
367static llvm::AtomicOrdering mapCABIAtomicOrdering(unsigned AO) {
368 // Map C11/C++11 memory ordering to LLVM memory ordering
369 assert(llvm::isValidAtomicOrderingCABI(AO));
370 switch (static_cast<llvm::AtomicOrderingCABI>(AO)) {
371 case llvm::AtomicOrderingCABI::acquire:
372 case llvm::AtomicOrderingCABI::consume:
373 return llvm::AtomicOrdering::Acquire;
374 case llvm::AtomicOrderingCABI::release:
375 return llvm::AtomicOrdering::Release;
376 case llvm::AtomicOrderingCABI::acq_rel:
377 return llvm::AtomicOrdering::AcquireRelease;
378 case llvm::AtomicOrderingCABI::seq_cst:
379 return llvm::AtomicOrdering::SequentiallyConsistent;
380 case llvm::AtomicOrderingCABI::relaxed:
381 return llvm::AtomicOrdering::Monotonic;
382 }
383 llvm_unreachable("Unknown AtomicOrderingCABI enum");
384}
385
386// Map a __MEMORY_SCOPE_* integer constant to the AMDGPU-specific syncscope.
387// Invalid scope values are mapped to system scope (empty string).
388static StringRef getAMDGPUSyncScopeStr(CodeGenModule &CGM, unsigned ScopeInt,
389 llvm::AtomicOrdering AO) {
390 AtomicScopeGenericModel ScopeModel;
391 if (!ScopeModel.isValid(ScopeInt))
392 return "";
393 clang::SyncScope Scope = ScopeModel.map(ScopeInt);
395 Scope, AO);
396}
397
398/// Convert a __MEMORY_SCOPE_* integer constant to a metadata node containing
399/// the target-specific sync scope string.
400static llvm::MetadataAsValue *emitScopeMD(
401 CodeGenFunction &CGF, unsigned ScopeInt,
402 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent) {
403 StringRef ScopeStr = getAMDGPUSyncScopeStr(CGF.CGM, ScopeInt, AO);
404 llvm::LLVMContext &Ctx = CGF.CGM.getLLVMContext();
405 llvm::MDNode *MD =
406 llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeStr)});
407 return llvm::MetadataAsValue::get(Ctx, MD);
408}
409
410// For processing memory ordering and memory scope arguments of various
411// amdgcn builtins.
412// \p Order takes a C++11 compatible memory-ordering specifier and converts
413// it into LLVM's memory ordering specifier using atomic C ABI, and writes
414// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
415// specific SyncScopeID and writes it to \p SSID.
417 llvm::AtomicOrdering &AO,
418 llvm::SyncScope::ID &SSID) {
419 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
420
421 // Map C11/C++11 memory ordering to LLVM memory ordering
422 AO = mapCABIAtomicOrdering(ord);
423
424 // Some of the atomic builtins take the scope as a string name.
425 StringRef scp;
426 if (llvm::getConstantStringInfo(Scope, scp)) {
427 if (getTarget().getTriple().isSPIRV())
428 scp = mapScopeToSPIRV(scp);
429 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
430 return;
431 }
432
433 // Older builtins had an enum argument for the memory scope.
434 unsigned scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
435 StringRef SSN = getAMDGPUSyncScopeStr(CGM, scope, AO);
436 SSID = getLLVMContext().getOrInsertSyncScopeID(SSN);
437}
438
440 const CallExpr *E) {
441 constexpr const char *Tag = "amdgpu-synchronize-as";
442
444 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
445 llvm::Value *V = EmitScalarExpr(E->getArg(K));
446 StringRef AS;
447 if (llvm::getConstantStringInfo(V, AS)) {
448 MMRAs.push_back({Tag, AS});
449 // TODO: Delete the resulting unused constant?
450 continue;
451 }
452 CGM.Error(E->getExprLoc(),
453 "expected an address space name as a string literal");
454 }
455
456 MMRAMetadata::appendTags(*Inst, MMRAs);
457}
458
459static Value *GetAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {
460 Constant *SpecId = ConstantInt::getAllOnesValue(CGF.Int32Ty);
461
462 LLVMContext &Ctx = CGF.getLLVMContext();
463 MDNode *Predicate = MDNode::get(Ctx, MDString::get(Ctx, Name.str()));
464 std::vector<Value *> Args = {SpecId, ConstantInt::getFalse(Ctx),
465 MetadataAsValue::get(Ctx, Predicate)};
466 CallInst *Call = CGF.Builder.CreateIntrinsic(
467 Intrinsic::spv_named_boolean_spec_constant, Args);
468
469 return Call;
470}
471
472static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
473 switch (BuiltinID) {
474 default:
475 llvm_unreachable("Unknown BuiltinID for wave reduction");
476 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
477 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
478 return Intrinsic::amdgcn_wave_reduce_add;
479 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
480 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
481 return Intrinsic::amdgcn_wave_reduce_fadd;
482 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
483 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
484 return Intrinsic::amdgcn_wave_reduce_sub;
485 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
486 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
487 return Intrinsic::amdgcn_wave_reduce_fsub;
488 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
489 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
490 return Intrinsic::amdgcn_wave_reduce_min;
491 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
492 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
493 return Intrinsic::amdgcn_wave_reduce_fmin;
494 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
495 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
496 return Intrinsic::amdgcn_wave_reduce_umin;
497 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
498 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
499 return Intrinsic::amdgcn_wave_reduce_max;
500 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
501 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
502 return Intrinsic::amdgcn_wave_reduce_fmax;
503 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
504 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
505 return Intrinsic::amdgcn_wave_reduce_umax;
506 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
507 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
508 return Intrinsic::amdgcn_wave_reduce_and;
509 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
510 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
511 return Intrinsic::amdgcn_wave_reduce_or;
512 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
513 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
514 return Intrinsic::amdgcn_wave_reduce_xor;
515 }
516}
517
519 const CallExpr *E) {
520 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
521 llvm::SyncScope::ID SSID;
522 switch (BuiltinID) {
523 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
524 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
525 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
526 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
527 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
528 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
529 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
530 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
531 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
532 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
533 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
534 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
535 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
536 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
537 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
538 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
539 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
540 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
541 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
542 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
543 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
544 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
545 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
546 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
547 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
548 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
549 Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID);
550 llvm::Value *Value = EmitScalarExpr(E->getArg(0));
551 llvm::Value *Strategy = EmitScalarExpr(E->getArg(1));
552 llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()});
553 return Builder.CreateCall(F, {Value, Strategy});
554 }
555 case AMDGPU::BI__builtin_amdgcn_div_scale:
556 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
557 // Translate from the intrinsics's struct return to the builtin's out
558 // argument.
559
560 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
561
562 llvm::Value *X = EmitScalarExpr(E->getArg(0));
563 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
564 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
565
566 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
567 X->getType());
568
569 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
570
571 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
572 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
573
574 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
575
576 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
577 Builder.CreateStore(FlagExt, FlagOutPtr);
578 return Result;
579 }
580 case AMDGPU::BI__builtin_amdgcn_div_fmas:
581 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
582 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
583 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
584 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
585 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
586
587 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
588 Src0->getType());
589 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
590 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
591 }
592
593 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
595 Intrinsic::amdgcn_ds_swizzle);
596 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
597 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
598 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
600 // Find out if any arguments are required to be integer constant
601 // expressions.
602 unsigned ICEArguments = 0;
604 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
605 assert(Error == ASTContext::GE_None && "Should not codegen an error");
606 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
607 unsigned Size = DataTy->getPrimitiveSizeInBits();
608 llvm::Type *IntTy =
609 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
610 Function *F =
611 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
612 ? Intrinsic::amdgcn_mov_dpp8
613 : Intrinsic::amdgcn_update_dpp,
614 IntTy);
615 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
616 E->getNumArgs() == 2);
617 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
618 if (InsertOld)
619 Args.push_back(llvm::PoisonValue::get(IntTy));
620 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
621 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
622 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
623 Size < 32) {
624 if (!DataTy->isIntegerTy())
625 V = Builder.CreateBitCast(
626 V, llvm::IntegerType::get(Builder.getContext(), Size));
627 V = Builder.CreateZExtOrBitCast(V, IntTy);
628 }
629 llvm::Type *ExpTy =
630 F->getFunctionType()->getFunctionParamType(I + InsertOld);
631 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
632 }
633 Value *V = Builder.CreateCall(F, Args);
634 if (Size < 32 && !DataTy->isIntegerTy())
635 V = Builder.CreateTrunc(
636 V, llvm::IntegerType::get(Builder.getContext(), Size));
637 return Builder.CreateTruncOrBitCast(V, DataTy);
638 }
639 case AMDGPU::BI__builtin_amdgcn_permlane16:
640 case AMDGPU::BI__builtin_amdgcn_permlanex16:
642 *this, E,
643 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
644 ? Intrinsic::amdgcn_permlane16
645 : Intrinsic::amdgcn_permlanex16);
646 case AMDGPU::BI__builtin_amdgcn_permlane64:
648 Intrinsic::amdgcn_permlane64);
649 case AMDGPU::BI__builtin_amdgcn_readlane:
651 Intrinsic::amdgcn_readlane);
652 case AMDGPU::BI__builtin_amdgcn_wave_shuffle:
654 Intrinsic::amdgcn_wave_shuffle);
655 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
657 Intrinsic::amdgcn_readfirstlane);
658 case AMDGPU::BI__builtin_amdgcn_div_fixup:
659 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
660 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
662 Intrinsic::amdgcn_div_fixup);
663 case AMDGPU::BI__builtin_amdgcn_trig_preop:
664 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
665 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
666 case AMDGPU::BI__builtin_amdgcn_rcp:
667 case AMDGPU::BI__builtin_amdgcn_rcpf:
668 case AMDGPU::BI__builtin_amdgcn_rcph:
669 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
670 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
671 case AMDGPU::BI__builtin_amdgcn_sqrt:
672 case AMDGPU::BI__builtin_amdgcn_sqrtf:
673 case AMDGPU::BI__builtin_amdgcn_sqrth:
674 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
676 Intrinsic::amdgcn_sqrt);
677 case AMDGPU::BI__builtin_amdgcn_rsq:
678 case AMDGPU::BI__builtin_amdgcn_rsqf:
679 case AMDGPU::BI__builtin_amdgcn_rsqh:
680 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
681 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
682 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
683 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
685 Intrinsic::amdgcn_rsq_clamp);
686 case AMDGPU::BI__builtin_amdgcn_sinf:
687 case AMDGPU::BI__builtin_amdgcn_sinh:
688 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
689 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
690 case AMDGPU::BI__builtin_amdgcn_cosf:
691 case AMDGPU::BI__builtin_amdgcn_cosh:
692 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
693 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
694 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
695 return EmitAMDGPUDispatchPtr(*this, E);
696 case AMDGPU::BI__builtin_amdgcn_logf:
697 case AMDGPU::BI__builtin_amdgcn_log_bf16:
698 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
699 case AMDGPU::BI__builtin_amdgcn_exp2f:
700 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
702 Intrinsic::amdgcn_exp2);
703 case AMDGPU::BI__builtin_amdgcn_log_clampf:
705 Intrinsic::amdgcn_log_clamp);
706 case AMDGPU::BI__builtin_amdgcn_ldexp:
707 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
708 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
709 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
710 llvm::Function *F =
711 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
712 return Builder.CreateCall(F, {Src0, Src1});
713 }
714 case AMDGPU::BI__builtin_amdgcn_ldexph: {
715 // The raw instruction has a different behavior for out of bounds exponent
716 // values (implicit truncation instead of saturate to short_min/short_max).
717 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
718 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
719 llvm::Function *F =
720 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
721 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
722 }
723 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
724 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
725 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
727 Intrinsic::amdgcn_frexp_mant);
728 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
729 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
730 Value *Src0 = EmitScalarExpr(E->getArg(0));
731 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
732 { Builder.getInt32Ty(), Src0->getType() });
733 return Builder.CreateCall(F, Src0);
734 }
735 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
736 Value *Src0 = EmitScalarExpr(E->getArg(0));
737 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
738 { Builder.getInt16Ty(), Src0->getType() });
739 return Builder.CreateCall(F, Src0);
740 }
741 case AMDGPU::BI__builtin_amdgcn_fract:
742 case AMDGPU::BI__builtin_amdgcn_fractf:
743 case AMDGPU::BI__builtin_amdgcn_fracth:
745 Intrinsic::amdgcn_fract);
746 case AMDGPU::BI__builtin_amdgcn_lerp:
748 Intrinsic::amdgcn_lerp);
749 case AMDGPU::BI__builtin_amdgcn_ubfe:
751 Intrinsic::amdgcn_ubfe);
752 case AMDGPU::BI__builtin_amdgcn_sbfe:
754 Intrinsic::amdgcn_sbfe);
755 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
756 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
757 llvm::Type *ResultType = ConvertType(E->getType());
758 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
759 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
760 return Builder.CreateCall(F, {Src});
761 }
762 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
763 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
764 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
765 Function *F =
766 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
767 return Builder.CreateCall(F, {Src});
768 }
769 case AMDGPU::BI__builtin_amdgcn_tanhf:
770 case AMDGPU::BI__builtin_amdgcn_tanhh:
771 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
773 Intrinsic::amdgcn_tanh);
774 case AMDGPU::BI__builtin_amdgcn_uicmp:
775 case AMDGPU::BI__builtin_amdgcn_uicmpl:
776 case AMDGPU::BI__builtin_amdgcn_sicmp:
777 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
778 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
779 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
780 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
781
782 // FIXME-GFX10: How should 32 bit mask be handled?
783 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
784 { Builder.getInt64Ty(), Src0->getType() });
785 return Builder.CreateCall(F, { Src0, Src1, Src2 });
786 }
787 case AMDGPU::BI__builtin_amdgcn_fcmp:
788 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
789 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
790 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
791 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
792
793 // FIXME-GFX10: How should 32 bit mask be handled?
794 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
795 { Builder.getInt64Ty(), Src0->getType() });
796 return Builder.CreateCall(F, { Src0, Src1, Src2 });
797 }
798 case AMDGPU::BI__builtin_amdgcn_class:
799 case AMDGPU::BI__builtin_amdgcn_classf:
800 case AMDGPU::BI__builtin_amdgcn_classh:
801 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
802 case AMDGPU::BI__builtin_amdgcn_fmed3f:
803 case AMDGPU::BI__builtin_amdgcn_fmed3h:
805 Intrinsic::amdgcn_fmed3);
806 case AMDGPU::BI__builtin_amdgcn_ds_append:
807 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
808 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
809 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
810 Value *Src0 = EmitScalarExpr(E->getArg(0));
811 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
812 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
813 }
814 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
815 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
816 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
817 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
818 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
819 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
820 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
821 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
822 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
823 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
824 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
825 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
826 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
827 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
828 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
829 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
830 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
831 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
832 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
833 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
834 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
835 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
836 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
837 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
838 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
839 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
840 Intrinsic::ID IID;
841 switch (BuiltinID) {
842 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
843 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
844 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
845 IID = Intrinsic::amdgcn_global_load_tr_b64;
846 break;
847 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
848 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
849 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
850 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
851 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
852 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
853 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
854 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
855 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
856 IID = Intrinsic::amdgcn_global_load_tr_b128;
857 break;
858 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
859 IID = Intrinsic::amdgcn_global_load_tr4_b64;
860 break;
861 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
862 IID = Intrinsic::amdgcn_global_load_tr6_b96;
863 break;
864 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
865 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
866 break;
867 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
868 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
869 break;
870 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
871 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
872 break;
873 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
874 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
875 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
876 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
877 break;
878 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
879 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
880 break;
881 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
882 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
883 break;
884 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
885 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
886 break;
887 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
888 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
889 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
890 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
891 break;
892 }
893 llvm::Type *LoadTy = ConvertType(E->getType());
894 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
895 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
896 return Builder.CreateCall(F, {Addr});
897 }
898 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
899 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
900 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
901 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
902 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
903 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
904
905 Intrinsic::ID IID;
906 switch (BuiltinID) {
907 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
908 IID = Intrinsic::amdgcn_global_load_monitor_b32;
909 break;
910 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
911 IID = Intrinsic::amdgcn_global_load_monitor_b64;
912 break;
913 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
914 IID = Intrinsic::amdgcn_global_load_monitor_b128;
915 break;
916 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
917 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
918 break;
919 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
920 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
921 break;
922 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
923 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
924 break;
925 }
926
927 llvm::Type *LoadTy = ConvertType(E->getType());
928 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
929
930 auto *AOExpr = cast<llvm::ConstantInt>(EmitScalarExpr(E->getArg(1)));
931 auto *ScopeExpr = cast<llvm::ConstantInt>(EmitScalarExpr(E->getArg(2)));
932 llvm::AtomicOrdering AO = mapCABIAtomicOrdering(AOExpr->getZExtValue());
933
934 llvm::Value *ScopeMD = emitScopeMD(*this, ScopeExpr->getZExtValue(), AO);
935 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
936 return Builder.CreateCall(F, {Addr, AOExpr, ScopeMD});
937 }
938 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
939 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
940 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
941 Intrinsic::ID IID;
942 switch (BuiltinID) {
943 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
944 IID = Intrinsic::amdgcn_cluster_load_b32;
945 break;
946 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
947 IID = Intrinsic::amdgcn_cluster_load_b64;
948 break;
949 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
950 IID = Intrinsic::amdgcn_cluster_load_b128;
951 break;
952 }
954 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
955 Args.push_back(EmitScalarExpr(E->getArg(i)));
956 llvm::Function *F = CGM.getIntrinsic(IID, {ConvertType(E->getType())});
957 return Builder.CreateCall(F, {Args});
958 }
959 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
960 // Should this have asan instrumentation?
962 Intrinsic::amdgcn_load_to_lds);
963 }
964 case AMDGPU::BI__builtin_amdgcn_load_async_to_lds: {
965 // Should this have asan instrumentation?
967 *this, E, Intrinsic::amdgcn_load_async_to_lds);
968 }
969 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
970 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
971 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
972 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
973 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
974 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
975 Intrinsic::ID IID;
976 switch (BuiltinID) {
977 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
978 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
979 break;
980 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
981 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
982 break;
983 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
984 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
985 break;
986 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
987 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
988 break;
989 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
990 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
991 break;
992 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
993 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
994 break;
995 }
996
997 LLVMContext &Ctx = CGM.getLLVMContext();
999 // last argument is a MD string
1000 const unsigned ScopeArg = E->getNumArgs() - 1;
1001 for (unsigned i = 0; i != ScopeArg; ++i)
1002 Args.push_back(EmitScalarExpr(E->getArg(i)));
1003 StringRef Arg = cast<StringLiteral>(E->getArg(ScopeArg)->IgnoreParenCasts())
1004 ->getString();
1005 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
1006 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
1007 // Intrinsic is typed based on the pointer AS. Pointer is always the first
1008 // argument.
1009 llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});
1010 return Builder.CreateCall(F, {Args});
1011 }
1012 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
1013 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
1014 {llvm::Type::getInt64Ty(getLLVMContext())});
1015 return Builder.CreateCall(F);
1016 }
1017 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
1018 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
1019 {llvm::Type::getInt64Ty(getLLVMContext())});
1020 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
1021 return Builder.CreateCall(F, {Env});
1022 }
1023 case AMDGPU::BI__builtin_amdgcn_processor_is: {
1024 assert(CGM.getTriple().isSPIRV() &&
1025 "__builtin_amdgcn_processor_is should never reach CodeGen for "
1026 "concrete targets!");
1027 StringRef Proc = cast<clang::StringLiteral>(E->getArg(0))->getString();
1028 return GetAMDGPUPredicate(*this, "is." + Proc);
1029 }
1030 case AMDGPU::BI__builtin_amdgcn_is_invocable: {
1031 assert(CGM.getTriple().isSPIRV() &&
1032 "__builtin_amdgcn_is_invocable should never reach CodeGen for "
1033 "concrete targets!");
1034 auto *FD = cast<FunctionDecl>(
1035 cast<DeclRefExpr>(E->getArg(0))->getReferencedDeclOfCallee());
1036 StringRef RF =
1037 getContext().BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
1038 return GetAMDGPUPredicate(*this, "has." + RF);
1039 }
1040 case AMDGPU::BI__builtin_amdgcn_read_exec:
1041 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
1042 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
1043 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
1044 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
1045 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
1046 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
1047 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
1048 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
1049 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
1050 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
1051 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
1052 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
1053 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
1054 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
1055 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
1056
1057 // The builtins take these arguments as vec4 where the last element is
1058 // ignored. The intrinsic takes them as vec3.
1059 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
1060 {0, 1, 2});
1061 RayDir =
1062 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
1063 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
1064 {0, 1, 2});
1065
1066 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
1067 {NodePtr->getType(), RayDir->getType()});
1068 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
1069 RayInverseDir, TextureDescr});
1070 }
1071 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1072 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
1073 Intrinsic::ID IID;
1074 switch (BuiltinID) {
1075 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1076 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
1077 break;
1078 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
1079 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
1080 break;
1081 }
1082 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
1083 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
1084 llvm::Value *InstanceMask = EmitScalarExpr(E->getArg(2));
1085 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(3));
1086 llvm::Value *RayDir = EmitScalarExpr(E->getArg(4));
1087 llvm::Value *Offset = EmitScalarExpr(E->getArg(5));
1088 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(6));
1089
1090 Address RetRayOriginPtr = EmitPointerWithAlignment(E->getArg(7));
1091 Address RetRayDirPtr = EmitPointerWithAlignment(E->getArg(8));
1092
1093 llvm::Function *IntrinsicFunc = CGM.getIntrinsic(IID);
1094
1095 llvm::CallInst *CI = Builder.CreateCall(
1096 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
1097 Offset, TextureDescr});
1098
1099 llvm::Value *RetVData = Builder.CreateExtractValue(CI, 0);
1100 llvm::Value *RetRayOrigin = Builder.CreateExtractValue(CI, 1);
1101 llvm::Value *RetRayDir = Builder.CreateExtractValue(CI, 2);
1102
1103 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
1104 Builder.CreateStore(RetRayDir, RetRayDirPtr);
1105
1106 return RetVData;
1107 }
1108
1109 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1110 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1111 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1112 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
1113 Intrinsic::ID IID;
1114 switch (BuiltinID) {
1115 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1116 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
1117 break;
1118 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1119 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
1120 break;
1121 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1122 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
1123 break;
1124 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
1125 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
1126 break;
1127 }
1128
1130 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
1131 Args.push_back(EmitScalarExpr(E->getArg(i)));
1132
1133 Function *F = CGM.getIntrinsic(IID);
1134 Value *Call = Builder.CreateCall(F, Args);
1135 Value *Rtn = Builder.CreateExtractValue(Call, 0);
1136 Value *A = Builder.CreateExtractValue(Call, 1);
1137 llvm::Type *RetTy = ConvertType(E->getType());
1138 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
1139 (uint64_t)0);
1140 // ds_bvh_stack_push8_pop2_rtn returns {i64, i32} but the builtin returns
1141 // <2 x i64>, zext the second value.
1142 if (A->getType()->getPrimitiveSizeInBits() <
1143 RetTy->getScalarType()->getPrimitiveSizeInBits())
1144 A = Builder.CreateZExt(A, RetTy->getScalarType());
1145
1146 return Builder.CreateInsertElement(I0, A, 1);
1147 }
1148 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1149 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1151 *this, E, Intrinsic::amdgcn_image_load_1d, false);
1152 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1153 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1155 *this, E, Intrinsic::amdgcn_image_load_1darray, false);
1156 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1157 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1158 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1160 *this, E, Intrinsic::amdgcn_image_load_2d, false);
1161 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1162 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1163 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1165 *this, E, Intrinsic::amdgcn_image_load_2darray, false);
1166 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1167 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1169 *this, E, Intrinsic::amdgcn_image_load_3d, false);
1170 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1171 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1173 *this, E, Intrinsic::amdgcn_image_load_cube, false);
1174 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1175 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1177 *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
1178 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1179 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1181 *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
1182 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1183 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1184 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1186 *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
1187 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1188 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1189 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1191 *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
1192 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1193 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1195 *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
1196 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1197 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1199 *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
1200 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1201 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1203 *this, E, Intrinsic::amdgcn_image_store_1d, true);
1204 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1205 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1207 *this, E, Intrinsic::amdgcn_image_store_1darray, true);
1208 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1209 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1210 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1212 *this, E, Intrinsic::amdgcn_image_store_2d, true);
1213 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1214 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1215 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1217 *this, E, Intrinsic::amdgcn_image_store_2darray, true);
1218 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1219 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1221 *this, E, Intrinsic::amdgcn_image_store_3d, true);
1222 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1223 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1225 *this, E, Intrinsic::amdgcn_image_store_cube, true);
1226 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1227 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1229 *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
1230 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1231 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1233 *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
1234 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1235 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1236 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1238 *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
1239 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1240 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1241 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1243 *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
1244 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1245 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1247 *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
1248 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1249 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1251 *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
1252 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1253 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1255 *this, E, Intrinsic::amdgcn_image_sample_1d, false);
1256 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1257 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1259 *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
1260 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1261 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1262 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1264 *this, E, Intrinsic::amdgcn_image_sample_2d, false);
1265 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1266 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1267 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1269 *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
1270 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1271 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1273 *this, E, Intrinsic::amdgcn_image_sample_3d, false);
1274 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1275 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1277 *this, E, Intrinsic::amdgcn_image_sample_cube, false);
1278 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1279 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1281 *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false);
1282 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1283 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1285 *this, E, Intrinsic::amdgcn_image_sample_l_1d, false);
1286 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1287 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1289 *this, E, Intrinsic::amdgcn_image_sample_d_1d, false);
1290 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1291 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1292 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1294 *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false);
1295 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1296 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1297 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1299 *this, E, Intrinsic::amdgcn_image_sample_l_2d, false);
1300 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1301 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1302 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1304 *this, E, Intrinsic::amdgcn_image_sample_d_2d, false);
1305 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1306 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1308 *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false);
1309 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1310 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1312 *this, E, Intrinsic::amdgcn_image_sample_l_3d, false);
1313 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1314 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1316 *this, E, Intrinsic::amdgcn_image_sample_d_3d, false);
1317 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1318 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1320 *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false);
1321 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1322 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1324 *this, E, Intrinsic::amdgcn_image_sample_l_cube, false);
1325 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1326 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1328 *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false);
1329 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1330 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1332 *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false);
1333 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1334 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1336 *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false);
1337 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1338 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1339 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1341 *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false);
1342 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1343 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1344 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1346 *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false);
1347 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1348 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1349 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1351 *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false);
1352 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1354 *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);
1355 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1356 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1357 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
1358 Function *F = CGM.getIntrinsic(
1359 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1360 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1361 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1362 {VT, VT});
1363
1365 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
1366 Args.push_back(EmitScalarExpr(E->getArg(I)));
1367 return Builder.CreateCall(F, Args);
1368 }
1369 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1370 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1371 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1372 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1373 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1374 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1375 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1376 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1377 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1378 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1379 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1380 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1381 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1382 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1383 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1384 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1385 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1386 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1387 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1388 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1389 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1390 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1391 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1392 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1393 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1394 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1395 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1396 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1397 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1398 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1399 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1400 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1401 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1402 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1403 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1404 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1405 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1406 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1407 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1408 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1409 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1410 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1411 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1412 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1413 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1414 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1415 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1416 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1417 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1418 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1419 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1420 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1421 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1422 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1423 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1424 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1425 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1426 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1427 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1428 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1429 // GFX1250 WMMA builtins
1430 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1431 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1432 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1433 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1434 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1435 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1436 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1437 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1438 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1439 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1440 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1441 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1442 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1443 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1444 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1445 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1446 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1447 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1448 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1449 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1450 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1451 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1452 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1453 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1454 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1455 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1456 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1457 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1458 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1459 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1460 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1461 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1462 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1463 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1464 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1465 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1466 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1467 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1468 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1469 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1470 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1471 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1472 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1473
1474 // These operations perform a matrix multiplication and accumulation of
1475 // the form:
1476 // D = A * B + C
1477 // We need to specify one type for matrices AB and one for matrices CD.
1478 // Sparse matrix operations can have different types for A and B as well as
1479 // an additional type for sparsity index.
1480 // Destination type should be put before types used for source operands.
1481 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
1482 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
1483 // There is no need for the variable opsel argument, so always set it to
1484 // "false".
1485 bool AppendFalseForOpselArg = false;
1486 unsigned BuiltinWMMAOp;
1487 // Need return type when D and C are of different types.
1488 bool NeedReturnType = false;
1489 // Need to remove unused neg modifiers.
1490 bool RemoveABNeg = false;
1491
1492 switch (BuiltinID) {
1493 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1494 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1495 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1496 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1497 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1498 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1499 break;
1500 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1501 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1502 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1503 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1504 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1505 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1506 break;
1507 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1508 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1509 AppendFalseForOpselArg = true;
1510 [[fallthrough]];
1511 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1512 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1513 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1514 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1515 break;
1516 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1517 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1518 AppendFalseForOpselArg = true;
1519 [[fallthrough]];
1520 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1521 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1522 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1523 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1524 break;
1525 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1526 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1527 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1528 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1529 break;
1530 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1531 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1532 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1533 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1534 break;
1535 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1536 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1537 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1538 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1539 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1540 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1541 break;
1542 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1543 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1544 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1545 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1546 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1547 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1548 break;
1549 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1550 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1551 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1552 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1553 break;
1554 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1555 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1556 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1557 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1558 break;
1559 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1560 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1561 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1562 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1563 break;
1564 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1565 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1566 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1567 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1568 break;
1569 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1570 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1571 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1572 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1573 break;
1574 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1575 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1576 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1577 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1578 break;
1579 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1580 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1581 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1582 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1583 break;
1584 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1585 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1586 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1587 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1588 break;
1589 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1590 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1591 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1592 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1593 break;
1594 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1595 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1596 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1597 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1598 break;
1599 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1600 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1601 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1602 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1603 break;
1604 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1605 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1606 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1607 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1608 break;
1609 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1610 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1611 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1612 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1613 break;
1614 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1615 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1616 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1617 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1618 break;
1619 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1620 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1621 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1622 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1623 break;
1624 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1625 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1626 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1627 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1628 break;
1629 // GFX1250 WMMA builtins
1630 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1631 ArgsForMatchingMatrixTypes = {3, 0};
1632 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1633 RemoveABNeg = true;
1634 break;
1635 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1636 ArgsForMatchingMatrixTypes = {3, 0};
1637 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1638 RemoveABNeg = true;
1639 break;
1640 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1641 ArgsForMatchingMatrixTypes = {3, 0};
1642 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1643 RemoveABNeg = true;
1644 break;
1645 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1646 ArgsForMatchingMatrixTypes = {3, 0};
1647 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1648 RemoveABNeg = true;
1649 break;
1650 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1651 ArgsForMatchingMatrixTypes = {3, 0};
1652 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1653 RemoveABNeg = true;
1654 break;
1655 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1656 NeedReturnType = true;
1657 ArgsForMatchingMatrixTypes = {0, 3};
1658 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1659 RemoveABNeg = true;
1660 break;
1661 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1662 ArgsForMatchingMatrixTypes = {3, 0};
1663 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1664 break;
1665 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1666 ArgsForMatchingMatrixTypes = {3, 0};
1667 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1668 break;
1669 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1670 ArgsForMatchingMatrixTypes = {3, 0};
1671 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1672 break;
1673 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1674 ArgsForMatchingMatrixTypes = {3, 0};
1675 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1676 break;
1677 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1678 ArgsForMatchingMatrixTypes = {3, 0};
1679 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1680 break;
1681 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1682 ArgsForMatchingMatrixTypes = {3, 0};
1683 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1684 break;
1685 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1686 ArgsForMatchingMatrixTypes = {3, 0};
1687 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1688 break;
1689 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1690 ArgsForMatchingMatrixTypes = {3, 0};
1691 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1692 break;
1693 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1694 ArgsForMatchingMatrixTypes = {3, 0};
1695 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1696 break;
1697 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1698 ArgsForMatchingMatrixTypes = {3, 0};
1699 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1700 break;
1701 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1702 ArgsForMatchingMatrixTypes = {3, 0};
1703 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1704 break;
1705 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1706 ArgsForMatchingMatrixTypes = {3, 0};
1707 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1708 break;
1709 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1710 ArgsForMatchingMatrixTypes = {3, 0};
1711 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1712 break;
1713 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1714 ArgsForMatchingMatrixTypes = {3, 0};
1715 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1716 break;
1717 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1718 ArgsForMatchingMatrixTypes = {3, 0};
1719 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1720 break;
1721 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1722 ArgsForMatchingMatrixTypes = {3, 0};
1723 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1724 break;
1725 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1726 ArgsForMatchingMatrixTypes = {4, 1};
1727 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1728 break;
1729 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1730 ArgsForMatchingMatrixTypes = {5, 1, 3};
1731 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1732 break;
1733 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1734 ArgsForMatchingMatrixTypes = {5, 1, 3};
1735 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1736 break;
1737 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1738 ArgsForMatchingMatrixTypes = {5, 1, 3};
1739 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1740 break;
1741 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1742 ArgsForMatchingMatrixTypes = {3, 0, 1};
1743 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1744 break;
1745 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1746 ArgsForMatchingMatrixTypes = {3, 0, 1};
1747 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1748 break;
1749 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1750 ArgsForMatchingMatrixTypes = {3, 0, 1};
1751 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1752 break;
1753 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1754 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1755 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1756 break;
1757 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1758 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1759 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1760 break;
1761 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1762 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1763 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1764 break;
1765 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1766 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1767 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1768 break;
1769 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1770 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1771 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1772 break;
1773 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1774 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1775 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1776 break;
1777 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1778 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1779 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1780 break;
1781 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1782 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1783 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1784 break;
1785 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1786 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1787 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1788 break;
1789 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1790 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1791 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1792 break;
1793 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1794 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1795 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1796 break;
1797 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1798 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1799 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1800 break;
1801 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1802 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1803 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1804 break;
1805 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1806 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1807 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1808 break;
1809 }
1810
1812 for (int i = 0, e = E->getNumArgs(); i != e; ++i) {
1813 // Remove unused neg modifiers.
1814 if (RemoveABNeg && (i == 0 || i == 2))
1815 continue;
1816 Args.push_back(EmitScalarExpr(E->getArg(i)));
1817 }
1818 if (AppendFalseForOpselArg)
1819 Args.push_back(Builder.getFalse());
1820
1821 // Handle the optional clamp argument of the following two builtins.
1822 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
1823 if (Args.size() == 7)
1824 Args.push_back(Builder.getFalse());
1825 assert(Args.size() == 8 && "Expected 8 arguments");
1826 Args[7] = Builder.CreateZExtOrTrunc(Args[7], Builder.getInt1Ty());
1827 } else if (BuiltinID ==
1828 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
1829 if (Args.size() == 8)
1830 Args.push_back(Builder.getFalse());
1831 assert(Args.size() == 9 && "Expected 9 arguments");
1832 Args[8] = Builder.CreateZExtOrTrunc(Args[8], Builder.getInt1Ty());
1833 }
1834
1836 if (NeedReturnType)
1837 ArgTypes.push_back(ConvertType(E->getType()));
1838 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
1839 ArgTypes.push_back(Args[ArgIdx]->getType());
1840
1841 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1842 return Builder.CreateCall(F, Args);
1843 }
1844 // amdgcn workgroup size
1845 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1846 return EmitAMDGPUWorkGroupSize(*this, 0);
1847 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1848 return EmitAMDGPUWorkGroupSize(*this, 1);
1849 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1850 return EmitAMDGPUWorkGroupSize(*this, 2);
1851
1852 // amdgcn grid size
1853 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1854 return EmitAMDGPUGridSize(*this, 0);
1855 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1856 return EmitAMDGPUGridSize(*this, 1);
1857 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1858 return EmitAMDGPUGridSize(*this, 2);
1859
1860 // r600 intrinsics
1861 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1862 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1864 Intrinsic::r600_recipsqrt_ieee);
1865 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1866 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
1867 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
1868 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
1869 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1870 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1871 }
1872 case AMDGPU::BI__builtin_amdgcn_fence: {
1874 EmitScalarExpr(E->getArg(1)), AO, SSID);
1875 FenceInst *Fence = Builder.CreateFence(AO, SSID);
1876 if (E->getNumArgs() > 2)
1878 return Fence;
1879 }
1880 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1881 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1882 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1883 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1884 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1885 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1886 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1887 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1888 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1889 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1890 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1891 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1892 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1893 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1894 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1895 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1896 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1897 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1898 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1899 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1900 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1901 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1902 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1903 llvm::AtomicRMWInst::BinOp BinOp;
1904 switch (BuiltinID) {
1905 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1906 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1907 BinOp = llvm::AtomicRMWInst::UIncWrap;
1908 break;
1909 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1910 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1911 BinOp = llvm::AtomicRMWInst::UDecWrap;
1912 break;
1913 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1914 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1915 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1916 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1917 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1918 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1919 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1920 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1921 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1922 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1923 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1924 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1925 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1926 BinOp = llvm::AtomicRMWInst::FAdd;
1927 break;
1928 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1929 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1930 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1931 BinOp = llvm::AtomicRMWInst::FMin;
1932 break;
1933 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1934 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1935 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1936 BinOp = llvm::AtomicRMWInst::FMax;
1937 break;
1938 }
1939
1940 Address Ptr = CheckAtomicAlignment(*this, E);
1941 Value *Val = EmitScalarExpr(E->getArg(1));
1942 llvm::Type *OrigTy = Val->getType();
1943 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1944
1945 bool Volatile;
1946
1947 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1948 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1949 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1950 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
1951 Volatile =
1952 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
1953 } else {
1954 // Infer volatile from the passed type.
1955 Volatile =
1956 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1957 }
1958
1959 if (E->getNumArgs() >= 4) {
1960 // Some of the builtins have explicit ordering and scope arguments.
1962 EmitScalarExpr(E->getArg(3)), AO, SSID);
1963 } else {
1964 // Most of the builtins do not have syncscope/order arguments. For DS
1965 // atomics the scope doesn't really matter, as they implicitly operate at
1966 // workgroup scope.
1967 //
1968 // The global/flat cases need to use agent scope to consistently produce
1969 // the native instruction instead of a cmpxchg expansion.
1970 if (getTarget().getTriple().isSPIRV())
1971 SSID = getLLVMContext().getOrInsertSyncScopeID("device");
1972 else
1973 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
1974 AO = AtomicOrdering::Monotonic;
1975
1976 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
1977 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1978 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1979 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1980 llvm::Type *V2BF16Ty = FixedVectorType::get(
1981 llvm::Type::getBFloatTy(Builder.getContext()), 2);
1982 Val = Builder.CreateBitCast(Val, V2BF16Ty);
1983 }
1984 }
1985
1986 llvm::AtomicRMWInst *RMW =
1987 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1988 if (Volatile)
1989 RMW->setVolatile(true);
1990
1991 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
1992 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1993 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
1994 // instruction for flat and global operations.
1995 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
1996 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
1997
1998 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
1999 // instruction, but this only matters for float fadd.
2000 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
2001 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
2002 }
2003
2004 return Builder.CreateBitCast(RMW, OrigTy);
2005 }
2006 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
2007 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
2008 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2009 llvm::Type *ResultType = ConvertType(E->getType());
2010 // s_sendmsg_rtn is mangled using return type only.
2011 Function *F =
2012 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
2013 return Builder.CreateCall(F, {Arg});
2014 }
2015 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
2016 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
2017 // Because builtin types are limited, and the intrinsic uses a struct/pair
2018 // output, marshal the pair-of-i32 to <2 x i32>.
2019 Value *VDstOld = EmitScalarExpr(E->getArg(0));
2020 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
2021 Value *FI = EmitScalarExpr(E->getArg(2));
2022 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
2023 Function *F =
2024 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
2025 ? Intrinsic::amdgcn_permlane16_swap
2026 : Intrinsic::amdgcn_permlane32_swap);
2027 llvm::CallInst *Call =
2028 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
2029
2030 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
2031 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
2032
2033 llvm::Type *ResultType = ConvertType(E->getType());
2034
2035 llvm::Value *Insert0 = Builder.CreateInsertElement(
2036 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
2037 llvm::Value *AsVector =
2038 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
2039 return AsVector;
2040 }
2041 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
2042 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
2044 Intrinsic::amdgcn_bitop3);
2045 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
2046 // TODO: LLVM has this overloaded to allow for fat pointers, but since
2047 // those haven't been plumbed through to Clang yet, default to creating the
2048 // resource type.
2050 for (unsigned I = 0; I < 4; ++I)
2051 Args.push_back(EmitScalarExpr(E->getArg(I)));
2052 llvm::PointerType *RetTy = llvm::PointerType::get(
2053 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
2054 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
2055 {RetTy, Args[0]->getType()});
2056 return Builder.CreateCall(F, Args);
2057 }
2058 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
2059 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
2060 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
2061 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
2062 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
2063 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
2065 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
2066 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_format_v4f32:
2067 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_format_v4f16:
2069 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store_format);
2070 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2071 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2072 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2073 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2074 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2075 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
2076 llvm::Type *RetTy = nullptr;
2077 switch (BuiltinID) {
2078 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2079 RetTy = Int8Ty;
2080 break;
2081 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2082 RetTy = Int16Ty;
2083 break;
2084 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2085 RetTy = Int32Ty;
2086 break;
2087 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2088 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
2089 break;
2090 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2091 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
2092 break;
2093 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
2094 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
2095 break;
2096 }
2097 Function *F =
2098 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
2099 return Builder.CreateCall(
2100 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
2101 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
2102 }
2103 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_format_v4f32:
2104 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_format_v4f16: {
2105 llvm::Type *RetTy = ConvertType(E->getType());
2106 Function *F =
2107 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load_format, {RetTy});
2108
2109 return Builder.CreateCall(
2110 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
2111 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
2112 }
2113 case AMDGPU::BI__builtin_amdgcn_struct_buffer_store_format_v4f32:
2114 case AMDGPU::BI__builtin_amdgcn_struct_buffer_store_format_v4f16:
2116 *this, E, Intrinsic::amdgcn_struct_ptr_buffer_store_format);
2117 case AMDGPU::BI__builtin_amdgcn_struct_buffer_load_format_v4f32:
2118 case AMDGPU::BI__builtin_amdgcn_struct_buffer_load_format_v4f16: {
2119 llvm::Type *RetTy = ConvertType(E->getType());
2120 Function *F = CGM.getIntrinsic(
2121 Intrinsic::amdgcn_struct_ptr_buffer_load_format, {RetTy});
2122
2123 return Builder.CreateCall(
2124 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
2126 EmitScalarExpr(E->getArg(4))});
2127 }
2128 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
2130 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
2131 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
2132 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
2134 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
2135 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
2136 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
2138 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
2139 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
2140 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
2142 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
2143 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
2145 *this, E, Intrinsic::amdgcn_s_prefetch_data);
2146 case Builtin::BIlogbf:
2147 case Builtin::BI__builtin_logbf: {
2148 Value *Src0 = EmitScalarExpr(E->getArg(0));
2149 Function *FrExpFunc = CGM.getIntrinsic(
2150 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
2151 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
2152 Value *Exp = Builder.CreateExtractValue(FrExp, 1);
2153 Value *Add = Builder.CreateAdd(
2154 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
2155 Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getFloatTy());
2156 Value *Fabs =
2157 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
2158 Value *FCmpONE = Builder.CreateFCmpONE(
2159 Fabs, ConstantFP::getInfinity(Builder.getFloatTy()));
2160 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2161 Value *FCmpOEQ =
2162 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getFloatTy()));
2163 Value *Sel2 = Builder.CreateSelect(
2164 FCmpOEQ,
2165 ConstantFP::getInfinity(Builder.getFloatTy(), /*Negative=*/true), Sel1);
2166 return Sel2;
2167 }
2168 case Builtin::BIlogb:
2169 case Builtin::BI__builtin_logb: {
2170 Value *Src0 = EmitScalarExpr(E->getArg(0));
2171 Function *FrExpFunc = CGM.getIntrinsic(
2172 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
2173 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
2174 Value *Exp = Builder.CreateExtractValue(FrExp, 1);
2175 Value *Add = Builder.CreateAdd(
2176 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
2177 Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
2178 Value *Fabs =
2179 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
2180 Value *FCmpONE = Builder.CreateFCmpONE(
2181 Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
2182 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2183 Value *FCmpOEQ =
2184 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
2185 Value *Sel2 = Builder.CreateSelect(
2186 FCmpOEQ,
2187 ConstantFP::getInfinity(Builder.getDoubleTy(), /*Negative=*/true),
2188 Sel1);
2189 return Sel2;
2190 }
2191 case Builtin::BIscalbnf:
2192 case Builtin::BI__builtin_scalbnf:
2193 case Builtin::BIscalbn:
2194 case Builtin::BI__builtin_scalbn:
2196 *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
2197 case AMDGPU::BI__builtin_amdgcn_permlane_bcast:
2199 *this, E, Intrinsic::amdgcn_permlane_bcast);
2200 case AMDGPU::BI__builtin_amdgcn_permlane_up:
2202 Intrinsic::amdgcn_permlane_up);
2203 case AMDGPU::BI__builtin_amdgcn_permlane_down:
2205 Intrinsic::amdgcn_permlane_down);
2206 case AMDGPU::BI__builtin_amdgcn_permlane_xor:
2208 Intrinsic::amdgcn_permlane_xor);
2209 default:
2210 return nullptr;
2211 }
2212}
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
llvm::Value * emitBuiltinWithOneOverloadedType(clang::CodeGen::CodeGenFunction &CGF, const clang::CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition CGBuiltin.h:63
static mlir::Value emitAMDGCNImageOverloadedReturnType(CIRGenFunction &cgf, const CallExpr *e, llvm::StringRef intrinsicName, bool isImageStore)
static mlir::Value emitBinaryExpMaybeConstrainedFPBuiltin(CIRGenFunction &cgf, const CallExpr *e, llvm::StringRef intrinsicName, llvm::StringRef constrainedIntrinsicName)
static StringRef getAMDGPUSyncScopeStr(CodeGenModule &CGM, unsigned ScopeInt, llvm::AtomicOrdering AO)
Definition AMDGPU.cpp:388
static Value * GetAMDGPUPredicate(CodeGenFunction &CGF, Twine Name)
Definition AMDGPU.cpp:459
static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID)
Definition AMDGPU.cpp:472
static StringRef mapScopeToSPIRV(StringRef AMDGCNScope)
Definition AMDGPU.cpp:359
static llvm::Value * loadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, llvm::Value *RsrcPtr)
Definition AMDGPU.cpp:280
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition AMDGPU.cpp:262
llvm::CallInst * emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, const clang::CallExpr *E, unsigned IntrinsicID, bool IsImageStore)
Definition AMDGPU.cpp:310
static llvm::MetadataAsValue * emitScopeMD(CodeGenFunction &CGF, unsigned ScopeInt, llvm::AtomicOrdering AO=llvm::AtomicOrdering::SequentiallyConsistent)
Convert a __MEMORY_SCOPE_* integer constant to a metadata node containing the target-specific sync sc...
Definition AMDGPU.cpp:400
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition AMDGPU.cpp:349
static llvm::AtomicOrdering mapCABIAtomicOrdering(unsigned AO)
Definition AMDGPU.cpp:367
TokenType getType() const
Returns the token's type, e.g.
#define X(type, name)
Definition Value.h:97
static StringRef getTriple(const Command &Job)
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:57
static QualType getPointeeType(const MemRegion *R)
Provides definitions for the atomic synchronization scopes.
Enumerates target-specific builtins in their own namespaces within namespace clang.
Builtin::Context & BuiltinInfo
Definition ASTContext.h:807
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Defines the generic atomic scope model.
Definition SyncScope.h:240
bool isValid(unsigned S) const override
Check if the compile-time constant sync scope value is valid.
Definition SyncScope.h:278
SyncScope map(unsigned S) const override
Maps language specific sync scope values to internal SyncScope enum.
Definition SyncScope.h:258
const char * getRequiredFeatures(unsigned ID) const
Definition Builtins.cpp:102
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:302
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:118
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition CGBuilder.h:138
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:199
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition CGBuilder.h:356
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition AMDGPU.cpp:518
const TargetInfo & getTarget() const
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
Definition AMDGPU.cpp:439
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1598
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::LLVMContext & getLLVMContext()
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
Definition AMDGPU.cpp:416
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
const LangOptions & getLangOpts() const
const llvm::DataLayout & getDataLayout() const
ASTContext & getContext() const
const TargetCodeGenInfo & getTargetCodeGenInfo()
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
virtual StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope, llvm::AtomicOrdering Ordering) const
Get the syncscope used in LLVM IR as a string.
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3102
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3077
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:282
QualType getType() const
Definition Expr.h:144
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3390
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition TypeBase.h:8524
QualType getCanonicalType() const
Definition TypeBase.h:8492
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition TargetInfo.h:327
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
unsigned getMaxOpenCLWorkGroupSize() const
Definition TargetInfo.h:879
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
QualType getType() const
Definition Value.cpp:237
bool Mul(InterpState &S, CodePtr OpPC)
Definition Interp.h:471
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
SyncScope
Defines sync scope values used internally by clang.
Definition SyncScope.h:42
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64