clang 23.0.0git
AMDGPU.cpp
Go to the documentation of this file.
1//===------- AMDCPU.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
14#include "CodeGenFunction.h"
15#include "TargetInfo.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
24#include "llvm/Support/AMDGPUAddrSpace.h"
25#include "llvm/Support/AtomicOrdering.h"
26
27using namespace clang;
28using namespace CodeGen;
29using namespace llvm;
30
31namespace {
32
33// Has second type mangled argument.
34static Value *
36 Intrinsic::ID IntrinsicID,
37 Intrinsic::ID ConstrainedIntrinsicID) {
38 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
39 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
40
41 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
42 if (CGF.Builder.getIsFPConstrained()) {
43 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
44 {Src0->getType(), Src1->getType()});
45 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
46 }
47
48 Function *F =
49 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
50 return CGF.Builder.CreateCall(F, {Src0, Src1});
51}
52
53// If \p E is not null pointer, insert address space cast to match return
54// type of \p E if necessary.
55Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
56 const CallExpr *E = nullptr) {
57 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
58 auto *Call = CGF.Builder.CreateCall(F);
59 Call->addRetAttr(
60 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
61 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
62 if (!E)
63 return Call;
64 QualType BuiltinRetType = E->getType();
65 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
66 if (RetTy == Call->getType())
67 return Call;
68 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
69}
70
71Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
72 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
73 auto *Call = CGF.Builder.CreateCall(F);
74 Call->addRetAttr(
75 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
76 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
77 return Call;
78}
79
80static llvm::Intrinsic::ID getAMDGPUWorkGroupID(CodeGenFunction &CGF,
81 unsigned Index) {
82 switch (Index) {
83 case 0:
84 return llvm::Intrinsic::amdgcn_workgroup_id_x;
85 case 1:
86 return llvm::Intrinsic::amdgcn_workgroup_id_y;
87 case 2:
88 return llvm::Intrinsic::amdgcn_workgroup_id_z;
89 default:
90 llvm_unreachable("unhandled index");
91 }
92}
93
94static void setNoundefInvariantLoad(llvm::LoadInst *Ld) {
95 Ld->setMetadata(llvm::LLVMContext::MD_noundef,
96 llvm::MDNode::get(Ld->getContext(), {}));
97 Ld->setMetadata(llvm::LLVMContext::MD_invariant_load,
98 llvm::MDNode::get(Ld->getContext(), {}));
99}
100
101static void addMaxWorkGroupSizeRangeMetadata(CodeGenFunction &CGF,
102 llvm::LoadInst *GroupSize) {
103 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
104 llvm::MDNode *RNode = MDHelper.createRange(
105 APInt(16, 1), APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
106 GroupSize->setMetadata(llvm::LLVMContext::MD_range, RNode);
107 setNoundefInvariantLoad(GroupSize);
108}
109
110static Value *emitAMDGPUWorkGroupSizeV5(CodeGenFunction &CGF, unsigned Index) {
111 llvm::Value *ImplicitArgPtr = EmitAMDGPUImplicitArgPtr(CGF);
112
113 // offsetof(amdhsa_implicit_kernarg_v5, block_count[Index])
114 unsigned BlockCountOffset = 0 + Index * 4;
115 // offsetof(amdhsa_implicit_kernarg_v5, group_size[Index])
116 unsigned GroupSizeOffset = 12 + Index * 2;
117 // offsetof(amdhsa_implicit_kernarg_v5, remainder[Index])
118 unsigned RemainderOffset = 18 + Index * 2;
119
120 if (CGF.CGM.getLangOpts().OffloadUniformBlock) {
121 // Indexing the implicit kernarg segment.
122 llvm::Value *GroupSizeGEP = CGF.Builder.CreateConstInBoundsGEP1_64(
123 CGF.Int8Ty, ImplicitArgPtr, GroupSizeOffset);
124 llvm::LoadInst *GroupSize = CGF.Builder.CreateLoad(
125 Address(GroupSizeGEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
126
127 addMaxWorkGroupSizeRangeMetadata(CGF, GroupSize);
128
129 return CGF.Builder.CreateZExt(GroupSize, CGF.Int32Ty);
130 }
131
132 llvm::Value *BlockCountGEP = CGF.Builder.CreateConstGEP1_64(
133 CGF.Int8Ty, ImplicitArgPtr, BlockCountOffset);
134 llvm::LoadInst *BlockCount = CGF.Builder.CreateLoad(
135 Address(BlockCountGEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
136 setNoundefInvariantLoad(BlockCount);
137
138 llvm::Value *WorkgroupID =
139 CGF.Builder.CreateIntrinsic(getAMDGPUWorkGroupID(CGF, Index), {});
140 llvm::Value *IsFull = CGF.Builder.CreateICmpULT(WorkgroupID, BlockCount);
141
142 llvm::Value *StructOffset = CGF.Builder.CreateSelect(
143 IsFull, ConstantInt::get(CGF.Int32Ty, GroupSizeOffset),
144 ConstantInt::get(CGF.Int32Ty, RemainderOffset));
145
146 llvm::Value *SizeGEP =
147 CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, ImplicitArgPtr, StructOffset);
148 llvm::LoadInst *Size = CGF.Builder.CreateLoad(
149 Address(SizeGEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
150 addMaxWorkGroupSizeRangeMetadata(CGF, Size);
151 setNoundefInvariantLoad(Size);
152
153 return CGF.Builder.CreateZExt(Size, CGF.Int32Ty);
154}
155
156static Value *emitAMDGPUWorkGroupSizeV4(CodeGenFunction &CGF, unsigned Index) {
157 llvm::Value *DispatchPtr = EmitAMDGPUDispatchPtr(CGF);
158
159 // Indexing the HSA kernel_dispatch_packet struct.
160 llvm::Value *GroupSizeGEP = CGF.Builder.CreateConstInBoundsGEP1_64(
161 CGF.Int8Ty, DispatchPtr, 4 + Index * 2);
162 llvm::LoadInst *GroupSizeLD = CGF.Builder.CreateLoad(
163 Address(GroupSizeGEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
164
165 addMaxWorkGroupSizeRangeMetadata(CGF, GroupSizeLD);
166
167 llvm::Value *GroupSize = CGF.Builder.CreateZExt(GroupSizeLD, CGF.Int32Ty);
168
169 if (CGF.CGM.getLangOpts().OffloadUniformBlock)
170 return GroupSize;
171
172 llvm::Value *WorkgroupID =
173 CGF.Builder.CreateIntrinsic(getAMDGPUWorkGroupID(CGF, Index), {});
174
175 llvm::Value *GridSizeGEP = CGF.Builder.CreateConstInBoundsGEP1_64(
176 CGF.Int8Ty, DispatchPtr, 12 + Index * 4);
177 llvm::LoadInst *GridSize = CGF.Builder.CreateLoad(
178 Address(GridSizeGEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
179
180 llvm::MDBuilder MDB(CGF.getLLVMContext());
181
182 // Known non-zero.
183 GridSize->setMetadata(llvm::LLVMContext::MD_range,
184 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
185 GridSize->setMetadata(llvm::LLVMContext::MD_invariant_load,
186 llvm::MDNode::get(CGF.getLLVMContext(), {}));
187
188 llvm::Value *Mul = CGF.Builder.CreateMul(WorkgroupID, GroupSize);
189 llvm::Value *Remainder = CGF.Builder.CreateSub(GridSize, Mul);
190
191 llvm::Value *IsPartial = CGF.Builder.CreateICmpULT(Remainder, GroupSize);
192
193 return CGF.Builder.CreateSelect(IsPartial, Remainder, GroupSize);
194}
195
196// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
197/// Emit code based on Code Object ABI version.
198/// COV_4 : Emit code to use dispatch ptr
199/// COV_5+ : Emit code to use implicitarg ptr
200/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
201/// and use its value for COV_4 or COV_5+ approach. It is used for
202/// compiling device libraries in an ABI-agnostic way.
203Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
204 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
205
206 // Do not emit __oclc_ABI_version references with non-empt environment.
207 if (Cov == CodeObjectVersionKind::COV_None &&
208 CGF.getTarget().getTriple().hasEnvironment())
209 Cov = CodeObjectVersionKind::COV_6;
210
211 if (Cov == CodeObjectVersionKind::COV_None) {
212 StringRef Name = "__oclc_ABI_version";
213 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
214 if (!ABIVersionC)
215 ABIVersionC = new llvm::GlobalVariable(
216 CGF.CGM.getModule(), CGF.Int32Ty, false,
217 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
218 llvm::GlobalVariable::NotThreadLocal,
220
221 // This load will be eliminated by the IPSCCP because it is constant
222 // weak_odr without externally_initialized. Either changing it to weak or
223 // adding externally_initialized will keep the load.
224 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
225 CGF.CGM.getIntAlign());
226
227 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
228 ABIVersion,
229 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
230
231 llvm::Value *V5Impl = emitAMDGPUWorkGroupSizeV5(CGF, Index);
232 llvm::Value *V4Impl = emitAMDGPUWorkGroupSizeV4(CGF, Index);
233 return CGF.Builder.CreateSelect(IsCOV5, V5Impl, V4Impl);
234 }
235
236 return Cov >= CodeObjectVersionKind::COV_5
237 ? emitAMDGPUWorkGroupSizeV5(CGF, Index)
238 : emitAMDGPUWorkGroupSizeV4(CGF, Index);
239}
240
241// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
242Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
243 const unsigned XOffset = 12;
244 auto *DP = EmitAMDGPUDispatchPtr(CGF);
245 // Indexing the HSA kernel_dispatch_packet struct.
246 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
247 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
248 auto *LD = CGF.Builder.CreateLoad(
250
251 llvm::MDBuilder MDB(CGF.getLLVMContext());
252
253 // Known non-zero.
254 LD->setMetadata(llvm::LLVMContext::MD_range,
255 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
256 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
257 llvm::MDNode::get(CGF.getLLVMContext(), {}));
258 return LD;
259}
260} // namespace
261
262// Generates the IR for __builtin_read_exec_*.
263// Lowers the builtin to amdgcn_ballot intrinsic.
265 llvm::Type *RegisterType,
266 llvm::Type *ValueType, bool isExecHi) {
267 CodeGen::CGBuilderTy &Builder = CGF.Builder;
268 CodeGen::CodeGenModule &CGM = CGF.CGM;
269
270 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
271 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
272
273 if (isExecHi) {
274 Value *Rt2 = Builder.CreateLShr(Call, 32);
275 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
276 return Rt2;
277 }
278
279 return Call;
280}
281
283 llvm::Value *RsrcPtr) {
284 auto &B = CGF.Builder;
285 auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8);
286
287 if (RsrcPtr->getType() == VecTy)
288 return RsrcPtr;
289
290 if (RsrcPtr->getType()->isIntegerTy(32)) {
291 llvm::PointerType *VecPtrTy =
292 llvm::PointerType::get(CGF.getLLVMContext(), 8);
293 llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");
294 return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val");
295 }
296
297 if (RsrcPtr->getType()->isPointerTy()) {
298 auto *VecPtrTy = llvm::PointerType::get(
299 CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace());
300 llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed");
301 return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val");
302 }
303
304 const auto &DL = CGF.CGM.getDataLayout();
305 if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256)
306 return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val");
307
308 llvm::report_fatal_error("Unexpected texture resource argument form");
309}
310
311llvm::CallInst *
313 const clang::CallExpr *E,
314 unsigned IntrinsicID, bool IsImageStore) {
315 auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned {
316 QualType TexQT = CGF.getContext().AMDGPUTextureTy;
317 for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {
318 QualType ArgTy = E->getArg(I)->getType();
319 if (ArgTy == TexQT) {
320 return I;
321 }
322
323 if (ArgTy.getCanonicalType() == TexQT.getCanonicalType()) {
324 return I;
325 }
326 }
327
328 return ~0U;
329 };
330
332 unsigned RsrcIndex = findTextureDescIndex(E);
333
334 if (RsrcIndex == ~0U) {
335 llvm::report_fatal_error("Invalid argument count for image builtin");
336 }
337
338 for (unsigned I = 0; I < E->getNumArgs(); ++I) {
339 llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I));
340 if (I == RsrcIndex)
342 Args.push_back(V);
343 }
344
345 llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(E->getType());
346 llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);
347 return Call;
348}
349
350// Emit an intrinsic that has 1 float or double operand, and 1 integer.
352 const CallExpr *E,
353 unsigned IntrinsicID) {
354 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
355 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
356
357 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
358 return CGF.Builder.CreateCall(F, {Src0, Src1});
359}
360
361static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
362 if (AMDGCNScope == "agent")
363 return "device";
364 if (AMDGCNScope == "wavefront")
365 return "subgroup";
366 return AMDGCNScope;
367}
368
369static llvm::AtomicOrdering mapCABIAtomicOrdering(unsigned AO) {
370 // Map C11/C++11 memory ordering to LLVM memory ordering
371 assert(llvm::isValidAtomicOrderingCABI(AO));
372 switch (static_cast<llvm::AtomicOrderingCABI>(AO)) {
373 case llvm::AtomicOrderingCABI::acquire:
374 case llvm::AtomicOrderingCABI::consume:
375 return llvm::AtomicOrdering::Acquire;
376 case llvm::AtomicOrderingCABI::release:
377 return llvm::AtomicOrdering::Release;
378 case llvm::AtomicOrderingCABI::acq_rel:
379 return llvm::AtomicOrdering::AcquireRelease;
380 case llvm::AtomicOrderingCABI::seq_cst:
381 return llvm::AtomicOrdering::SequentiallyConsistent;
382 case llvm::AtomicOrderingCABI::relaxed:
383 return llvm::AtomicOrdering::Monotonic;
384 }
385 llvm_unreachable("Unknown AtomicOrderingCABI enum");
386}
387
388// For processing memory ordering and memory scope arguments of various
389// amdgcn builtins.
390// \p Order takes a C++11 compatible memory-ordering specifier and converts
391// it into LLVM's memory ordering specifier using atomic C ABI, and writes
392// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
393// specific SyncScopeID and writes it to \p SSID.
395 llvm::AtomicOrdering &AO,
396 llvm::SyncScope::ID &SSID) {
397 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
398
399 // Map C11/C++11 memory ordering to LLVM memory ordering
400 AO = mapCABIAtomicOrdering(ord);
401
402 // Some of the atomic builtins take the scope as a string name.
403 StringRef scp;
404 if (llvm::getConstantStringInfo(Scope, scp)) {
405 if (getTarget().getTriple().isSPIRV())
406 scp = mapScopeToSPIRV(scp);
407 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
408 return;
409 }
410
411 // Older builtins had an enum argument for the memory scope.
412 const char *SSN = nullptr;
413 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
414 switch (scope) {
415 case AtomicScopeGenericModel::System: // __MEMORY_SCOPE_SYSTEM
416 SSID = llvm::SyncScope::System;
417 break;
418 case AtomicScopeGenericModel::Device: // __MEMORY_SCOPE_DEVICE
419 SSN = getTarget().getTriple().isSPIRV() ? "device" : "agent";
420 break;
421 case AtomicScopeGenericModel::Workgroup: // __MEMORY_SCOPE_WRKGRP
422 SSN = "workgroup";
423 break;
424 case AtomicScopeGenericModel::Cluster: // __MEMORY_SCOPE_CLUSTR
425 SSN = getTarget().getTriple().isSPIRV() ? "workgroup" : "cluster";
426 break;
427 case AtomicScopeGenericModel::Wavefront: // __MEMORY_SCOPE_WVFRNT
428 SSN = getTarget().getTriple().isSPIRV() ? "subgroup" : "wavefront";
429 break;
430 case AtomicScopeGenericModel::Single: // __MEMORY_SCOPE_SINGLE
431 SSID = llvm::SyncScope::SingleThread;
432 break;
433 default:
434 SSID = llvm::SyncScope::System;
435 break;
436 }
437 if (SSN)
438 SSID = getLLVMContext().getOrInsertSyncScopeID(SSN);
439}
440
442 const CallExpr *E) {
443 constexpr const char *Tag = "amdgpu-synchronize-as";
444
445 LLVMContext &Ctx = Inst->getContext();
447 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
448 llvm::Value *V = EmitScalarExpr(E->getArg(K));
449 StringRef AS;
450 if (llvm::getConstantStringInfo(V, AS)) {
451 MMRAs.push_back({Tag, AS});
452 // TODO: Delete the resulting unused constant?
453 continue;
454 }
455 CGM.Error(E->getExprLoc(),
456 "expected an address space name as a string literal");
457 }
458
459 llvm::sort(MMRAs);
460 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
461 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
462}
463
464static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
465 switch (BuiltinID) {
466 default:
467 llvm_unreachable("Unknown BuiltinID for wave reduction");
468 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
469 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
470 return Intrinsic::amdgcn_wave_reduce_add;
471 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
472 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
473 return Intrinsic::amdgcn_wave_reduce_fadd;
474 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
475 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
476 return Intrinsic::amdgcn_wave_reduce_sub;
477 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
478 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
479 return Intrinsic::amdgcn_wave_reduce_fsub;
480 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
481 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
482 return Intrinsic::amdgcn_wave_reduce_min;
483 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
484 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
485 return Intrinsic::amdgcn_wave_reduce_fmin;
486 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
487 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
488 return Intrinsic::amdgcn_wave_reduce_umin;
489 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
490 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
491 return Intrinsic::amdgcn_wave_reduce_max;
492 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
493 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
494 return Intrinsic::amdgcn_wave_reduce_fmax;
495 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
496 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
497 return Intrinsic::amdgcn_wave_reduce_umax;
498 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
499 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
500 return Intrinsic::amdgcn_wave_reduce_and;
501 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
502 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
503 return Intrinsic::amdgcn_wave_reduce_or;
504 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
505 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
506 return Intrinsic::amdgcn_wave_reduce_xor;
507 }
508}
509
511 const CallExpr *E) {
512 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
513 llvm::SyncScope::ID SSID;
514 switch (BuiltinID) {
515 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
516 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
517 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
518 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
519 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
520 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
521 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
522 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
523 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
524 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
525 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
526 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
527 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
528 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
529 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
530 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
531 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
532 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
533 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
534 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
535 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
536 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
537 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
538 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
539 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
540 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
541 Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID);
542 llvm::Value *Value = EmitScalarExpr(E->getArg(0));
543 llvm::Value *Strategy = EmitScalarExpr(E->getArg(1));
544 llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()});
545 return Builder.CreateCall(F, {Value, Strategy});
546 }
547 case AMDGPU::BI__builtin_amdgcn_div_scale:
548 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
549 // Translate from the intrinsics's struct return to the builtin's out
550 // argument.
551
552 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
553
554 llvm::Value *X = EmitScalarExpr(E->getArg(0));
555 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
556 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
557
558 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
559 X->getType());
560
561 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
562
563 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
564 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
565
566 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
567
568 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
569 Builder.CreateStore(FlagExt, FlagOutPtr);
570 return Result;
571 }
572 case AMDGPU::BI__builtin_amdgcn_div_fmas:
573 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
574 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
575 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
576 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
577 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
578
579 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
580 Src0->getType());
581 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
582 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
583 }
584
585 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
587 Intrinsic::amdgcn_ds_swizzle);
588 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
589 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
590 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
592 // Find out if any arguments are required to be integer constant
593 // expressions.
594 unsigned ICEArguments = 0;
596 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
597 assert(Error == ASTContext::GE_None && "Should not codegen an error");
598 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
599 unsigned Size = DataTy->getPrimitiveSizeInBits();
600 llvm::Type *IntTy =
601 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
602 Function *F =
603 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
604 ? Intrinsic::amdgcn_mov_dpp8
605 : Intrinsic::amdgcn_update_dpp,
606 IntTy);
607 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
608 E->getNumArgs() == 2);
609 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
610 if (InsertOld)
611 Args.push_back(llvm::PoisonValue::get(IntTy));
612 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
613 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
614 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
615 Size < 32) {
616 if (!DataTy->isIntegerTy())
617 V = Builder.CreateBitCast(
618 V, llvm::IntegerType::get(Builder.getContext(), Size));
619 V = Builder.CreateZExtOrBitCast(V, IntTy);
620 }
621 llvm::Type *ExpTy =
622 F->getFunctionType()->getFunctionParamType(I + InsertOld);
623 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
624 }
625 Value *V = Builder.CreateCall(F, Args);
626 if (Size < 32 && !DataTy->isIntegerTy())
627 V = Builder.CreateTrunc(
628 V, llvm::IntegerType::get(Builder.getContext(), Size));
629 return Builder.CreateTruncOrBitCast(V, DataTy);
630 }
631 case AMDGPU::BI__builtin_amdgcn_permlane16:
632 case AMDGPU::BI__builtin_amdgcn_permlanex16:
634 *this, E,
635 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
636 ? Intrinsic::amdgcn_permlane16
637 : Intrinsic::amdgcn_permlanex16);
638 case AMDGPU::BI__builtin_amdgcn_permlane64:
640 Intrinsic::amdgcn_permlane64);
641 case AMDGPU::BI__builtin_amdgcn_readlane:
643 Intrinsic::amdgcn_readlane);
644 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
646 Intrinsic::amdgcn_readfirstlane);
647 case AMDGPU::BI__builtin_amdgcn_div_fixup:
648 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
649 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
651 Intrinsic::amdgcn_div_fixup);
652 case AMDGPU::BI__builtin_amdgcn_trig_preop:
653 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
654 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
655 case AMDGPU::BI__builtin_amdgcn_rcp:
656 case AMDGPU::BI__builtin_amdgcn_rcpf:
657 case AMDGPU::BI__builtin_amdgcn_rcph:
658 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
659 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
660 case AMDGPU::BI__builtin_amdgcn_sqrt:
661 case AMDGPU::BI__builtin_amdgcn_sqrtf:
662 case AMDGPU::BI__builtin_amdgcn_sqrth:
663 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
665 Intrinsic::amdgcn_sqrt);
666 case AMDGPU::BI__builtin_amdgcn_rsq:
667 case AMDGPU::BI__builtin_amdgcn_rsqf:
668 case AMDGPU::BI__builtin_amdgcn_rsqh:
669 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
670 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
671 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
672 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
674 Intrinsic::amdgcn_rsq_clamp);
675 case AMDGPU::BI__builtin_amdgcn_sinf:
676 case AMDGPU::BI__builtin_amdgcn_sinh:
677 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
678 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
679 case AMDGPU::BI__builtin_amdgcn_cosf:
680 case AMDGPU::BI__builtin_amdgcn_cosh:
681 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
682 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
683 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
684 return EmitAMDGPUDispatchPtr(*this, E);
685 case AMDGPU::BI__builtin_amdgcn_logf:
686 case AMDGPU::BI__builtin_amdgcn_log_bf16:
687 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
688 case AMDGPU::BI__builtin_amdgcn_exp2f:
689 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
691 Intrinsic::amdgcn_exp2);
692 case AMDGPU::BI__builtin_amdgcn_log_clampf:
694 Intrinsic::amdgcn_log_clamp);
695 case AMDGPU::BI__builtin_amdgcn_ldexp:
696 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
697 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
698 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
699 llvm::Function *F =
700 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
701 return Builder.CreateCall(F, {Src0, Src1});
702 }
703 case AMDGPU::BI__builtin_amdgcn_ldexph: {
704 // The raw instruction has a different behavior for out of bounds exponent
705 // values (implicit truncation instead of saturate to short_min/short_max).
706 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
707 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
708 llvm::Function *F =
709 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
710 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
711 }
712 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
713 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
714 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
716 Intrinsic::amdgcn_frexp_mant);
717 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
718 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
719 Value *Src0 = EmitScalarExpr(E->getArg(0));
720 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
721 { Builder.getInt32Ty(), Src0->getType() });
722 return Builder.CreateCall(F, Src0);
723 }
724 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
725 Value *Src0 = EmitScalarExpr(E->getArg(0));
726 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
727 { Builder.getInt16Ty(), Src0->getType() });
728 return Builder.CreateCall(F, Src0);
729 }
730 case AMDGPU::BI__builtin_amdgcn_fract:
731 case AMDGPU::BI__builtin_amdgcn_fractf:
732 case AMDGPU::BI__builtin_amdgcn_fracth:
734 Intrinsic::amdgcn_fract);
735 case AMDGPU::BI__builtin_amdgcn_lerp:
737 Intrinsic::amdgcn_lerp);
738 case AMDGPU::BI__builtin_amdgcn_ubfe:
740 Intrinsic::amdgcn_ubfe);
741 case AMDGPU::BI__builtin_amdgcn_sbfe:
743 Intrinsic::amdgcn_sbfe);
744 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
745 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
746 llvm::Type *ResultType = ConvertType(E->getType());
747 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
748 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
749 return Builder.CreateCall(F, {Src});
750 }
751 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
752 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
753 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
754 Function *F =
755 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
756 return Builder.CreateCall(F, {Src});
757 }
758 case AMDGPU::BI__builtin_amdgcn_tanhf:
759 case AMDGPU::BI__builtin_amdgcn_tanhh:
760 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
762 Intrinsic::amdgcn_tanh);
763 case AMDGPU::BI__builtin_amdgcn_uicmp:
764 case AMDGPU::BI__builtin_amdgcn_uicmpl:
765 case AMDGPU::BI__builtin_amdgcn_sicmp:
766 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
767 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
768 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
769 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
770
771 // FIXME-GFX10: How should 32 bit mask be handled?
772 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
773 { Builder.getInt64Ty(), Src0->getType() });
774 return Builder.CreateCall(F, { Src0, Src1, Src2 });
775 }
776 case AMDGPU::BI__builtin_amdgcn_fcmp:
777 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
778 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
779 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
780 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
781
782 // FIXME-GFX10: How should 32 bit mask be handled?
783 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
784 { Builder.getInt64Ty(), Src0->getType() });
785 return Builder.CreateCall(F, { Src0, Src1, Src2 });
786 }
787 case AMDGPU::BI__builtin_amdgcn_class:
788 case AMDGPU::BI__builtin_amdgcn_classf:
789 case AMDGPU::BI__builtin_amdgcn_classh:
790 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
791 case AMDGPU::BI__builtin_amdgcn_fmed3f:
792 case AMDGPU::BI__builtin_amdgcn_fmed3h:
794 Intrinsic::amdgcn_fmed3);
795 case AMDGPU::BI__builtin_amdgcn_ds_append:
796 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
797 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
798 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
799 Value *Src0 = EmitScalarExpr(E->getArg(0));
800 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
801 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
802 }
803 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
804 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
805 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
806 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
807 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
808 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
809 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
810 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
811 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
812 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
813 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
814 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
815 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
816 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
817 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
818 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
819 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
820 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
821 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
822 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
823 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
824 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
825 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
826 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
827 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
828 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
829 Intrinsic::ID IID;
830 switch (BuiltinID) {
831 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
832 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
833 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
834 IID = Intrinsic::amdgcn_global_load_tr_b64;
835 break;
836 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
837 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
838 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
839 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
840 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
841 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
842 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
843 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
844 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
845 IID = Intrinsic::amdgcn_global_load_tr_b128;
846 break;
847 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
848 IID = Intrinsic::amdgcn_global_load_tr4_b64;
849 break;
850 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
851 IID = Intrinsic::amdgcn_global_load_tr6_b96;
852 break;
853 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
854 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
855 break;
856 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
857 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
858 break;
859 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
860 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
861 break;
862 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
863 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
864 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
865 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
866 break;
867 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
868 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
869 break;
870 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
871 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
872 break;
873 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
874 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
875 break;
876 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
877 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
878 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
879 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
880 break;
881 }
882 llvm::Type *LoadTy = ConvertType(E->getType());
883 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
884 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
885 return Builder.CreateCall(F, {Addr});
886 }
887 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
888 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
889 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
890 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
891 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
892 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
893
894 Intrinsic::ID IID;
895 switch (BuiltinID) {
896 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
897 IID = Intrinsic::amdgcn_global_load_monitor_b32;
898 break;
899 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
900 IID = Intrinsic::amdgcn_global_load_monitor_b64;
901 break;
902 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
903 IID = Intrinsic::amdgcn_global_load_monitor_b128;
904 break;
905 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
906 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
907 break;
908 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
909 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
910 break;
911 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
912 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
913 break;
914 }
915
916 LLVMContext &Ctx = CGM.getLLVMContext();
917 llvm::Type *LoadTy = ConvertType(E->getType());
918 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
919
920 auto *AOExpr = cast<llvm::ConstantInt>(EmitScalarExpr(E->getArg(1)));
921 auto *ScopeExpr = cast<llvm::ConstantInt>(EmitScalarExpr(E->getArg(2)));
922
923 auto Scope = static_cast<SyncScope>(ScopeExpr->getZExtValue());
924 llvm::AtomicOrdering AO = mapCABIAtomicOrdering(AOExpr->getZExtValue());
925
926 StringRef ScopeStr = CGM.getTargetCodeGenInfo().getLLVMSyncScopeStr(
927 CGM.getLangOpts(), Scope, AO);
928
929 llvm::MDNode *MD =
930 llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeStr)});
931 llvm::Value *ScopeMD = llvm::MetadataAsValue::get(Ctx, MD);
932 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
933 return Builder.CreateCall(F, {Addr, AOExpr, ScopeMD});
934 }
935 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
936 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
937 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
938 Intrinsic::ID IID;
939 switch (BuiltinID) {
940 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
941 IID = Intrinsic::amdgcn_cluster_load_b32;
942 break;
943 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
944 IID = Intrinsic::amdgcn_cluster_load_b64;
945 break;
946 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
947 IID = Intrinsic::amdgcn_cluster_load_b128;
948 break;
949 }
951 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
952 Args.push_back(EmitScalarExpr(E->getArg(i)));
953 llvm::Function *F = CGM.getIntrinsic(IID, {ConvertType(E->getType())});
954 return Builder.CreateCall(F, {Args});
955 }
956 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
957 // Should this have asan instrumentation?
959 Intrinsic::amdgcn_load_to_lds);
960 }
961 case AMDGPU::BI__builtin_amdgcn_load_async_to_lds: {
962 // Should this have asan instrumentation?
964 *this, E, Intrinsic::amdgcn_load_async_to_lds);
965 }
966 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
967 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
968 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
969 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
970 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
971 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
972 Intrinsic::ID IID;
973 switch (BuiltinID) {
974 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
975 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
976 break;
977 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
978 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
979 break;
980 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
981 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
982 break;
983 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
984 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
985 break;
986 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
987 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
988 break;
989 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
990 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
991 break;
992 }
993
994 LLVMContext &Ctx = CGM.getLLVMContext();
996 // last argument is a MD string
997 const unsigned ScopeArg = E->getNumArgs() - 1;
998 for (unsigned i = 0; i != ScopeArg; ++i)
999 Args.push_back(EmitScalarExpr(E->getArg(i)));
1000 StringRef Arg = cast<StringLiteral>(E->getArg(ScopeArg)->IgnoreParenCasts())
1001 ->getString();
1002 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
1003 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
1004 // Intrinsic is typed based on the pointer AS. Pointer is always the first
1005 // argument.
1006 llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});
1007 return Builder.CreateCall(F, {Args});
1008 }
1009 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
1010 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
1011 {llvm::Type::getInt64Ty(getLLVMContext())});
1012 return Builder.CreateCall(F);
1013 }
1014 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
1015 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
1016 {llvm::Type::getInt64Ty(getLLVMContext())});
1017 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
1018 return Builder.CreateCall(F, {Env});
1019 }
1020 case AMDGPU::BI__builtin_amdgcn_read_exec:
1021 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
1022 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
1023 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
1024 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
1025 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
1026 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
1027 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
1028 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
1029 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
1030 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
1031 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
1032 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
1033 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
1034 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
1035 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
1036
1037 // The builtins take these arguments as vec4 where the last element is
1038 // ignored. The intrinsic takes them as vec3.
1039 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
1040 {0, 1, 2});
1041 RayDir =
1042 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
1043 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
1044 {0, 1, 2});
1045
1046 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
1047 {NodePtr->getType(), RayDir->getType()});
1048 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
1049 RayInverseDir, TextureDescr});
1050 }
1051 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1052 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
1053 Intrinsic::ID IID;
1054 switch (BuiltinID) {
1055 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1056 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
1057 break;
1058 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
1059 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
1060 break;
1061 }
1062 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
1063 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
1064 llvm::Value *InstanceMask = EmitScalarExpr(E->getArg(2));
1065 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(3));
1066 llvm::Value *RayDir = EmitScalarExpr(E->getArg(4));
1067 llvm::Value *Offset = EmitScalarExpr(E->getArg(5));
1068 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(6));
1069
1070 Address RetRayOriginPtr = EmitPointerWithAlignment(E->getArg(7));
1071 Address RetRayDirPtr = EmitPointerWithAlignment(E->getArg(8));
1072
1073 llvm::Function *IntrinsicFunc = CGM.getIntrinsic(IID);
1074
1075 llvm::CallInst *CI = Builder.CreateCall(
1076 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
1077 Offset, TextureDescr});
1078
1079 llvm::Value *RetVData = Builder.CreateExtractValue(CI, 0);
1080 llvm::Value *RetRayOrigin = Builder.CreateExtractValue(CI, 1);
1081 llvm::Value *RetRayDir = Builder.CreateExtractValue(CI, 2);
1082
1083 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
1084 Builder.CreateStore(RetRayDir, RetRayDirPtr);
1085
1086 return RetVData;
1087 }
1088
1089 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1090 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1091 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1092 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
1093 Intrinsic::ID IID;
1094 switch (BuiltinID) {
1095 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1096 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
1097 break;
1098 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1099 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
1100 break;
1101 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1102 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
1103 break;
1104 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
1105 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
1106 break;
1107 }
1108
1110 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
1111 Args.push_back(EmitScalarExpr(E->getArg(i)));
1112
1113 Function *F = CGM.getIntrinsic(IID);
1114 Value *Call = Builder.CreateCall(F, Args);
1115 Value *Rtn = Builder.CreateExtractValue(Call, 0);
1116 Value *A = Builder.CreateExtractValue(Call, 1);
1117 llvm::Type *RetTy = ConvertType(E->getType());
1118 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
1119 (uint64_t)0);
1120 // ds_bvh_stack_push8_pop2_rtn returns {i64, i32} but the builtin returns
1121 // <2 x i64>, zext the second value.
1122 if (A->getType()->getPrimitiveSizeInBits() <
1123 RetTy->getScalarType()->getPrimitiveSizeInBits())
1124 A = Builder.CreateZExt(A, RetTy->getScalarType());
1125
1126 return Builder.CreateInsertElement(I0, A, 1);
1127 }
1128 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1129 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1131 *this, E, Intrinsic::amdgcn_image_load_1d, false);
1132 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1133 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1135 *this, E, Intrinsic::amdgcn_image_load_1darray, false);
1136 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1137 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1138 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1140 *this, E, Intrinsic::amdgcn_image_load_2d, false);
1141 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1142 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1143 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1145 *this, E, Intrinsic::amdgcn_image_load_2darray, false);
1146 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1147 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1149 *this, E, Intrinsic::amdgcn_image_load_3d, false);
1150 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1151 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1153 *this, E, Intrinsic::amdgcn_image_load_cube, false);
1154 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1155 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1157 *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
1158 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1159 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1161 *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
1162 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1163 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1164 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1166 *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
1167 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1168 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1169 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1171 *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
1172 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1173 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1175 *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
1176 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1177 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1179 *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
1180 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1181 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1183 *this, E, Intrinsic::amdgcn_image_store_1d, true);
1184 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1185 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1187 *this, E, Intrinsic::amdgcn_image_store_1darray, true);
1188 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1189 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1190 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1192 *this, E, Intrinsic::amdgcn_image_store_2d, true);
1193 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1194 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1195 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1197 *this, E, Intrinsic::amdgcn_image_store_2darray, true);
1198 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1199 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1201 *this, E, Intrinsic::amdgcn_image_store_3d, true);
1202 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1203 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1205 *this, E, Intrinsic::amdgcn_image_store_cube, true);
1206 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1207 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1209 *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
1210 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1211 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1213 *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
1214 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1215 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1216 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1218 *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
1219 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1220 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1221 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1223 *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
1224 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1225 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1227 *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
1228 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1229 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1231 *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
1232 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1233 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1235 *this, E, Intrinsic::amdgcn_image_sample_1d, false);
1236 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1237 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1239 *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
1240 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1241 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1242 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1244 *this, E, Intrinsic::amdgcn_image_sample_2d, false);
1245 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1246 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1247 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1249 *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
1250 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1251 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1253 *this, E, Intrinsic::amdgcn_image_sample_3d, false);
1254 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1255 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1257 *this, E, Intrinsic::amdgcn_image_sample_cube, false);
1258 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1259 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1261 *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false);
1262 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1263 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1265 *this, E, Intrinsic::amdgcn_image_sample_l_1d, false);
1266 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1267 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1269 *this, E, Intrinsic::amdgcn_image_sample_d_1d, false);
1270 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1271 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1272 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1274 *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false);
1275 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1276 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1277 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1279 *this, E, Intrinsic::amdgcn_image_sample_l_2d, false);
1280 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1281 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1282 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1284 *this, E, Intrinsic::amdgcn_image_sample_d_2d, false);
1285 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1286 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1288 *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false);
1289 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1290 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1292 *this, E, Intrinsic::amdgcn_image_sample_l_3d, false);
1293 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1294 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1296 *this, E, Intrinsic::amdgcn_image_sample_d_3d, false);
1297 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1298 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1300 *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false);
1301 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1302 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1304 *this, E, Intrinsic::amdgcn_image_sample_l_cube, false);
1305 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1306 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1308 *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false);
1309 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1310 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1312 *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false);
1313 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1314 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1316 *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false);
1317 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1318 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1319 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1321 *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false);
1322 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1323 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1324 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1326 *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false);
1327 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1328 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1329 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1331 *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false);
1332 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1334 *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);
1335 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1336 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1337 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
1338 Function *F = CGM.getIntrinsic(
1339 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1340 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1341 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1342 {VT, VT});
1343
1345 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
1346 Args.push_back(EmitScalarExpr(E->getArg(I)));
1347 return Builder.CreateCall(F, Args);
1348 }
1349 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1350 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1351 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1352 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1353 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1354 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1355 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1356 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1357 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1358 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1359 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1360 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1361 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1362 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1363 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1364 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1365 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1366 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1367 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1368 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1369 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1370 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1371 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1372 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1373 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1374 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1375 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1376 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1377 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1378 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1379 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1380 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1381 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1382 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1383 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1384 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1385 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1386 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1387 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1388 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1389 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1390 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1391 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1392 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1393 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1394 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1395 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1396 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1397 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1398 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1399 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1400 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1401 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1402 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1403 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1404 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1405 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1406 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1407 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1408 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1409 // GFX1250 WMMA builtins
1410 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1411 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1412 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1413 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1414 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1415 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1416 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1417 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1418 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1419 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1420 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1421 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1422 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1423 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1424 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1425 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1426 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1427 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1428 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1429 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1430 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1431 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1432 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1433 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1434 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1435 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1436 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1437 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1438 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1439 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1440 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1441 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1442 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1443 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1444 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1445 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1446 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1447 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1448 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1449 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1450 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1451 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1452 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1453
1454 // These operations perform a matrix multiplication and accumulation of
1455 // the form:
1456 // D = A * B + C
1457 // We need to specify one type for matrices AB and one for matrices CD.
1458 // Sparse matrix operations can have different types for A and B as well as
1459 // an additional type for sparsity index.
1460 // Destination type should be put before types used for source operands.
1461 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
1462 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
1463 // There is no need for the variable opsel argument, so always set it to
1464 // "false".
1465 bool AppendFalseForOpselArg = false;
1466 unsigned BuiltinWMMAOp;
1467 // Need return type when D and C are of different types.
1468 bool NeedReturnType = false;
1469
1470 switch (BuiltinID) {
1471 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1472 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1473 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1474 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1475 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1476 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1477 break;
1478 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1479 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1480 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1481 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1482 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1483 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1484 break;
1485 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1486 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1487 AppendFalseForOpselArg = true;
1488 [[fallthrough]];
1489 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1490 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1491 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1492 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1493 break;
1494 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1495 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1496 AppendFalseForOpselArg = true;
1497 [[fallthrough]];
1498 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1499 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1500 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1501 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1502 break;
1503 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1504 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1505 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1506 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1507 break;
1508 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1509 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1510 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1511 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1512 break;
1513 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1514 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1515 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1516 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1517 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1518 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1519 break;
1520 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1521 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1522 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1523 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1524 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1525 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1526 break;
1527 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1528 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1529 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1530 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1531 break;
1532 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1533 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1534 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1535 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1536 break;
1537 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1538 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1539 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1540 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1541 break;
1542 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1543 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1544 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1545 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1546 break;
1547 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1548 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1549 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1550 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1551 break;
1552 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1553 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1554 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1555 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1556 break;
1557 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1558 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1559 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1560 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1561 break;
1562 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1563 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1564 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1565 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1566 break;
1567 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1568 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1569 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1570 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1571 break;
1572 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1573 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1574 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1575 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1576 break;
1577 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1578 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1579 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1580 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1581 break;
1582 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1583 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1584 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1585 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1586 break;
1587 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1588 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1589 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1590 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1591 break;
1592 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1593 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1594 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1595 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1596 break;
1597 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1598 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1599 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1600 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1601 break;
1602 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1603 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1604 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1605 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1606 break;
1607 // GFX1250 WMMA builtins
1608 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1609 ArgsForMatchingMatrixTypes = {5, 1};
1610 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1611 break;
1612 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1613 ArgsForMatchingMatrixTypes = {5, 1};
1614 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1615 break;
1616 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1617 ArgsForMatchingMatrixTypes = {5, 1};
1618 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1619 break;
1620 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1621 ArgsForMatchingMatrixTypes = {5, 1};
1622 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1623 break;
1624 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1625 ArgsForMatchingMatrixTypes = {5, 1};
1626 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1627 break;
1628 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1629 NeedReturnType = true;
1630 ArgsForMatchingMatrixTypes = {1, 5};
1631 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1632 break;
1633 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1634 ArgsForMatchingMatrixTypes = {3, 0};
1635 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1636 break;
1637 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1638 ArgsForMatchingMatrixTypes = {3, 0};
1639 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1640 break;
1641 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1642 ArgsForMatchingMatrixTypes = {3, 0};
1643 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1644 break;
1645 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1646 ArgsForMatchingMatrixTypes = {3, 0};
1647 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1648 break;
1649 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1650 ArgsForMatchingMatrixTypes = {3, 0};
1651 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1652 break;
1653 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1654 ArgsForMatchingMatrixTypes = {3, 0};
1655 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1656 break;
1657 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1658 ArgsForMatchingMatrixTypes = {3, 0};
1659 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1660 break;
1661 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1662 ArgsForMatchingMatrixTypes = {3, 0};
1663 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1664 break;
1665 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1666 ArgsForMatchingMatrixTypes = {3, 0};
1667 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1668 break;
1669 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1670 ArgsForMatchingMatrixTypes = {3, 0};
1671 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1672 break;
1673 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1674 ArgsForMatchingMatrixTypes = {3, 0};
1675 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1676 break;
1677 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1678 ArgsForMatchingMatrixTypes = {3, 0};
1679 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1680 break;
1681 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1682 ArgsForMatchingMatrixTypes = {3, 0};
1683 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1684 break;
1685 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1686 ArgsForMatchingMatrixTypes = {3, 0};
1687 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1688 break;
1689 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1690 ArgsForMatchingMatrixTypes = {3, 0};
1691 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1692 break;
1693 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1694 ArgsForMatchingMatrixTypes = {3, 0};
1695 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1696 break;
1697 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1698 ArgsForMatchingMatrixTypes = {4, 1};
1699 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1700 break;
1701 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1702 ArgsForMatchingMatrixTypes = {5, 1, 3};
1703 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1704 break;
1705 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1706 ArgsForMatchingMatrixTypes = {5, 1, 3};
1707 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1708 break;
1709 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1710 ArgsForMatchingMatrixTypes = {5, 1, 3};
1711 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1712 break;
1713 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1714 ArgsForMatchingMatrixTypes = {3, 0, 1};
1715 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1716 break;
1717 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1718 ArgsForMatchingMatrixTypes = {3, 0, 1};
1719 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1720 break;
1721 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1722 ArgsForMatchingMatrixTypes = {3, 0, 1};
1723 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1724 break;
1725 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1726 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1727 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1728 break;
1729 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1730 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1731 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1732 break;
1733 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1734 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1735 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1736 break;
1737 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1738 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1739 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1740 break;
1741 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1742 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1743 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1744 break;
1745 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1746 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1747 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1748 break;
1749 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1750 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1751 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1752 break;
1753 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1754 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1755 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1756 break;
1757 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1758 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1759 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1760 break;
1761 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1762 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1763 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1764 break;
1765 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1766 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1767 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1768 break;
1769 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1770 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1771 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1772 break;
1773 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1774 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1775 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1776 break;
1777 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1778 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1779 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1780 break;
1781 }
1782
1784 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
1785 Args.push_back(EmitScalarExpr(E->getArg(i)));
1786 if (AppendFalseForOpselArg)
1787 Args.push_back(Builder.getFalse());
1788
1789 // Handle the optional clamp argument of the following two builtins.
1790 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
1791 if (Args.size() == 7)
1792 Args.push_back(Builder.getFalse());
1793 assert(Args.size() == 8 && "Expected 8 arguments");
1794 Args[7] = Builder.CreateZExtOrTrunc(Args[7], Builder.getInt1Ty());
1795 } else if (BuiltinID ==
1796 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
1797 if (Args.size() == 8)
1798 Args.push_back(Builder.getFalse());
1799 assert(Args.size() == 9 && "Expected 9 arguments");
1800 Args[8] = Builder.CreateZExtOrTrunc(Args[8], Builder.getInt1Ty());
1801 }
1802
1804 if (NeedReturnType)
1805 ArgTypes.push_back(ConvertType(E->getType()));
1806 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
1807 ArgTypes.push_back(Args[ArgIdx]->getType());
1808
1809 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1810 return Builder.CreateCall(F, Args);
1811 }
1812 // amdgcn workgroup size
1813 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1814 return EmitAMDGPUWorkGroupSize(*this, 0);
1815 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1816 return EmitAMDGPUWorkGroupSize(*this, 1);
1817 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1818 return EmitAMDGPUWorkGroupSize(*this, 2);
1819
1820 // amdgcn grid size
1821 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1822 return EmitAMDGPUGridSize(*this, 0);
1823 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1824 return EmitAMDGPUGridSize(*this, 1);
1825 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1826 return EmitAMDGPUGridSize(*this, 2);
1827
1828 // r600 intrinsics
1829 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1830 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1832 Intrinsic::r600_recipsqrt_ieee);
1833 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1834 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
1835 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
1836 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
1837 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1838 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1839 }
1840 case AMDGPU::BI__builtin_amdgcn_fence: {
1842 EmitScalarExpr(E->getArg(1)), AO, SSID);
1843 FenceInst *Fence = Builder.CreateFence(AO, SSID);
1844 if (E->getNumArgs() > 2)
1846 return Fence;
1847 }
1848 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1849 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1850 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1851 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1852 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1853 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1854 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1855 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1856 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1857 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1858 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1859 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1860 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1861 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1862 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1863 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1864 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1865 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1866 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1867 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1868 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1869 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1870 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1871 llvm::AtomicRMWInst::BinOp BinOp;
1872 switch (BuiltinID) {
1873 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1874 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1875 BinOp = llvm::AtomicRMWInst::UIncWrap;
1876 break;
1877 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1878 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1879 BinOp = llvm::AtomicRMWInst::UDecWrap;
1880 break;
1881 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1882 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1883 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1884 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1885 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1886 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1887 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1888 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1889 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1890 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1891 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1892 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1893 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1894 BinOp = llvm::AtomicRMWInst::FAdd;
1895 break;
1896 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1897 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1898 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1899 BinOp = llvm::AtomicRMWInst::FMin;
1900 break;
1901 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1902 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1903 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1904 BinOp = llvm::AtomicRMWInst::FMax;
1905 break;
1906 }
1907
1908 Address Ptr = CheckAtomicAlignment(*this, E);
1909 Value *Val = EmitScalarExpr(E->getArg(1));
1910 llvm::Type *OrigTy = Val->getType();
1911 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1912
1913 bool Volatile;
1914
1915 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1916 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1917 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1918 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
1919 Volatile =
1920 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
1921 } else {
1922 // Infer volatile from the passed type.
1923 Volatile =
1924 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1925 }
1926
1927 if (E->getNumArgs() >= 4) {
1928 // Some of the builtins have explicit ordering and scope arguments.
1930 EmitScalarExpr(E->getArg(3)), AO, SSID);
1931 } else {
1932 // Most of the builtins do not have syncscope/order arguments. For DS
1933 // atomics the scope doesn't really matter, as they implicitly operate at
1934 // workgroup scope.
1935 //
1936 // The global/flat cases need to use agent scope to consistently produce
1937 // the native instruction instead of a cmpxchg expansion.
1938 if (getTarget().getTriple().isSPIRV())
1939 SSID = getLLVMContext().getOrInsertSyncScopeID("device");
1940 else
1941 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
1942 AO = AtomicOrdering::Monotonic;
1943
1944 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
1945 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1946 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1947 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1948 llvm::Type *V2BF16Ty = FixedVectorType::get(
1949 llvm::Type::getBFloatTy(Builder.getContext()), 2);
1950 Val = Builder.CreateBitCast(Val, V2BF16Ty);
1951 }
1952 }
1953
1954 llvm::AtomicRMWInst *RMW =
1955 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1956 if (Volatile)
1957 RMW->setVolatile(true);
1958
1959 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
1960 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1961 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
1962 // instruction for flat and global operations.
1963 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
1964 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
1965
1966 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
1967 // instruction, but this only matters for float fadd.
1968 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
1969 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
1970 }
1971
1972 return Builder.CreateBitCast(RMW, OrigTy);
1973 }
1974 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1975 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1976 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
1977 llvm::Type *ResultType = ConvertType(E->getType());
1978 // s_sendmsg_rtn is mangled using return type only.
1979 Function *F =
1980 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1981 return Builder.CreateCall(F, {Arg});
1982 }
1983 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1984 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1985 // Because builtin types are limited, and the intrinsic uses a struct/pair
1986 // output, marshal the pair-of-i32 to <2 x i32>.
1987 Value *VDstOld = EmitScalarExpr(E->getArg(0));
1988 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
1989 Value *FI = EmitScalarExpr(E->getArg(2));
1990 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
1991 Function *F =
1992 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1993 ? Intrinsic::amdgcn_permlane16_swap
1994 : Intrinsic::amdgcn_permlane32_swap);
1995 llvm::CallInst *Call =
1996 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1997
1998 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
1999 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
2000
2001 llvm::Type *ResultType = ConvertType(E->getType());
2002
2003 llvm::Value *Insert0 = Builder.CreateInsertElement(
2004 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
2005 llvm::Value *AsVector =
2006 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
2007 return AsVector;
2008 }
2009 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
2010 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
2012 Intrinsic::amdgcn_bitop3);
2013 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
2014 // TODO: LLVM has this overloaded to allow for fat pointers, but since
2015 // those haven't been plumbed through to Clang yet, default to creating the
2016 // resource type.
2018 for (unsigned I = 0; I < 4; ++I)
2019 Args.push_back(EmitScalarExpr(E->getArg(I)));
2020 llvm::PointerType *RetTy = llvm::PointerType::get(
2021 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
2022 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
2023 {RetTy, Args[0]->getType()});
2024 return Builder.CreateCall(F, Args);
2025 }
2026 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
2027 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
2028 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
2029 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
2030 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
2031 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
2033 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
2034 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2035 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2036 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2037 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2038 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2039 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
2040 llvm::Type *RetTy = nullptr;
2041 switch (BuiltinID) {
2042 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2043 RetTy = Int8Ty;
2044 break;
2045 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2046 RetTy = Int16Ty;
2047 break;
2048 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2049 RetTy = Int32Ty;
2050 break;
2051 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2052 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
2053 break;
2054 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2055 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
2056 break;
2057 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
2058 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
2059 break;
2060 }
2061 Function *F =
2062 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
2063 return Builder.CreateCall(
2064 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
2065 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
2066 }
2067 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
2069 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
2070 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
2071 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
2073 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
2074 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
2075 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
2077 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
2078 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
2079 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
2081 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
2082 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
2084 *this, E, Intrinsic::amdgcn_s_prefetch_data);
2085 case Builtin::BIlogbf:
2086 case Builtin::BI__builtin_logbf: {
2087 Value *Src0 = EmitScalarExpr(E->getArg(0));
2088 Function *FrExpFunc = CGM.getIntrinsic(
2089 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
2090 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
2091 Value *Exp = Builder.CreateExtractValue(FrExp, 1);
2092 Value *Add = Builder.CreateAdd(
2093 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
2094 Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getFloatTy());
2095 Value *Fabs =
2096 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
2097 Value *FCmpONE = Builder.CreateFCmpONE(
2098 Fabs, ConstantFP::getInfinity(Builder.getFloatTy()));
2099 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2100 Value *FCmpOEQ =
2101 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getFloatTy()));
2102 Value *Sel2 = Builder.CreateSelect(
2103 FCmpOEQ,
2104 ConstantFP::getInfinity(Builder.getFloatTy(), /*Negative=*/true), Sel1);
2105 return Sel2;
2106 }
2107 case Builtin::BIlogb:
2108 case Builtin::BI__builtin_logb: {
2109 Value *Src0 = EmitScalarExpr(E->getArg(0));
2110 Function *FrExpFunc = CGM.getIntrinsic(
2111 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
2112 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
2113 Value *Exp = Builder.CreateExtractValue(FrExp, 1);
2114 Value *Add = Builder.CreateAdd(
2115 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
2116 Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
2117 Value *Fabs =
2118 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
2119 Value *FCmpONE = Builder.CreateFCmpONE(
2120 Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
2121 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2122 Value *FCmpOEQ =
2123 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
2124 Value *Sel2 = Builder.CreateSelect(
2125 FCmpOEQ,
2126 ConstantFP::getInfinity(Builder.getDoubleTy(), /*Negative=*/true),
2127 Sel1);
2128 return Sel2;
2129 }
2130 case Builtin::BIscalbnf:
2131 case Builtin::BI__builtin_scalbnf:
2132 case Builtin::BIscalbn:
2133 case Builtin::BI__builtin_scalbn:
2135 *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
2136 default:
2137 return nullptr;
2138 }
2139}
#define V(N, I)
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, Intrinsic::ID IntrinsicID, Intrinsic::ID ConstrainedIntrinsicID)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
llvm::Value * emitBuiltinWithOneOverloadedType(clang::CodeGen::CodeGenFunction &CGF, const clang::CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition CGBuiltin.h:63
static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID)
Definition AMDGPU.cpp:464
static StringRef mapScopeToSPIRV(StringRef AMDGCNScope)
Definition AMDGPU.cpp:361
static llvm::Value * loadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, llvm::Value *RsrcPtr)
Definition AMDGPU.cpp:282
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition AMDGPU.cpp:264
llvm::CallInst * emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, const clang::CallExpr *E, unsigned IntrinsicID, bool IsImageStore)
Definition AMDGPU.cpp:312
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition AMDGPU.cpp:351
static llvm::AtomicOrdering mapCABIAtomicOrdering(unsigned AO)
Definition AMDGPU.cpp:369
TokenType getType() const
Returns the token's type, e.g.
#define X(type, name)
Definition Value.h:97
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:57
static QualType getPointeeType(const MemRegion *R)
Provides definitions for the atomic synchronization scopes.
Enumerates target-specific builtins in their own namespaces within namespace clang.
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:302
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:118
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition CGBuilder.h:138
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:199
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition CGBuilder.h:356
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition AMDGPU.cpp:510
const TargetInfo & getTarget() const
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
Definition AMDGPU.cpp:441
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1591
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::LLVMContext & getLLVMContext()
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
Definition AMDGPU.cpp:394
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
const LangOptions & getLangOpts() const
const llvm::DataLayout & getDataLayout() const
ASTContext & getContext() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3095
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3070
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:277
QualType getType() const
Definition Expr.h:144
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3336
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition TypeBase.h:8472
QualType getCanonicalType() const
Definition TypeBase.h:8440
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition TargetInfo.h:326
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
unsigned getMaxOpenCLWorkGroupSize() const
Definition TargetInfo.h:875
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
QualType getType() const
Definition Value.cpp:237
bool Mul(InterpState &S, CodePtr OpPC)
Definition Interp.h:366
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
SyncScope
Defines sync scope values used internally by clang.
Definition SyncScope.h:42
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64