clang 23.0.0git
AMDGPU.cpp
Go to the documentation of this file.
1//===------- AMDCPU.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
14#include "CodeGenFunction.h"
15#include "TargetInfo.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
24#include "llvm/Support/AMDGPUAddrSpace.h"
25#include "llvm/Support/AtomicOrdering.h"
26
27using namespace clang;
28using namespace CodeGen;
29using namespace llvm;
30
31namespace {
32
33// Has second type mangled argument.
34static Value *
36 Intrinsic::ID IntrinsicID,
37 Intrinsic::ID ConstrainedIntrinsicID) {
38 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
39 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
40
41 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
42 if (CGF.Builder.getIsFPConstrained()) {
43 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
44 {Src0->getType(), Src1->getType()});
45 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
46 }
47
48 Function *F =
49 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
50 return CGF.Builder.CreateCall(F, {Src0, Src1});
51}
52
53// If \p E is not null pointer, insert address space cast to match return
54// type of \p E if necessary.
55Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
56 const CallExpr *E = nullptr) {
57 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
58 auto *Call = CGF.Builder.CreateCall(F);
59 if (!E)
60 return Call;
61 QualType BuiltinRetType = E->getType();
62 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
63 if (RetTy == Call->getType())
64 return Call;
65 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
66}
67
68Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
69 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
70 auto *Call = CGF.Builder.CreateCall(F);
71 Call->addRetAttr(
72 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
73 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
74 return Call;
75}
76
77static llvm::Intrinsic::ID getAMDGPUWorkGroupID(CodeGenFunction &CGF,
78 unsigned Index) {
79 switch (Index) {
80 case 0:
81 return llvm::Intrinsic::amdgcn_workgroup_id_x;
82 case 1:
83 return llvm::Intrinsic::amdgcn_workgroup_id_y;
84 case 2:
85 return llvm::Intrinsic::amdgcn_workgroup_id_z;
86 default:
87 llvm_unreachable("unhandled index");
88 }
89}
90
91static void setNoundefInvariantLoad(llvm::LoadInst *Ld) {
92 Ld->setMetadata(llvm::LLVMContext::MD_noundef,
93 llvm::MDNode::get(Ld->getContext(), {}));
94 Ld->setMetadata(llvm::LLVMContext::MD_invariant_load,
95 llvm::MDNode::get(Ld->getContext(), {}));
96}
97
98static void addMaxWorkGroupSizeRangeMetadata(CodeGenFunction &CGF,
99 llvm::LoadInst *GroupSize) {
100 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
101 llvm::MDNode *RNode = MDHelper.createRange(
102 APInt(16, 1), APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
103 GroupSize->setMetadata(llvm::LLVMContext::MD_range, RNode);
104 setNoundefInvariantLoad(GroupSize);
105}
106
107static Value *emitAMDGPUWorkGroupSizeV5(CodeGenFunction &CGF, unsigned Index) {
108 llvm::Value *ImplicitArgPtr = EmitAMDGPUImplicitArgPtr(CGF);
109
110 // offsetof(amdhsa_implicit_kernarg_v5, block_count[Index])
111 unsigned BlockCountOffset = 0 + Index * 4;
112 // offsetof(amdhsa_implicit_kernarg_v5, group_size[Index])
113 unsigned GroupSizeOffset = 12 + Index * 2;
114 // offsetof(amdhsa_implicit_kernarg_v5, remainder[Index])
115 unsigned RemainderOffset = 18 + Index * 2;
116
117 if (CGF.CGM.getLangOpts().OffloadUniformBlock) {
118 // Indexing the implicit kernarg segment.
119 llvm::Value *GroupSizeGEP = CGF.Builder.CreateConstInBoundsGEP1_64(
120 CGF.Int8Ty, ImplicitArgPtr, GroupSizeOffset);
121 llvm::LoadInst *GroupSize = CGF.Builder.CreateLoad(
122 Address(GroupSizeGEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
123
124 addMaxWorkGroupSizeRangeMetadata(CGF, GroupSize);
125
126 return CGF.Builder.CreateZExt(GroupSize, CGF.Int32Ty);
127 }
128
129 llvm::Value *BlockCountGEP = CGF.Builder.CreateConstGEP1_64(
130 CGF.Int8Ty, ImplicitArgPtr, BlockCountOffset);
131 llvm::LoadInst *BlockCount = CGF.Builder.CreateLoad(
132 Address(BlockCountGEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
133 setNoundefInvariantLoad(BlockCount);
134
135 llvm::Value *WorkgroupID =
136 CGF.Builder.CreateIntrinsic(getAMDGPUWorkGroupID(CGF, Index), {});
137 llvm::Value *IsFull = CGF.Builder.CreateICmpULT(WorkgroupID, BlockCount);
138
139 llvm::Value *StructOffset = CGF.Builder.CreateSelect(
140 IsFull, ConstantInt::get(CGF.Int32Ty, GroupSizeOffset),
141 ConstantInt::get(CGF.Int32Ty, RemainderOffset));
142
143 llvm::Value *SizeGEP =
144 CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, ImplicitArgPtr, StructOffset);
145 llvm::LoadInst *Size = CGF.Builder.CreateLoad(
146 Address(SizeGEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
147 addMaxWorkGroupSizeRangeMetadata(CGF, Size);
148 setNoundefInvariantLoad(Size);
149
150 return CGF.Builder.CreateZExt(Size, CGF.Int32Ty);
151}
152
153static Value *emitAMDGPUWorkGroupSizeV4(CodeGenFunction &CGF, unsigned Index) {
154 llvm::Value *DispatchPtr = EmitAMDGPUDispatchPtr(CGF);
155
156 // Indexing the HSA kernel_dispatch_packet struct.
157 llvm::Value *GroupSizeGEP = CGF.Builder.CreateConstInBoundsGEP1_64(
158 CGF.Int8Ty, DispatchPtr, 4 + Index * 2);
159 llvm::LoadInst *GroupSizeLD = CGF.Builder.CreateLoad(
160 Address(GroupSizeGEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
161
162 addMaxWorkGroupSizeRangeMetadata(CGF, GroupSizeLD);
163
164 llvm::Value *GroupSize = CGF.Builder.CreateZExt(GroupSizeLD, CGF.Int32Ty);
165
166 if (CGF.CGM.getLangOpts().OffloadUniformBlock)
167 return GroupSize;
168
169 llvm::Value *WorkgroupID =
170 CGF.Builder.CreateIntrinsic(getAMDGPUWorkGroupID(CGF, Index), {});
171
172 llvm::Value *GridSizeGEP = CGF.Builder.CreateConstInBoundsGEP1_64(
173 CGF.Int8Ty, DispatchPtr, 12 + Index * 4);
174 llvm::LoadInst *GridSize = CGF.Builder.CreateLoad(
175 Address(GridSizeGEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
176
177 llvm::MDBuilder MDB(CGF.getLLVMContext());
178
179 // Known non-zero.
180 GridSize->setMetadata(llvm::LLVMContext::MD_range,
181 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
182 GridSize->setMetadata(llvm::LLVMContext::MD_invariant_load,
183 llvm::MDNode::get(CGF.getLLVMContext(), {}));
184
185 llvm::Value *Mul = CGF.Builder.CreateMul(WorkgroupID, GroupSize);
186 llvm::Value *Remainder = CGF.Builder.CreateSub(GridSize, Mul);
187
188 llvm::Value *IsPartial = CGF.Builder.CreateICmpULT(Remainder, GroupSize);
189
190 return CGF.Builder.CreateSelect(IsPartial, Remainder, GroupSize);
191}
192
193// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
194/// Emit code based on Code Object ABI version.
195/// COV_4 : Emit code to use dispatch ptr
196/// COV_5+ : Emit code to use implicitarg ptr
197/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
198/// and use its value for COV_4 or COV_5+ approach. It is used for
199/// compiling device libraries in an ABI-agnostic way.
200Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
201 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
202
203 // Do not emit __oclc_ABI_version references with non-empt environment.
204 if (Cov == CodeObjectVersionKind::COV_None &&
205 CGF.getTarget().getTriple().hasEnvironment())
206 Cov = CodeObjectVersionKind::COV_6;
207
208 if (Cov == CodeObjectVersionKind::COV_None) {
209 StringRef Name = "__oclc_ABI_version";
210 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
211 if (!ABIVersionC)
212 ABIVersionC = new llvm::GlobalVariable(
213 CGF.CGM.getModule(), CGF.Int32Ty, false,
214 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
215 llvm::GlobalVariable::NotThreadLocal,
217
218 // This load will be eliminated by the IPSCCP because it is constant
219 // weak_odr without externally_initialized. Either changing it to weak or
220 // adding externally_initialized will keep the load.
221 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
222 CGF.CGM.getIntAlign());
223
224 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
225 ABIVersion,
226 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
227
228 llvm::Value *V5Impl = emitAMDGPUWorkGroupSizeV5(CGF, Index);
229 llvm::Value *V4Impl = emitAMDGPUWorkGroupSizeV4(CGF, Index);
230 return CGF.Builder.CreateSelect(IsCOV5, V5Impl, V4Impl);
231 }
232
233 return Cov >= CodeObjectVersionKind::COV_5
234 ? emitAMDGPUWorkGroupSizeV5(CGF, Index)
235 : emitAMDGPUWorkGroupSizeV4(CGF, Index);
236}
237
238// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
239Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
240 const unsigned XOffset = 12;
241 auto *DP = EmitAMDGPUDispatchPtr(CGF);
242 // Indexing the HSA kernel_dispatch_packet struct.
243 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
244 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
245 auto *LD = CGF.Builder.CreateLoad(
247
248 llvm::MDBuilder MDB(CGF.getLLVMContext());
249
250 // Known non-zero.
251 LD->setMetadata(llvm::LLVMContext::MD_range,
252 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
253 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
254 llvm::MDNode::get(CGF.getLLVMContext(), {}));
255 return LD;
256}
257} // namespace
258
259// Generates the IR for __builtin_read_exec_*.
260// Lowers the builtin to amdgcn_ballot intrinsic.
262 llvm::Type *RegisterType,
263 llvm::Type *ValueType, bool isExecHi) {
264 CodeGen::CGBuilderTy &Builder = CGF.Builder;
265 CodeGen::CodeGenModule &CGM = CGF.CGM;
266
267 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
268 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
269
270 if (isExecHi) {
271 Value *Rt2 = Builder.CreateLShr(Call, 32);
272 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
273 return Rt2;
274 }
275
276 return Call;
277}
278
280 llvm::Value *RsrcPtr) {
281 auto &B = CGF.Builder;
282 auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8);
283
284 if (RsrcPtr->getType() == VecTy)
285 return RsrcPtr;
286
287 if (RsrcPtr->getType()->isIntegerTy(32)) {
288 llvm::PointerType *VecPtrTy =
289 llvm::PointerType::get(CGF.getLLVMContext(), 8);
290 llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");
291 return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val");
292 }
293
294 if (RsrcPtr->getType()->isPointerTy()) {
295 auto *VecPtrTy = llvm::PointerType::get(
296 CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace());
297 llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed");
298 return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val");
299 }
300
301 const auto &DL = CGF.CGM.getDataLayout();
302 if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256)
303 return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val");
304
305 llvm::report_fatal_error("Unexpected texture resource argument form");
306}
307
308llvm::CallInst *
310 const clang::CallExpr *E,
311 unsigned IntrinsicID, bool IsImageStore) {
312 auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned {
313 QualType TexQT = CGF.getContext().AMDGPUTextureTy;
314 for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {
315 QualType ArgTy = E->getArg(I)->getType();
316 if (ArgTy == TexQT) {
317 return I;
318 }
319
320 if (ArgTy.getCanonicalType() == TexQT.getCanonicalType()) {
321 return I;
322 }
323 }
324
325 return ~0U;
326 };
327
329 unsigned RsrcIndex = findTextureDescIndex(E);
330
331 if (RsrcIndex == ~0U) {
332 llvm::report_fatal_error("Invalid argument count for image builtin");
333 }
334
335 for (unsigned I = 0; I < E->getNumArgs(); ++I) {
336 llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I));
337 if (I == RsrcIndex)
339 Args.push_back(V);
340 }
341
342 llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(E->getType());
343 llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);
344 return Call;
345}
346
347// Emit an intrinsic that has 1 float or double operand, and 1 integer.
349 const CallExpr *E,
350 unsigned IntrinsicID) {
351 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
352 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
353
354 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
355 return CGF.Builder.CreateCall(F, {Src0, Src1});
356}
357
358static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
359 if (AMDGCNScope == "agent")
360 return "device";
361 if (AMDGCNScope == "wavefront")
362 return "subgroup";
363 return AMDGCNScope;
364}
365
366static llvm::AtomicOrdering mapCABIAtomicOrdering(unsigned AO) {
367 // Map C11/C++11 memory ordering to LLVM memory ordering
368 assert(llvm::isValidAtomicOrderingCABI(AO));
369 switch (static_cast<llvm::AtomicOrderingCABI>(AO)) {
370 case llvm::AtomicOrderingCABI::acquire:
371 case llvm::AtomicOrderingCABI::consume:
372 return llvm::AtomicOrdering::Acquire;
373 case llvm::AtomicOrderingCABI::release:
374 return llvm::AtomicOrdering::Release;
375 case llvm::AtomicOrderingCABI::acq_rel:
376 return llvm::AtomicOrdering::AcquireRelease;
377 case llvm::AtomicOrderingCABI::seq_cst:
378 return llvm::AtomicOrdering::SequentiallyConsistent;
379 case llvm::AtomicOrderingCABI::relaxed:
380 return llvm::AtomicOrdering::Monotonic;
381 }
382 llvm_unreachable("Unknown AtomicOrderingCABI enum");
383}
384
385// For processing memory ordering and memory scope arguments of various
386// amdgcn builtins.
387// \p Order takes a C++11 compatible memory-ordering specifier and converts
388// it into LLVM's memory ordering specifier using atomic C ABI, and writes
389// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
390// specific SyncScopeID and writes it to \p SSID.
392 llvm::AtomicOrdering &AO,
393 llvm::SyncScope::ID &SSID) {
394 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
395
396 // Map C11/C++11 memory ordering to LLVM memory ordering
397 AO = mapCABIAtomicOrdering(ord);
398
399 // Some of the atomic builtins take the scope as a string name.
400 StringRef scp;
401 if (llvm::getConstantStringInfo(Scope, scp)) {
402 if (getTarget().getTriple().isSPIRV())
403 scp = mapScopeToSPIRV(scp);
404 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
405 return;
406 }
407
408 // Older builtins had an enum argument for the memory scope.
409 const char *SSN = nullptr;
410 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
411 switch (scope) {
412 case AtomicScopeGenericModel::System: // __MEMORY_SCOPE_SYSTEM
413 SSID = llvm::SyncScope::System;
414 break;
415 case AtomicScopeGenericModel::Device: // __MEMORY_SCOPE_DEVICE
416 SSN = getTarget().getTriple().isSPIRV() ? "device" : "agent";
417 break;
418 case AtomicScopeGenericModel::Workgroup: // __MEMORY_SCOPE_WRKGRP
419 SSN = "workgroup";
420 break;
421 case AtomicScopeGenericModel::Cluster: // __MEMORY_SCOPE_CLUSTR
422 SSN = getTarget().getTriple().isSPIRV() ? "workgroup" : "cluster";
423 break;
424 case AtomicScopeGenericModel::Wavefront: // __MEMORY_SCOPE_WVFRNT
425 SSN = getTarget().getTriple().isSPIRV() ? "subgroup" : "wavefront";
426 break;
427 case AtomicScopeGenericModel::Single: // __MEMORY_SCOPE_SINGLE
428 SSID = llvm::SyncScope::SingleThread;
429 break;
430 default:
431 SSID = llvm::SyncScope::System;
432 break;
433 }
434 if (SSN)
435 SSID = getLLVMContext().getOrInsertSyncScopeID(SSN);
436}
437
439 const CallExpr *E) {
440 constexpr const char *Tag = "amdgpu-synchronize-as";
441
442 LLVMContext &Ctx = Inst->getContext();
444 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
445 llvm::Value *V = EmitScalarExpr(E->getArg(K));
446 StringRef AS;
447 if (llvm::getConstantStringInfo(V, AS)) {
448 MMRAs.push_back({Tag, AS});
449 // TODO: Delete the resulting unused constant?
450 continue;
451 }
452 CGM.Error(E->getExprLoc(),
453 "expected an address space name as a string literal");
454 }
455
456 llvm::sort(MMRAs);
457 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
458 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
459}
460
461static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
462 switch (BuiltinID) {
463 default:
464 llvm_unreachable("Unknown BuiltinID for wave reduction");
465 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
466 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
467 return Intrinsic::amdgcn_wave_reduce_add;
468 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
469 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
470 return Intrinsic::amdgcn_wave_reduce_fadd;
471 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
472 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
473 return Intrinsic::amdgcn_wave_reduce_sub;
474 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
475 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
476 return Intrinsic::amdgcn_wave_reduce_fsub;
477 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
478 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
479 return Intrinsic::amdgcn_wave_reduce_min;
480 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
481 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
482 return Intrinsic::amdgcn_wave_reduce_fmin;
483 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
484 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
485 return Intrinsic::amdgcn_wave_reduce_umin;
486 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
487 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
488 return Intrinsic::amdgcn_wave_reduce_max;
489 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
490 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
491 return Intrinsic::amdgcn_wave_reduce_fmax;
492 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
493 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
494 return Intrinsic::amdgcn_wave_reduce_umax;
495 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
496 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
497 return Intrinsic::amdgcn_wave_reduce_and;
498 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
499 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
500 return Intrinsic::amdgcn_wave_reduce_or;
501 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
502 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
503 return Intrinsic::amdgcn_wave_reduce_xor;
504 }
505}
506
508 const CallExpr *E) {
509 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
510 llvm::SyncScope::ID SSID;
511 switch (BuiltinID) {
512 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
513 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
514 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
515 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
516 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
517 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
518 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
519 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
520 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
521 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
522 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
523 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
524 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
525 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
526 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
527 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
528 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
529 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
530 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
531 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
532 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
533 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
534 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
535 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
536 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
537 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
538 Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID);
539 llvm::Value *Value = EmitScalarExpr(E->getArg(0));
540 llvm::Value *Strategy = EmitScalarExpr(E->getArg(1));
541 llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()});
542 return Builder.CreateCall(F, {Value, Strategy});
543 }
544 case AMDGPU::BI__builtin_amdgcn_div_scale:
545 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
546 // Translate from the intrinsics's struct return to the builtin's out
547 // argument.
548
549 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
550
551 llvm::Value *X = EmitScalarExpr(E->getArg(0));
552 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
553 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
554
555 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
556 X->getType());
557
558 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
559
560 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
561 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
562
563 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
564
565 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
566 Builder.CreateStore(FlagExt, FlagOutPtr);
567 return Result;
568 }
569 case AMDGPU::BI__builtin_amdgcn_div_fmas:
570 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
571 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
572 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
573 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
574 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
575
576 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
577 Src0->getType());
578 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
579 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
580 }
581
582 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
584 Intrinsic::amdgcn_ds_swizzle);
585 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
586 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
587 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
589 // Find out if any arguments are required to be integer constant
590 // expressions.
591 unsigned ICEArguments = 0;
593 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
594 assert(Error == ASTContext::GE_None && "Should not codegen an error");
595 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
596 unsigned Size = DataTy->getPrimitiveSizeInBits();
597 llvm::Type *IntTy =
598 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
599 Function *F =
600 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
601 ? Intrinsic::amdgcn_mov_dpp8
602 : Intrinsic::amdgcn_update_dpp,
603 IntTy);
604 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
605 E->getNumArgs() == 2);
606 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
607 if (InsertOld)
608 Args.push_back(llvm::PoisonValue::get(IntTy));
609 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
610 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
611 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
612 Size < 32) {
613 if (!DataTy->isIntegerTy())
614 V = Builder.CreateBitCast(
615 V, llvm::IntegerType::get(Builder.getContext(), Size));
616 V = Builder.CreateZExtOrBitCast(V, IntTy);
617 }
618 llvm::Type *ExpTy =
619 F->getFunctionType()->getFunctionParamType(I + InsertOld);
620 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
621 }
622 Value *V = Builder.CreateCall(F, Args);
623 if (Size < 32 && !DataTy->isIntegerTy())
624 V = Builder.CreateTrunc(
625 V, llvm::IntegerType::get(Builder.getContext(), Size));
626 return Builder.CreateTruncOrBitCast(V, DataTy);
627 }
628 case AMDGPU::BI__builtin_amdgcn_permlane16:
629 case AMDGPU::BI__builtin_amdgcn_permlanex16:
631 *this, E,
632 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
633 ? Intrinsic::amdgcn_permlane16
634 : Intrinsic::amdgcn_permlanex16);
635 case AMDGPU::BI__builtin_amdgcn_permlane64:
637 Intrinsic::amdgcn_permlane64);
638 case AMDGPU::BI__builtin_amdgcn_readlane:
640 Intrinsic::amdgcn_readlane);
641 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
643 Intrinsic::amdgcn_readfirstlane);
644 case AMDGPU::BI__builtin_amdgcn_div_fixup:
645 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
646 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
648 Intrinsic::amdgcn_div_fixup);
649 case AMDGPU::BI__builtin_amdgcn_trig_preop:
650 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
651 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
652 case AMDGPU::BI__builtin_amdgcn_rcp:
653 case AMDGPU::BI__builtin_amdgcn_rcpf:
654 case AMDGPU::BI__builtin_amdgcn_rcph:
655 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
656 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
657 case AMDGPU::BI__builtin_amdgcn_sqrt:
658 case AMDGPU::BI__builtin_amdgcn_sqrtf:
659 case AMDGPU::BI__builtin_amdgcn_sqrth:
660 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
662 Intrinsic::amdgcn_sqrt);
663 case AMDGPU::BI__builtin_amdgcn_rsq:
664 case AMDGPU::BI__builtin_amdgcn_rsqf:
665 case AMDGPU::BI__builtin_amdgcn_rsqh:
666 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
667 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
668 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
669 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
671 Intrinsic::amdgcn_rsq_clamp);
672 case AMDGPU::BI__builtin_amdgcn_sinf:
673 case AMDGPU::BI__builtin_amdgcn_sinh:
674 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
675 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
676 case AMDGPU::BI__builtin_amdgcn_cosf:
677 case AMDGPU::BI__builtin_amdgcn_cosh:
678 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
679 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
680 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
681 return EmitAMDGPUDispatchPtr(*this, E);
682 case AMDGPU::BI__builtin_amdgcn_logf:
683 case AMDGPU::BI__builtin_amdgcn_log_bf16:
684 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
685 case AMDGPU::BI__builtin_amdgcn_exp2f:
686 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
688 Intrinsic::amdgcn_exp2);
689 case AMDGPU::BI__builtin_amdgcn_log_clampf:
691 Intrinsic::amdgcn_log_clamp);
692 case AMDGPU::BI__builtin_amdgcn_ldexp:
693 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
694 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
695 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
696 llvm::Function *F =
697 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
698 return Builder.CreateCall(F, {Src0, Src1});
699 }
700 case AMDGPU::BI__builtin_amdgcn_ldexph: {
701 // The raw instruction has a different behavior for out of bounds exponent
702 // values (implicit truncation instead of saturate to short_min/short_max).
703 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
704 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
705 llvm::Function *F =
706 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
707 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
708 }
709 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
710 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
711 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
713 Intrinsic::amdgcn_frexp_mant);
714 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
715 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
716 Value *Src0 = EmitScalarExpr(E->getArg(0));
717 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
718 { Builder.getInt32Ty(), Src0->getType() });
719 return Builder.CreateCall(F, Src0);
720 }
721 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
722 Value *Src0 = EmitScalarExpr(E->getArg(0));
723 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
724 { Builder.getInt16Ty(), Src0->getType() });
725 return Builder.CreateCall(F, Src0);
726 }
727 case AMDGPU::BI__builtin_amdgcn_fract:
728 case AMDGPU::BI__builtin_amdgcn_fractf:
729 case AMDGPU::BI__builtin_amdgcn_fracth:
731 Intrinsic::amdgcn_fract);
732 case AMDGPU::BI__builtin_amdgcn_lerp:
734 Intrinsic::amdgcn_lerp);
735 case AMDGPU::BI__builtin_amdgcn_ubfe:
737 Intrinsic::amdgcn_ubfe);
738 case AMDGPU::BI__builtin_amdgcn_sbfe:
740 Intrinsic::amdgcn_sbfe);
741 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
742 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
743 llvm::Type *ResultType = ConvertType(E->getType());
744 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
745 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
746 return Builder.CreateCall(F, {Src});
747 }
748 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
749 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
750 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
751 Function *F =
752 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
753 return Builder.CreateCall(F, {Src});
754 }
755 case AMDGPU::BI__builtin_amdgcn_tanhf:
756 case AMDGPU::BI__builtin_amdgcn_tanhh:
757 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
759 Intrinsic::amdgcn_tanh);
760 case AMDGPU::BI__builtin_amdgcn_uicmp:
761 case AMDGPU::BI__builtin_amdgcn_uicmpl:
762 case AMDGPU::BI__builtin_amdgcn_sicmp:
763 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
764 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
765 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
766 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
767
768 // FIXME-GFX10: How should 32 bit mask be handled?
769 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
770 { Builder.getInt64Ty(), Src0->getType() });
771 return Builder.CreateCall(F, { Src0, Src1, Src2 });
772 }
773 case AMDGPU::BI__builtin_amdgcn_fcmp:
774 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
775 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
776 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
777 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
778
779 // FIXME-GFX10: How should 32 bit mask be handled?
780 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
781 { Builder.getInt64Ty(), Src0->getType() });
782 return Builder.CreateCall(F, { Src0, Src1, Src2 });
783 }
784 case AMDGPU::BI__builtin_amdgcn_class:
785 case AMDGPU::BI__builtin_amdgcn_classf:
786 case AMDGPU::BI__builtin_amdgcn_classh:
787 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
788 case AMDGPU::BI__builtin_amdgcn_fmed3f:
789 case AMDGPU::BI__builtin_amdgcn_fmed3h:
791 Intrinsic::amdgcn_fmed3);
792 case AMDGPU::BI__builtin_amdgcn_ds_append:
793 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
794 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
795 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
796 Value *Src0 = EmitScalarExpr(E->getArg(0));
797 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
798 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
799 }
800 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
801 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
802 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
803 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
804 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
805 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
806 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
807 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
808 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
809 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
810 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
811 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
812 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
813 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
814 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
815 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
816 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
817 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
818 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
819 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
820 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
821 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
822 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
823 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
824 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
825 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
826 Intrinsic::ID IID;
827 switch (BuiltinID) {
828 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
829 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
830 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
831 IID = Intrinsic::amdgcn_global_load_tr_b64;
832 break;
833 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
834 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
835 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
836 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
837 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
838 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
839 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
840 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
841 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
842 IID = Intrinsic::amdgcn_global_load_tr_b128;
843 break;
844 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
845 IID = Intrinsic::amdgcn_global_load_tr4_b64;
846 break;
847 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
848 IID = Intrinsic::amdgcn_global_load_tr6_b96;
849 break;
850 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
851 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
852 break;
853 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
854 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
855 break;
856 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
857 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
858 break;
859 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
860 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
861 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
862 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
863 break;
864 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
865 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
866 break;
867 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
868 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
869 break;
870 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
871 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
872 break;
873 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
874 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
875 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
876 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
877 break;
878 }
879 llvm::Type *LoadTy = ConvertType(E->getType());
880 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
881 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
882 return Builder.CreateCall(F, {Addr});
883 }
884 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
885 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
886 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
887 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
888 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
889 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
890
891 Intrinsic::ID IID;
892 switch (BuiltinID) {
893 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
894 IID = Intrinsic::amdgcn_global_load_monitor_b32;
895 break;
896 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
897 IID = Intrinsic::amdgcn_global_load_monitor_b64;
898 break;
899 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
900 IID = Intrinsic::amdgcn_global_load_monitor_b128;
901 break;
902 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
903 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
904 break;
905 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
906 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
907 break;
908 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
909 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
910 break;
911 }
912
913 LLVMContext &Ctx = CGM.getLLVMContext();
914 llvm::Type *LoadTy = ConvertType(E->getType());
915 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
916
917 auto *AOExpr = cast<llvm::ConstantInt>(EmitScalarExpr(E->getArg(1)));
918 auto *ScopeExpr = cast<llvm::ConstantInt>(EmitScalarExpr(E->getArg(2)));
919
920 auto Scope = static_cast<SyncScope>(ScopeExpr->getZExtValue());
921 llvm::AtomicOrdering AO = mapCABIAtomicOrdering(AOExpr->getZExtValue());
922
923 StringRef ScopeStr = CGM.getTargetCodeGenInfo().getLLVMSyncScopeStr(
924 CGM.getLangOpts(), Scope, AO);
925
926 llvm::MDNode *MD =
927 llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeStr)});
928 llvm::Value *ScopeMD = llvm::MetadataAsValue::get(Ctx, MD);
929 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
930 return Builder.CreateCall(F, {Addr, AOExpr, ScopeMD});
931 }
932 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
933 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
934 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
935 Intrinsic::ID IID;
936 switch (BuiltinID) {
937 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
938 IID = Intrinsic::amdgcn_cluster_load_b32;
939 break;
940 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
941 IID = Intrinsic::amdgcn_cluster_load_b64;
942 break;
943 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
944 IID = Intrinsic::amdgcn_cluster_load_b128;
945 break;
946 }
948 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
949 Args.push_back(EmitScalarExpr(E->getArg(i)));
950 llvm::Function *F = CGM.getIntrinsic(IID, {ConvertType(E->getType())});
951 return Builder.CreateCall(F, {Args});
952 }
953 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
954 // Should this have asan instrumentation?
956 Intrinsic::amdgcn_load_to_lds);
957 }
958 case AMDGPU::BI__builtin_amdgcn_load_async_to_lds: {
959 // Should this have asan instrumentation?
961 *this, E, Intrinsic::amdgcn_load_async_to_lds);
962 }
963 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
964 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
965 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
966 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
967 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
968 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
969 Intrinsic::ID IID;
970 switch (BuiltinID) {
971 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
972 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
973 break;
974 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
975 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
976 break;
977 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
978 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
979 break;
980 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
981 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
982 break;
983 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
984 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
985 break;
986 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
987 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
988 break;
989 }
990
991 LLVMContext &Ctx = CGM.getLLVMContext();
993 // last argument is a MD string
994 const unsigned ScopeArg = E->getNumArgs() - 1;
995 for (unsigned i = 0; i != ScopeArg; ++i)
996 Args.push_back(EmitScalarExpr(E->getArg(i)));
997 StringRef Arg = cast<StringLiteral>(E->getArg(ScopeArg)->IgnoreParenCasts())
998 ->getString();
999 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
1000 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
1001 // Intrinsic is typed based on the pointer AS. Pointer is always the first
1002 // argument.
1003 llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});
1004 return Builder.CreateCall(F, {Args});
1005 }
1006 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
1007 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
1008 {llvm::Type::getInt64Ty(getLLVMContext())});
1009 return Builder.CreateCall(F);
1010 }
1011 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
1012 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
1013 {llvm::Type::getInt64Ty(getLLVMContext())});
1014 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
1015 return Builder.CreateCall(F, {Env});
1016 }
1017 case AMDGPU::BI__builtin_amdgcn_read_exec:
1018 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
1019 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
1020 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
1021 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
1022 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
1023 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
1024 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
1025 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
1026 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
1027 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
1028 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
1029 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
1030 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
1031 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
1032 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
1033
1034 // The builtins take these arguments as vec4 where the last element is
1035 // ignored. The intrinsic takes them as vec3.
1036 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
1037 {0, 1, 2});
1038 RayDir =
1039 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
1040 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
1041 {0, 1, 2});
1042
1043 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
1044 {NodePtr->getType(), RayDir->getType()});
1045 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
1046 RayInverseDir, TextureDescr});
1047 }
1048 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1049 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
1050 Intrinsic::ID IID;
1051 switch (BuiltinID) {
1052 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1053 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
1054 break;
1055 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
1056 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
1057 break;
1058 }
1059 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
1060 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
1061 llvm::Value *InstanceMask = EmitScalarExpr(E->getArg(2));
1062 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(3));
1063 llvm::Value *RayDir = EmitScalarExpr(E->getArg(4));
1064 llvm::Value *Offset = EmitScalarExpr(E->getArg(5));
1065 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(6));
1066
1067 Address RetRayOriginPtr = EmitPointerWithAlignment(E->getArg(7));
1068 Address RetRayDirPtr = EmitPointerWithAlignment(E->getArg(8));
1069
1070 llvm::Function *IntrinsicFunc = CGM.getIntrinsic(IID);
1071
1072 llvm::CallInst *CI = Builder.CreateCall(
1073 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
1074 Offset, TextureDescr});
1075
1076 llvm::Value *RetVData = Builder.CreateExtractValue(CI, 0);
1077 llvm::Value *RetRayOrigin = Builder.CreateExtractValue(CI, 1);
1078 llvm::Value *RetRayDir = Builder.CreateExtractValue(CI, 2);
1079
1080 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
1081 Builder.CreateStore(RetRayDir, RetRayDirPtr);
1082
1083 return RetVData;
1084 }
1085
1086 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1087 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1088 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1089 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
1090 Intrinsic::ID IID;
1091 switch (BuiltinID) {
1092 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1093 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
1094 break;
1095 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1096 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
1097 break;
1098 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1099 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
1100 break;
1101 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
1102 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
1103 break;
1104 }
1105
1107 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
1108 Args.push_back(EmitScalarExpr(E->getArg(i)));
1109
1110 Function *F = CGM.getIntrinsic(IID);
1111 Value *Call = Builder.CreateCall(F, Args);
1112 Value *Rtn = Builder.CreateExtractValue(Call, 0);
1113 Value *A = Builder.CreateExtractValue(Call, 1);
1114 llvm::Type *RetTy = ConvertType(E->getType());
1115 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
1116 (uint64_t)0);
1117 // ds_bvh_stack_push8_pop2_rtn returns {i64, i32} but the builtin returns
1118 // <2 x i64>, zext the second value.
1119 if (A->getType()->getPrimitiveSizeInBits() <
1120 RetTy->getScalarType()->getPrimitiveSizeInBits())
1121 A = Builder.CreateZExt(A, RetTy->getScalarType());
1122
1123 return Builder.CreateInsertElement(I0, A, 1);
1124 }
1125 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1126 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1128 *this, E, Intrinsic::amdgcn_image_load_1d, false);
1129 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1130 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1132 *this, E, Intrinsic::amdgcn_image_load_1darray, false);
1133 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1134 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1135 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1137 *this, E, Intrinsic::amdgcn_image_load_2d, false);
1138 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1139 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1140 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1142 *this, E, Intrinsic::amdgcn_image_load_2darray, false);
1143 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1144 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1146 *this, E, Intrinsic::amdgcn_image_load_3d, false);
1147 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1148 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1150 *this, E, Intrinsic::amdgcn_image_load_cube, false);
1151 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1152 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1154 *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
1155 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1156 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1158 *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
1159 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1160 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1161 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1163 *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
1164 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1165 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1166 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1168 *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
1169 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1170 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1172 *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
1173 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1174 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1176 *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
1177 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1178 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1180 *this, E, Intrinsic::amdgcn_image_store_1d, true);
1181 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1182 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1184 *this, E, Intrinsic::amdgcn_image_store_1darray, true);
1185 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1186 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1187 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1189 *this, E, Intrinsic::amdgcn_image_store_2d, true);
1190 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1191 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1192 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1194 *this, E, Intrinsic::amdgcn_image_store_2darray, true);
1195 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1196 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1198 *this, E, Intrinsic::amdgcn_image_store_3d, true);
1199 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1200 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1202 *this, E, Intrinsic::amdgcn_image_store_cube, true);
1203 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1204 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1206 *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
1207 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1208 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1210 *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
1211 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1212 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1213 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1215 *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
1216 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1217 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1218 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1220 *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
1221 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1222 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1224 *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
1225 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1226 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1228 *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
1229 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1230 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1232 *this, E, Intrinsic::amdgcn_image_sample_1d, false);
1233 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1234 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1236 *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
1237 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1238 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1239 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1241 *this, E, Intrinsic::amdgcn_image_sample_2d, false);
1242 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1243 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1244 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1246 *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
1247 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1248 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1250 *this, E, Intrinsic::amdgcn_image_sample_3d, false);
1251 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1252 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1254 *this, E, Intrinsic::amdgcn_image_sample_cube, false);
1255 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1256 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1258 *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false);
1259 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1260 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1262 *this, E, Intrinsic::amdgcn_image_sample_l_1d, false);
1263 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1264 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1266 *this, E, Intrinsic::amdgcn_image_sample_d_1d, false);
1267 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1268 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1269 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1271 *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false);
1272 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1273 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1274 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1276 *this, E, Intrinsic::amdgcn_image_sample_l_2d, false);
1277 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1278 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1279 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1281 *this, E, Intrinsic::amdgcn_image_sample_d_2d, false);
1282 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1283 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1285 *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false);
1286 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1287 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1289 *this, E, Intrinsic::amdgcn_image_sample_l_3d, false);
1290 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1291 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1293 *this, E, Intrinsic::amdgcn_image_sample_d_3d, false);
1294 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1295 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1297 *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false);
1298 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1299 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1301 *this, E, Intrinsic::amdgcn_image_sample_l_cube, false);
1302 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1303 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1305 *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false);
1306 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1307 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1309 *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false);
1310 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1311 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1313 *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false);
1314 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1315 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1316 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1318 *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false);
1319 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1320 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1321 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1323 *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false);
1324 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1325 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1326 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1328 *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false);
1329 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1331 *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);
1332 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1333 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1334 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
1335 Function *F = CGM.getIntrinsic(
1336 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1337 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1338 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1339 {VT, VT});
1340
1342 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
1343 Args.push_back(EmitScalarExpr(E->getArg(I)));
1344 return Builder.CreateCall(F, Args);
1345 }
1346 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1347 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1348 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1349 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1350 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1351 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1352 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1353 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1354 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1355 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1356 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1357 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1358 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1359 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1360 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1361 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1362 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1363 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1364 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1365 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1366 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1367 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1368 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1369 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1370 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1371 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1372 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1373 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1374 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1375 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1376 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1377 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1378 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1379 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1380 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1381 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1382 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1383 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1384 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1385 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1386 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1387 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1388 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1389 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1390 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1391 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1392 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1393 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1394 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1395 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1396 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1397 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1398 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1399 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1400 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1401 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1402 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1403 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1404 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1405 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1406 // GFX1250 WMMA builtins
1407 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1408 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1409 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1410 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1411 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1412 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1413 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1414 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1415 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1416 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1417 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1418 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1419 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1420 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1421 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1422 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1423 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1424 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1425 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1426 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1427 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1428 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1429 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1430 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1431 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1432 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1433 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1434 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1435 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1436 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1437 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1438 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1439 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1440 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1441 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1442 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1443 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1444 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1445 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1446 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1447 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1448 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1449 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1450
1451 // These operations perform a matrix multiplication and accumulation of
1452 // the form:
1453 // D = A * B + C
1454 // We need to specify one type for matrices AB and one for matrices CD.
1455 // Sparse matrix operations can have different types for A and B as well as
1456 // an additional type for sparsity index.
1457 // Destination type should be put before types used for source operands.
1458 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
1459 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
1460 // There is no need for the variable opsel argument, so always set it to
1461 // "false".
1462 bool AppendFalseForOpselArg = false;
1463 unsigned BuiltinWMMAOp;
1464 // Need return type when D and C are of different types.
1465 bool NeedReturnType = false;
1466
1467 switch (BuiltinID) {
1468 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1469 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1470 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1471 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1472 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1473 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1474 break;
1475 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1476 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1477 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1478 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1479 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1480 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1481 break;
1482 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1483 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1484 AppendFalseForOpselArg = true;
1485 [[fallthrough]];
1486 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1487 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1488 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1489 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1490 break;
1491 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1492 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1493 AppendFalseForOpselArg = true;
1494 [[fallthrough]];
1495 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1496 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1497 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1498 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1499 break;
1500 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1501 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1502 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1503 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1504 break;
1505 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1506 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1507 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1508 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1509 break;
1510 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1511 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1512 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1513 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1514 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1515 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1516 break;
1517 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1518 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1519 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1520 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1521 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1522 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1523 break;
1524 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1525 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1526 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1527 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1528 break;
1529 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1530 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1531 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1532 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1533 break;
1534 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1535 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1536 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1537 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1538 break;
1539 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1540 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1541 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
1542 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1543 break;
1544 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1545 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1546 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
1547 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1548 break;
1549 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1550 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1551 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1552 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1553 break;
1554 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1555 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1556 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1557 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1558 break;
1559 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1560 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1561 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1562 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1563 break;
1564 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1565 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1566 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1567 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1568 break;
1569 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1570 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1571 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1572 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1573 break;
1574 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1575 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1576 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1577 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1578 break;
1579 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1580 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1581 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
1582 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1583 break;
1584 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1585 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1586 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1587 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1588 break;
1589 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1590 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1591 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1592 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1593 break;
1594 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1595 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1596 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1597 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1598 break;
1599 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1600 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1601 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
1602 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1603 break;
1604 // GFX1250 WMMA builtins
1605 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1606 ArgsForMatchingMatrixTypes = {5, 1};
1607 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1608 break;
1609 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1610 ArgsForMatchingMatrixTypes = {5, 1};
1611 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1612 break;
1613 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1614 ArgsForMatchingMatrixTypes = {5, 1};
1615 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1616 break;
1617 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1618 ArgsForMatchingMatrixTypes = {5, 1};
1619 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1620 break;
1621 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1622 ArgsForMatchingMatrixTypes = {5, 1};
1623 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1624 break;
1625 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1626 NeedReturnType = true;
1627 ArgsForMatchingMatrixTypes = {1, 5};
1628 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1629 break;
1630 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1631 ArgsForMatchingMatrixTypes = {3, 0};
1632 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1633 break;
1634 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1635 ArgsForMatchingMatrixTypes = {3, 0};
1636 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1637 break;
1638 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1639 ArgsForMatchingMatrixTypes = {3, 0};
1640 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1641 break;
1642 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1643 ArgsForMatchingMatrixTypes = {3, 0};
1644 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1645 break;
1646 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1647 ArgsForMatchingMatrixTypes = {3, 0};
1648 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1649 break;
1650 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1651 ArgsForMatchingMatrixTypes = {3, 0};
1652 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1653 break;
1654 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1655 ArgsForMatchingMatrixTypes = {3, 0};
1656 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1657 break;
1658 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1659 ArgsForMatchingMatrixTypes = {3, 0};
1660 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1661 break;
1662 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1663 ArgsForMatchingMatrixTypes = {3, 0};
1664 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1665 break;
1666 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1667 ArgsForMatchingMatrixTypes = {3, 0};
1668 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1669 break;
1670 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1671 ArgsForMatchingMatrixTypes = {3, 0};
1672 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1673 break;
1674 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1675 ArgsForMatchingMatrixTypes = {3, 0};
1676 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1677 break;
1678 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1679 ArgsForMatchingMatrixTypes = {3, 0};
1680 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1681 break;
1682 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1683 ArgsForMatchingMatrixTypes = {3, 0};
1684 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1685 break;
1686 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1687 ArgsForMatchingMatrixTypes = {3, 0};
1688 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1689 break;
1690 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1691 ArgsForMatchingMatrixTypes = {3, 0};
1692 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1693 break;
1694 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1695 ArgsForMatchingMatrixTypes = {4, 1};
1696 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1697 break;
1698 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1699 ArgsForMatchingMatrixTypes = {5, 1, 3};
1700 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1701 break;
1702 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1703 ArgsForMatchingMatrixTypes = {5, 1, 3};
1704 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1705 break;
1706 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1707 ArgsForMatchingMatrixTypes = {5, 1, 3};
1708 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1709 break;
1710 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1711 ArgsForMatchingMatrixTypes = {3, 0, 1};
1712 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1713 break;
1714 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1715 ArgsForMatchingMatrixTypes = {3, 0, 1};
1716 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1717 break;
1718 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1719 ArgsForMatchingMatrixTypes = {3, 0, 1};
1720 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1721 break;
1722 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1723 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1724 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1725 break;
1726 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1727 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1728 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1729 break;
1730 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1731 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1732 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1733 break;
1734 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1735 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1736 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1737 break;
1738 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1739 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1740 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1741 break;
1742 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1743 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1744 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1745 break;
1746 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1747 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1748 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1749 break;
1750 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1751 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1752 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1753 break;
1754 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1755 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1756 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1757 break;
1758 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1759 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1760 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1761 break;
1762 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1763 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1764 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1765 break;
1766 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1767 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1768 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1769 break;
1770 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1771 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1772 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1773 break;
1774 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1775 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1776 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1777 break;
1778 }
1779
1781 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
1782 Args.push_back(EmitScalarExpr(E->getArg(i)));
1783 if (AppendFalseForOpselArg)
1784 Args.push_back(Builder.getFalse());
1785
1786 // Handle the optional clamp argument of the following two builtins.
1787 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
1788 if (Args.size() == 7)
1789 Args.push_back(Builder.getFalse());
1790 assert(Args.size() == 8 && "Expected 8 arguments");
1791 Args[7] = Builder.CreateZExtOrTrunc(Args[7], Builder.getInt1Ty());
1792 } else if (BuiltinID ==
1793 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
1794 if (Args.size() == 8)
1795 Args.push_back(Builder.getFalse());
1796 assert(Args.size() == 9 && "Expected 9 arguments");
1797 Args[8] = Builder.CreateZExtOrTrunc(Args[8], Builder.getInt1Ty());
1798 }
1799
1801 if (NeedReturnType)
1802 ArgTypes.push_back(ConvertType(E->getType()));
1803 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
1804 ArgTypes.push_back(Args[ArgIdx]->getType());
1805
1806 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1807 return Builder.CreateCall(F, Args);
1808 }
1809 // amdgcn workgroup size
1810 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1811 return EmitAMDGPUWorkGroupSize(*this, 0);
1812 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1813 return EmitAMDGPUWorkGroupSize(*this, 1);
1814 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1815 return EmitAMDGPUWorkGroupSize(*this, 2);
1816
1817 // amdgcn grid size
1818 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1819 return EmitAMDGPUGridSize(*this, 0);
1820 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1821 return EmitAMDGPUGridSize(*this, 1);
1822 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1823 return EmitAMDGPUGridSize(*this, 2);
1824
1825 // r600 intrinsics
1826 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1827 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1829 Intrinsic::r600_recipsqrt_ieee);
1830 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1831 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
1832 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
1833 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
1834 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1835 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1836 }
1837 case AMDGPU::BI__builtin_amdgcn_fence: {
1839 EmitScalarExpr(E->getArg(1)), AO, SSID);
1840 FenceInst *Fence = Builder.CreateFence(AO, SSID);
1841 if (E->getNumArgs() > 2)
1843 return Fence;
1844 }
1845 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1846 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1847 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1848 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1849 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1850 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1851 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1852 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1853 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1854 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1855 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1856 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1857 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1858 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1859 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1860 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1861 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1862 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1863 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1864 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1865 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1866 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1867 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1868 llvm::AtomicRMWInst::BinOp BinOp;
1869 switch (BuiltinID) {
1870 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1871 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1872 BinOp = llvm::AtomicRMWInst::UIncWrap;
1873 break;
1874 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1875 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1876 BinOp = llvm::AtomicRMWInst::UDecWrap;
1877 break;
1878 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1879 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1880 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1881 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1882 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1883 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1884 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1885 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1886 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1887 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1888 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1889 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1890 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1891 BinOp = llvm::AtomicRMWInst::FAdd;
1892 break;
1893 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1894 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1895 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1896 BinOp = llvm::AtomicRMWInst::FMin;
1897 break;
1898 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1899 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1900 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1901 BinOp = llvm::AtomicRMWInst::FMax;
1902 break;
1903 }
1904
1905 Address Ptr = CheckAtomicAlignment(*this, E);
1906 Value *Val = EmitScalarExpr(E->getArg(1));
1907 llvm::Type *OrigTy = Val->getType();
1908 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1909
1910 bool Volatile;
1911
1912 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1913 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1914 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1915 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
1916 Volatile =
1917 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
1918 } else {
1919 // Infer volatile from the passed type.
1920 Volatile =
1921 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1922 }
1923
1924 if (E->getNumArgs() >= 4) {
1925 // Some of the builtins have explicit ordering and scope arguments.
1927 EmitScalarExpr(E->getArg(3)), AO, SSID);
1928 } else {
1929 // Most of the builtins do not have syncscope/order arguments. For DS
1930 // atomics the scope doesn't really matter, as they implicitly operate at
1931 // workgroup scope.
1932 //
1933 // The global/flat cases need to use agent scope to consistently produce
1934 // the native instruction instead of a cmpxchg expansion.
1935 if (getTarget().getTriple().isSPIRV())
1936 SSID = getLLVMContext().getOrInsertSyncScopeID("device");
1937 else
1938 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
1939 AO = AtomicOrdering::Monotonic;
1940
1941 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
1942 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1943 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1944 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1945 llvm::Type *V2BF16Ty = FixedVectorType::get(
1946 llvm::Type::getBFloatTy(Builder.getContext()), 2);
1947 Val = Builder.CreateBitCast(Val, V2BF16Ty);
1948 }
1949 }
1950
1951 llvm::AtomicRMWInst *RMW =
1952 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1953 if (Volatile)
1954 RMW->setVolatile(true);
1955
1956 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
1957 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1958 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
1959 // instruction for flat and global operations.
1960 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
1961 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
1962
1963 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
1964 // instruction, but this only matters for float fadd.
1965 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
1966 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
1967 }
1968
1969 return Builder.CreateBitCast(RMW, OrigTy);
1970 }
1971 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1972 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1973 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
1974 llvm::Type *ResultType = ConvertType(E->getType());
1975 // s_sendmsg_rtn is mangled using return type only.
1976 Function *F =
1977 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1978 return Builder.CreateCall(F, {Arg});
1979 }
1980 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1981 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1982 // Because builtin types are limited, and the intrinsic uses a struct/pair
1983 // output, marshal the pair-of-i32 to <2 x i32>.
1984 Value *VDstOld = EmitScalarExpr(E->getArg(0));
1985 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
1986 Value *FI = EmitScalarExpr(E->getArg(2));
1987 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
1988 Function *F =
1989 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1990 ? Intrinsic::amdgcn_permlane16_swap
1991 : Intrinsic::amdgcn_permlane32_swap);
1992 llvm::CallInst *Call =
1993 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1994
1995 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
1996 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
1997
1998 llvm::Type *ResultType = ConvertType(E->getType());
1999
2000 llvm::Value *Insert0 = Builder.CreateInsertElement(
2001 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
2002 llvm::Value *AsVector =
2003 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
2004 return AsVector;
2005 }
2006 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
2007 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
2009 Intrinsic::amdgcn_bitop3);
2010 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
2011 // TODO: LLVM has this overloaded to allow for fat pointers, but since
2012 // those haven't been plumbed through to Clang yet, default to creating the
2013 // resource type.
2015 for (unsigned I = 0; I < 4; ++I)
2016 Args.push_back(EmitScalarExpr(E->getArg(I)));
2017 llvm::PointerType *RetTy = llvm::PointerType::get(
2018 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
2019 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
2020 {RetTy, Args[0]->getType()});
2021 return Builder.CreateCall(F, Args);
2022 }
2023 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
2024 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
2025 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
2026 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
2027 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
2028 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
2030 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
2031 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2032 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2033 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2034 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2035 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2036 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
2037 llvm::Type *RetTy = nullptr;
2038 switch (BuiltinID) {
2039 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2040 RetTy = Int8Ty;
2041 break;
2042 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2043 RetTy = Int16Ty;
2044 break;
2045 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2046 RetTy = Int32Ty;
2047 break;
2048 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2049 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
2050 break;
2051 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2052 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
2053 break;
2054 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
2055 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
2056 break;
2057 }
2058 Function *F =
2059 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
2060 return Builder.CreateCall(
2061 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
2062 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
2063 }
2064 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
2066 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
2067 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
2068 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
2070 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
2071 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
2072 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
2074 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
2075 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
2076 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
2078 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
2079 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
2081 *this, E, Intrinsic::amdgcn_s_prefetch_data);
2082 case Builtin::BIlogbf:
2083 case Builtin::BI__builtin_logbf: {
2084 Value *Src0 = EmitScalarExpr(E->getArg(0));
2085 Function *FrExpFunc = CGM.getIntrinsic(
2086 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
2087 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
2088 Value *Exp = Builder.CreateExtractValue(FrExp, 1);
2089 Value *Add = Builder.CreateAdd(
2090 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
2091 Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getFloatTy());
2092 Value *Fabs =
2093 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
2094 Value *FCmpONE = Builder.CreateFCmpONE(
2095 Fabs, ConstantFP::getInfinity(Builder.getFloatTy()));
2096 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2097 Value *FCmpOEQ =
2098 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getFloatTy()));
2099 Value *Sel2 = Builder.CreateSelect(
2100 FCmpOEQ,
2101 ConstantFP::getInfinity(Builder.getFloatTy(), /*Negative=*/true), Sel1);
2102 return Sel2;
2103 }
2104 case Builtin::BIlogb:
2105 case Builtin::BI__builtin_logb: {
2106 Value *Src0 = EmitScalarExpr(E->getArg(0));
2107 Function *FrExpFunc = CGM.getIntrinsic(
2108 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
2109 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
2110 Value *Exp = Builder.CreateExtractValue(FrExp, 1);
2111 Value *Add = Builder.CreateAdd(
2112 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
2113 Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
2114 Value *Fabs =
2115 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
2116 Value *FCmpONE = Builder.CreateFCmpONE(
2117 Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
2118 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2119 Value *FCmpOEQ =
2120 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
2121 Value *Sel2 = Builder.CreateSelect(
2122 FCmpOEQ,
2123 ConstantFP::getInfinity(Builder.getDoubleTy(), /*Negative=*/true),
2124 Sel1);
2125 return Sel2;
2126 }
2127 case Builtin::BIscalbnf:
2128 case Builtin::BI__builtin_scalbnf:
2129 case Builtin::BIscalbn:
2130 case Builtin::BI__builtin_scalbn:
2132 *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
2133 default:
2134 return nullptr;
2135 }
2136}
#define V(N, I)
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, Intrinsic::ID IntrinsicID, Intrinsic::ID ConstrainedIntrinsicID)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
llvm::Value * emitBuiltinWithOneOverloadedType(clang::CodeGen::CodeGenFunction &CGF, const clang::CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition CGBuiltin.h:63
static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID)
Definition AMDGPU.cpp:461
static StringRef mapScopeToSPIRV(StringRef AMDGCNScope)
Definition AMDGPU.cpp:358
static llvm::Value * loadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, llvm::Value *RsrcPtr)
Definition AMDGPU.cpp:279
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition AMDGPU.cpp:261
llvm::CallInst * emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, const clang::CallExpr *E, unsigned IntrinsicID, bool IsImageStore)
Definition AMDGPU.cpp:309
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition AMDGPU.cpp:348
static llvm::AtomicOrdering mapCABIAtomicOrdering(unsigned AO)
Definition AMDGPU.cpp:366
TokenType getType() const
Returns the token's type, e.g.
#define X(type, name)
Definition Value.h:97
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:57
static QualType getPointeeType(const MemRegion *R)
Provides definitions for the atomic synchronization scopes.
Enumerates target-specific builtins in their own namespaces within namespace clang.
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:302
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:118
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition CGBuilder.h:138
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:199
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition CGBuilder.h:356
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition AMDGPU.cpp:507
const TargetInfo & getTarget() const
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
Definition AMDGPU.cpp:438
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1591
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::LLVMContext & getLLVMContext()
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
Definition AMDGPU.cpp:391
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
const LangOptions & getLangOpts() const
const llvm::DataLayout & getDataLayout() const
ASTContext & getContext() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3095
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3070
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:277
QualType getType() const
Definition Expr.h:144
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3336
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition TypeBase.h:8472
QualType getCanonicalType() const
Definition TypeBase.h:8440
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition TargetInfo.h:327
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
unsigned getMaxOpenCLWorkGroupSize() const
Definition TargetInfo.h:879
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
QualType getType() const
Definition Value.cpp:237
bool Mul(InterpState &S, CodePtr OpPC)
Definition Interp.h:366
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
SyncScope
Defines sync scope values used internally by clang.
Definition SyncScope.h:42
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64