15#include "llvm/Analysis/ValueTracking.h"
16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
18#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
19#include "llvm/Support/AMDGPUAddrSpace.h"
22using namespace CodeGen;
30 Intrinsic::ID IntrinsicID,
31 Intrinsic::ID ConstrainedIntrinsicID) {
36 if (CGF.
Builder.getIsFPConstrained()) {
38 {Src0->getType(), Src1->getType()});
39 return CGF.
Builder.CreateConstrainedFPCall(F, {Src0, Src1});
44 return CGF.
Builder.CreateCall(F, {Src0, Src1});
54 Attribute::getWithDereferenceableBytes(
Call->getContext(), 64));
55 Call->addRetAttr(Attribute::getWithAlignment(
Call->getContext(), Align(4)));
59 auto *RetTy = cast<llvm::PointerType>(CGF.
ConvertType(BuiltinRetType));
60 if (RetTy ==
Call->getType())
69 Attribute::getWithDereferenceableBytes(
Call->getContext(), 256));
70 Call->addRetAttr(Attribute::getWithAlignment(
Call->getContext(), Align(8)));
86 if (Cov == CodeObjectVersionKind::COV_None) {
87 StringRef Name =
"__oclc_ABI_version";
88 auto *ABIVersionC = CGF.
CGM.
getModule().getNamedGlobal(Name);
90 ABIVersionC =
new llvm::GlobalVariable(
92 llvm::GlobalValue::ExternalLinkage,
nullptr, Name,
nullptr,
93 llvm::GlobalVariable::NotThreadLocal,
104 llvm::ConstantInt::get(CGF.
Int32Ty, CodeObjectVersionKind::COV_5));
108 CGF.
Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
112 CGF.
Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
114 auto Result = CGF.
Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
118 Value *GEP =
nullptr;
119 if (Cov >= CodeObjectVersionKind::COV_5) {
121 GEP = CGF.
Builder.CreateConstGEP1_32(
122 CGF.
Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
125 GEP = CGF.
Builder.CreateConstGEP1_32(
126 CGF.
Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
133 llvm::MDNode *RNode = MDHelper.createRange(
APInt(16, 1),
135 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
136 LD->setMetadata(llvm::LLVMContext::MD_noundef,
138 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
145 const unsigned XOffset = 12;
146 auto *DP = EmitAMDGPUDispatchPtr(CGF);
148 auto *Offset = llvm::ConstantInt::get(CGF.
Int32Ty, XOffset + Index * 4);
156 LD->setMetadata(llvm::LLVMContext::MD_range,
157 MDB.createRange(
APInt(32, 1), APInt::getZero(32)));
158 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
168 llvm::Type *ValueType,
bool isExecHi) {
173 llvm::Value *
Call = Builder.CreateCall(F, {Builder.getInt1(
true)});
176 Value *Rt2 = Builder.CreateLShr(
Call, 32);
177 Rt2 = Builder.CreateTrunc(Rt2, CGF.
Int32Ty);
187 unsigned IntrinsicID) {
192 return CGF.
Builder.CreateCall(F, {Src0, Src1});
202 llvm::AtomicOrdering &AO,
203 llvm::SyncScope::ID &SSID) {
204 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
207 assert(llvm::isValidAtomicOrderingCABI(ord));
208 switch (
static_cast<llvm::AtomicOrderingCABI
>(ord)) {
209 case llvm::AtomicOrderingCABI::acquire:
210 case llvm::AtomicOrderingCABI::consume:
211 AO = llvm::AtomicOrdering::Acquire;
213 case llvm::AtomicOrderingCABI::release:
214 AO = llvm::AtomicOrdering::Release;
216 case llvm::AtomicOrderingCABI::acq_rel:
217 AO = llvm::AtomicOrdering::AcquireRelease;
219 case llvm::AtomicOrderingCABI::seq_cst:
220 AO = llvm::AtomicOrdering::SequentiallyConsistent;
222 case llvm::AtomicOrderingCABI::relaxed:
223 AO = llvm::AtomicOrdering::Monotonic;
229 if (llvm::getConstantStringInfo(
Scope, scp)) {
235 int scope = cast<llvm::ConstantInt>(
Scope)->getZExtValue();
238 SSID = llvm::SyncScope::System;
250 SSID = llvm::SyncScope::SingleThread;
253 SSID = llvm::SyncScope::System;
261 llvm::Value *Arg =
nullptr;
262 if ((ICEArguments & (1 << Idx)) == 0) {
267 std::optional<llvm::APSInt>
Result =
269 assert(
Result &&
"Expected argument to be a constant");
277 constexpr const char *Tag =
"amdgpu-synchronize-as";
279 LLVMContext &Ctx = Inst->getContext();
281 for (
unsigned K = 2; K <
E->getNumArgs(); ++K) {
284 if (llvm::getConstantStringInfo(
V, AS)) {
285 MMRAs.push_back({Tag, AS});
290 "expected an address space name as a string literal");
294 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
295 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
300 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
301 llvm::SyncScope::ID SSID;
303 case AMDGPU::BI__builtin_amdgcn_div_scale:
304 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
317 llvm::Value *Tmp =
Builder.CreateCall(Callee, {
X, Y, Z});
320 llvm::Value *Flag =
Builder.CreateExtractValue(Tmp, 1);
324 llvm::Value *FlagExt =
Builder.CreateZExt(Flag, RealFlagType);
328 case AMDGPU::BI__builtin_amdgcn_div_fmas:
329 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
337 llvm::Value *Src3ToBool =
Builder.CreateIsNotNull(Src3);
338 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
341 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
342 return emitBuiltinWithOneOverloadedType<2>(*
this,
E,
343 Intrinsic::amdgcn_ds_swizzle);
344 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
345 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
346 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
350 unsigned ICEArguments = 0;
355 unsigned Size = DataTy->getPrimitiveSizeInBits();
357 llvm::IntegerType::get(
Builder.getContext(), std::max(Size, 32u));
360 ? Intrinsic::amdgcn_mov_dpp8
361 : Intrinsic::amdgcn_update_dpp,
363 assert(
E->getNumArgs() == 5 ||
E->getNumArgs() == 6 ||
364 E->getNumArgs() == 2);
365 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
367 Args.push_back(llvm::PoisonValue::get(
IntTy));
368 for (
unsigned I = 0; I !=
E->getNumArgs(); ++I) {
370 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
372 if (!DataTy->isIntegerTy())
374 V, llvm::IntegerType::get(
Builder.getContext(), Size));
378 F->getFunctionType()->getFunctionParamType(I + InsertOld);
379 Args.push_back(
Builder.CreateTruncOrBitCast(
V, ExpTy));
382 if (Size < 32 && !DataTy->isIntegerTy())
384 V, llvm::IntegerType::get(
Builder.getContext(), Size));
385 return Builder.CreateTruncOrBitCast(
V, DataTy);
387 case AMDGPU::BI__builtin_amdgcn_permlane16:
388 case AMDGPU::BI__builtin_amdgcn_permlanex16:
389 return emitBuiltinWithOneOverloadedType<6>(
391 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
392 ? Intrinsic::amdgcn_permlane16
393 : Intrinsic::amdgcn_permlanex16);
394 case AMDGPU::BI__builtin_amdgcn_permlane64:
395 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
396 Intrinsic::amdgcn_permlane64);
397 case AMDGPU::BI__builtin_amdgcn_readlane:
398 return emitBuiltinWithOneOverloadedType<2>(*
this,
E,
399 Intrinsic::amdgcn_readlane);
400 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
401 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
402 Intrinsic::amdgcn_readfirstlane);
403 case AMDGPU::BI__builtin_amdgcn_div_fixup:
404 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
405 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
406 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
407 Intrinsic::amdgcn_div_fixup);
408 case AMDGPU::BI__builtin_amdgcn_trig_preop:
409 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
411 case AMDGPU::BI__builtin_amdgcn_rcp:
412 case AMDGPU::BI__builtin_amdgcn_rcpf:
413 case AMDGPU::BI__builtin_amdgcn_rcph:
414 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
415 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_rcp);
416 case AMDGPU::BI__builtin_amdgcn_sqrt:
417 case AMDGPU::BI__builtin_amdgcn_sqrtf:
418 case AMDGPU::BI__builtin_amdgcn_sqrth:
419 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
420 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
421 Intrinsic::amdgcn_sqrt);
422 case AMDGPU::BI__builtin_amdgcn_rsq:
423 case AMDGPU::BI__builtin_amdgcn_rsqf:
424 case AMDGPU::BI__builtin_amdgcn_rsqh:
425 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
426 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_rsq);
427 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
428 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
429 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
430 Intrinsic::amdgcn_rsq_clamp);
431 case AMDGPU::BI__builtin_amdgcn_sinf:
432 case AMDGPU::BI__builtin_amdgcn_sinh:
433 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
434 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_sin);
435 case AMDGPU::BI__builtin_amdgcn_cosf:
436 case AMDGPU::BI__builtin_amdgcn_cosh:
437 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
438 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_cos);
439 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
440 return EmitAMDGPUDispatchPtr(*
this,
E);
441 case AMDGPU::BI__builtin_amdgcn_logf:
442 case AMDGPU::BI__builtin_amdgcn_log_bf16:
443 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_log);
444 case AMDGPU::BI__builtin_amdgcn_exp2f:
445 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
446 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
447 Intrinsic::amdgcn_exp2);
448 case AMDGPU::BI__builtin_amdgcn_log_clampf:
449 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
450 Intrinsic::amdgcn_log_clamp);
451 case AMDGPU::BI__builtin_amdgcn_ldexp:
452 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
456 CGM.
getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
457 return Builder.CreateCall(F, {Src0, Src1});
459 case AMDGPU::BI__builtin_amdgcn_ldexph: {
468 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
469 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
470 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
471 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
472 Intrinsic::amdgcn_frexp_mant);
473 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
474 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
478 return Builder.CreateCall(F, Src0);
480 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
484 return Builder.CreateCall(F, Src0);
486 case AMDGPU::BI__builtin_amdgcn_fract:
487 case AMDGPU::BI__builtin_amdgcn_fractf:
488 case AMDGPU::BI__builtin_amdgcn_fracth:
489 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
490 Intrinsic::amdgcn_fract);
491 case AMDGPU::BI__builtin_amdgcn_lerp:
492 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
493 Intrinsic::amdgcn_lerp);
494 case AMDGPU::BI__builtin_amdgcn_ubfe:
495 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
496 Intrinsic::amdgcn_ubfe);
497 case AMDGPU::BI__builtin_amdgcn_sbfe:
498 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
499 Intrinsic::amdgcn_sbfe);
500 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
501 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
505 return Builder.CreateCall(F, { Src });
507 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
508 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
512 return Builder.CreateCall(F, {Src});
514 case AMDGPU::BI__builtin_amdgcn_tanhf:
515 case AMDGPU::BI__builtin_amdgcn_tanhh:
516 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
517 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
518 Intrinsic::amdgcn_tanh);
519 case AMDGPU::BI__builtin_amdgcn_uicmp:
520 case AMDGPU::BI__builtin_amdgcn_uicmpl:
521 case AMDGPU::BI__builtin_amdgcn_sicmp:
522 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
529 {
Builder.getInt64Ty(), Src0->getType() });
530 return Builder.CreateCall(F, { Src0, Src1, Src2 });
532 case AMDGPU::BI__builtin_amdgcn_fcmp:
533 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
540 {
Builder.getInt64Ty(), Src0->getType() });
541 return Builder.CreateCall(F, { Src0, Src1, Src2 });
543 case AMDGPU::BI__builtin_amdgcn_class:
544 case AMDGPU::BI__builtin_amdgcn_classf:
545 case AMDGPU::BI__builtin_amdgcn_classh:
547 case AMDGPU::BI__builtin_amdgcn_fmed3f:
548 case AMDGPU::BI__builtin_amdgcn_fmed3h:
549 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
550 Intrinsic::amdgcn_fmed3);
551 case AMDGPU::BI__builtin_amdgcn_ds_append:
552 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
553 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
554 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
559 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
560 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
561 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
562 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
563 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
564 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
565 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
566 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
567 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
568 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
569 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
570 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
571 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
572 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
573 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
574 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
575 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
576 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
577 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
578 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
579 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
580 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
581 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
582 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
583 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
584 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
587 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
588 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
589 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
590 IID = Intrinsic::amdgcn_global_load_tr_b64;
592 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
593 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
594 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
595 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
596 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
597 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
598 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
599 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
600 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
601 IID = Intrinsic::amdgcn_global_load_tr_b128;
603 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
604 IID = Intrinsic::amdgcn_global_load_tr4_b64;
606 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
607 IID = Intrinsic::amdgcn_global_load_tr6_b96;
609 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
610 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
612 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
613 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
615 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
616 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
618 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
619 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
620 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
621 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
623 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
624 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
626 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
627 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
629 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
630 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
632 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
633 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
634 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
635 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
643 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
644 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
645 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
646 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
647 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
648 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
652 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
653 IID = Intrinsic::amdgcn_global_load_monitor_b32;
655 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
656 IID = Intrinsic::amdgcn_global_load_monitor_b64;
658 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
659 IID = Intrinsic::amdgcn_global_load_monitor_b128;
661 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
662 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
664 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
665 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
667 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
668 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
678 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
679 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
680 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
683 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
684 IID = Intrinsic::amdgcn_cluster_load_b32;
686 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
687 IID = Intrinsic::amdgcn_cluster_load_b64;
689 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
690 IID = Intrinsic::amdgcn_cluster_load_b128;
694 for (
int i = 0, e =
E->getNumArgs(); i != e; ++i)
697 return Builder.CreateCall(F, {Args});
699 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
701 return emitBuiltinWithOneOverloadedType<5>(*
this,
E,
702 Intrinsic::amdgcn_load_to_lds);
704 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
709 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
715 case AMDGPU::BI__builtin_amdgcn_read_exec:
717 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
719 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
721 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
722 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
723 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
724 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
734 RayOrigin =
Builder.CreateShuffleVector(RayOrigin, RayOrigin,
737 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
738 RayInverseDir =
Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
742 {NodePtr->getType(), RayDir->getType()});
743 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
744 RayInverseDir, TextureDescr});
746 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
747 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
750 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
751 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
753 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
754 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
770 llvm::CallInst *CI =
Builder.CreateCall(
771 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
772 Offset, TextureDescr});
774 llvm::Value *RetVData =
Builder.CreateExtractValue(CI, 0);
775 llvm::Value *RetRayOrigin =
Builder.CreateExtractValue(CI, 1);
776 llvm::Value *RetRayDir =
Builder.CreateExtractValue(CI, 2);
784 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
785 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
786 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
787 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
790 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
791 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
793 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
794 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
796 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
797 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
799 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
800 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
805 for (
int i = 0, e =
E->getNumArgs(); i != e; ++i)
813 Value *I0 =
Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
817 if (A->
getType()->getPrimitiveSizeInBits() <
818 RetTy->getScalarType()->getPrimitiveSizeInBits())
819 A =
Builder.CreateZExt(A, RetTy->getScalarType());
821 return Builder.CreateInsertElement(I0, A, 1);
823 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
824 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
825 llvm::FixedVectorType *VT = FixedVectorType::get(
Builder.getInt32Ty(), 8);
827 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
828 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
829 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
833 for (
unsigned I = 0, N =
E->getNumArgs(); I != N; ++I)
835 return Builder.CreateCall(F, Args);
837 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
838 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
839 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
840 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
841 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
842 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
843 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
844 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
845 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
846 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
847 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
848 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
849 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
850 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
851 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
852 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
853 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
854 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
855 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
856 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
857 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
858 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
859 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
860 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
861 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
862 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
863 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
864 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
865 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
866 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
867 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
868 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
869 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
870 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
871 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
872 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
873 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
874 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
875 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
876 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
877 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
878 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
879 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
880 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
881 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
882 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
883 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
884 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
885 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
886 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
887 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
888 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
889 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
890 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
891 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
892 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
893 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
894 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
895 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
896 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
898 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
899 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
900 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
901 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
902 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
903 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
904 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
905 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
906 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
907 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
908 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
909 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
910 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
911 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
912 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
913 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
914 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
915 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
916 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
917 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
918 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
919 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
920 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
921 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
922 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
923 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
924 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
925 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
926 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
927 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
928 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
929 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
930 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
931 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
932 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
933 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
934 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
935 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
936 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
937 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
938 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
939 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
940 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
953 bool AppendFalseForOpselArg =
false;
954 unsigned BuiltinWMMAOp;
956 bool NeedReturnType =
false;
959 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
960 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
961 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
962 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
963 ArgsForMatchingMatrixTypes = {2, 0};
964 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
966 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
967 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
968 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
969 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
970 ArgsForMatchingMatrixTypes = {2, 0};
971 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
973 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
974 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
975 AppendFalseForOpselArg =
true;
977 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
978 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
979 ArgsForMatchingMatrixTypes = {2, 0};
980 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
982 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
983 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
984 AppendFalseForOpselArg =
true;
986 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
987 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
988 ArgsForMatchingMatrixTypes = {2, 0};
989 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
991 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
992 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
993 ArgsForMatchingMatrixTypes = {2, 0};
994 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
996 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
997 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
998 ArgsForMatchingMatrixTypes = {2, 0};
999 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1001 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1002 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1003 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1004 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1005 ArgsForMatchingMatrixTypes = {4, 1};
1006 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1008 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1009 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1010 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1011 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1012 ArgsForMatchingMatrixTypes = {4, 1};
1013 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1015 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1016 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1017 ArgsForMatchingMatrixTypes = {2, 0};
1018 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1020 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1021 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1022 ArgsForMatchingMatrixTypes = {2, 0};
1023 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1025 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1026 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1027 ArgsForMatchingMatrixTypes = {2, 0};
1028 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1030 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1031 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1032 ArgsForMatchingMatrixTypes = {2, 0};
1033 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1035 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1036 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1037 ArgsForMatchingMatrixTypes = {4, 1};
1038 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1040 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1041 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1042 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1043 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1045 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1046 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1047 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1048 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1050 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1051 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1052 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1053 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1055 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1056 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1057 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1058 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1060 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1061 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1062 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1063 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1065 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1066 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1067 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1068 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1070 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1071 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1072 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1073 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1075 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1076 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1077 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1078 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1080 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1081 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1082 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1083 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1085 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1086 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1087 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1088 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1090 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1091 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1092 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1093 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1096 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1097 ArgsForMatchingMatrixTypes = {5, 1};
1098 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1100 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1101 ArgsForMatchingMatrixTypes = {5, 1};
1102 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1104 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1105 ArgsForMatchingMatrixTypes = {5, 1};
1106 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1108 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1109 ArgsForMatchingMatrixTypes = {5, 1};
1110 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1112 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1113 ArgsForMatchingMatrixTypes = {5, 1};
1114 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1116 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1117 NeedReturnType =
true;
1118 ArgsForMatchingMatrixTypes = {1, 5};
1119 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1121 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1122 ArgsForMatchingMatrixTypes = {3, 0};
1123 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1125 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1126 ArgsForMatchingMatrixTypes = {3, 0};
1127 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1129 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1130 ArgsForMatchingMatrixTypes = {3, 0};
1131 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1133 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1134 ArgsForMatchingMatrixTypes = {3, 0};
1135 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1137 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1138 ArgsForMatchingMatrixTypes = {3, 0};
1139 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1141 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1142 ArgsForMatchingMatrixTypes = {3, 0};
1143 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1145 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1146 ArgsForMatchingMatrixTypes = {3, 0};
1147 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1149 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1150 ArgsForMatchingMatrixTypes = {3, 0};
1151 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1153 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1154 ArgsForMatchingMatrixTypes = {3, 0};
1155 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1157 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1158 ArgsForMatchingMatrixTypes = {3, 0};
1159 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1161 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1162 ArgsForMatchingMatrixTypes = {3, 0};
1163 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1165 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1166 ArgsForMatchingMatrixTypes = {3, 0};
1167 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1169 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1170 ArgsForMatchingMatrixTypes = {3, 0};
1171 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1173 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1174 ArgsForMatchingMatrixTypes = {3, 0};
1175 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1177 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1178 ArgsForMatchingMatrixTypes = {3, 0};
1179 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1181 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1182 ArgsForMatchingMatrixTypes = {3, 0};
1183 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1185 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1186 ArgsForMatchingMatrixTypes = {4, 1};
1187 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1189 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1190 ArgsForMatchingMatrixTypes = {5, 1, 3};
1191 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1193 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1194 ArgsForMatchingMatrixTypes = {5, 1, 3};
1195 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1197 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1198 ArgsForMatchingMatrixTypes = {5, 1, 3};
1199 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1201 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1202 ArgsForMatchingMatrixTypes = {3, 0, 1};
1203 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1205 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1206 ArgsForMatchingMatrixTypes = {3, 0, 1};
1207 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1209 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1210 ArgsForMatchingMatrixTypes = {3, 0, 1};
1211 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1213 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1214 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1215 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1217 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1218 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1219 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1221 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1222 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1223 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1225 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1226 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1227 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1229 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1230 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1231 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1233 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1234 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1235 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1237 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1238 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1239 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1241 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1242 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1243 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1245 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1246 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1247 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1249 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1250 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1251 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1253 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1254 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1255 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1257 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1258 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1259 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1261 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1262 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1263 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1265 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1266 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1267 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1272 for (
int i = 0, e =
E->getNumArgs(); i != e; ++i)
1274 if (AppendFalseForOpselArg)
1275 Args.push_back(
Builder.getFalse());
1280 for (
auto ArgIdx : ArgsForMatchingMatrixTypes)
1281 ArgTypes.push_back(Args[ArgIdx]->getType());
1284 return Builder.CreateCall(F, Args);
1287 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1288 return EmitAMDGPUWorkGroupSize(*
this, 0);
1289 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1290 return EmitAMDGPUWorkGroupSize(*
this, 1);
1291 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1292 return EmitAMDGPUWorkGroupSize(*
this, 2);
1295 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1296 return EmitAMDGPUGridSize(*
this, 0);
1297 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1298 return EmitAMDGPUGridSize(*
this, 1);
1299 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1300 return EmitAMDGPUGridSize(*
this, 2);
1303 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1304 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1305 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
1306 Intrinsic::r600_recipsqrt_ieee);
1307 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1312 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1314 case AMDGPU::BI__builtin_amdgcn_fence: {
1317 FenceInst *Fence =
Builder.CreateFence(AO, SSID);
1318 if (
E->getNumArgs() > 2)
1322 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1323 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1324 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1325 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1326 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1327 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1328 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1329 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1330 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1331 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1332 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1333 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1334 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1335 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1336 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1337 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1338 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1339 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1340 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1341 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1342 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1343 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1344 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1345 llvm::AtomicRMWInst::BinOp BinOp;
1346 switch (BuiltinID) {
1347 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1348 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1349 BinOp = llvm::AtomicRMWInst::UIncWrap;
1351 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1352 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1353 BinOp = llvm::AtomicRMWInst::UDecWrap;
1355 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1356 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1357 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1358 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1359 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1360 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1361 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1362 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1363 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1364 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1365 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1366 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1367 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1368 BinOp = llvm::AtomicRMWInst::FAdd;
1370 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1371 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1372 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1373 BinOp = llvm::AtomicRMWInst::FMin;
1375 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1376 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1377 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1378 BinOp = llvm::AtomicRMWInst::FMax;
1384 llvm::Type *OrigTy = Val->
getType();
1389 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1390 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1391 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1401 if (
E->getNumArgs() >= 4) {
1413 AO = AtomicOrdering::Monotonic;
1416 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1417 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1418 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1419 llvm::Type *V2BF16Ty = FixedVectorType::get(
1420 llvm::Type::getBFloatTy(
Builder.getContext()), 2);
1421 Val =
Builder.CreateBitCast(Val, V2BF16Ty);
1425 llvm::AtomicRMWInst *RMW =
1428 RMW->setVolatile(
true);
1430 unsigned AddrSpace = Ptr.
getType()->getAddressSpace();
1431 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1435 RMW->setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
1439 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->
getType()->isFloatTy())
1440 RMW->setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
1443 return Builder.CreateBitCast(RMW, OrigTy);
1445 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1446 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1452 return Builder.CreateCall(F, {Arg});
1454 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1455 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1463 CGM.
getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1464 ? Intrinsic::amdgcn_permlane16_swap
1465 : Intrinsic::amdgcn_permlane32_swap);
1466 llvm::CallInst *
Call =
1467 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1469 llvm::Value *Elt0 =
Builder.CreateExtractValue(
Call, 0);
1470 llvm::Value *Elt1 =
Builder.CreateExtractValue(
Call, 1);
1474 llvm::Value *Insert0 =
Builder.CreateInsertElement(
1475 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
1476 llvm::Value *AsVector =
1477 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
1480 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
1481 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
1482 return emitBuiltinWithOneOverloadedType<4>(*
this,
E,
1483 Intrinsic::amdgcn_bitop3);
1484 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
1489 for (
unsigned I = 0; I < 4; ++I)
1491 llvm::PointerType *RetTy = llvm::PointerType::get(
1492 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
1494 {RetTy, Args[0]->getType()});
1495 return Builder.CreateCall(F, Args);
1497 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
1498 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
1499 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
1500 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
1501 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
1502 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1503 return emitBuiltinWithOneOverloadedType<5>(
1504 *
this,
E, Intrinsic::amdgcn_raw_ptr_buffer_store);
1505 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1506 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1507 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1508 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1509 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1510 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
1511 llvm::Type *RetTy =
nullptr;
1512 switch (BuiltinID) {
1513 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1516 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1519 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1522 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1523 RetTy = llvm::FixedVectorType::get(
Int32Ty, 2);
1525 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1526 RetTy = llvm::FixedVectorType::get(
Int32Ty, 3);
1528 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
1529 RetTy = llvm::FixedVectorType::get(
Int32Ty, 4);
1538 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1539 return emitBuiltinWithOneOverloadedType<5>(
1540 *
this,
E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
1541 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1542 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1543 return emitBuiltinWithOneOverloadedType<5>(
1544 *
this,
E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
1545 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
1546 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
1547 return emitBuiltinWithOneOverloadedType<5>(
1548 *
this,
E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
1549 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1550 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1551 return emitBuiltinWithOneOverloadedType<5>(
1552 *
this,
E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
1553 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
1554 return emitBuiltinWithOneOverloadedType<2>(
1555 *
this,
E, Intrinsic::amdgcn_s_prefetch_data);
1556 case Builtin::BIlogbf:
1557 case Builtin::BI__builtin_logbf: {
1561 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1564 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1567 emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::fabs);
1569 Fabs, ConstantFP::getInfinity(
Builder.getFloatTy()));
1570 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1572 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getFloatTy()));
1575 ConstantFP::getInfinity(
Builder.getFloatTy(),
true), Sel1);
1578 case Builtin::BIlogb:
1579 case Builtin::BI__builtin_logb: {
1583 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1586 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1589 emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::fabs);
1591 Fabs, ConstantFP::getInfinity(
Builder.getDoubleTy()));
1592 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1594 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getDoubleTy()));
1597 ConstantFP::getInfinity(
Builder.getDoubleTy(),
true),
1601 case Builtin::BIscalbnf:
1602 case Builtin::BI__builtin_scalbnf:
1603 case Builtin::BIscalbn:
1604 case Builtin::BI__builtin_scalbn:
1606 *
this,
E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, Intrinsic::ID IntrinsicID, Intrinsic::ID ConstrainedIntrinsicID)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
HLSLResourceBindingAttr::RegisterType RegisterType
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
unsigned getTargetAddressSpace(LangAS AS) const
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
llvm::Type * getElementType() const
Return the type of the values stored in this address.
llvm::PointerType * getType() const
Return the type of the pointer value.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
const TargetInfo & getTarget() const
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
ASTContext & getContext() const
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::LLVMContext & getLLVMContext()
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
ASTContext & getContext() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Scope - A scope is a transient data structure that is used while parsing the program.
TargetOptions & getTargetOpts() const
Retrieve the target options.
unsigned getMaxOpenCLWorkGroupSize() const
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
const T * castAs() const
Member-template castAs<specific type>.
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Diagnostic wrappers for TextAPI types for error reporting.
llvm::IntegerType * Int64Ty
CharUnits getIntAlign() const
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::IntegerType * Int32Ty
llvm::IntegerType * IntTy
int
llvm::IntegerType * Int16Ty