512 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
513 llvm::SyncScope::ID SSID;
515 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
516 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
517 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
518 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
519 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
520 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
521 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
522 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
523 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
524 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
525 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
526 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
527 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
528 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
529 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
530 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
531 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
532 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
533 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
534 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
535 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
536 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
537 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
538 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
539 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
540 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
547 case AMDGPU::BI__builtin_amdgcn_div_scale:
548 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
558 llvm::Function *Callee =
CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
561 llvm::Value *Tmp =
Builder.CreateCall(Callee, {
X, Y, Z});
564 llvm::Value *Flag =
Builder.CreateExtractValue(Tmp, 1);
568 llvm::Value *FlagExt =
Builder.CreateZExt(Flag, RealFlagType);
569 Builder.CreateStore(FlagExt, FlagOutPtr);
572 case AMDGPU::BI__builtin_amdgcn_div_fmas:
573 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
579 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
581 llvm::Value *Src3ToBool =
Builder.CreateIsNotNull(Src3);
582 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
585 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
587 Intrinsic::amdgcn_ds_swizzle);
588 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
589 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
590 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
594 unsigned ICEArguments = 0;
599 unsigned Size = DataTy->getPrimitiveSizeInBits();
601 llvm::IntegerType::get(
Builder.getContext(), std::max(Size, 32u));
603 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
604 ? Intrinsic::amdgcn_mov_dpp8
605 : Intrinsic::amdgcn_update_dpp,
609 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
611 Args.push_back(llvm::PoisonValue::get(
IntTy));
612 for (
unsigned I = 0; I != E->
getNumArgs(); ++I) {
614 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
616 if (!DataTy->isIntegerTy())
618 V, llvm::IntegerType::get(
Builder.getContext(), Size));
622 F->getFunctionType()->getFunctionParamType(I + InsertOld);
623 Args.push_back(
Builder.CreateTruncOrBitCast(
V, ExpTy));
626 if (Size < 32 && !DataTy->isIntegerTy())
628 V, llvm::IntegerType::get(
Builder.getContext(), Size));
629 return Builder.CreateTruncOrBitCast(
V, DataTy);
631 case AMDGPU::BI__builtin_amdgcn_permlane16:
632 case AMDGPU::BI__builtin_amdgcn_permlanex16:
635 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
636 ? Intrinsic::amdgcn_permlane16
637 : Intrinsic::amdgcn_permlanex16);
638 case AMDGPU::BI__builtin_amdgcn_permlane64:
640 Intrinsic::amdgcn_permlane64);
641 case AMDGPU::BI__builtin_amdgcn_readlane:
643 Intrinsic::amdgcn_readlane);
644 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
646 Intrinsic::amdgcn_readfirstlane);
647 case AMDGPU::BI__builtin_amdgcn_div_fixup:
648 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
649 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
651 Intrinsic::amdgcn_div_fixup);
652 case AMDGPU::BI__builtin_amdgcn_trig_preop:
653 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
655 case AMDGPU::BI__builtin_amdgcn_rcp:
656 case AMDGPU::BI__builtin_amdgcn_rcpf:
657 case AMDGPU::BI__builtin_amdgcn_rcph:
658 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
660 case AMDGPU::BI__builtin_amdgcn_sqrt:
661 case AMDGPU::BI__builtin_amdgcn_sqrtf:
662 case AMDGPU::BI__builtin_amdgcn_sqrth:
663 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
665 Intrinsic::amdgcn_sqrt);
666 case AMDGPU::BI__builtin_amdgcn_rsq:
667 case AMDGPU::BI__builtin_amdgcn_rsqf:
668 case AMDGPU::BI__builtin_amdgcn_rsqh:
669 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
671 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
672 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
674 Intrinsic::amdgcn_rsq_clamp);
675 case AMDGPU::BI__builtin_amdgcn_sinf:
676 case AMDGPU::BI__builtin_amdgcn_sinh:
677 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
679 case AMDGPU::BI__builtin_amdgcn_cosf:
680 case AMDGPU::BI__builtin_amdgcn_cosh:
681 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
683 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
684 return EmitAMDGPUDispatchPtr(*
this, E);
685 case AMDGPU::BI__builtin_amdgcn_logf:
686 case AMDGPU::BI__builtin_amdgcn_log_bf16:
688 case AMDGPU::BI__builtin_amdgcn_exp2f:
689 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
691 Intrinsic::amdgcn_exp2);
692 case AMDGPU::BI__builtin_amdgcn_log_clampf:
694 Intrinsic::amdgcn_log_clamp);
695 case AMDGPU::BI__builtin_amdgcn_ldexp:
696 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
700 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
701 return Builder.CreateCall(F, {Src0, Src1});
703 case AMDGPU::BI__builtin_amdgcn_ldexph: {
709 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(),
Int16Ty});
712 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
713 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
714 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
716 Intrinsic::amdgcn_frexp_mant);
717 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
718 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
720 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
722 return Builder.CreateCall(F, Src0);
724 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
726 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
728 return Builder.CreateCall(F, Src0);
730 case AMDGPU::BI__builtin_amdgcn_fract:
731 case AMDGPU::BI__builtin_amdgcn_fractf:
732 case AMDGPU::BI__builtin_amdgcn_fracth:
734 Intrinsic::amdgcn_fract);
735 case AMDGPU::BI__builtin_amdgcn_lerp:
737 Intrinsic::amdgcn_lerp);
738 case AMDGPU::BI__builtin_amdgcn_ubfe:
740 Intrinsic::amdgcn_ubfe);
741 case AMDGPU::BI__builtin_amdgcn_sbfe:
743 Intrinsic::amdgcn_sbfe);
744 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
745 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
748 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
749 return Builder.CreateCall(F, {Src});
751 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
752 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
755 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
756 return Builder.CreateCall(F, {Src});
758 case AMDGPU::BI__builtin_amdgcn_tanhf:
759 case AMDGPU::BI__builtin_amdgcn_tanhh:
760 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
762 Intrinsic::amdgcn_tanh);
763 case AMDGPU::BI__builtin_amdgcn_uicmp:
764 case AMDGPU::BI__builtin_amdgcn_uicmpl:
765 case AMDGPU::BI__builtin_amdgcn_sicmp:
766 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
772 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
773 {
Builder.getInt64Ty(), Src0->getType() });
774 return Builder.CreateCall(F, { Src0, Src1, Src2 });
776 case AMDGPU::BI__builtin_amdgcn_fcmp:
777 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
783 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
784 {
Builder.getInt64Ty(), Src0->getType() });
785 return Builder.CreateCall(F, { Src0, Src1, Src2 });
787 case AMDGPU::BI__builtin_amdgcn_class:
788 case AMDGPU::BI__builtin_amdgcn_classf:
789 case AMDGPU::BI__builtin_amdgcn_classh:
791 case AMDGPU::BI__builtin_amdgcn_fmed3f:
792 case AMDGPU::BI__builtin_amdgcn_fmed3h:
794 Intrinsic::amdgcn_fmed3);
795 case AMDGPU::BI__builtin_amdgcn_ds_append:
796 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
797 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
798 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
803 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
804 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
805 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
806 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
807 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
808 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
809 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
810 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
811 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
812 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
813 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
814 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
815 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
816 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
817 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
818 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
819 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
820 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
821 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
822 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
823 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
824 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
825 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
826 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
827 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
828 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
831 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
832 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
833 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
834 IID = Intrinsic::amdgcn_global_load_tr_b64;
836 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
837 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
838 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
839 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
840 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
841 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
842 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
843 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
844 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
845 IID = Intrinsic::amdgcn_global_load_tr_b128;
847 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
848 IID = Intrinsic::amdgcn_global_load_tr4_b64;
850 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
851 IID = Intrinsic::amdgcn_global_load_tr6_b96;
853 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
854 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
856 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
857 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
859 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
860 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
862 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
863 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
864 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
865 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
867 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
868 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
870 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
871 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
873 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
874 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
876 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
877 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
878 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
879 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
884 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
887 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
888 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
889 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
890 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
891 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
892 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
896 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
897 IID = Intrinsic::amdgcn_global_load_monitor_b32;
899 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
900 IID = Intrinsic::amdgcn_global_load_monitor_b64;
902 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
903 IID = Intrinsic::amdgcn_global_load_monitor_b128;
905 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
906 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
908 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
909 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
911 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
912 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
916 LLVMContext &Ctx =
CGM.getLLVMContext();
926 StringRef ScopeStr =
CGM.getTargetCodeGenInfo().getLLVMSyncScopeStr(
930 llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeStr)});
931 llvm::Value *ScopeMD = llvm::MetadataAsValue::get(Ctx, MD);
932 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
933 return Builder.CreateCall(F, {
Addr, AOExpr, ScopeMD});
935 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
936 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
937 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
940 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
941 IID = Intrinsic::amdgcn_cluster_load_b32;
943 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
944 IID = Intrinsic::amdgcn_cluster_load_b64;
946 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
947 IID = Intrinsic::amdgcn_cluster_load_b128;
951 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
954 return Builder.CreateCall(F, {Args});
956 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
959 Intrinsic::amdgcn_load_to_lds);
961 case AMDGPU::BI__builtin_amdgcn_load_async_to_lds: {
964 *
this, E, Intrinsic::amdgcn_load_async_to_lds);
966 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
967 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
968 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
969 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
970 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
971 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
974 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
975 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
977 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
978 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
980 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
981 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
983 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
984 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
986 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
987 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
989 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
990 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
994 LLVMContext &Ctx =
CGM.getLLVMContext();
997 const unsigned ScopeArg = E->
getNumArgs() - 1;
998 for (
unsigned i = 0; i != ScopeArg; ++i)
1002 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
1003 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
1006 llvm::Function *F =
CGM.getIntrinsic(IID, {Args[0]->getType()});
1007 return Builder.CreateCall(F, {Args});
1009 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
1010 Function *F =
CGM.getIntrinsic(Intrinsic::get_fpenv,
1014 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
1015 Function *F =
CGM.getIntrinsic(Intrinsic::set_fpenv,
1018 return Builder.CreateCall(F, {Env});
1020 case AMDGPU::BI__builtin_amdgcn_read_exec:
1022 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
1024 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
1026 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
1027 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
1028 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
1029 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
1039 RayOrigin =
Builder.CreateShuffleVector(RayOrigin, RayOrigin,
1042 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
1043 RayInverseDir =
Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
1046 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
1047 {NodePtr->getType(), RayDir->getType()});
1048 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
1049 RayInverseDir, TextureDescr});
1051 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1052 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
1054 switch (BuiltinID) {
1055 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1056 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
1058 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
1059 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
1073 llvm::Function *IntrinsicFunc =
CGM.getIntrinsic(IID);
1075 llvm::CallInst *CI =
Builder.CreateCall(
1076 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
1077 Offset, TextureDescr});
1079 llvm::Value *RetVData =
Builder.CreateExtractValue(CI, 0);
1080 llvm::Value *RetRayOrigin =
Builder.CreateExtractValue(CI, 1);
1081 llvm::Value *RetRayDir =
Builder.CreateExtractValue(CI, 2);
1083 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
1084 Builder.CreateStore(RetRayDir, RetRayDirPtr);
1089 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1090 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1091 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1092 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
1094 switch (BuiltinID) {
1095 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1096 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
1098 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1099 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
1101 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1102 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
1104 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
1105 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
1110 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1118 Value *I0 =
Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
1122 if (A->
getType()->getPrimitiveSizeInBits() <
1123 RetTy->getScalarType()->getPrimitiveSizeInBits())
1124 A =
Builder.CreateZExt(A, RetTy->getScalarType());
1126 return Builder.CreateInsertElement(I0, A, 1);
1128 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1129 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1131 *
this, E, Intrinsic::amdgcn_image_load_1d,
false);
1132 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1133 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1135 *
this, E, Intrinsic::amdgcn_image_load_1darray,
false);
1136 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1137 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1138 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1140 *
this, E, Intrinsic::amdgcn_image_load_2d,
false);
1141 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1142 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1143 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1145 *
this, E, Intrinsic::amdgcn_image_load_2darray,
false);
1146 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1147 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1149 *
this, E, Intrinsic::amdgcn_image_load_3d,
false);
1150 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1151 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1153 *
this, E, Intrinsic::amdgcn_image_load_cube,
false);
1154 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1155 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1157 *
this, E, Intrinsic::amdgcn_image_load_mip_1d,
false);
1158 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1159 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1161 *
this, E, Intrinsic::amdgcn_image_load_mip_1darray,
false);
1162 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1163 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1164 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1166 *
this, E, Intrinsic::amdgcn_image_load_mip_2d,
false);
1167 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1168 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1169 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1171 *
this, E, Intrinsic::amdgcn_image_load_mip_2darray,
false);
1172 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1173 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1175 *
this, E, Intrinsic::amdgcn_image_load_mip_3d,
false);
1176 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1177 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1179 *
this, E, Intrinsic::amdgcn_image_load_mip_cube,
false);
1180 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1181 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1183 *
this, E, Intrinsic::amdgcn_image_store_1d,
true);
1184 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1185 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1187 *
this, E, Intrinsic::amdgcn_image_store_1darray,
true);
1188 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1189 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1190 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1192 *
this, E, Intrinsic::amdgcn_image_store_2d,
true);
1193 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1194 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1195 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1197 *
this, E, Intrinsic::amdgcn_image_store_2darray,
true);
1198 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1199 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1201 *
this, E, Intrinsic::amdgcn_image_store_3d,
true);
1202 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1203 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1205 *
this, E, Intrinsic::amdgcn_image_store_cube,
true);
1206 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1207 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1209 *
this, E, Intrinsic::amdgcn_image_store_mip_1d,
true);
1210 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1211 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1213 *
this, E, Intrinsic::amdgcn_image_store_mip_1darray,
true);
1214 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1215 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1216 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1218 *
this, E, Intrinsic::amdgcn_image_store_mip_2d,
true);
1219 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1220 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1221 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1223 *
this, E, Intrinsic::amdgcn_image_store_mip_2darray,
true);
1224 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1225 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1227 *
this, E, Intrinsic::amdgcn_image_store_mip_3d,
true);
1228 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1229 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1231 *
this, E, Intrinsic::amdgcn_image_store_mip_cube,
true);
1232 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1233 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1235 *
this, E, Intrinsic::amdgcn_image_sample_1d,
false);
1236 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1237 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1239 *
this, E, Intrinsic::amdgcn_image_sample_1darray,
false);
1240 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1241 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1242 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1244 *
this, E, Intrinsic::amdgcn_image_sample_2d,
false);
1245 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1246 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1247 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1249 *
this, E, Intrinsic::amdgcn_image_sample_2darray,
false);
1250 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1251 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1253 *
this, E, Intrinsic::amdgcn_image_sample_3d,
false);
1254 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1255 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1257 *
this, E, Intrinsic::amdgcn_image_sample_cube,
false);
1258 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1259 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1261 *
this, E, Intrinsic::amdgcn_image_sample_lz_1d,
false);
1262 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1263 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1265 *
this, E, Intrinsic::amdgcn_image_sample_l_1d,
false);
1266 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1267 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1269 *
this, E, Intrinsic::amdgcn_image_sample_d_1d,
false);
1270 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1271 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1272 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1274 *
this, E, Intrinsic::amdgcn_image_sample_lz_2d,
false);
1275 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1276 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1277 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1279 *
this, E, Intrinsic::amdgcn_image_sample_l_2d,
false);
1280 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1281 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1282 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1284 *
this, E, Intrinsic::amdgcn_image_sample_d_2d,
false);
1285 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1286 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1288 *
this, E, Intrinsic::amdgcn_image_sample_lz_3d,
false);
1289 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1290 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1292 *
this, E, Intrinsic::amdgcn_image_sample_l_3d,
false);
1293 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1294 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1296 *
this, E, Intrinsic::amdgcn_image_sample_d_3d,
false);
1297 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1298 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1300 *
this, E, Intrinsic::amdgcn_image_sample_lz_cube,
false);
1301 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1302 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1304 *
this, E, Intrinsic::amdgcn_image_sample_l_cube,
false);
1305 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1306 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1308 *
this, E, Intrinsic::amdgcn_image_sample_lz_1darray,
false);
1309 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1310 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1312 *
this, E, Intrinsic::amdgcn_image_sample_l_1darray,
false);
1313 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1314 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1316 *
this, E, Intrinsic::amdgcn_image_sample_d_1darray,
false);
1317 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1318 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1319 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1321 *
this, E, Intrinsic::amdgcn_image_sample_lz_2darray,
false);
1322 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1323 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1324 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1326 *
this, E, Intrinsic::amdgcn_image_sample_l_2darray,
false);
1327 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1328 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1329 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1331 *
this, E, Intrinsic::amdgcn_image_sample_d_2darray,
false);
1332 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1334 *
this, E, Intrinsic::amdgcn_image_gather4_lz_2d,
false);
1335 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1336 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1337 llvm::FixedVectorType *VT = FixedVectorType::get(
Builder.getInt32Ty(), 8);
1339 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1340 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1341 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1345 for (
unsigned I = 0, N = E->
getNumArgs(); I != N; ++I)
1347 return Builder.CreateCall(F, Args);
1349 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1350 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1351 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1352 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1353 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1354 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1355 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1356 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1357 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1358 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1359 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1360 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1361 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1362 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1363 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1364 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1365 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1366 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1367 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1368 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1369 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1370 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1371 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1372 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1373 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1374 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1375 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1376 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1377 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1378 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1379 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1380 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1381 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1382 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1383 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1384 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1385 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1386 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1387 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1388 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1389 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1390 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1391 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1392 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1393 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1394 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1395 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1396 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1397 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1398 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1399 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1400 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1401 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1402 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1403 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1404 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1405 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1406 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1407 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1408 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1410 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1411 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1412 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1413 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1414 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1415 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1416 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1417 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1418 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1419 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1420 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1421 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1422 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1423 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1424 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1425 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1426 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1427 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1428 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1429 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1430 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1431 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1432 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1433 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1434 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1435 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1436 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1437 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1438 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1439 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1440 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1441 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1442 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1443 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1444 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1445 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1446 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1447 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1448 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1449 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1450 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1451 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1452 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1465 bool AppendFalseForOpselArg =
false;
1466 unsigned BuiltinWMMAOp;
1468 bool NeedReturnType =
false;
1470 switch (BuiltinID) {
1471 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1472 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1473 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1474 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1475 ArgsForMatchingMatrixTypes = {2, 0};
1476 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1478 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1479 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1480 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1481 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1482 ArgsForMatchingMatrixTypes = {2, 0};
1483 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1485 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1486 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1487 AppendFalseForOpselArg =
true;
1489 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1490 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1491 ArgsForMatchingMatrixTypes = {2, 0};
1492 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1494 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1495 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1496 AppendFalseForOpselArg =
true;
1498 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1499 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1500 ArgsForMatchingMatrixTypes = {2, 0};
1501 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1503 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1504 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1505 ArgsForMatchingMatrixTypes = {2, 0};
1506 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1508 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1509 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1510 ArgsForMatchingMatrixTypes = {2, 0};
1511 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1513 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1514 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1515 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1516 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1517 ArgsForMatchingMatrixTypes = {4, 1};
1518 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1520 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1521 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1522 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1523 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1524 ArgsForMatchingMatrixTypes = {4, 1};
1525 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1527 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1528 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1529 ArgsForMatchingMatrixTypes = {2, 0};
1530 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1532 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1533 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1534 ArgsForMatchingMatrixTypes = {2, 0};
1535 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1537 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1538 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1539 ArgsForMatchingMatrixTypes = {2, 0};
1540 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1542 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1543 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1544 ArgsForMatchingMatrixTypes = {2, 0};
1545 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1547 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1548 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1549 ArgsForMatchingMatrixTypes = {4, 1};
1550 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1552 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1553 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1554 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1555 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1557 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1558 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1559 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1560 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1562 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1563 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1564 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1565 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1567 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1568 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1569 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1570 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1572 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1573 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1574 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1575 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1577 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1578 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1579 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1580 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1582 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1583 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1584 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1585 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1587 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1588 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1589 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1590 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1592 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1593 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1594 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1595 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1597 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1598 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1599 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1600 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1602 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1603 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1604 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1605 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1608 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1609 ArgsForMatchingMatrixTypes = {5, 1};
1610 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1612 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1613 ArgsForMatchingMatrixTypes = {5, 1};
1614 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1616 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1617 ArgsForMatchingMatrixTypes = {5, 1};
1618 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1620 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1621 ArgsForMatchingMatrixTypes = {5, 1};
1622 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1624 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1625 ArgsForMatchingMatrixTypes = {5, 1};
1626 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1628 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1629 NeedReturnType =
true;
1630 ArgsForMatchingMatrixTypes = {1, 5};
1631 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1633 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1634 ArgsForMatchingMatrixTypes = {3, 0};
1635 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1637 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1638 ArgsForMatchingMatrixTypes = {3, 0};
1639 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1641 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1642 ArgsForMatchingMatrixTypes = {3, 0};
1643 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1645 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1646 ArgsForMatchingMatrixTypes = {3, 0};
1647 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1649 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1650 ArgsForMatchingMatrixTypes = {3, 0};
1651 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1653 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1654 ArgsForMatchingMatrixTypes = {3, 0};
1655 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1657 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1658 ArgsForMatchingMatrixTypes = {3, 0};
1659 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1661 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1662 ArgsForMatchingMatrixTypes = {3, 0};
1663 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1665 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1666 ArgsForMatchingMatrixTypes = {3, 0};
1667 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1669 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1670 ArgsForMatchingMatrixTypes = {3, 0};
1671 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1673 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1674 ArgsForMatchingMatrixTypes = {3, 0};
1675 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1677 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1678 ArgsForMatchingMatrixTypes = {3, 0};
1679 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1681 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1682 ArgsForMatchingMatrixTypes = {3, 0};
1683 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1685 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1686 ArgsForMatchingMatrixTypes = {3, 0};
1687 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1689 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1690 ArgsForMatchingMatrixTypes = {3, 0};
1691 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1693 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1694 ArgsForMatchingMatrixTypes = {3, 0};
1695 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1697 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1698 ArgsForMatchingMatrixTypes = {4, 1};
1699 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1701 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1702 ArgsForMatchingMatrixTypes = {5, 1, 3};
1703 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1705 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1706 ArgsForMatchingMatrixTypes = {5, 1, 3};
1707 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1709 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1710 ArgsForMatchingMatrixTypes = {5, 1, 3};
1711 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1713 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1714 ArgsForMatchingMatrixTypes = {3, 0, 1};
1715 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1717 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1718 ArgsForMatchingMatrixTypes = {3, 0, 1};
1719 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1721 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1722 ArgsForMatchingMatrixTypes = {3, 0, 1};
1723 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1725 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1726 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1727 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1729 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1730 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1731 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1733 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1734 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1735 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1737 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1738 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1739 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1741 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1742 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1743 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1745 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1746 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1747 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1749 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1750 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1751 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1753 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1754 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1755 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1757 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1758 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1759 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1761 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1762 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1763 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1765 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1766 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1767 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1769 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1770 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1771 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1773 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1774 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1775 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1777 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1778 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1779 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1784 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1786 if (AppendFalseForOpselArg)
1787 Args.push_back(
Builder.getFalse());
1790 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
1791 if (Args.size() == 7)
1792 Args.push_back(
Builder.getFalse());
1793 assert(Args.size() == 8 &&
"Expected 8 arguments");
1794 Args[7] =
Builder.CreateZExtOrTrunc(Args[7],
Builder.getInt1Ty());
1795 }
else if (BuiltinID ==
1796 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
1797 if (Args.size() == 8)
1798 Args.push_back(
Builder.getFalse());
1799 assert(Args.size() == 9 &&
"Expected 9 arguments");
1800 Args[8] =
Builder.CreateZExtOrTrunc(Args[8],
Builder.getInt1Ty());
1806 for (
auto ArgIdx : ArgsForMatchingMatrixTypes)
1807 ArgTypes.push_back(Args[ArgIdx]->
getType());
1809 Function *F =
CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1810 return Builder.CreateCall(F, Args);
1813 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1814 return EmitAMDGPUWorkGroupSize(*
this, 0);
1815 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1816 return EmitAMDGPUWorkGroupSize(*
this, 1);
1817 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1818 return EmitAMDGPUWorkGroupSize(*
this, 2);
1821 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1822 return EmitAMDGPUGridSize(*
this, 0);
1823 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1824 return EmitAMDGPUGridSize(*
this, 1);
1825 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1826 return EmitAMDGPUGridSize(*
this, 2);
1829 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1830 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1832 Intrinsic::r600_recipsqrt_ieee);
1833 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1837 Function *F =
CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1838 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1840 case AMDGPU::BI__builtin_amdgcn_fence: {
1843 FenceInst *Fence =
Builder.CreateFence(AO, SSID);
1848 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1849 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1850 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1851 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1852 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1853 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1854 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1855 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1856 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1857 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1858 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1859 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1860 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1861 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1862 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1863 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1864 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1865 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1866 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1867 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1868 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1869 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1870 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1871 llvm::AtomicRMWInst::BinOp BinOp;
1872 switch (BuiltinID) {
1873 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1874 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1875 BinOp = llvm::AtomicRMWInst::UIncWrap;
1877 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1878 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1879 BinOp = llvm::AtomicRMWInst::UDecWrap;
1881 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1882 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1883 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1884 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1885 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1886 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1887 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1888 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1889 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1890 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1891 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1892 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1893 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1894 BinOp = llvm::AtomicRMWInst::FAdd;
1896 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1897 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1898 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1899 BinOp = llvm::AtomicRMWInst::FMin;
1901 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1902 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1903 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1904 BinOp = llvm::AtomicRMWInst::FMax;
1910 llvm::Type *OrigTy = Val->
getType();
1915 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1916 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1917 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1942 AO = AtomicOrdering::Monotonic;
1945 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1946 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1947 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1948 llvm::Type *V2BF16Ty = FixedVectorType::get(
1949 llvm::Type::getBFloatTy(
Builder.getContext()), 2);
1950 Val =
Builder.CreateBitCast(Val, V2BF16Ty);
1954 llvm::AtomicRMWInst *RMW =
1955 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1957 RMW->setVolatile(
true);
1959 unsigned AddrSpace = Ptr.
getType()->getAddressSpace();
1960 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1964 RMW->setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
1968 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->
getType()->isFloatTy())
1969 RMW->setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
1972 return Builder.CreateBitCast(RMW, OrigTy);
1974 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1975 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1980 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1981 return Builder.CreateCall(F, {Arg});
1983 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1984 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1992 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1993 ? Intrinsic::amdgcn_permlane16_swap
1994 : Intrinsic::amdgcn_permlane32_swap);
1995 llvm::CallInst *
Call =
1996 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1998 llvm::Value *Elt0 =
Builder.CreateExtractValue(
Call, 0);
1999 llvm::Value *Elt1 =
Builder.CreateExtractValue(
Call, 1);
2003 llvm::Value *Insert0 =
Builder.CreateInsertElement(
2004 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
2005 llvm::Value *AsVector =
2006 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
2009 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
2010 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
2012 Intrinsic::amdgcn_bitop3);
2013 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
2018 for (
unsigned I = 0; I < 4; ++I)
2020 llvm::PointerType *RetTy = llvm::PointerType::get(
2021 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
2022 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
2023 {RetTy, Args[0]->getType()});
2024 return Builder.CreateCall(F, Args);
2026 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
2027 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
2028 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
2029 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
2030 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
2031 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
2033 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
2034 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2035 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2036 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2037 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2038 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2039 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
2040 llvm::Type *RetTy =
nullptr;
2041 switch (BuiltinID) {
2042 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2045 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2048 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2051 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2052 RetTy = llvm::FixedVectorType::get(
Int32Ty, 2);
2054 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2055 RetTy = llvm::FixedVectorType::get(
Int32Ty, 3);
2057 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
2058 RetTy = llvm::FixedVectorType::get(
Int32Ty, 4);
2062 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
2067 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
2069 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
2070 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
2071 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
2073 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
2074 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
2075 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
2077 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
2078 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
2079 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
2081 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
2082 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
2084 *
this, E, Intrinsic::amdgcn_s_prefetch_data);
2085 case Builtin::BIlogbf:
2086 case Builtin::BI__builtin_logbf: {
2090 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
2093 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
2098 Fabs, ConstantFP::getInfinity(
Builder.getFloatTy()));
2099 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2101 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getFloatTy()));
2104 ConstantFP::getInfinity(
Builder.getFloatTy(),
true), Sel1);
2107 case Builtin::BIlogb:
2108 case Builtin::BI__builtin_logb: {
2112 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
2115 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
2120 Fabs, ConstantFP::getInfinity(
Builder.getDoubleTy()));
2121 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2123 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getDoubleTy()));
2126 ConstantFP::getInfinity(
Builder.getDoubleTy(),
true),
2130 case Builtin::BIscalbnf:
2131 case Builtin::BI__builtin_scalbnf:
2132 case Builtin::BIscalbn:
2133 case Builtin::BI__builtin_scalbn:
2135 *
this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);