509 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
510 llvm::SyncScope::ID SSID;
512 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
513 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
514 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
515 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
516 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
517 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
518 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
519 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
520 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
521 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
522 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
523 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
524 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
525 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
526 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
527 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
528 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
529 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
530 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
531 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
532 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
533 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
534 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
535 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
536 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
537 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
544 case AMDGPU::BI__builtin_amdgcn_div_scale:
545 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
555 llvm::Function *Callee =
CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
558 llvm::Value *Tmp =
Builder.CreateCall(Callee, {
X, Y, Z});
561 llvm::Value *Flag =
Builder.CreateExtractValue(Tmp, 1);
565 llvm::Value *FlagExt =
Builder.CreateZExt(Flag, RealFlagType);
566 Builder.CreateStore(FlagExt, FlagOutPtr);
569 case AMDGPU::BI__builtin_amdgcn_div_fmas:
570 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
576 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
578 llvm::Value *Src3ToBool =
Builder.CreateIsNotNull(Src3);
579 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
582 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
584 Intrinsic::amdgcn_ds_swizzle);
585 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
586 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
587 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
591 unsigned ICEArguments = 0;
596 unsigned Size = DataTy->getPrimitiveSizeInBits();
598 llvm::IntegerType::get(
Builder.getContext(), std::max(Size, 32u));
600 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
601 ? Intrinsic::amdgcn_mov_dpp8
602 : Intrinsic::amdgcn_update_dpp,
606 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
608 Args.push_back(llvm::PoisonValue::get(
IntTy));
609 for (
unsigned I = 0; I != E->
getNumArgs(); ++I) {
611 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
613 if (!DataTy->isIntegerTy())
615 V, llvm::IntegerType::get(
Builder.getContext(), Size));
619 F->getFunctionType()->getFunctionParamType(I + InsertOld);
620 Args.push_back(
Builder.CreateTruncOrBitCast(
V, ExpTy));
623 if (Size < 32 && !DataTy->isIntegerTy())
625 V, llvm::IntegerType::get(
Builder.getContext(), Size));
626 return Builder.CreateTruncOrBitCast(
V, DataTy);
628 case AMDGPU::BI__builtin_amdgcn_permlane16:
629 case AMDGPU::BI__builtin_amdgcn_permlanex16:
632 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
633 ? Intrinsic::amdgcn_permlane16
634 : Intrinsic::amdgcn_permlanex16);
635 case AMDGPU::BI__builtin_amdgcn_permlane64:
637 Intrinsic::amdgcn_permlane64);
638 case AMDGPU::BI__builtin_amdgcn_readlane:
640 Intrinsic::amdgcn_readlane);
641 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
643 Intrinsic::amdgcn_readfirstlane);
644 case AMDGPU::BI__builtin_amdgcn_div_fixup:
645 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
646 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
648 Intrinsic::amdgcn_div_fixup);
649 case AMDGPU::BI__builtin_amdgcn_trig_preop:
650 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
652 case AMDGPU::BI__builtin_amdgcn_rcp:
653 case AMDGPU::BI__builtin_amdgcn_rcpf:
654 case AMDGPU::BI__builtin_amdgcn_rcph:
655 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
657 case AMDGPU::BI__builtin_amdgcn_sqrt:
658 case AMDGPU::BI__builtin_amdgcn_sqrtf:
659 case AMDGPU::BI__builtin_amdgcn_sqrth:
660 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
662 Intrinsic::amdgcn_sqrt);
663 case AMDGPU::BI__builtin_amdgcn_rsq:
664 case AMDGPU::BI__builtin_amdgcn_rsqf:
665 case AMDGPU::BI__builtin_amdgcn_rsqh:
666 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
668 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
669 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
671 Intrinsic::amdgcn_rsq_clamp);
672 case AMDGPU::BI__builtin_amdgcn_sinf:
673 case AMDGPU::BI__builtin_amdgcn_sinh:
674 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
676 case AMDGPU::BI__builtin_amdgcn_cosf:
677 case AMDGPU::BI__builtin_amdgcn_cosh:
678 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
680 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
681 return EmitAMDGPUDispatchPtr(*
this, E);
682 case AMDGPU::BI__builtin_amdgcn_logf:
683 case AMDGPU::BI__builtin_amdgcn_log_bf16:
685 case AMDGPU::BI__builtin_amdgcn_exp2f:
686 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
688 Intrinsic::amdgcn_exp2);
689 case AMDGPU::BI__builtin_amdgcn_log_clampf:
691 Intrinsic::amdgcn_log_clamp);
692 case AMDGPU::BI__builtin_amdgcn_ldexp:
693 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
697 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
698 return Builder.CreateCall(F, {Src0, Src1});
700 case AMDGPU::BI__builtin_amdgcn_ldexph: {
706 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(),
Int16Ty});
709 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
710 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
711 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
713 Intrinsic::amdgcn_frexp_mant);
714 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
715 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
717 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
719 return Builder.CreateCall(F, Src0);
721 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
723 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
725 return Builder.CreateCall(F, Src0);
727 case AMDGPU::BI__builtin_amdgcn_fract:
728 case AMDGPU::BI__builtin_amdgcn_fractf:
729 case AMDGPU::BI__builtin_amdgcn_fracth:
731 Intrinsic::amdgcn_fract);
732 case AMDGPU::BI__builtin_amdgcn_lerp:
734 Intrinsic::amdgcn_lerp);
735 case AMDGPU::BI__builtin_amdgcn_ubfe:
737 Intrinsic::amdgcn_ubfe);
738 case AMDGPU::BI__builtin_amdgcn_sbfe:
740 Intrinsic::amdgcn_sbfe);
741 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
742 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
745 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
746 return Builder.CreateCall(F, {Src});
748 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
749 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
752 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
753 return Builder.CreateCall(F, {Src});
755 case AMDGPU::BI__builtin_amdgcn_tanhf:
756 case AMDGPU::BI__builtin_amdgcn_tanhh:
757 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
759 Intrinsic::amdgcn_tanh);
760 case AMDGPU::BI__builtin_amdgcn_uicmp:
761 case AMDGPU::BI__builtin_amdgcn_uicmpl:
762 case AMDGPU::BI__builtin_amdgcn_sicmp:
763 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
769 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
770 {
Builder.getInt64Ty(), Src0->getType() });
771 return Builder.CreateCall(F, { Src0, Src1, Src2 });
773 case AMDGPU::BI__builtin_amdgcn_fcmp:
774 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
780 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
781 {
Builder.getInt64Ty(), Src0->getType() });
782 return Builder.CreateCall(F, { Src0, Src1, Src2 });
784 case AMDGPU::BI__builtin_amdgcn_class:
785 case AMDGPU::BI__builtin_amdgcn_classf:
786 case AMDGPU::BI__builtin_amdgcn_classh:
788 case AMDGPU::BI__builtin_amdgcn_fmed3f:
789 case AMDGPU::BI__builtin_amdgcn_fmed3h:
791 Intrinsic::amdgcn_fmed3);
792 case AMDGPU::BI__builtin_amdgcn_ds_append:
793 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
794 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
795 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
800 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
801 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
802 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
803 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
804 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
805 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
806 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
807 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
808 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
809 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
810 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
811 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
812 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
813 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
814 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
815 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
816 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
817 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
818 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
819 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
820 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
821 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
822 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
823 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
824 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
825 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
828 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
829 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
830 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
831 IID = Intrinsic::amdgcn_global_load_tr_b64;
833 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
834 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
835 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
836 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
837 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
838 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
839 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
840 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
841 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
842 IID = Intrinsic::amdgcn_global_load_tr_b128;
844 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
845 IID = Intrinsic::amdgcn_global_load_tr4_b64;
847 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
848 IID = Intrinsic::amdgcn_global_load_tr6_b96;
850 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
851 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
853 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
854 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
856 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
857 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
859 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
860 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
861 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
862 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
864 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
865 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
867 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
868 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
870 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
871 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
873 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
874 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
875 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
876 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
881 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
884 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
885 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
886 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
887 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
888 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
889 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
893 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
894 IID = Intrinsic::amdgcn_global_load_monitor_b32;
896 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
897 IID = Intrinsic::amdgcn_global_load_monitor_b64;
899 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
900 IID = Intrinsic::amdgcn_global_load_monitor_b128;
902 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
903 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
905 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
906 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
908 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
909 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
913 LLVMContext &Ctx =
CGM.getLLVMContext();
923 StringRef ScopeStr =
CGM.getTargetCodeGenInfo().getLLVMSyncScopeStr(
927 llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeStr)});
928 llvm::Value *ScopeMD = llvm::MetadataAsValue::get(Ctx, MD);
929 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
930 return Builder.CreateCall(F, {
Addr, AOExpr, ScopeMD});
932 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
933 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
934 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
937 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
938 IID = Intrinsic::amdgcn_cluster_load_b32;
940 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
941 IID = Intrinsic::amdgcn_cluster_load_b64;
943 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
944 IID = Intrinsic::amdgcn_cluster_load_b128;
948 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
951 return Builder.CreateCall(F, {Args});
953 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
956 Intrinsic::amdgcn_load_to_lds);
958 case AMDGPU::BI__builtin_amdgcn_load_async_to_lds: {
961 *
this, E, Intrinsic::amdgcn_load_async_to_lds);
963 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
964 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
965 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
966 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
967 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
968 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
971 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
972 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
974 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
975 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
977 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
978 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
980 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
981 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
983 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
984 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
986 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
987 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
991 LLVMContext &Ctx =
CGM.getLLVMContext();
994 const unsigned ScopeArg = E->
getNumArgs() - 1;
995 for (
unsigned i = 0; i != ScopeArg; ++i)
999 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
1000 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
1003 llvm::Function *F =
CGM.getIntrinsic(IID, {Args[0]->getType()});
1004 return Builder.CreateCall(F, {Args});
1006 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
1007 Function *F =
CGM.getIntrinsic(Intrinsic::get_fpenv,
1011 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
1012 Function *F =
CGM.getIntrinsic(Intrinsic::set_fpenv,
1015 return Builder.CreateCall(F, {Env});
1017 case AMDGPU::BI__builtin_amdgcn_read_exec:
1019 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
1021 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
1023 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
1024 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
1025 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
1026 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
1036 RayOrigin =
Builder.CreateShuffleVector(RayOrigin, RayOrigin,
1039 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
1040 RayInverseDir =
Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
1043 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
1044 {NodePtr->getType(), RayDir->getType()});
1045 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
1046 RayInverseDir, TextureDescr});
1048 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1049 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
1051 switch (BuiltinID) {
1052 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
1053 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
1055 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
1056 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
1070 llvm::Function *IntrinsicFunc =
CGM.getIntrinsic(IID);
1072 llvm::CallInst *CI =
Builder.CreateCall(
1073 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
1074 Offset, TextureDescr});
1076 llvm::Value *RetVData =
Builder.CreateExtractValue(CI, 0);
1077 llvm::Value *RetRayOrigin =
Builder.CreateExtractValue(CI, 1);
1078 llvm::Value *RetRayDir =
Builder.CreateExtractValue(CI, 2);
1080 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
1081 Builder.CreateStore(RetRayDir, RetRayDirPtr);
1086 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1087 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1088 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1089 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
1091 switch (BuiltinID) {
1092 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
1093 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
1095 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
1096 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
1098 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
1099 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
1101 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
1102 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
1107 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1115 Value *I0 =
Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
1119 if (A->
getType()->getPrimitiveSizeInBits() <
1120 RetTy->getScalarType()->getPrimitiveSizeInBits())
1121 A =
Builder.CreateZExt(A, RetTy->getScalarType());
1123 return Builder.CreateInsertElement(I0, A, 1);
1125 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1126 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1128 *
this, E, Intrinsic::amdgcn_image_load_1d,
false);
1129 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1130 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1132 *
this, E, Intrinsic::amdgcn_image_load_1darray,
false);
1133 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1134 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1135 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1137 *
this, E, Intrinsic::amdgcn_image_load_2d,
false);
1138 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1139 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1140 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1142 *
this, E, Intrinsic::amdgcn_image_load_2darray,
false);
1143 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1144 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1146 *
this, E, Intrinsic::amdgcn_image_load_3d,
false);
1147 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1148 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1150 *
this, E, Intrinsic::amdgcn_image_load_cube,
false);
1151 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1152 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1154 *
this, E, Intrinsic::amdgcn_image_load_mip_1d,
false);
1155 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1156 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1158 *
this, E, Intrinsic::amdgcn_image_load_mip_1darray,
false);
1159 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1160 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1161 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1163 *
this, E, Intrinsic::amdgcn_image_load_mip_2d,
false);
1164 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1165 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1166 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1168 *
this, E, Intrinsic::amdgcn_image_load_mip_2darray,
false);
1169 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1170 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1172 *
this, E, Intrinsic::amdgcn_image_load_mip_3d,
false);
1173 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1174 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1176 *
this, E, Intrinsic::amdgcn_image_load_mip_cube,
false);
1177 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1178 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1180 *
this, E, Intrinsic::amdgcn_image_store_1d,
true);
1181 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1182 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1184 *
this, E, Intrinsic::amdgcn_image_store_1darray,
true);
1185 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1186 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1187 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1189 *
this, E, Intrinsic::amdgcn_image_store_2d,
true);
1190 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1191 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1192 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1194 *
this, E, Intrinsic::amdgcn_image_store_2darray,
true);
1195 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1196 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1198 *
this, E, Intrinsic::amdgcn_image_store_3d,
true);
1199 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1200 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1202 *
this, E, Intrinsic::amdgcn_image_store_cube,
true);
1203 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1204 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1206 *
this, E, Intrinsic::amdgcn_image_store_mip_1d,
true);
1207 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1208 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1210 *
this, E, Intrinsic::amdgcn_image_store_mip_1darray,
true);
1211 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1212 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1213 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1215 *
this, E, Intrinsic::amdgcn_image_store_mip_2d,
true);
1216 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1217 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1218 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1220 *
this, E, Intrinsic::amdgcn_image_store_mip_2darray,
true);
1221 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1222 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1224 *
this, E, Intrinsic::amdgcn_image_store_mip_3d,
true);
1225 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1226 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1228 *
this, E, Intrinsic::amdgcn_image_store_mip_cube,
true);
1229 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1230 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1232 *
this, E, Intrinsic::amdgcn_image_sample_1d,
false);
1233 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1234 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1236 *
this, E, Intrinsic::amdgcn_image_sample_1darray,
false);
1237 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1238 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1239 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1241 *
this, E, Intrinsic::amdgcn_image_sample_2d,
false);
1242 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1243 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1244 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1246 *
this, E, Intrinsic::amdgcn_image_sample_2darray,
false);
1247 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1248 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1250 *
this, E, Intrinsic::amdgcn_image_sample_3d,
false);
1251 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1252 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1254 *
this, E, Intrinsic::amdgcn_image_sample_cube,
false);
1255 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1256 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1258 *
this, E, Intrinsic::amdgcn_image_sample_lz_1d,
false);
1259 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1260 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1262 *
this, E, Intrinsic::amdgcn_image_sample_l_1d,
false);
1263 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1264 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1266 *
this, E, Intrinsic::amdgcn_image_sample_d_1d,
false);
1267 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1268 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1269 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1271 *
this, E, Intrinsic::amdgcn_image_sample_lz_2d,
false);
1272 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1273 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1274 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1276 *
this, E, Intrinsic::amdgcn_image_sample_l_2d,
false);
1277 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1278 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1279 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1281 *
this, E, Intrinsic::amdgcn_image_sample_d_2d,
false);
1282 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1283 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1285 *
this, E, Intrinsic::amdgcn_image_sample_lz_3d,
false);
1286 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1287 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1289 *
this, E, Intrinsic::amdgcn_image_sample_l_3d,
false);
1290 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1291 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1293 *
this, E, Intrinsic::amdgcn_image_sample_d_3d,
false);
1294 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1295 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1297 *
this, E, Intrinsic::amdgcn_image_sample_lz_cube,
false);
1298 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1299 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1301 *
this, E, Intrinsic::amdgcn_image_sample_l_cube,
false);
1302 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1303 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1305 *
this, E, Intrinsic::amdgcn_image_sample_lz_1darray,
false);
1306 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1307 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1309 *
this, E, Intrinsic::amdgcn_image_sample_l_1darray,
false);
1310 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1311 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1313 *
this, E, Intrinsic::amdgcn_image_sample_d_1darray,
false);
1314 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1315 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1316 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1318 *
this, E, Intrinsic::amdgcn_image_sample_lz_2darray,
false);
1319 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1320 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1321 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1323 *
this, E, Intrinsic::amdgcn_image_sample_l_2darray,
false);
1324 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1325 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1326 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1328 *
this, E, Intrinsic::amdgcn_image_sample_d_2darray,
false);
1329 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1331 *
this, E, Intrinsic::amdgcn_image_gather4_lz_2d,
false);
1332 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1333 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1334 llvm::FixedVectorType *VT = FixedVectorType::get(
Builder.getInt32Ty(), 8);
1336 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1337 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1338 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1342 for (
unsigned I = 0, N = E->
getNumArgs(); I != N; ++I)
1344 return Builder.CreateCall(F, Args);
1346 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1347 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1348 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1349 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1350 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1351 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1352 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1353 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1354 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1355 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1356 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1357 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1358 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1359 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1360 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1361 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1362 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1363 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1364 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1365 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1366 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1367 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1368 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1369 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1370 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1371 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1372 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1373 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1374 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1375 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1376 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1377 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1378 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1379 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1380 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1381 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1382 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1383 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1384 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1385 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1386 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1387 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1388 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1389 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1390 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1391 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1392 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1393 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1394 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1395 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1396 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1397 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1398 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1399 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1400 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1401 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1402 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1403 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1404 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1405 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1407 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1408 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1409 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1410 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1411 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1412 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1413 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1414 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1415 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1416 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1417 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1418 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1419 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1420 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1421 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1422 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1423 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1424 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1425 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1426 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1427 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1428 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1429 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1430 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1431 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1432 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1433 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1434 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1435 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1436 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1437 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1438 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1439 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1440 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1441 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1442 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1443 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1444 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1445 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1446 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1447 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1448 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1449 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1462 bool AppendFalseForOpselArg =
false;
1463 unsigned BuiltinWMMAOp;
1465 bool NeedReturnType =
false;
1467 switch (BuiltinID) {
1468 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1469 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1470 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1471 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1472 ArgsForMatchingMatrixTypes = {2, 0};
1473 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1475 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1476 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1477 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1478 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1479 ArgsForMatchingMatrixTypes = {2, 0};
1480 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1482 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1483 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1484 AppendFalseForOpselArg =
true;
1486 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1487 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1488 ArgsForMatchingMatrixTypes = {2, 0};
1489 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1491 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1492 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1493 AppendFalseForOpselArg =
true;
1495 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1496 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1497 ArgsForMatchingMatrixTypes = {2, 0};
1498 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1500 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1501 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1502 ArgsForMatchingMatrixTypes = {2, 0};
1503 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1505 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1506 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1507 ArgsForMatchingMatrixTypes = {2, 0};
1508 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1510 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1511 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1512 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1513 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1514 ArgsForMatchingMatrixTypes = {4, 1};
1515 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1517 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1518 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1519 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1520 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1521 ArgsForMatchingMatrixTypes = {4, 1};
1522 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1524 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1525 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1526 ArgsForMatchingMatrixTypes = {2, 0};
1527 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1529 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1530 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1531 ArgsForMatchingMatrixTypes = {2, 0};
1532 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1534 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1535 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1536 ArgsForMatchingMatrixTypes = {2, 0};
1537 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1539 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1540 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1541 ArgsForMatchingMatrixTypes = {2, 0};
1542 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1544 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1545 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1546 ArgsForMatchingMatrixTypes = {4, 1};
1547 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1549 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1550 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1551 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1552 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1554 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1555 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1556 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1557 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1559 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1560 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1561 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1562 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1564 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1565 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1566 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1567 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1569 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1570 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1571 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1572 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1574 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1575 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1576 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1577 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1579 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1580 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1581 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1582 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1584 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1585 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1586 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1587 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1589 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1590 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1591 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1592 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1594 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1595 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1596 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1597 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1599 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1600 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1601 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1602 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1605 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1606 ArgsForMatchingMatrixTypes = {5, 1};
1607 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1609 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1610 ArgsForMatchingMatrixTypes = {5, 1};
1611 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1613 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1614 ArgsForMatchingMatrixTypes = {5, 1};
1615 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1617 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1618 ArgsForMatchingMatrixTypes = {5, 1};
1619 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1621 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1622 ArgsForMatchingMatrixTypes = {5, 1};
1623 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1625 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1626 NeedReturnType =
true;
1627 ArgsForMatchingMatrixTypes = {1, 5};
1628 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1630 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1631 ArgsForMatchingMatrixTypes = {3, 0};
1632 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1634 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1635 ArgsForMatchingMatrixTypes = {3, 0};
1636 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1638 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1639 ArgsForMatchingMatrixTypes = {3, 0};
1640 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1642 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1643 ArgsForMatchingMatrixTypes = {3, 0};
1644 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1646 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1647 ArgsForMatchingMatrixTypes = {3, 0};
1648 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1650 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1651 ArgsForMatchingMatrixTypes = {3, 0};
1652 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1654 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1655 ArgsForMatchingMatrixTypes = {3, 0};
1656 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1658 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1659 ArgsForMatchingMatrixTypes = {3, 0};
1660 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1662 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1663 ArgsForMatchingMatrixTypes = {3, 0};
1664 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1666 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1667 ArgsForMatchingMatrixTypes = {3, 0};
1668 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1670 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1671 ArgsForMatchingMatrixTypes = {3, 0};
1672 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1674 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1675 ArgsForMatchingMatrixTypes = {3, 0};
1676 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1678 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1679 ArgsForMatchingMatrixTypes = {3, 0};
1680 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1682 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1683 ArgsForMatchingMatrixTypes = {3, 0};
1684 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1686 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1687 ArgsForMatchingMatrixTypes = {3, 0};
1688 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1690 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1691 ArgsForMatchingMatrixTypes = {3, 0};
1692 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1694 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1695 ArgsForMatchingMatrixTypes = {4, 1};
1696 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1698 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1699 ArgsForMatchingMatrixTypes = {5, 1, 3};
1700 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1702 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1703 ArgsForMatchingMatrixTypes = {5, 1, 3};
1704 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1706 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1707 ArgsForMatchingMatrixTypes = {5, 1, 3};
1708 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1710 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1711 ArgsForMatchingMatrixTypes = {3, 0, 1};
1712 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1714 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1715 ArgsForMatchingMatrixTypes = {3, 0, 1};
1716 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1718 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1719 ArgsForMatchingMatrixTypes = {3, 0, 1};
1720 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1722 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1723 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1724 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1726 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1727 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1728 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1730 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1731 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1732 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1734 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1735 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1736 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1738 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1739 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1740 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1742 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1743 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1744 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1746 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1747 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1748 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1750 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1751 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1752 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1754 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1755 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1756 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1758 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1759 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1760 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1762 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1763 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1764 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1766 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1767 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1768 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1770 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1771 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1772 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1774 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1775 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1776 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1781 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1783 if (AppendFalseForOpselArg)
1784 Args.push_back(
Builder.getFalse());
1787 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
1788 if (Args.size() == 7)
1789 Args.push_back(
Builder.getFalse());
1790 assert(Args.size() == 8 &&
"Expected 8 arguments");
1791 Args[7] =
Builder.CreateZExtOrTrunc(Args[7],
Builder.getInt1Ty());
1792 }
else if (BuiltinID ==
1793 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
1794 if (Args.size() == 8)
1795 Args.push_back(
Builder.getFalse());
1796 assert(Args.size() == 9 &&
"Expected 9 arguments");
1797 Args[8] =
Builder.CreateZExtOrTrunc(Args[8],
Builder.getInt1Ty());
1803 for (
auto ArgIdx : ArgsForMatchingMatrixTypes)
1804 ArgTypes.push_back(Args[ArgIdx]->
getType());
1806 Function *F =
CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1807 return Builder.CreateCall(F, Args);
1810 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1811 return EmitAMDGPUWorkGroupSize(*
this, 0);
1812 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1813 return EmitAMDGPUWorkGroupSize(*
this, 1);
1814 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1815 return EmitAMDGPUWorkGroupSize(*
this, 2);
1818 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1819 return EmitAMDGPUGridSize(*
this, 0);
1820 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1821 return EmitAMDGPUGridSize(*
this, 1);
1822 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1823 return EmitAMDGPUGridSize(*
this, 2);
1826 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1827 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1829 Intrinsic::r600_recipsqrt_ieee);
1830 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1834 Function *F =
CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1835 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1837 case AMDGPU::BI__builtin_amdgcn_fence: {
1840 FenceInst *Fence =
Builder.CreateFence(AO, SSID);
1845 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1846 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1847 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1848 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1849 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1850 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1851 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1852 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1853 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1854 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1855 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1856 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1857 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1858 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1859 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1860 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1861 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1862 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1863 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1864 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1865 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1866 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1867 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1868 llvm::AtomicRMWInst::BinOp BinOp;
1869 switch (BuiltinID) {
1870 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1871 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1872 BinOp = llvm::AtomicRMWInst::UIncWrap;
1874 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1875 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1876 BinOp = llvm::AtomicRMWInst::UDecWrap;
1878 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1879 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1880 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1881 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1882 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1883 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1884 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1885 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1886 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1887 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1888 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1889 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1890 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1891 BinOp = llvm::AtomicRMWInst::FAdd;
1893 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1894 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1895 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1896 BinOp = llvm::AtomicRMWInst::FMin;
1898 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1899 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1900 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1901 BinOp = llvm::AtomicRMWInst::FMax;
1907 llvm::Type *OrigTy = Val->
getType();
1912 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1913 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1914 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1939 AO = AtomicOrdering::Monotonic;
1942 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1943 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1944 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1945 llvm::Type *V2BF16Ty = FixedVectorType::get(
1946 llvm::Type::getBFloatTy(
Builder.getContext()), 2);
1947 Val =
Builder.CreateBitCast(Val, V2BF16Ty);
1951 llvm::AtomicRMWInst *RMW =
1952 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1954 RMW->setVolatile(
true);
1956 unsigned AddrSpace = Ptr.
getType()->getAddressSpace();
1957 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1961 RMW->setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
1965 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->
getType()->isFloatTy())
1966 RMW->setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
1969 return Builder.CreateBitCast(RMW, OrigTy);
1971 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1972 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1977 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1978 return Builder.CreateCall(F, {Arg});
1980 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1981 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1989 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1990 ? Intrinsic::amdgcn_permlane16_swap
1991 : Intrinsic::amdgcn_permlane32_swap);
1992 llvm::CallInst *
Call =
1993 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1995 llvm::Value *Elt0 =
Builder.CreateExtractValue(
Call, 0);
1996 llvm::Value *Elt1 =
Builder.CreateExtractValue(
Call, 1);
2000 llvm::Value *Insert0 =
Builder.CreateInsertElement(
2001 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
2002 llvm::Value *AsVector =
2003 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
2006 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
2007 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
2009 Intrinsic::amdgcn_bitop3);
2010 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
2015 for (
unsigned I = 0; I < 4; ++I)
2017 llvm::PointerType *RetTy = llvm::PointerType::get(
2018 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
2019 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
2020 {RetTy, Args[0]->getType()});
2021 return Builder.CreateCall(F, Args);
2023 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
2024 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
2025 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
2026 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
2027 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
2028 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
2030 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
2031 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2032 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2033 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2034 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2035 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2036 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
2037 llvm::Type *RetTy =
nullptr;
2038 switch (BuiltinID) {
2039 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
2042 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
2045 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
2048 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
2049 RetTy = llvm::FixedVectorType::get(
Int32Ty, 2);
2051 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
2052 RetTy = llvm::FixedVectorType::get(
Int32Ty, 3);
2054 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
2055 RetTy = llvm::FixedVectorType::get(
Int32Ty, 4);
2059 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
2064 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
2066 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
2067 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
2068 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
2070 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
2071 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
2072 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
2074 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
2075 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
2076 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
2078 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
2079 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
2081 *
this, E, Intrinsic::amdgcn_s_prefetch_data);
2082 case Builtin::BIlogbf:
2083 case Builtin::BI__builtin_logbf: {
2087 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
2090 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
2095 Fabs, ConstantFP::getInfinity(
Builder.getFloatTy()));
2096 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2098 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getFloatTy()));
2101 ConstantFP::getInfinity(
Builder.getFloatTy(),
true), Sel1);
2104 case Builtin::BIlogb:
2105 case Builtin::BI__builtin_logb: {
2109 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
2112 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
2117 Fabs, ConstantFP::getInfinity(
Builder.getDoubleTy()));
2118 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2120 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getDoubleTy()));
2123 ConstantFP::getInfinity(
Builder.getDoubleTy(),
true),
2127 case Builtin::BIscalbnf:
2128 case Builtin::BI__builtin_scalbnf:
2129 case Builtin::BIscalbn:
2130 case Builtin::BI__builtin_scalbn:
2132 *
this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);