430 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
431 llvm::SyncScope::ID SSID;
433 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
434 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
435 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
436 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
437 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
438 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
439 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
440 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
441 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
442 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
443 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
444 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
445 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
446 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
447 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
448 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
449 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
450 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
451 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
452 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
453 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
454 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
461 case AMDGPU::BI__builtin_amdgcn_div_scale:
462 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
472 llvm::Function *Callee =
CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
475 llvm::Value *Tmp =
Builder.CreateCall(Callee, {
X, Y, Z});
478 llvm::Value *Flag =
Builder.CreateExtractValue(Tmp, 1);
482 llvm::Value *FlagExt =
Builder.CreateZExt(Flag, RealFlagType);
483 Builder.CreateStore(FlagExt, FlagOutPtr);
486 case AMDGPU::BI__builtin_amdgcn_div_fmas:
487 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
493 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
495 llvm::Value *Src3ToBool =
Builder.CreateIsNotNull(Src3);
496 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
499 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
501 Intrinsic::amdgcn_ds_swizzle);
502 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
503 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
504 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
508 unsigned ICEArguments = 0;
513 unsigned Size = DataTy->getPrimitiveSizeInBits();
515 llvm::IntegerType::get(
Builder.getContext(), std::max(Size, 32u));
517 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
518 ? Intrinsic::amdgcn_mov_dpp8
519 : Intrinsic::amdgcn_update_dpp,
523 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
525 Args.push_back(llvm::PoisonValue::get(
IntTy));
526 for (
unsigned I = 0; I != E->
getNumArgs(); ++I) {
528 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
530 if (!DataTy->isIntegerTy())
532 V, llvm::IntegerType::get(
Builder.getContext(), Size));
536 F->getFunctionType()->getFunctionParamType(I + InsertOld);
537 Args.push_back(
Builder.CreateTruncOrBitCast(
V, ExpTy));
540 if (Size < 32 && !DataTy->isIntegerTy())
542 V, llvm::IntegerType::get(
Builder.getContext(), Size));
543 return Builder.CreateTruncOrBitCast(
V, DataTy);
545 case AMDGPU::BI__builtin_amdgcn_permlane16:
546 case AMDGPU::BI__builtin_amdgcn_permlanex16:
549 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
550 ? Intrinsic::amdgcn_permlane16
551 : Intrinsic::amdgcn_permlanex16);
552 case AMDGPU::BI__builtin_amdgcn_permlane64:
554 Intrinsic::amdgcn_permlane64);
555 case AMDGPU::BI__builtin_amdgcn_readlane:
557 Intrinsic::amdgcn_readlane);
558 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
560 Intrinsic::amdgcn_readfirstlane);
561 case AMDGPU::BI__builtin_amdgcn_div_fixup:
562 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
563 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
565 Intrinsic::amdgcn_div_fixup);
566 case AMDGPU::BI__builtin_amdgcn_trig_preop:
567 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
569 case AMDGPU::BI__builtin_amdgcn_rcp:
570 case AMDGPU::BI__builtin_amdgcn_rcpf:
571 case AMDGPU::BI__builtin_amdgcn_rcph:
572 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
574 case AMDGPU::BI__builtin_amdgcn_sqrt:
575 case AMDGPU::BI__builtin_amdgcn_sqrtf:
576 case AMDGPU::BI__builtin_amdgcn_sqrth:
577 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
579 Intrinsic::amdgcn_sqrt);
580 case AMDGPU::BI__builtin_amdgcn_rsq:
581 case AMDGPU::BI__builtin_amdgcn_rsqf:
582 case AMDGPU::BI__builtin_amdgcn_rsqh:
583 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
585 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
586 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
588 Intrinsic::amdgcn_rsq_clamp);
589 case AMDGPU::BI__builtin_amdgcn_sinf:
590 case AMDGPU::BI__builtin_amdgcn_sinh:
591 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
593 case AMDGPU::BI__builtin_amdgcn_cosf:
594 case AMDGPU::BI__builtin_amdgcn_cosh:
595 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
597 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
598 return EmitAMDGPUDispatchPtr(*
this, E);
599 case AMDGPU::BI__builtin_amdgcn_logf:
600 case AMDGPU::BI__builtin_amdgcn_log_bf16:
602 case AMDGPU::BI__builtin_amdgcn_exp2f:
603 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
605 Intrinsic::amdgcn_exp2);
606 case AMDGPU::BI__builtin_amdgcn_log_clampf:
608 Intrinsic::amdgcn_log_clamp);
609 case AMDGPU::BI__builtin_amdgcn_ldexp:
610 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
614 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
615 return Builder.CreateCall(F, {Src0, Src1});
617 case AMDGPU::BI__builtin_amdgcn_ldexph: {
623 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(),
Int16Ty});
626 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
627 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
628 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
630 Intrinsic::amdgcn_frexp_mant);
631 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
632 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
634 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
636 return Builder.CreateCall(F, Src0);
638 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
640 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
642 return Builder.CreateCall(F, Src0);
644 case AMDGPU::BI__builtin_amdgcn_fract:
645 case AMDGPU::BI__builtin_amdgcn_fractf:
646 case AMDGPU::BI__builtin_amdgcn_fracth:
648 Intrinsic::amdgcn_fract);
649 case AMDGPU::BI__builtin_amdgcn_lerp:
651 Intrinsic::amdgcn_lerp);
652 case AMDGPU::BI__builtin_amdgcn_ubfe:
654 Intrinsic::amdgcn_ubfe);
655 case AMDGPU::BI__builtin_amdgcn_sbfe:
657 Intrinsic::amdgcn_sbfe);
658 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
659 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
662 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
663 return Builder.CreateCall(F, {Src});
665 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
666 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
669 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
670 return Builder.CreateCall(F, {Src});
672 case AMDGPU::BI__builtin_amdgcn_tanhf:
673 case AMDGPU::BI__builtin_amdgcn_tanhh:
674 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
676 Intrinsic::amdgcn_tanh);
677 case AMDGPU::BI__builtin_amdgcn_uicmp:
678 case AMDGPU::BI__builtin_amdgcn_uicmpl:
679 case AMDGPU::BI__builtin_amdgcn_sicmp:
680 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
686 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
687 {
Builder.getInt64Ty(), Src0->getType() });
688 return Builder.CreateCall(F, { Src0, Src1, Src2 });
690 case AMDGPU::BI__builtin_amdgcn_fcmp:
691 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
697 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
698 {
Builder.getInt64Ty(), Src0->getType() });
699 return Builder.CreateCall(F, { Src0, Src1, Src2 });
701 case AMDGPU::BI__builtin_amdgcn_class:
702 case AMDGPU::BI__builtin_amdgcn_classf:
703 case AMDGPU::BI__builtin_amdgcn_classh:
705 case AMDGPU::BI__builtin_amdgcn_fmed3f:
706 case AMDGPU::BI__builtin_amdgcn_fmed3h:
708 Intrinsic::amdgcn_fmed3);
709 case AMDGPU::BI__builtin_amdgcn_ds_append:
710 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
711 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
712 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
717 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
718 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
719 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
720 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
721 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
722 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
723 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
724 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
725 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
726 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
727 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
728 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
729 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
730 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
731 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
732 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
733 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
734 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
735 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
736 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
737 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
738 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
739 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
740 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
741 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
742 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
745 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
746 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
747 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
748 IID = Intrinsic::amdgcn_global_load_tr_b64;
750 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
751 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
752 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
753 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
754 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
755 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
756 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
757 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
758 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
759 IID = Intrinsic::amdgcn_global_load_tr_b128;
761 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
762 IID = Intrinsic::amdgcn_global_load_tr4_b64;
764 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
765 IID = Intrinsic::amdgcn_global_load_tr6_b96;
767 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
768 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
770 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
771 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
773 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
774 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
776 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
777 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
778 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
779 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
781 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
782 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
784 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
785 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
787 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
788 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
790 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
791 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
792 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
793 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
798 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
801 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
802 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
803 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
804 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
805 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
806 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
810 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
811 IID = Intrinsic::amdgcn_global_load_monitor_b32;
813 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
814 IID = Intrinsic::amdgcn_global_load_monitor_b64;
816 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
817 IID = Intrinsic::amdgcn_global_load_monitor_b128;
819 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
820 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
822 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
823 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
825 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
826 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
833 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
836 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
837 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
838 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
841 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
842 IID = Intrinsic::amdgcn_cluster_load_b32;
844 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
845 IID = Intrinsic::amdgcn_cluster_load_b64;
847 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
848 IID = Intrinsic::amdgcn_cluster_load_b128;
852 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
855 return Builder.CreateCall(F, {Args});
857 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
860 Intrinsic::amdgcn_load_to_lds);
862 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
863 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
864 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
865 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
866 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
867 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
870 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
871 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
873 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
874 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
876 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
877 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
879 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
880 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
882 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
883 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
885 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
886 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
890 LLVMContext &Ctx =
CGM.getLLVMContext();
893 const unsigned ScopeArg = E->
getNumArgs() - 1;
894 for (
unsigned i = 0; i != ScopeArg; ++i)
898 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
899 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
902 llvm::Function *F =
CGM.getIntrinsic(IID, {Args[0]->getType()});
903 return Builder.CreateCall(F, {Args});
905 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
906 Function *F =
CGM.getIntrinsic(Intrinsic::get_fpenv,
910 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
911 Function *F =
CGM.getIntrinsic(Intrinsic::set_fpenv,
914 return Builder.CreateCall(F, {Env});
916 case AMDGPU::BI__builtin_amdgcn_read_exec:
918 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
920 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
922 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
923 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
924 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
925 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
935 RayOrigin =
Builder.CreateShuffleVector(RayOrigin, RayOrigin,
938 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
939 RayInverseDir =
Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
942 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
943 {NodePtr->getType(), RayDir->getType()});
944 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
945 RayInverseDir, TextureDescr});
947 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
948 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
951 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
952 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
954 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
955 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
969 llvm::Function *IntrinsicFunc =
CGM.getIntrinsic(IID);
971 llvm::CallInst *CI =
Builder.CreateCall(
972 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
973 Offset, TextureDescr});
975 llvm::Value *RetVData =
Builder.CreateExtractValue(CI, 0);
976 llvm::Value *RetRayOrigin =
Builder.CreateExtractValue(CI, 1);
977 llvm::Value *RetRayDir =
Builder.CreateExtractValue(CI, 2);
979 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
980 Builder.CreateStore(RetRayDir, RetRayDirPtr);
985 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
986 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
987 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
988 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
991 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
992 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
994 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
995 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
997 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
998 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
1000 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
1001 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
1006 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1014 Value *I0 =
Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
1018 if (A->
getType()->getPrimitiveSizeInBits() <
1019 RetTy->getScalarType()->getPrimitiveSizeInBits())
1020 A =
Builder.CreateZExt(A, RetTy->getScalarType());
1022 return Builder.CreateInsertElement(I0, A, 1);
1024 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1025 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1027 *
this, E, Intrinsic::amdgcn_image_load_1d,
false);
1028 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1029 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1031 *
this, E, Intrinsic::amdgcn_image_load_1darray,
false);
1032 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1033 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1034 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1036 *
this, E, Intrinsic::amdgcn_image_load_2d,
false);
1037 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1038 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1039 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1041 *
this, E, Intrinsic::amdgcn_image_load_2darray,
false);
1042 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1043 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1045 *
this, E, Intrinsic::amdgcn_image_load_3d,
false);
1046 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1047 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1049 *
this, E, Intrinsic::amdgcn_image_load_cube,
false);
1050 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1051 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1053 *
this, E, Intrinsic::amdgcn_image_load_mip_1d,
false);
1054 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1055 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1057 *
this, E, Intrinsic::amdgcn_image_load_mip_1darray,
false);
1058 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1059 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1060 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1062 *
this, E, Intrinsic::amdgcn_image_load_mip_2d,
false);
1063 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1064 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1065 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1067 *
this, E, Intrinsic::amdgcn_image_load_mip_2darray,
false);
1068 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1069 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1071 *
this, E, Intrinsic::amdgcn_image_load_mip_3d,
false);
1072 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1073 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1075 *
this, E, Intrinsic::amdgcn_image_load_mip_cube,
false);
1076 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1077 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1079 *
this, E, Intrinsic::amdgcn_image_store_1d,
true);
1080 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1081 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1083 *
this, E, Intrinsic::amdgcn_image_store_1darray,
true);
1084 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1085 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1086 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1088 *
this, E, Intrinsic::amdgcn_image_store_2d,
true);
1089 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1090 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1091 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1093 *
this, E, Intrinsic::amdgcn_image_store_2darray,
true);
1094 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1095 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1097 *
this, E, Intrinsic::amdgcn_image_store_3d,
true);
1098 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1099 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1101 *
this, E, Intrinsic::amdgcn_image_store_cube,
true);
1102 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1103 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1105 *
this, E, Intrinsic::amdgcn_image_store_mip_1d,
true);
1106 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1107 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1109 *
this, E, Intrinsic::amdgcn_image_store_mip_1darray,
true);
1110 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1111 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1112 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1114 *
this, E, Intrinsic::amdgcn_image_store_mip_2d,
true);
1115 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1116 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1117 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1119 *
this, E, Intrinsic::amdgcn_image_store_mip_2darray,
true);
1120 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1121 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1123 *
this, E, Intrinsic::amdgcn_image_store_mip_3d,
true);
1124 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1125 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1127 *
this, E, Intrinsic::amdgcn_image_store_mip_cube,
true);
1128 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1129 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1131 *
this, E, Intrinsic::amdgcn_image_sample_1d,
false);
1132 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1133 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1135 *
this, E, Intrinsic::amdgcn_image_sample_1darray,
false);
1136 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1137 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1138 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1140 *
this, E, Intrinsic::amdgcn_image_sample_2d,
false);
1141 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1142 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1143 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1145 *
this, E, Intrinsic::amdgcn_image_sample_2darray,
false);
1146 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1147 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1149 *
this, E, Intrinsic::amdgcn_image_sample_3d,
false);
1150 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1151 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1153 *
this, E, Intrinsic::amdgcn_image_sample_cube,
false);
1154 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1155 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1157 *
this, E, Intrinsic::amdgcn_image_sample_lz_1d,
false);
1158 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1159 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1161 *
this, E, Intrinsic::amdgcn_image_sample_l_1d,
false);
1162 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1163 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1165 *
this, E, Intrinsic::amdgcn_image_sample_d_1d,
false);
1166 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1167 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1168 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1170 *
this, E, Intrinsic::amdgcn_image_sample_lz_2d,
false);
1171 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1172 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1173 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1175 *
this, E, Intrinsic::amdgcn_image_sample_l_2d,
false);
1176 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1177 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1178 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1180 *
this, E, Intrinsic::amdgcn_image_sample_d_2d,
false);
1181 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1182 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1184 *
this, E, Intrinsic::amdgcn_image_sample_lz_3d,
false);
1185 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1186 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1188 *
this, E, Intrinsic::amdgcn_image_sample_l_3d,
false);
1189 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1190 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1192 *
this, E, Intrinsic::amdgcn_image_sample_d_3d,
false);
1193 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1194 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1196 *
this, E, Intrinsic::amdgcn_image_sample_lz_cube,
false);
1197 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1198 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1200 *
this, E, Intrinsic::amdgcn_image_sample_l_cube,
false);
1201 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1202 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1204 *
this, E, Intrinsic::amdgcn_image_sample_lz_1darray,
false);
1205 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1206 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1208 *
this, E, Intrinsic::amdgcn_image_sample_l_1darray,
false);
1209 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1210 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1212 *
this, E, Intrinsic::amdgcn_image_sample_d_1darray,
false);
1213 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1214 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1215 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1217 *
this, E, Intrinsic::amdgcn_image_sample_lz_2darray,
false);
1218 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1219 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1220 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1222 *
this, E, Intrinsic::amdgcn_image_sample_l_2darray,
false);
1223 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1224 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1225 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1227 *
this, E, Intrinsic::amdgcn_image_sample_d_2darray,
false);
1228 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1230 *
this, E, Intrinsic::amdgcn_image_gather4_lz_2d,
false);
1231 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1232 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1233 llvm::FixedVectorType *VT = FixedVectorType::get(
Builder.getInt32Ty(), 8);
1235 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1236 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1237 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1241 for (
unsigned I = 0, N = E->
getNumArgs(); I != N; ++I)
1243 return Builder.CreateCall(F, Args);
1245 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1246 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1247 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1248 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1249 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1250 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1251 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1252 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1253 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1254 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1255 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1256 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1257 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1258 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1259 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1260 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1261 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1262 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1263 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1264 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1265 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1266 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1267 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1268 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1269 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1270 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1271 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1272 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1273 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1274 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1275 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1276 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1277 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1278 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1279 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1280 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1281 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1282 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1283 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1284 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1285 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1286 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1287 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1288 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1289 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1290 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1291 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1292 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1293 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1294 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1295 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1296 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1297 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1298 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1299 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1300 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1301 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1302 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1303 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1304 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1306 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1307 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1308 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1309 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1310 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1311 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1312 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1313 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1314 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1315 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1316 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1317 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1318 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1319 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1320 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1321 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1322 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1323 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1324 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1325 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1326 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1327 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1328 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1329 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1330 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1331 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1332 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1333 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1334 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1335 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1336 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1337 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1338 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1339 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1340 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1341 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1342 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1343 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1344 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1345 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1346 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1347 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1348 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1361 bool AppendFalseForOpselArg =
false;
1362 unsigned BuiltinWMMAOp;
1364 bool NeedReturnType =
false;
1366 switch (BuiltinID) {
1367 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1368 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1369 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1370 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1371 ArgsForMatchingMatrixTypes = {2, 0};
1372 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1374 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1375 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1376 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1377 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1378 ArgsForMatchingMatrixTypes = {2, 0};
1379 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1381 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1382 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1383 AppendFalseForOpselArg =
true;
1385 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1386 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1387 ArgsForMatchingMatrixTypes = {2, 0};
1388 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1390 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1391 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1392 AppendFalseForOpselArg =
true;
1394 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1395 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1396 ArgsForMatchingMatrixTypes = {2, 0};
1397 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1399 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1400 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1401 ArgsForMatchingMatrixTypes = {2, 0};
1402 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1404 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1405 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1406 ArgsForMatchingMatrixTypes = {2, 0};
1407 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1409 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1410 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1411 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1412 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1413 ArgsForMatchingMatrixTypes = {4, 1};
1414 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1416 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1417 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1418 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1419 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1420 ArgsForMatchingMatrixTypes = {4, 1};
1421 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1423 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1424 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1425 ArgsForMatchingMatrixTypes = {2, 0};
1426 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1428 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1429 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1430 ArgsForMatchingMatrixTypes = {2, 0};
1431 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1433 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1434 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1435 ArgsForMatchingMatrixTypes = {2, 0};
1436 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1438 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1439 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1440 ArgsForMatchingMatrixTypes = {2, 0};
1441 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1443 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1444 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1445 ArgsForMatchingMatrixTypes = {4, 1};
1446 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1448 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1449 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1450 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1451 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1453 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1454 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1455 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1456 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1458 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1459 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1460 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1461 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1463 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1464 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1465 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1466 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1468 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1469 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1470 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1471 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1473 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1474 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1475 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1476 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1478 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1479 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1480 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1481 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1483 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1484 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1485 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1486 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1488 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1489 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1490 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1491 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1493 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1494 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1495 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1496 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1498 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1499 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1500 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1501 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1504 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1505 ArgsForMatchingMatrixTypes = {5, 1};
1506 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1508 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1509 ArgsForMatchingMatrixTypes = {5, 1};
1510 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1512 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1513 ArgsForMatchingMatrixTypes = {5, 1};
1514 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1516 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1517 ArgsForMatchingMatrixTypes = {5, 1};
1518 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1520 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1521 ArgsForMatchingMatrixTypes = {5, 1};
1522 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1524 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1525 NeedReturnType =
true;
1526 ArgsForMatchingMatrixTypes = {1, 5};
1527 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1529 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1530 ArgsForMatchingMatrixTypes = {3, 0};
1531 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1533 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1534 ArgsForMatchingMatrixTypes = {3, 0};
1535 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1537 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1538 ArgsForMatchingMatrixTypes = {3, 0};
1539 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1541 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1542 ArgsForMatchingMatrixTypes = {3, 0};
1543 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1545 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1546 ArgsForMatchingMatrixTypes = {3, 0};
1547 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1549 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1550 ArgsForMatchingMatrixTypes = {3, 0};
1551 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1553 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1554 ArgsForMatchingMatrixTypes = {3, 0};
1555 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1557 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1558 ArgsForMatchingMatrixTypes = {3, 0};
1559 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1561 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1562 ArgsForMatchingMatrixTypes = {3, 0};
1563 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1565 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1566 ArgsForMatchingMatrixTypes = {3, 0};
1567 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1569 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1570 ArgsForMatchingMatrixTypes = {3, 0};
1571 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1573 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1574 ArgsForMatchingMatrixTypes = {3, 0};
1575 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1577 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1578 ArgsForMatchingMatrixTypes = {3, 0};
1579 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1581 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1582 ArgsForMatchingMatrixTypes = {3, 0};
1583 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1585 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1586 ArgsForMatchingMatrixTypes = {3, 0};
1587 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1589 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1590 ArgsForMatchingMatrixTypes = {3, 0};
1591 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1593 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1594 ArgsForMatchingMatrixTypes = {4, 1};
1595 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1597 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1598 ArgsForMatchingMatrixTypes = {5, 1, 3};
1599 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1601 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1602 ArgsForMatchingMatrixTypes = {5, 1, 3};
1603 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1605 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1606 ArgsForMatchingMatrixTypes = {5, 1, 3};
1607 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1609 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1610 ArgsForMatchingMatrixTypes = {3, 0, 1};
1611 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1613 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1614 ArgsForMatchingMatrixTypes = {3, 0, 1};
1615 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1617 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1618 ArgsForMatchingMatrixTypes = {3, 0, 1};
1619 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1621 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1622 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1623 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1625 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1626 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1627 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1629 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1630 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1631 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1633 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1634 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1635 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1637 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1638 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1639 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1641 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1642 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1643 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1645 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1646 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1647 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1649 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1650 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1651 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1653 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1654 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1655 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1657 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1658 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1659 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1661 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1662 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1663 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1665 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1666 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1667 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1669 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1670 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1671 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1673 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1674 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1675 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1680 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1682 if (AppendFalseForOpselArg)
1683 Args.push_back(
Builder.getFalse());
1688 for (
auto ArgIdx : ArgsForMatchingMatrixTypes)
1689 ArgTypes.push_back(Args[ArgIdx]->
getType());
1691 Function *F =
CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1692 return Builder.CreateCall(F, Args);
1695 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1696 return EmitAMDGPUWorkGroupSize(*
this, 0);
1697 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1698 return EmitAMDGPUWorkGroupSize(*
this, 1);
1699 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1700 return EmitAMDGPUWorkGroupSize(*
this, 2);
1703 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1704 return EmitAMDGPUGridSize(*
this, 0);
1705 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1706 return EmitAMDGPUGridSize(*
this, 1);
1707 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1708 return EmitAMDGPUGridSize(*
this, 2);
1711 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1712 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1714 Intrinsic::r600_recipsqrt_ieee);
1715 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1719 Function *F =
CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1720 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1722 case AMDGPU::BI__builtin_amdgcn_fence: {
1725 FenceInst *Fence =
Builder.CreateFence(AO, SSID);
1730 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1731 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1732 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1733 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1734 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1735 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1736 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1737 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1738 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1739 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1740 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1741 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1742 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1743 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1744 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1745 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1746 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1747 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1748 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1749 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1750 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1751 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1752 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1753 llvm::AtomicRMWInst::BinOp BinOp;
1754 switch (BuiltinID) {
1755 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1756 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1757 BinOp = llvm::AtomicRMWInst::UIncWrap;
1759 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1760 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1761 BinOp = llvm::AtomicRMWInst::UDecWrap;
1763 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1764 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1765 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1766 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1767 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1768 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1769 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1770 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1771 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1772 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1773 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1774 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1775 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1776 BinOp = llvm::AtomicRMWInst::FAdd;
1778 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1779 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1780 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1781 BinOp = llvm::AtomicRMWInst::FMin;
1783 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1784 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1785 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1786 BinOp = llvm::AtomicRMWInst::FMax;
1792 llvm::Type *OrigTy = Val->
getType();
1797 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1798 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1799 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1824 AO = AtomicOrdering::Monotonic;
1827 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1828 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1829 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1830 llvm::Type *V2BF16Ty = FixedVectorType::get(
1831 llvm::Type::getBFloatTy(
Builder.getContext()), 2);
1832 Val =
Builder.CreateBitCast(Val, V2BF16Ty);
1836 llvm::AtomicRMWInst *RMW =
1837 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1839 RMW->setVolatile(
true);
1841 unsigned AddrSpace = Ptr.
getType()->getAddressSpace();
1842 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1846 RMW->setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
1850 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->
getType()->isFloatTy())
1851 RMW->setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
1854 return Builder.CreateBitCast(RMW, OrigTy);
1856 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1857 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1862 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1863 return Builder.CreateCall(F, {Arg});
1865 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1866 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1874 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1875 ? Intrinsic::amdgcn_permlane16_swap
1876 : Intrinsic::amdgcn_permlane32_swap);
1877 llvm::CallInst *
Call =
1878 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1880 llvm::Value *Elt0 =
Builder.CreateExtractValue(
Call, 0);
1881 llvm::Value *Elt1 =
Builder.CreateExtractValue(
Call, 1);
1885 llvm::Value *Insert0 =
Builder.CreateInsertElement(
1886 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
1887 llvm::Value *AsVector =
1888 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
1891 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
1892 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
1894 Intrinsic::amdgcn_bitop3);
1895 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
1900 for (
unsigned I = 0; I < 4; ++I)
1902 llvm::PointerType *RetTy = llvm::PointerType::get(
1903 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
1904 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
1905 {RetTy, Args[0]->getType()});
1906 return Builder.CreateCall(F, Args);
1908 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
1909 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
1910 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
1911 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
1912 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
1913 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1915 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
1916 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1917 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1918 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1919 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1920 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1921 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
1922 llvm::Type *RetTy =
nullptr;
1923 switch (BuiltinID) {
1924 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1927 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1930 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1933 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1934 RetTy = llvm::FixedVectorType::get(
Int32Ty, 2);
1936 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1937 RetTy = llvm::FixedVectorType::get(
Int32Ty, 3);
1939 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
1940 RetTy = llvm::FixedVectorType::get(
Int32Ty, 4);
1944 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
1949 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1951 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
1952 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1953 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1955 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
1956 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
1957 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
1959 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
1960 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1961 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1963 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
1964 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
1966 *
this, E, Intrinsic::amdgcn_s_prefetch_data);
1967 case Builtin::BIlogbf:
1968 case Builtin::BI__builtin_logbf: {
1972 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1975 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1980 Fabs, ConstantFP::getInfinity(
Builder.getFloatTy()));
1981 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1983 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getFloatTy()));
1986 ConstantFP::getInfinity(
Builder.getFloatTy(),
true), Sel1);
1989 case Builtin::BIlogb:
1990 case Builtin::BI__builtin_logb: {
1994 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1997 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
2002 Fabs, ConstantFP::getInfinity(
Builder.getDoubleTy()));
2003 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2005 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getDoubleTy()));
2008 ConstantFP::getInfinity(
Builder.getDoubleTy(),
true),
2012 case Builtin::BIscalbnf:
2013 case Builtin::BI__builtin_scalbnf:
2014 case Builtin::BIscalbn:
2015 case Builtin::BI__builtin_scalbn:
2017 *
this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);