421 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
422 llvm::SyncScope::ID SSID;
424 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
425 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
426 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
427 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
428 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
429 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
430 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
431 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
432 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
433 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
434 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
435 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
436 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
437 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
438 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
439 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
440 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
441 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
448 case AMDGPU::BI__builtin_amdgcn_div_scale:
449 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
459 llvm::Function *Callee =
CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
462 llvm::Value *Tmp =
Builder.CreateCall(Callee, {
X, Y, Z});
465 llvm::Value *Flag =
Builder.CreateExtractValue(Tmp, 1);
469 llvm::Value *FlagExt =
Builder.CreateZExt(Flag, RealFlagType);
470 Builder.CreateStore(FlagExt, FlagOutPtr);
473 case AMDGPU::BI__builtin_amdgcn_div_fmas:
474 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
480 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
482 llvm::Value *Src3ToBool =
Builder.CreateIsNotNull(Src3);
483 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
486 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
488 Intrinsic::amdgcn_ds_swizzle);
489 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
490 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
491 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
495 unsigned ICEArguments = 0;
500 unsigned Size = DataTy->getPrimitiveSizeInBits();
502 llvm::IntegerType::get(
Builder.getContext(), std::max(Size, 32u));
504 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
505 ? Intrinsic::amdgcn_mov_dpp8
506 : Intrinsic::amdgcn_update_dpp,
510 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
512 Args.push_back(llvm::PoisonValue::get(
IntTy));
513 for (
unsigned I = 0; I != E->
getNumArgs(); ++I) {
515 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
517 if (!DataTy->isIntegerTy())
519 V, llvm::IntegerType::get(
Builder.getContext(), Size));
523 F->getFunctionType()->getFunctionParamType(I + InsertOld);
524 Args.push_back(
Builder.CreateTruncOrBitCast(
V, ExpTy));
527 if (Size < 32 && !DataTy->isIntegerTy())
529 V, llvm::IntegerType::get(
Builder.getContext(), Size));
530 return Builder.CreateTruncOrBitCast(
V, DataTy);
532 case AMDGPU::BI__builtin_amdgcn_permlane16:
533 case AMDGPU::BI__builtin_amdgcn_permlanex16:
536 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
537 ? Intrinsic::amdgcn_permlane16
538 : Intrinsic::amdgcn_permlanex16);
539 case AMDGPU::BI__builtin_amdgcn_permlane64:
541 Intrinsic::amdgcn_permlane64);
542 case AMDGPU::BI__builtin_amdgcn_readlane:
544 Intrinsic::amdgcn_readlane);
545 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
547 Intrinsic::amdgcn_readfirstlane);
548 case AMDGPU::BI__builtin_amdgcn_div_fixup:
549 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
550 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
552 Intrinsic::amdgcn_div_fixup);
553 case AMDGPU::BI__builtin_amdgcn_trig_preop:
554 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
556 case AMDGPU::BI__builtin_amdgcn_rcp:
557 case AMDGPU::BI__builtin_amdgcn_rcpf:
558 case AMDGPU::BI__builtin_amdgcn_rcph:
559 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
561 case AMDGPU::BI__builtin_amdgcn_sqrt:
562 case AMDGPU::BI__builtin_amdgcn_sqrtf:
563 case AMDGPU::BI__builtin_amdgcn_sqrth:
564 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
566 Intrinsic::amdgcn_sqrt);
567 case AMDGPU::BI__builtin_amdgcn_rsq:
568 case AMDGPU::BI__builtin_amdgcn_rsqf:
569 case AMDGPU::BI__builtin_amdgcn_rsqh:
570 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
572 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
573 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
575 Intrinsic::amdgcn_rsq_clamp);
576 case AMDGPU::BI__builtin_amdgcn_sinf:
577 case AMDGPU::BI__builtin_amdgcn_sinh:
578 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
580 case AMDGPU::BI__builtin_amdgcn_cosf:
581 case AMDGPU::BI__builtin_amdgcn_cosh:
582 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
584 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
585 return EmitAMDGPUDispatchPtr(*
this, E);
586 case AMDGPU::BI__builtin_amdgcn_logf:
587 case AMDGPU::BI__builtin_amdgcn_log_bf16:
589 case AMDGPU::BI__builtin_amdgcn_exp2f:
590 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
592 Intrinsic::amdgcn_exp2);
593 case AMDGPU::BI__builtin_amdgcn_log_clampf:
595 Intrinsic::amdgcn_log_clamp);
596 case AMDGPU::BI__builtin_amdgcn_ldexp:
597 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
601 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
602 return Builder.CreateCall(F, {Src0, Src1});
604 case AMDGPU::BI__builtin_amdgcn_ldexph: {
610 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(),
Int16Ty});
613 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
614 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
615 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
617 Intrinsic::amdgcn_frexp_mant);
618 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
619 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
621 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
623 return Builder.CreateCall(F, Src0);
625 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
627 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
629 return Builder.CreateCall(F, Src0);
631 case AMDGPU::BI__builtin_amdgcn_fract:
632 case AMDGPU::BI__builtin_amdgcn_fractf:
633 case AMDGPU::BI__builtin_amdgcn_fracth:
635 Intrinsic::amdgcn_fract);
636 case AMDGPU::BI__builtin_amdgcn_lerp:
638 Intrinsic::amdgcn_lerp);
639 case AMDGPU::BI__builtin_amdgcn_ubfe:
641 Intrinsic::amdgcn_ubfe);
642 case AMDGPU::BI__builtin_amdgcn_sbfe:
644 Intrinsic::amdgcn_sbfe);
645 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
646 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
649 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
650 return Builder.CreateCall(F, { Src });
652 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
653 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
656 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
657 return Builder.CreateCall(F, {Src});
659 case AMDGPU::BI__builtin_amdgcn_tanhf:
660 case AMDGPU::BI__builtin_amdgcn_tanhh:
661 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
663 Intrinsic::amdgcn_tanh);
664 case AMDGPU::BI__builtin_amdgcn_uicmp:
665 case AMDGPU::BI__builtin_amdgcn_uicmpl:
666 case AMDGPU::BI__builtin_amdgcn_sicmp:
667 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
673 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
674 {
Builder.getInt64Ty(), Src0->getType() });
675 return Builder.CreateCall(F, { Src0, Src1, Src2 });
677 case AMDGPU::BI__builtin_amdgcn_fcmp:
678 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
684 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
685 {
Builder.getInt64Ty(), Src0->getType() });
686 return Builder.CreateCall(F, { Src0, Src1, Src2 });
688 case AMDGPU::BI__builtin_amdgcn_class:
689 case AMDGPU::BI__builtin_amdgcn_classf:
690 case AMDGPU::BI__builtin_amdgcn_classh:
692 case AMDGPU::BI__builtin_amdgcn_fmed3f:
693 case AMDGPU::BI__builtin_amdgcn_fmed3h:
695 Intrinsic::amdgcn_fmed3);
696 case AMDGPU::BI__builtin_amdgcn_ds_append:
697 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
698 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
699 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
704 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
705 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
706 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
707 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
708 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
709 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
710 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
711 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
712 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
713 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
714 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
715 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
716 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
717 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
718 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
719 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
720 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
721 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
722 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
723 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
724 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
725 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
726 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
727 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
728 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
729 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
732 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
733 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
734 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
735 IID = Intrinsic::amdgcn_global_load_tr_b64;
737 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
738 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
739 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
740 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
741 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
742 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
743 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
744 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
745 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
746 IID = Intrinsic::amdgcn_global_load_tr_b128;
748 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
749 IID = Intrinsic::amdgcn_global_load_tr4_b64;
751 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
752 IID = Intrinsic::amdgcn_global_load_tr6_b96;
754 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
755 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
757 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
758 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
760 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
761 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
763 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
764 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
765 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
766 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
768 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
769 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
771 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
772 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
774 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
775 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
777 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
778 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
779 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
780 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
785 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
788 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
789 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
790 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
791 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
792 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
793 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
797 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
798 IID = Intrinsic::amdgcn_global_load_monitor_b32;
800 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
801 IID = Intrinsic::amdgcn_global_load_monitor_b64;
803 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
804 IID = Intrinsic::amdgcn_global_load_monitor_b128;
806 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
807 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
809 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
810 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
812 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
813 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
820 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
823 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
824 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
825 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
828 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
829 IID = Intrinsic::amdgcn_cluster_load_b32;
831 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
832 IID = Intrinsic::amdgcn_cluster_load_b64;
834 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
835 IID = Intrinsic::amdgcn_cluster_load_b128;
839 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
842 return Builder.CreateCall(F, {Args});
844 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
847 Intrinsic::amdgcn_load_to_lds);
849 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
850 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
851 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
852 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
853 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
854 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
857 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
858 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
860 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
861 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
863 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
864 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
866 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
867 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
869 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
870 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
872 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
873 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
877 LLVMContext &Ctx =
CGM.getLLVMContext();
880 const unsigned ScopeArg = E->
getNumArgs() - 1;
881 for (
unsigned i = 0; i != ScopeArg; ++i)
885 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
886 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
889 llvm::Function *F =
CGM.getIntrinsic(IID, {Args[0]->getType()});
890 return Builder.CreateCall(F, {Args});
892 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
893 Function *F =
CGM.getIntrinsic(Intrinsic::get_fpenv,
897 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
898 Function *F =
CGM.getIntrinsic(Intrinsic::set_fpenv,
901 return Builder.CreateCall(F, {Env});
903 case AMDGPU::BI__builtin_amdgcn_read_exec:
905 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
907 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
909 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
910 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
911 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
912 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
922 RayOrigin =
Builder.CreateShuffleVector(RayOrigin, RayOrigin,
925 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
926 RayInverseDir =
Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
929 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
930 {NodePtr->getType(), RayDir->getType()});
931 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
932 RayInverseDir, TextureDescr});
934 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
935 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
938 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
939 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
941 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
942 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
956 llvm::Function *IntrinsicFunc =
CGM.getIntrinsic(IID);
958 llvm::CallInst *CI =
Builder.CreateCall(
959 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
960 Offset, TextureDescr});
962 llvm::Value *RetVData =
Builder.CreateExtractValue(CI, 0);
963 llvm::Value *RetRayOrigin =
Builder.CreateExtractValue(CI, 1);
964 llvm::Value *RetRayDir =
Builder.CreateExtractValue(CI, 2);
966 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
967 Builder.CreateStore(RetRayDir, RetRayDirPtr);
972 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
973 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
974 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
975 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
978 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
979 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
981 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
982 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
984 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
985 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
987 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
988 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
993 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1001 Value *I0 =
Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
1005 if (A->
getType()->getPrimitiveSizeInBits() <
1006 RetTy->getScalarType()->getPrimitiveSizeInBits())
1007 A =
Builder.CreateZExt(A, RetTy->getScalarType());
1009 return Builder.CreateInsertElement(I0, A, 1);
1011 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1012 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1014 *
this, E, Intrinsic::amdgcn_image_load_1d,
false);
1015 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1016 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1018 *
this, E, Intrinsic::amdgcn_image_load_1darray,
false);
1019 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1020 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1021 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1023 *
this, E, Intrinsic::amdgcn_image_load_2d,
false);
1024 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1025 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1026 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1028 *
this, E, Intrinsic::amdgcn_image_load_2darray,
false);
1029 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1030 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1032 *
this, E, Intrinsic::amdgcn_image_load_3d,
false);
1033 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1034 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1036 *
this, E, Intrinsic::amdgcn_image_load_cube,
false);
1037 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1038 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1040 *
this, E, Intrinsic::amdgcn_image_load_mip_1d,
false);
1041 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1042 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1044 *
this, E, Intrinsic::amdgcn_image_load_mip_1darray,
false);
1045 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1046 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1047 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1049 *
this, E, Intrinsic::amdgcn_image_load_mip_2d,
false);
1050 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1051 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1052 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1054 *
this, E, Intrinsic::amdgcn_image_load_mip_2darray,
false);
1055 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1056 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1058 *
this, E, Intrinsic::amdgcn_image_load_mip_3d,
false);
1059 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1060 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1062 *
this, E, Intrinsic::amdgcn_image_load_mip_cube,
false);
1063 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1064 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1066 *
this, E, Intrinsic::amdgcn_image_store_1d,
true);
1067 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1068 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1070 *
this, E, Intrinsic::amdgcn_image_store_1darray,
true);
1071 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1072 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1073 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1075 *
this, E, Intrinsic::amdgcn_image_store_2d,
true);
1076 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1077 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1078 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1080 *
this, E, Intrinsic::amdgcn_image_store_2darray,
true);
1081 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1082 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1084 *
this, E, Intrinsic::amdgcn_image_store_3d,
true);
1085 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1086 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1088 *
this, E, Intrinsic::amdgcn_image_store_cube,
true);
1089 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1090 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1092 *
this, E, Intrinsic::amdgcn_image_store_mip_1d,
true);
1093 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1094 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1096 *
this, E, Intrinsic::amdgcn_image_store_mip_1darray,
true);
1097 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1098 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1099 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1101 *
this, E, Intrinsic::amdgcn_image_store_mip_2d,
true);
1102 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1103 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1104 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1106 *
this, E, Intrinsic::amdgcn_image_store_mip_2darray,
true);
1107 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1108 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1110 *
this, E, Intrinsic::amdgcn_image_store_mip_3d,
true);
1111 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1112 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1114 *
this, E, Intrinsic::amdgcn_image_store_mip_cube,
true);
1115 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1116 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1118 *
this, E, Intrinsic::amdgcn_image_sample_1d,
false);
1119 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1120 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1122 *
this, E, Intrinsic::amdgcn_image_sample_1darray,
false);
1123 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1124 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1125 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1127 *
this, E, Intrinsic::amdgcn_image_sample_2d,
false);
1128 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1129 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1130 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1132 *
this, E, Intrinsic::amdgcn_image_sample_2darray,
false);
1133 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1134 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1136 *
this, E, Intrinsic::amdgcn_image_sample_3d,
false);
1137 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1138 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1140 *
this, E, Intrinsic::amdgcn_image_sample_cube,
false);
1141 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1142 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1143 llvm::FixedVectorType *VT = FixedVectorType::get(
Builder.getInt32Ty(), 8);
1145 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1146 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1147 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1151 for (
unsigned I = 0, N = E->
getNumArgs(); I != N; ++I)
1153 return Builder.CreateCall(F, Args);
1155 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1156 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1157 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1158 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1159 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1160 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1161 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1162 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1163 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1164 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1165 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1166 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1167 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1168 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1169 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1170 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1171 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1172 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1173 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1174 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1175 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1176 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1177 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1178 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1179 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1180 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1181 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1182 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1183 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1184 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1185 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1186 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1187 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1188 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1189 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1190 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1191 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1192 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1193 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1194 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1195 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1196 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1197 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1198 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1199 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1200 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1201 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1202 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1203 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1204 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1205 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1206 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1207 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1208 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1209 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1210 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1211 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1212 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1213 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1214 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1216 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1217 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1218 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1219 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1220 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1221 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1222 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1223 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1224 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1225 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1226 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1227 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1228 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1229 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1230 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1231 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1232 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1233 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1234 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1235 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1236 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1237 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1238 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1239 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1240 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1241 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1242 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1243 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1244 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1245 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1246 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1247 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1248 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1249 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1250 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1251 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1252 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1253 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1254 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1255 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1256 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1257 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1258 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1271 bool AppendFalseForOpselArg =
false;
1272 unsigned BuiltinWMMAOp;
1274 bool NeedReturnType =
false;
1276 switch (BuiltinID) {
1277 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1278 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1279 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1280 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1281 ArgsForMatchingMatrixTypes = {2, 0};
1282 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1284 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1285 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1286 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1287 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1288 ArgsForMatchingMatrixTypes = {2, 0};
1289 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1291 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1292 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1293 AppendFalseForOpselArg =
true;
1295 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1296 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1297 ArgsForMatchingMatrixTypes = {2, 0};
1298 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1300 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1301 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1302 AppendFalseForOpselArg =
true;
1304 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1305 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1306 ArgsForMatchingMatrixTypes = {2, 0};
1307 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1309 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1310 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1311 ArgsForMatchingMatrixTypes = {2, 0};
1312 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1314 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1315 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1316 ArgsForMatchingMatrixTypes = {2, 0};
1317 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1319 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1320 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1321 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1322 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1323 ArgsForMatchingMatrixTypes = {4, 1};
1324 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1326 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1327 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1328 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1329 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1330 ArgsForMatchingMatrixTypes = {4, 1};
1331 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1333 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1334 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1335 ArgsForMatchingMatrixTypes = {2, 0};
1336 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1338 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1339 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1340 ArgsForMatchingMatrixTypes = {2, 0};
1341 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1343 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1344 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1345 ArgsForMatchingMatrixTypes = {2, 0};
1346 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1348 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1349 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1350 ArgsForMatchingMatrixTypes = {2, 0};
1351 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1353 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1354 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1355 ArgsForMatchingMatrixTypes = {4, 1};
1356 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1358 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1359 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1360 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1361 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1363 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1364 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1365 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1366 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1368 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1369 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1370 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1371 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1373 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1374 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1375 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1376 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1378 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1379 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1380 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1381 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1383 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1384 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1385 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1386 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1388 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1389 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1390 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1391 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1393 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1394 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1395 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1396 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1398 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1399 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1400 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1401 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1403 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1404 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1405 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1406 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1408 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1409 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1410 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1411 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1414 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1415 ArgsForMatchingMatrixTypes = {5, 1};
1416 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1418 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1419 ArgsForMatchingMatrixTypes = {5, 1};
1420 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1422 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1423 ArgsForMatchingMatrixTypes = {5, 1};
1424 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1426 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1427 ArgsForMatchingMatrixTypes = {5, 1};
1428 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1430 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1431 ArgsForMatchingMatrixTypes = {5, 1};
1432 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1434 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1435 NeedReturnType =
true;
1436 ArgsForMatchingMatrixTypes = {1, 5};
1437 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1439 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1440 ArgsForMatchingMatrixTypes = {3, 0};
1441 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1443 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1444 ArgsForMatchingMatrixTypes = {3, 0};
1445 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1447 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1448 ArgsForMatchingMatrixTypes = {3, 0};
1449 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1451 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1452 ArgsForMatchingMatrixTypes = {3, 0};
1453 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1455 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1456 ArgsForMatchingMatrixTypes = {3, 0};
1457 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1459 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1460 ArgsForMatchingMatrixTypes = {3, 0};
1461 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1463 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1464 ArgsForMatchingMatrixTypes = {3, 0};
1465 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1467 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1468 ArgsForMatchingMatrixTypes = {3, 0};
1469 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1471 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1472 ArgsForMatchingMatrixTypes = {3, 0};
1473 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1475 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1476 ArgsForMatchingMatrixTypes = {3, 0};
1477 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1479 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1480 ArgsForMatchingMatrixTypes = {3, 0};
1481 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1483 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1484 ArgsForMatchingMatrixTypes = {3, 0};
1485 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1487 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1488 ArgsForMatchingMatrixTypes = {3, 0};
1489 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1491 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1492 ArgsForMatchingMatrixTypes = {3, 0};
1493 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1495 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1496 ArgsForMatchingMatrixTypes = {3, 0};
1497 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1499 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1500 ArgsForMatchingMatrixTypes = {3, 0};
1501 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1503 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1504 ArgsForMatchingMatrixTypes = {4, 1};
1505 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1507 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1508 ArgsForMatchingMatrixTypes = {5, 1, 3};
1509 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1511 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1512 ArgsForMatchingMatrixTypes = {5, 1, 3};
1513 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1515 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1516 ArgsForMatchingMatrixTypes = {5, 1, 3};
1517 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1519 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1520 ArgsForMatchingMatrixTypes = {3, 0, 1};
1521 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1523 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1524 ArgsForMatchingMatrixTypes = {3, 0, 1};
1525 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1527 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1528 ArgsForMatchingMatrixTypes = {3, 0, 1};
1529 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1531 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1532 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1533 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1535 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1536 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1537 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1539 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1540 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1541 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1543 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1544 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1545 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1547 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1548 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1549 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1551 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1552 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1553 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1555 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1556 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1557 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1559 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1560 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1561 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1563 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1564 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1565 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1567 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1568 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1569 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1571 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1572 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1573 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1575 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1576 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1577 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1579 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1580 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1581 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1583 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1584 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1585 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1590 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1592 if (AppendFalseForOpselArg)
1593 Args.push_back(
Builder.getFalse());
1598 for (
auto ArgIdx : ArgsForMatchingMatrixTypes)
1599 ArgTypes.push_back(Args[ArgIdx]->
getType());
1601 Function *F =
CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1602 return Builder.CreateCall(F, Args);
1605 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1606 return EmitAMDGPUWorkGroupSize(*
this, 0);
1607 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1608 return EmitAMDGPUWorkGroupSize(*
this, 1);
1609 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1610 return EmitAMDGPUWorkGroupSize(*
this, 2);
1613 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1614 return EmitAMDGPUGridSize(*
this, 0);
1615 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1616 return EmitAMDGPUGridSize(*
this, 1);
1617 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1618 return EmitAMDGPUGridSize(*
this, 2);
1621 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1622 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1624 Intrinsic::r600_recipsqrt_ieee);
1625 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1629 Function *F =
CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1630 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1632 case AMDGPU::BI__builtin_amdgcn_fence: {
1635 FenceInst *Fence =
Builder.CreateFence(AO, SSID);
1640 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1641 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1642 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1643 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1644 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1645 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1646 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1647 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1648 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1649 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1650 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1651 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1652 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1653 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1654 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1655 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1656 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1657 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1658 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1659 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1660 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1661 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1662 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1663 llvm::AtomicRMWInst::BinOp BinOp;
1664 switch (BuiltinID) {
1665 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1666 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1667 BinOp = llvm::AtomicRMWInst::UIncWrap;
1669 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1670 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1671 BinOp = llvm::AtomicRMWInst::UDecWrap;
1673 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1674 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1675 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1676 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1677 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1678 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1679 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1680 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1681 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1682 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1683 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1684 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1685 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1686 BinOp = llvm::AtomicRMWInst::FAdd;
1688 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1689 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1690 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1691 BinOp = llvm::AtomicRMWInst::FMin;
1693 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1694 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1695 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1696 BinOp = llvm::AtomicRMWInst::FMax;
1702 llvm::Type *OrigTy = Val->
getType();
1707 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1708 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1709 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1734 AO = AtomicOrdering::Monotonic;
1737 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1738 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1739 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1740 llvm::Type *V2BF16Ty = FixedVectorType::get(
1741 llvm::Type::getBFloatTy(
Builder.getContext()), 2);
1742 Val =
Builder.CreateBitCast(Val, V2BF16Ty);
1746 llvm::AtomicRMWInst *RMW =
1747 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1749 RMW->setVolatile(
true);
1751 unsigned AddrSpace = Ptr.
getType()->getAddressSpace();
1752 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1756 RMW->setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
1760 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->
getType()->isFloatTy())
1761 RMW->setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
1764 return Builder.CreateBitCast(RMW, OrigTy);
1766 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1767 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1772 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1773 return Builder.CreateCall(F, {Arg});
1775 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1776 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1784 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1785 ? Intrinsic::amdgcn_permlane16_swap
1786 : Intrinsic::amdgcn_permlane32_swap);
1787 llvm::CallInst *
Call =
1788 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1790 llvm::Value *Elt0 =
Builder.CreateExtractValue(
Call, 0);
1791 llvm::Value *Elt1 =
Builder.CreateExtractValue(
Call, 1);
1795 llvm::Value *Insert0 =
Builder.CreateInsertElement(
1796 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
1797 llvm::Value *AsVector =
1798 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
1801 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
1802 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
1804 Intrinsic::amdgcn_bitop3);
1805 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
1810 for (
unsigned I = 0; I < 4; ++I)
1812 llvm::PointerType *RetTy = llvm::PointerType::get(
1813 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
1814 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
1815 {RetTy, Args[0]->getType()});
1816 return Builder.CreateCall(F, Args);
1818 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
1819 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
1820 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
1821 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
1822 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
1823 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1825 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
1826 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1827 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1828 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1829 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1830 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1831 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
1832 llvm::Type *RetTy =
nullptr;
1833 switch (BuiltinID) {
1834 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1837 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1840 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1843 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1844 RetTy = llvm::FixedVectorType::get(
Int32Ty, 2);
1846 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1847 RetTy = llvm::FixedVectorType::get(
Int32Ty, 3);
1849 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
1850 RetTy = llvm::FixedVectorType::get(
Int32Ty, 4);
1854 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
1859 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1861 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
1862 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1863 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1865 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
1866 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
1867 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
1869 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
1870 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1871 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1873 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
1874 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
1876 *
this, E, Intrinsic::amdgcn_s_prefetch_data);
1877 case Builtin::BIlogbf:
1878 case Builtin::BI__builtin_logbf: {
1882 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1885 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1890 Fabs, ConstantFP::getInfinity(
Builder.getFloatTy()));
1891 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1893 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getFloatTy()));
1896 ConstantFP::getInfinity(
Builder.getFloatTy(),
true), Sel1);
1899 case Builtin::BIlogb:
1900 case Builtin::BI__builtin_logb: {
1904 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1907 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1912 Fabs, ConstantFP::getInfinity(
Builder.getDoubleTy()));
1913 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1915 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getDoubleTy()));
1918 ConstantFP::getInfinity(
Builder.getDoubleTy(),
true),
1922 case Builtin::BIscalbnf:
1923 case Builtin::BI__builtin_scalbnf:
1924 case Builtin::BIscalbn:
1925 case Builtin::BI__builtin_scalbn:
1927 *
this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);