417 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
418 llvm::SyncScope::ID SSID;
420 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
421 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
422 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
423 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
424 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
425 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
426 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
427 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
428 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
429 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
430 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
431 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
432 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
433 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
434 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
435 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
436 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
437 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
438 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
439 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
440 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
441 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
442 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
443 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
444 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
445 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
452 case AMDGPU::BI__builtin_amdgcn_div_scale:
453 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
463 llvm::Function *Callee =
CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
466 llvm::Value *Tmp =
Builder.CreateCall(Callee, {
X, Y, Z});
469 llvm::Value *Flag =
Builder.CreateExtractValue(Tmp, 1);
473 llvm::Value *FlagExt =
Builder.CreateZExt(Flag, RealFlagType);
474 Builder.CreateStore(FlagExt, FlagOutPtr);
477 case AMDGPU::BI__builtin_amdgcn_div_fmas:
478 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
484 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
486 llvm::Value *Src3ToBool =
Builder.CreateIsNotNull(Src3);
487 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
490 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
492 Intrinsic::amdgcn_ds_swizzle);
493 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
494 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
495 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
499 unsigned ICEArguments = 0;
504 unsigned Size = DataTy->getPrimitiveSizeInBits();
506 llvm::IntegerType::get(
Builder.getContext(), std::max(Size, 32u));
508 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
509 ? Intrinsic::amdgcn_mov_dpp8
510 : Intrinsic::amdgcn_update_dpp,
514 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
516 Args.push_back(llvm::PoisonValue::get(
IntTy));
517 for (
unsigned I = 0; I != E->
getNumArgs(); ++I) {
519 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
521 if (!DataTy->isIntegerTy())
523 V, llvm::IntegerType::get(
Builder.getContext(), Size));
527 F->getFunctionType()->getFunctionParamType(I + InsertOld);
528 Args.push_back(
Builder.CreateTruncOrBitCast(
V, ExpTy));
531 if (Size < 32 && !DataTy->isIntegerTy())
533 V, llvm::IntegerType::get(
Builder.getContext(), Size));
534 return Builder.CreateTruncOrBitCast(
V, DataTy);
536 case AMDGPU::BI__builtin_amdgcn_permlane16:
537 case AMDGPU::BI__builtin_amdgcn_permlanex16:
540 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
541 ? Intrinsic::amdgcn_permlane16
542 : Intrinsic::amdgcn_permlanex16);
543 case AMDGPU::BI__builtin_amdgcn_permlane64:
545 Intrinsic::amdgcn_permlane64);
546 case AMDGPU::BI__builtin_amdgcn_readlane:
548 Intrinsic::amdgcn_readlane);
549 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
551 Intrinsic::amdgcn_readfirstlane);
552 case AMDGPU::BI__builtin_amdgcn_div_fixup:
553 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
554 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
556 Intrinsic::amdgcn_div_fixup);
557 case AMDGPU::BI__builtin_amdgcn_trig_preop:
558 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
560 case AMDGPU::BI__builtin_amdgcn_rcp:
561 case AMDGPU::BI__builtin_amdgcn_rcpf:
562 case AMDGPU::BI__builtin_amdgcn_rcph:
563 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
565 case AMDGPU::BI__builtin_amdgcn_sqrt:
566 case AMDGPU::BI__builtin_amdgcn_sqrtf:
567 case AMDGPU::BI__builtin_amdgcn_sqrth:
568 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
570 Intrinsic::amdgcn_sqrt);
571 case AMDGPU::BI__builtin_amdgcn_rsq:
572 case AMDGPU::BI__builtin_amdgcn_rsqf:
573 case AMDGPU::BI__builtin_amdgcn_rsqh:
574 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
576 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
577 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
579 Intrinsic::amdgcn_rsq_clamp);
580 case AMDGPU::BI__builtin_amdgcn_sinf:
581 case AMDGPU::BI__builtin_amdgcn_sinh:
582 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
584 case AMDGPU::BI__builtin_amdgcn_cosf:
585 case AMDGPU::BI__builtin_amdgcn_cosh:
586 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
588 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
589 return EmitAMDGPUDispatchPtr(*
this, E);
590 case AMDGPU::BI__builtin_amdgcn_logf:
591 case AMDGPU::BI__builtin_amdgcn_log_bf16:
593 case AMDGPU::BI__builtin_amdgcn_exp2f:
594 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
596 Intrinsic::amdgcn_exp2);
597 case AMDGPU::BI__builtin_amdgcn_log_clampf:
599 Intrinsic::amdgcn_log_clamp);
600 case AMDGPU::BI__builtin_amdgcn_ldexp:
601 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
605 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
606 return Builder.CreateCall(F, {Src0, Src1});
608 case AMDGPU::BI__builtin_amdgcn_ldexph: {
614 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(),
Int16Ty});
617 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
618 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
619 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
621 Intrinsic::amdgcn_frexp_mant);
622 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
623 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
625 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
627 return Builder.CreateCall(F, Src0);
629 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
631 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
633 return Builder.CreateCall(F, Src0);
635 case AMDGPU::BI__builtin_amdgcn_fract:
636 case AMDGPU::BI__builtin_amdgcn_fractf:
637 case AMDGPU::BI__builtin_amdgcn_fracth:
639 Intrinsic::amdgcn_fract);
640 case AMDGPU::BI__builtin_amdgcn_lerp:
642 Intrinsic::amdgcn_lerp);
643 case AMDGPU::BI__builtin_amdgcn_ubfe:
645 Intrinsic::amdgcn_ubfe);
646 case AMDGPU::BI__builtin_amdgcn_sbfe:
648 Intrinsic::amdgcn_sbfe);
649 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
650 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
653 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
654 return Builder.CreateCall(F, {Src});
656 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
657 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
660 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
661 return Builder.CreateCall(F, {Src});
663 case AMDGPU::BI__builtin_amdgcn_tanhf:
664 case AMDGPU::BI__builtin_amdgcn_tanhh:
665 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
667 Intrinsic::amdgcn_tanh);
668 case AMDGPU::BI__builtin_amdgcn_uicmp:
669 case AMDGPU::BI__builtin_amdgcn_uicmpl:
670 case AMDGPU::BI__builtin_amdgcn_sicmp:
671 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
677 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
678 {
Builder.getInt64Ty(), Src0->getType() });
679 return Builder.CreateCall(F, { Src0, Src1, Src2 });
681 case AMDGPU::BI__builtin_amdgcn_fcmp:
682 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
688 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
689 {
Builder.getInt64Ty(), Src0->getType() });
690 return Builder.CreateCall(F, { Src0, Src1, Src2 });
692 case AMDGPU::BI__builtin_amdgcn_class:
693 case AMDGPU::BI__builtin_amdgcn_classf:
694 case AMDGPU::BI__builtin_amdgcn_classh:
696 case AMDGPU::BI__builtin_amdgcn_fmed3f:
697 case AMDGPU::BI__builtin_amdgcn_fmed3h:
699 Intrinsic::amdgcn_fmed3);
700 case AMDGPU::BI__builtin_amdgcn_ds_append:
701 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
702 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
703 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
708 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
709 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
710 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
711 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
712 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
713 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
714 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
715 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
716 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
717 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
718 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
719 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
720 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
721 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
722 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
723 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
724 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
725 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
726 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
727 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
728 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
729 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
730 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
731 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
732 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
733 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
736 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
737 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
738 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
739 IID = Intrinsic::amdgcn_global_load_tr_b64;
741 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
742 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
743 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
744 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
745 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
746 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
747 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
748 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
749 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
750 IID = Intrinsic::amdgcn_global_load_tr_b128;
752 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
753 IID = Intrinsic::amdgcn_global_load_tr4_b64;
755 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
756 IID = Intrinsic::amdgcn_global_load_tr6_b96;
758 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
759 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
761 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
762 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
764 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
765 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
767 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
768 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
769 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
770 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
772 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
773 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
775 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
776 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
778 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
779 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
781 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
782 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
783 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
784 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
789 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
792 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
793 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
794 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
795 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
796 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
797 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
801 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
802 IID = Intrinsic::amdgcn_global_load_monitor_b32;
804 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
805 IID = Intrinsic::amdgcn_global_load_monitor_b64;
807 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
808 IID = Intrinsic::amdgcn_global_load_monitor_b128;
810 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
811 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
813 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
814 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
816 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
817 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
824 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
827 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
828 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
829 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
832 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
833 IID = Intrinsic::amdgcn_cluster_load_b32;
835 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
836 IID = Intrinsic::amdgcn_cluster_load_b64;
838 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
839 IID = Intrinsic::amdgcn_cluster_load_b128;
843 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
846 return Builder.CreateCall(F, {Args});
848 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
851 Intrinsic::amdgcn_load_to_lds);
853 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
854 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
855 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
856 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
857 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
858 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
861 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
862 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
864 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
865 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
867 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
868 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
870 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
871 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
873 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
874 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
876 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
877 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
881 LLVMContext &Ctx =
CGM.getLLVMContext();
884 const unsigned ScopeArg = E->
getNumArgs() - 1;
885 for (
unsigned i = 0; i != ScopeArg; ++i)
889 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
890 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
893 llvm::Function *F =
CGM.getIntrinsic(IID, {Args[0]->getType()});
894 return Builder.CreateCall(F, {Args});
896 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
897 Function *F =
CGM.getIntrinsic(Intrinsic::get_fpenv,
901 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
902 Function *F =
CGM.getIntrinsic(Intrinsic::set_fpenv,
905 return Builder.CreateCall(F, {Env});
907 case AMDGPU::BI__builtin_amdgcn_read_exec:
909 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
911 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
913 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
914 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
915 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
916 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
926 RayOrigin =
Builder.CreateShuffleVector(RayOrigin, RayOrigin,
929 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
930 RayInverseDir =
Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
933 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
934 {NodePtr->getType(), RayDir->getType()});
935 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
936 RayInverseDir, TextureDescr});
938 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
939 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
942 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
943 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
945 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
946 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
960 llvm::Function *IntrinsicFunc =
CGM.getIntrinsic(IID);
962 llvm::CallInst *CI =
Builder.CreateCall(
963 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
964 Offset, TextureDescr});
966 llvm::Value *RetVData =
Builder.CreateExtractValue(CI, 0);
967 llvm::Value *RetRayOrigin =
Builder.CreateExtractValue(CI, 1);
968 llvm::Value *RetRayDir =
Builder.CreateExtractValue(CI, 2);
970 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
971 Builder.CreateStore(RetRayDir, RetRayDirPtr);
976 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
977 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
978 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
979 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
982 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
983 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
985 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
986 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
988 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
989 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
991 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
992 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
997 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1005 Value *I0 =
Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
1009 if (A->
getType()->getPrimitiveSizeInBits() <
1010 RetTy->getScalarType()->getPrimitiveSizeInBits())
1011 A =
Builder.CreateZExt(A, RetTy->getScalarType());
1013 return Builder.CreateInsertElement(I0, A, 1);
1015 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1016 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1018 *
this, E, Intrinsic::amdgcn_image_load_1d,
false);
1019 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1020 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1022 *
this, E, Intrinsic::amdgcn_image_load_1darray,
false);
1023 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1024 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1025 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1027 *
this, E, Intrinsic::amdgcn_image_load_2d,
false);
1028 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1029 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1030 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1032 *
this, E, Intrinsic::amdgcn_image_load_2darray,
false);
1033 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1034 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1036 *
this, E, Intrinsic::amdgcn_image_load_3d,
false);
1037 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1038 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1040 *
this, E, Intrinsic::amdgcn_image_load_cube,
false);
1041 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1042 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1044 *
this, E, Intrinsic::amdgcn_image_load_mip_1d,
false);
1045 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1046 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1048 *
this, E, Intrinsic::amdgcn_image_load_mip_1darray,
false);
1049 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1050 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1051 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1053 *
this, E, Intrinsic::amdgcn_image_load_mip_2d,
false);
1054 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1055 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1056 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1058 *
this, E, Intrinsic::amdgcn_image_load_mip_2darray,
false);
1059 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1060 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1062 *
this, E, Intrinsic::amdgcn_image_load_mip_3d,
false);
1063 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1064 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1066 *
this, E, Intrinsic::amdgcn_image_load_mip_cube,
false);
1067 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1068 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1070 *
this, E, Intrinsic::amdgcn_image_store_1d,
true);
1071 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1072 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1074 *
this, E, Intrinsic::amdgcn_image_store_1darray,
true);
1075 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1076 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1077 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1079 *
this, E, Intrinsic::amdgcn_image_store_2d,
true);
1080 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1081 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1082 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1084 *
this, E, Intrinsic::amdgcn_image_store_2darray,
true);
1085 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1086 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1088 *
this, E, Intrinsic::amdgcn_image_store_3d,
true);
1089 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1090 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1092 *
this, E, Intrinsic::amdgcn_image_store_cube,
true);
1093 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1094 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1096 *
this, E, Intrinsic::amdgcn_image_store_mip_1d,
true);
1097 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1098 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1100 *
this, E, Intrinsic::amdgcn_image_store_mip_1darray,
true);
1101 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1102 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1103 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1105 *
this, E, Intrinsic::amdgcn_image_store_mip_2d,
true);
1106 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1107 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1108 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1110 *
this, E, Intrinsic::amdgcn_image_store_mip_2darray,
true);
1111 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1112 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1114 *
this, E, Intrinsic::amdgcn_image_store_mip_3d,
true);
1115 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1116 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1118 *
this, E, Intrinsic::amdgcn_image_store_mip_cube,
true);
1119 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1120 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1122 *
this, E, Intrinsic::amdgcn_image_sample_1d,
false);
1123 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1124 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1126 *
this, E, Intrinsic::amdgcn_image_sample_1darray,
false);
1127 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1128 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1129 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1131 *
this, E, Intrinsic::amdgcn_image_sample_2d,
false);
1132 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1133 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1134 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1136 *
this, E, Intrinsic::amdgcn_image_sample_2darray,
false);
1137 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1138 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1140 *
this, E, Intrinsic::amdgcn_image_sample_3d,
false);
1141 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1142 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1144 *
this, E, Intrinsic::amdgcn_image_sample_cube,
false);
1145 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1146 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1148 *
this, E, Intrinsic::amdgcn_image_sample_lz_1d,
false);
1149 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1150 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1152 *
this, E, Intrinsic::amdgcn_image_sample_l_1d,
false);
1153 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1154 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1156 *
this, E, Intrinsic::amdgcn_image_sample_d_1d,
false);
1157 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1158 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1159 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1161 *
this, E, Intrinsic::amdgcn_image_sample_lz_2d,
false);
1162 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1163 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1164 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1166 *
this, E, Intrinsic::amdgcn_image_sample_l_2d,
false);
1167 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1168 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1169 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1171 *
this, E, Intrinsic::amdgcn_image_sample_d_2d,
false);
1172 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1173 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1175 *
this, E, Intrinsic::amdgcn_image_sample_lz_3d,
false);
1176 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1177 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1179 *
this, E, Intrinsic::amdgcn_image_sample_l_3d,
false);
1180 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1181 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1183 *
this, E, Intrinsic::amdgcn_image_sample_d_3d,
false);
1184 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1185 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1187 *
this, E, Intrinsic::amdgcn_image_sample_lz_cube,
false);
1188 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1189 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1191 *
this, E, Intrinsic::amdgcn_image_sample_l_cube,
false);
1192 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1193 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1195 *
this, E, Intrinsic::amdgcn_image_sample_lz_1darray,
false);
1196 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1197 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1199 *
this, E, Intrinsic::amdgcn_image_sample_l_1darray,
false);
1200 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1201 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1203 *
this, E, Intrinsic::amdgcn_image_sample_d_1darray,
false);
1204 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1205 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1206 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1208 *
this, E, Intrinsic::amdgcn_image_sample_lz_2darray,
false);
1209 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1210 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1211 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1213 *
this, E, Intrinsic::amdgcn_image_sample_l_2darray,
false);
1214 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1215 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1216 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1218 *
this, E, Intrinsic::amdgcn_image_sample_d_2darray,
false);
1219 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1221 *
this, E, Intrinsic::amdgcn_image_gather4_lz_2d,
false);
1222 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1223 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1224 llvm::FixedVectorType *VT = FixedVectorType::get(
Builder.getInt32Ty(), 8);
1226 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1227 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1228 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1232 for (
unsigned I = 0, N = E->
getNumArgs(); I != N; ++I)
1234 return Builder.CreateCall(F, Args);
1236 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1237 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1238 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1239 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1240 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1241 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1242 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1243 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1244 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1245 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1246 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1247 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1248 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1249 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1250 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1251 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1252 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1253 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1254 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1255 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1256 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1257 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1258 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1259 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1260 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1261 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1262 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1263 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1264 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1265 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1266 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1267 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1268 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1269 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1270 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1271 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1272 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1273 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1274 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1275 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1276 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1277 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1278 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1279 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1280 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1281 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1282 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1283 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1284 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1285 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1286 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1287 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1288 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1289 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1290 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1291 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1292 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1293 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1294 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1295 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1297 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1298 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1299 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1300 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1301 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1302 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1303 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1304 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1305 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1306 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1307 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1308 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1309 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1310 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1311 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1312 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1313 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1314 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1315 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1316 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1317 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1318 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1319 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1320 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1321 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1322 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1323 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1324 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1325 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1326 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1327 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1328 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1329 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1330 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1331 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1332 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1333 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1334 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1335 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1336 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1337 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1338 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1339 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1352 bool AppendFalseForOpselArg =
false;
1353 unsigned BuiltinWMMAOp;
1355 bool NeedReturnType =
false;
1357 switch (BuiltinID) {
1358 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1359 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1360 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1361 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1362 ArgsForMatchingMatrixTypes = {2, 0};
1363 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1365 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1366 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1367 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1368 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1369 ArgsForMatchingMatrixTypes = {2, 0};
1370 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1372 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1373 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1374 AppendFalseForOpselArg =
true;
1376 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1377 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1378 ArgsForMatchingMatrixTypes = {2, 0};
1379 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1381 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1382 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1383 AppendFalseForOpselArg =
true;
1385 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1386 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1387 ArgsForMatchingMatrixTypes = {2, 0};
1388 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1390 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1391 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1392 ArgsForMatchingMatrixTypes = {2, 0};
1393 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1395 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1396 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1397 ArgsForMatchingMatrixTypes = {2, 0};
1398 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1400 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1401 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1402 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1403 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1404 ArgsForMatchingMatrixTypes = {4, 1};
1405 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1407 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1408 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1409 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1410 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1411 ArgsForMatchingMatrixTypes = {4, 1};
1412 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1414 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1415 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1416 ArgsForMatchingMatrixTypes = {2, 0};
1417 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1419 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1420 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1421 ArgsForMatchingMatrixTypes = {2, 0};
1422 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1424 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1425 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1426 ArgsForMatchingMatrixTypes = {2, 0};
1427 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1429 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1430 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1431 ArgsForMatchingMatrixTypes = {2, 0};
1432 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1434 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1435 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1436 ArgsForMatchingMatrixTypes = {4, 1};
1437 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1439 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1440 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1441 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1442 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1444 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1445 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1446 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1447 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1449 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1450 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1451 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1452 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1454 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1455 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1456 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1457 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1459 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1460 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1461 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1462 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1464 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1465 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1466 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1467 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1469 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1470 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1471 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1472 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1474 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1475 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1476 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1477 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1479 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1480 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1481 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1482 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1484 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1485 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1486 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1487 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1489 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1490 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1491 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1492 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1495 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1496 ArgsForMatchingMatrixTypes = {5, 1};
1497 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1499 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1500 ArgsForMatchingMatrixTypes = {5, 1};
1501 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1503 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1504 ArgsForMatchingMatrixTypes = {5, 1};
1505 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1507 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1508 ArgsForMatchingMatrixTypes = {5, 1};
1509 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1511 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1512 ArgsForMatchingMatrixTypes = {5, 1};
1513 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1515 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1516 NeedReturnType =
true;
1517 ArgsForMatchingMatrixTypes = {1, 5};
1518 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1520 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1521 ArgsForMatchingMatrixTypes = {3, 0};
1522 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1524 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1525 ArgsForMatchingMatrixTypes = {3, 0};
1526 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1528 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1529 ArgsForMatchingMatrixTypes = {3, 0};
1530 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1532 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1533 ArgsForMatchingMatrixTypes = {3, 0};
1534 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1536 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1537 ArgsForMatchingMatrixTypes = {3, 0};
1538 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1540 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1541 ArgsForMatchingMatrixTypes = {3, 0};
1542 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1544 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1545 ArgsForMatchingMatrixTypes = {3, 0};
1546 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1548 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1549 ArgsForMatchingMatrixTypes = {3, 0};
1550 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1552 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1553 ArgsForMatchingMatrixTypes = {3, 0};
1554 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1556 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1557 ArgsForMatchingMatrixTypes = {3, 0};
1558 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1560 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1561 ArgsForMatchingMatrixTypes = {3, 0};
1562 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1564 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1565 ArgsForMatchingMatrixTypes = {3, 0};
1566 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1568 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1569 ArgsForMatchingMatrixTypes = {3, 0};
1570 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1572 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1573 ArgsForMatchingMatrixTypes = {3, 0};
1574 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1576 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1577 ArgsForMatchingMatrixTypes = {3, 0};
1578 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1580 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1581 ArgsForMatchingMatrixTypes = {3, 0};
1582 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1584 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1585 ArgsForMatchingMatrixTypes = {4, 1};
1586 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1588 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1589 ArgsForMatchingMatrixTypes = {5, 1, 3};
1590 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1592 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1593 ArgsForMatchingMatrixTypes = {5, 1, 3};
1594 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1596 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1597 ArgsForMatchingMatrixTypes = {5, 1, 3};
1598 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1600 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1601 ArgsForMatchingMatrixTypes = {3, 0, 1};
1602 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1604 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1605 ArgsForMatchingMatrixTypes = {3, 0, 1};
1606 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1608 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1609 ArgsForMatchingMatrixTypes = {3, 0, 1};
1610 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1612 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1613 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1614 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1616 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1617 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1618 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1620 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1621 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1622 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1624 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1625 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1626 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1628 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1629 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1630 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1632 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1633 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1634 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1636 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1637 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1638 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1640 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1641 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1642 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1644 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1645 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1646 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1648 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1649 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1650 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1652 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1653 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1654 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1656 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1657 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1658 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1660 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1661 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1662 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1664 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1665 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1666 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1671 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1673 if (AppendFalseForOpselArg)
1674 Args.push_back(
Builder.getFalse());
1677 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
1678 if (Args.size() == 7)
1679 Args.push_back(
Builder.getFalse());
1680 assert(Args.size() == 8 &&
"Expected 8 arguments");
1681 Args[7] =
Builder.CreateZExtOrTrunc(Args[7],
Builder.getInt1Ty());
1682 }
else if (BuiltinID ==
1683 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
1684 if (Args.size() == 8)
1685 Args.push_back(
Builder.getFalse());
1686 assert(Args.size() == 9 &&
"Expected 9 arguments");
1687 Args[8] =
Builder.CreateZExtOrTrunc(Args[8],
Builder.getInt1Ty());
1693 for (
auto ArgIdx : ArgsForMatchingMatrixTypes)
1694 ArgTypes.push_back(Args[ArgIdx]->
getType());
1696 Function *F =
CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1697 return Builder.CreateCall(F, Args);
1700 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1701 return EmitAMDGPUWorkGroupSize(*
this, 0);
1702 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1703 return EmitAMDGPUWorkGroupSize(*
this, 1);
1704 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1705 return EmitAMDGPUWorkGroupSize(*
this, 2);
1708 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1709 return EmitAMDGPUGridSize(*
this, 0);
1710 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1711 return EmitAMDGPUGridSize(*
this, 1);
1712 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1713 return EmitAMDGPUGridSize(*
this, 2);
1716 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1717 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1719 Intrinsic::r600_recipsqrt_ieee);
1720 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1724 Function *F =
CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1725 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1727 case AMDGPU::BI__builtin_amdgcn_fence: {
1730 FenceInst *Fence =
Builder.CreateFence(AO, SSID);
1735 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1736 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1737 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1738 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1739 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1740 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1741 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1742 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1743 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1744 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1745 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1746 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1747 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1748 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1749 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1750 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1751 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1752 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1753 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1754 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1755 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1756 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1757 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1758 llvm::AtomicRMWInst::BinOp BinOp;
1759 switch (BuiltinID) {
1760 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1761 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1762 BinOp = llvm::AtomicRMWInst::UIncWrap;
1764 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1765 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1766 BinOp = llvm::AtomicRMWInst::UDecWrap;
1768 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1769 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1770 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1771 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1772 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1773 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1774 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1775 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1776 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1777 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1778 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1779 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1780 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1781 BinOp = llvm::AtomicRMWInst::FAdd;
1783 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1784 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1785 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1786 BinOp = llvm::AtomicRMWInst::FMin;
1788 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1789 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1790 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1791 BinOp = llvm::AtomicRMWInst::FMax;
1797 llvm::Type *OrigTy = Val->
getType();
1802 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1803 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1804 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1829 AO = AtomicOrdering::Monotonic;
1832 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1833 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1834 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1835 llvm::Type *V2BF16Ty = FixedVectorType::get(
1836 llvm::Type::getBFloatTy(
Builder.getContext()), 2);
1837 Val =
Builder.CreateBitCast(Val, V2BF16Ty);
1841 llvm::AtomicRMWInst *RMW =
1842 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1844 RMW->setVolatile(
true);
1846 unsigned AddrSpace = Ptr.
getType()->getAddressSpace();
1847 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1851 RMW->setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
1855 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->
getType()->isFloatTy())
1856 RMW->setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
1859 return Builder.CreateBitCast(RMW, OrigTy);
1861 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1862 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1867 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1868 return Builder.CreateCall(F, {Arg});
1870 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1871 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1879 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1880 ? Intrinsic::amdgcn_permlane16_swap
1881 : Intrinsic::amdgcn_permlane32_swap);
1882 llvm::CallInst *
Call =
1883 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1885 llvm::Value *Elt0 =
Builder.CreateExtractValue(
Call, 0);
1886 llvm::Value *Elt1 =
Builder.CreateExtractValue(
Call, 1);
1890 llvm::Value *Insert0 =
Builder.CreateInsertElement(
1891 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
1892 llvm::Value *AsVector =
1893 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
1896 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
1897 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
1899 Intrinsic::amdgcn_bitop3);
1900 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
1905 for (
unsigned I = 0; I < 4; ++I)
1907 llvm::PointerType *RetTy = llvm::PointerType::get(
1908 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
1909 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
1910 {RetTy, Args[0]->getType()});
1911 return Builder.CreateCall(F, Args);
1913 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
1914 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
1915 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
1916 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
1917 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
1918 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1920 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
1921 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1922 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1923 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1924 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1925 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1926 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
1927 llvm::Type *RetTy =
nullptr;
1928 switch (BuiltinID) {
1929 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1932 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1935 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1938 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1939 RetTy = llvm::FixedVectorType::get(
Int32Ty, 2);
1941 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1942 RetTy = llvm::FixedVectorType::get(
Int32Ty, 3);
1944 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
1945 RetTy = llvm::FixedVectorType::get(
Int32Ty, 4);
1949 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
1954 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1956 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
1957 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1958 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1960 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
1961 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
1962 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
1964 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
1965 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1966 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1968 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
1969 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
1971 *
this, E, Intrinsic::amdgcn_s_prefetch_data);
1972 case Builtin::BIlogbf:
1973 case Builtin::BI__builtin_logbf: {
1977 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1980 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1985 Fabs, ConstantFP::getInfinity(
Builder.getFloatTy()));
1986 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1988 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getFloatTy()));
1991 ConstantFP::getInfinity(
Builder.getFloatTy(),
true), Sel1);
1994 case Builtin::BIlogb:
1995 case Builtin::BI__builtin_logb: {
1999 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
2002 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
2007 Fabs, ConstantFP::getInfinity(
Builder.getDoubleTy()));
2008 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
2010 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getDoubleTy()));
2013 ConstantFP::getInfinity(
Builder.getDoubleTy(),
true),
2017 case Builtin::BIscalbnf:
2018 case Builtin::BI__builtin_scalbnf:
2019 case Builtin::BIscalbn:
2020 case Builtin::BI__builtin_scalbn:
2022 *
this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);