449 case NVPTX::BI__nvvm_atom_add_gen_i:
450 case NVPTX::BI__nvvm_atom_add_gen_l:
451 case NVPTX::BI__nvvm_atom_add_gen_ll:
453 AtomicOrdering::Monotonic);
455 case NVPTX::BI__nvvm_atom_sub_gen_i:
456 case NVPTX::BI__nvvm_atom_sub_gen_l:
457 case NVPTX::BI__nvvm_atom_sub_gen_ll:
459 AtomicOrdering::Monotonic);
461 case NVPTX::BI__nvvm_atom_and_gen_i:
462 case NVPTX::BI__nvvm_atom_and_gen_l:
463 case NVPTX::BI__nvvm_atom_and_gen_ll:
465 AtomicOrdering::Monotonic);
467 case NVPTX::BI__nvvm_atom_or_gen_i:
468 case NVPTX::BI__nvvm_atom_or_gen_l:
469 case NVPTX::BI__nvvm_atom_or_gen_ll:
471 AtomicOrdering::Monotonic);
473 case NVPTX::BI__nvvm_atom_xor_gen_i:
474 case NVPTX::BI__nvvm_atom_xor_gen_l:
475 case NVPTX::BI__nvvm_atom_xor_gen_ll:
477 AtomicOrdering::Monotonic);
479 case NVPTX::BI__nvvm_atom_xchg_gen_i:
480 case NVPTX::BI__nvvm_atom_xchg_gen_l:
481 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
483 AtomicOrdering::Monotonic);
485 case NVPTX::BI__nvvm_atom_max_gen_i:
486 case NVPTX::BI__nvvm_atom_max_gen_l:
487 case NVPTX::BI__nvvm_atom_max_gen_ll:
489 AtomicOrdering::Monotonic);
491 case NVPTX::BI__nvvm_atom_max_gen_ui:
492 case NVPTX::BI__nvvm_atom_max_gen_ul:
493 case NVPTX::BI__nvvm_atom_max_gen_ull:
495 AtomicOrdering::Monotonic);
497 case NVPTX::BI__nvvm_atom_min_gen_i:
498 case NVPTX::BI__nvvm_atom_min_gen_l:
499 case NVPTX::BI__nvvm_atom_min_gen_ll:
501 AtomicOrdering::Monotonic);
503 case NVPTX::BI__nvvm_atom_min_gen_ui:
504 case NVPTX::BI__nvvm_atom_min_gen_ul:
505 case NVPTX::BI__nvvm_atom_min_gen_ull:
507 AtomicOrdering::Monotonic);
509 case NVPTX::BI__nvvm_atom_cas_gen_us:
510 case NVPTX::BI__nvvm_atom_cas_gen_i:
511 case NVPTX::BI__nvvm_atom_cas_gen_l:
512 case NVPTX::BI__nvvm_atom_cas_gen_ll:
516 AtomicOrdering::Monotonic,
517 AtomicOrdering::Monotonic);
519 case NVPTX::BI__nvvm_atom_add_gen_f:
520 case NVPTX::BI__nvvm_atom_add_gen_d: {
524 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
525 AtomicOrdering::Monotonic);
528 case NVPTX::BI__nvvm_atom_inc_gen_ui:
530 AtomicOrdering::Monotonic);
532 case NVPTX::BI__nvvm_atom_dec_gen_ui:
534 AtomicOrdering::Monotonic);
536 case NVPTX::BI__nvvm_ldg_c:
537 case NVPTX::BI__nvvm_ldg_sc:
538 case NVPTX::BI__nvvm_ldg_c2:
539 case NVPTX::BI__nvvm_ldg_sc2:
540 case NVPTX::BI__nvvm_ldg_c4:
541 case NVPTX::BI__nvvm_ldg_sc4:
542 case NVPTX::BI__nvvm_ldg_s:
543 case NVPTX::BI__nvvm_ldg_s2:
544 case NVPTX::BI__nvvm_ldg_s4:
545 case NVPTX::BI__nvvm_ldg_i:
546 case NVPTX::BI__nvvm_ldg_i2:
547 case NVPTX::BI__nvvm_ldg_i4:
548 case NVPTX::BI__nvvm_ldg_l:
549 case NVPTX::BI__nvvm_ldg_l2:
550 case NVPTX::BI__nvvm_ldg_ll:
551 case NVPTX::BI__nvvm_ldg_ll2:
552 case NVPTX::BI__nvvm_ldg_uc:
553 case NVPTX::BI__nvvm_ldg_uc2:
554 case NVPTX::BI__nvvm_ldg_uc4:
555 case NVPTX::BI__nvvm_ldg_us:
556 case NVPTX::BI__nvvm_ldg_us2:
557 case NVPTX::BI__nvvm_ldg_us4:
558 case NVPTX::BI__nvvm_ldg_ui:
559 case NVPTX::BI__nvvm_ldg_ui2:
560 case NVPTX::BI__nvvm_ldg_ui4:
561 case NVPTX::BI__nvvm_ldg_ul:
562 case NVPTX::BI__nvvm_ldg_ul2:
563 case NVPTX::BI__nvvm_ldg_ull:
564 case NVPTX::BI__nvvm_ldg_ull2:
565 case NVPTX::BI__nvvm_ldg_f:
566 case NVPTX::BI__nvvm_ldg_f2:
567 case NVPTX::BI__nvvm_ldg_f4:
568 case NVPTX::BI__nvvm_ldg_d:
569 case NVPTX::BI__nvvm_ldg_d2:
573 return MakeLdg(*
this, E);
575 case NVPTX::BI__nvvm_ldu_c:
576 case NVPTX::BI__nvvm_ldu_sc:
577 case NVPTX::BI__nvvm_ldu_c2:
578 case NVPTX::BI__nvvm_ldu_sc2:
579 case NVPTX::BI__nvvm_ldu_c4:
580 case NVPTX::BI__nvvm_ldu_sc4:
581 case NVPTX::BI__nvvm_ldu_s:
582 case NVPTX::BI__nvvm_ldu_s2:
583 case NVPTX::BI__nvvm_ldu_s4:
584 case NVPTX::BI__nvvm_ldu_i:
585 case NVPTX::BI__nvvm_ldu_i2:
586 case NVPTX::BI__nvvm_ldu_i4:
587 case NVPTX::BI__nvvm_ldu_l:
588 case NVPTX::BI__nvvm_ldu_l2:
589 case NVPTX::BI__nvvm_ldu_ll:
590 case NVPTX::BI__nvvm_ldu_ll2:
591 case NVPTX::BI__nvvm_ldu_uc:
592 case NVPTX::BI__nvvm_ldu_uc2:
593 case NVPTX::BI__nvvm_ldu_uc4:
594 case NVPTX::BI__nvvm_ldu_us:
595 case NVPTX::BI__nvvm_ldu_us2:
596 case NVPTX::BI__nvvm_ldu_us4:
597 case NVPTX::BI__nvvm_ldu_ui:
598 case NVPTX::BI__nvvm_ldu_ui2:
599 case NVPTX::BI__nvvm_ldu_ui4:
600 case NVPTX::BI__nvvm_ldu_ul:
601 case NVPTX::BI__nvvm_ldu_ul2:
602 case NVPTX::BI__nvvm_ldu_ull:
603 case NVPTX::BI__nvvm_ldu_ull2:
604 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *
this, E);
605 case NVPTX::BI__nvvm_ldu_f:
606 case NVPTX::BI__nvvm_ldu_f2:
607 case NVPTX::BI__nvvm_ldu_f4:
608 case NVPTX::BI__nvvm_ldu_d:
609 case NVPTX::BI__nvvm_ldu_d2:
610 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *
this, E);
612 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
613 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
614 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
615 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Add,
"block");
616 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
617 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
618 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
619 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Add,
"");
620 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
621 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
622 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::FAdd,
"block");
623 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
624 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
625 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::FAdd,
"");
626 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
627 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
628 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
629 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Xchg,
"block");
630 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
631 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
632 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
633 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Xchg,
"");
634 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
635 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
636 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
637 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Max,
"block");
638 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
639 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
640 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
641 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::UMax,
"block");
642 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
643 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
644 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
645 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Max,
"");
646 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
647 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
648 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
649 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::UMax,
"");
650 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
651 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
652 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
653 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Min,
"block");
654 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
655 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
656 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
657 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::UMin,
"block");
658 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
659 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
660 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
661 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Min,
"");
662 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
663 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
664 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
665 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::UMin,
"");
666 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
667 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::UIncWrap,
669 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
670 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::UDecWrap,
672 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
673 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::UIncWrap,
"");
674 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
675 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::UDecWrap,
"");
676 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
677 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
678 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
679 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::And,
"block");
680 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
681 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
682 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
683 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::And,
"");
684 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
685 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
686 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
687 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Or,
"block");
688 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
689 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
690 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
691 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Or,
"");
692 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
693 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
694 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
695 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Xor,
"block");
696 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
697 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
698 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
699 return MakeScopedAtomicRMW(*
this, E, llvm::AtomicRMWInst::Xor,
"");
700 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
701 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
702 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
703 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll:
704 return MakeScopedAtomicCAS(*
this, E,
"block");
705 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
706 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
707 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
708 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll:
709 return MakeScopedAtomicCAS(*
this, E,
"");
710 case NVPTX::BI__nvvm_match_all_sync_i32p:
711 case NVPTX::BI__nvvm_match_all_sync_i64p: {
716 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
717 ? Intrinsic::nvvm_match_all_sync_i32p
718 : Intrinsic::nvvm_match_all_sync_i64p),
722 Builder.CreateStore(Pred, PredOutPtr);
723 return Builder.CreateExtractValue(ResultPair, 0);
727 case NVPTX::BI__hmma_m16n16k16_ld_a:
728 case NVPTX::BI__hmma_m16n16k16_ld_b:
729 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
730 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
731 case NVPTX::BI__hmma_m32n8k16_ld_a:
732 case NVPTX::BI__hmma_m32n8k16_ld_b:
733 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
734 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
735 case NVPTX::BI__hmma_m8n32k16_ld_a:
736 case NVPTX::BI__hmma_m8n32k16_ld_b:
737 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
738 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
740 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
741 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
742 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
743 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
744 case NVPTX::BI__imma_m16n16k16_ld_c:
745 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
746 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
747 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
748 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
749 case NVPTX::BI__imma_m32n8k16_ld_c:
750 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
751 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
752 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
753 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
754 case NVPTX::BI__imma_m8n32k16_ld_c:
756 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
757 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
758 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
759 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
760 case NVPTX::BI__imma_m8n8k32_ld_c:
761 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
762 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
763 case NVPTX::BI__bmma_m8n8k128_ld_c:
765 case NVPTX::BI__dmma_m8n8k4_ld_a:
766 case NVPTX::BI__dmma_m8n8k4_ld_b:
767 case NVPTX::BI__dmma_m8n8k4_ld_c:
769 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
770 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
771 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
772 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
773 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
774 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
775 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
776 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
777 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
781 std::optional<llvm::APSInt> isColMajorArg =
785 bool isColMajor = isColMajorArg->getSExtValue();
786 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
787 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
795 assert(II.NumResults);
796 if (II.NumResults == 1) {
800 for (
unsigned i = 0; i < II.NumResults; ++i) {
805 llvm::ConstantInt::get(
IntTy, i)),
812 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
813 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
814 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
815 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
816 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
817 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
818 case NVPTX::BI__imma_m16n16k16_st_c_i32:
819 case NVPTX::BI__imma_m32n8k16_st_c_i32:
820 case NVPTX::BI__imma_m8n32k16_st_c_i32:
821 case NVPTX::BI__imma_m8n8k32_st_c_i32:
822 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
823 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
824 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
828 std::optional<llvm::APSInt> isColMajorArg =
832 bool isColMajor = isColMajorArg->getSExtValue();
833 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
834 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
839 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
841 for (
unsigned i = 0; i < II.NumResults; ++i) {
845 llvm::ConstantInt::get(
IntTy, i)),
847 Values.push_back(
Builder.CreateBitCast(
V, ParamType));
849 Values.push_back(Ldm);
856 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
857 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
858 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
859 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
860 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
861 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
862 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
863 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
864 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
865 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
866 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
867 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
868 case NVPTX::BI__imma_m16n16k16_mma_s8:
869 case NVPTX::BI__imma_m16n16k16_mma_u8:
870 case NVPTX::BI__imma_m32n8k16_mma_s8:
871 case NVPTX::BI__imma_m32n8k16_mma_u8:
872 case NVPTX::BI__imma_m8n32k16_mma_s8:
873 case NVPTX::BI__imma_m8n32k16_mma_u8:
874 case NVPTX::BI__imma_m8n8k32_mma_s4:
875 case NVPTX::BI__imma_m8n8k32_mma_u4:
876 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
877 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
878 case NVPTX::BI__dmma_m8n8k4_mma_f64:
879 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
880 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
881 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
882 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
887 std::optional<llvm::APSInt> LayoutArg =
891 int Layout = LayoutArg->getSExtValue();
892 if (Layout < 0 || Layout > 3)
894 llvm::APSInt SatfArg;
895 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
896 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
898 else if (std::optional<llvm::APSInt> OptSatfArg =
900 SatfArg = *OptSatfArg;
903 bool Satf = SatfArg.getSExtValue();
904 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
905 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
911 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
913 for (
unsigned i = 0; i < MI.NumEltsA; ++i) {
917 llvm::ConstantInt::get(
IntTy, i)),
919 Values.push_back(
Builder.CreateBitCast(
V, AType));
922 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
923 for (
unsigned i = 0; i < MI.NumEltsB; ++i) {
927 llvm::ConstantInt::get(
IntTy, i)),
929 Values.push_back(
Builder.CreateBitCast(
V, BType));
933 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
934 for (
unsigned i = 0; i < MI.NumEltsC; ++i) {
938 llvm::ConstantInt::get(
IntTy, i)),
940 Values.push_back(
Builder.CreateBitCast(
V, CType));
944 for (
unsigned i = 0; i < MI.NumEltsD; ++i)
948 llvm::ConstantInt::get(
IntTy, i)),
953 case NVPTX::BI__nvvm_ex2_approx_f16:
955 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,
Builder.getHalfTy()),
956 BuiltinID, E, *
this);
957 case NVPTX::BI__nvvm_ex2_approx_f16x2:
959 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,
960 FixedVectorType::get(
Builder.getHalfTy(), 2)),
961 BuiltinID, E, *
this);
962 case NVPTX::BI__nvvm_ff2f16x2_rn:
963 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *
this);
964 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
965 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *
this);
966 case NVPTX::BI__nvvm_ff2f16x2_rz:
967 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *
this);
968 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
969 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *
this);
970 case NVPTX::BI__nvvm_fma_rn_f16:
971 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *
this);
972 case NVPTX::BI__nvvm_fma_rn_f16x2:
973 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *
this);
974 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
975 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *
this);
976 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
977 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *
this);
978 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
979 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
981 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
982 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
984 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
985 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
987 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
988 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
990 case NVPTX::BI__nvvm_fma_rn_relu_f16:
991 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *
this);
992 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
993 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *
this);
994 case NVPTX::BI__nvvm_fma_rn_sat_f16:
995 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *
this);
996 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
997 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *
this);
998 case NVPTX::BI__nvvm_fma_rn_oob_f16:
999 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
Builder.getHalfTy(), E,
1001 case NVPTX::BI__nvvm_fma_rn_oob_f16x2:
1002 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
1003 llvm::FixedVectorType::get(
Builder.getHalfTy(), 2), E,
1005 case NVPTX::BI__nvvm_fma_rn_oob_bf16:
1006 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
Builder.getBFloatTy(), E,
1008 case NVPTX::BI__nvvm_fma_rn_oob_bf16x2:
1009 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
1010 llvm::FixedVectorType::get(
Builder.getBFloatTy(), 2), E,
1012 case NVPTX::BI__nvvm_fma_rn_oob_relu_f16:
1013 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
Builder.getHalfTy(), E,
1015 case NVPTX::BI__nvvm_fma_rn_oob_relu_f16x2:
1016 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
1017 llvm::FixedVectorType::get(
Builder.getHalfTy(), 2), E,
1019 case NVPTX::BI__nvvm_fma_rn_oob_relu_bf16:
1020 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
Builder.getBFloatTy(), E,
1022 case NVPTX::BI__nvvm_fma_rn_oob_relu_bf16x2:
1023 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
1024 llvm::FixedVectorType::get(
Builder.getBFloatTy(), 2), E,
1026 case NVPTX::BI__nvvm_fmax_f16:
1027 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *
this);
1028 case NVPTX::BI__nvvm_fmax_f16x2:
1029 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *
this);
1030 case NVPTX::BI__nvvm_fmax_ftz_f16:
1031 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *
this);
1032 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
1033 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *
this);
1034 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
1035 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *
this);
1036 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
1037 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
1039 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
1040 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
1042 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
1043 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
1044 BuiltinID, E, *
this);
1045 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
1046 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
1048 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
1049 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
1051 case NVPTX::BI__nvvm_fmax_nan_f16:
1052 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *
this);
1053 case NVPTX::BI__nvvm_fmax_nan_f16x2:
1054 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *
this);
1055 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
1056 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
1058 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
1059 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
1061 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
1062 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
1064 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
1065 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
1067 case NVPTX::BI__nvvm_fmin_f16:
1068 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *
this);
1069 case NVPTX::BI__nvvm_fmin_f16x2:
1070 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *
this);
1071 case NVPTX::BI__nvvm_fmin_ftz_f16:
1072 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *
this);
1073 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
1074 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *
this);
1075 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
1076 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *
this);
1077 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
1078 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
1080 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
1081 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
1083 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
1084 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
1085 BuiltinID, E, *
this);
1086 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
1087 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
1089 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
1090 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
1092 case NVPTX::BI__nvvm_fmin_nan_f16:
1093 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *
this);
1094 case NVPTX::BI__nvvm_fmin_nan_f16x2:
1095 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *
this);
1096 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
1097 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
1099 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
1100 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
1102 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
1103 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
1105 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
1106 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
1108 case NVPTX::BI__nvvm_fabs_f:
1109 case NVPTX::BI__nvvm_abs_bf16:
1110 case NVPTX::BI__nvvm_abs_bf16x2:
1111 case NVPTX::BI__nvvm_fabs_f16:
1112 case NVPTX::BI__nvvm_fabs_f16x2:
1113 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs,
1115 case NVPTX::BI__nvvm_fabs_ftz_f:
1116 case NVPTX::BI__nvvm_fabs_ftz_f16:
1117 case NVPTX::BI__nvvm_fabs_ftz_f16x2:
1118 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs_ftz,
1120 case NVPTX::BI__nvvm_fabs_d:
1122 case NVPTX::BI__nvvm_ex2_approx_d:
1123 case NVPTX::BI__nvvm_ex2_approx_f:
1124 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx,
1126 case NVPTX::BI__nvvm_ex2_approx_ftz_f:
1127 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx_ftz,
1129 case NVPTX::BI__nvvm_ldg_h:
1130 case NVPTX::BI__nvvm_ldg_h2:
1131 return EnsureNativeHalfSupport(BuiltinID, E, *
this) ? MakeLdg(*
this, E)
1133 case NVPTX::BI__nvvm_ldu_h:
1134 case NVPTX::BI__nvvm_ldu_h2:
1135 return EnsureNativeHalfSupport(BuiltinID, E, *
this)
1136 ? MakeLdu(Intrinsic::nvvm_ldu_global_f, *
this, E)
1138 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
1139 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
1140 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *
this, E,
1142 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
1143 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
1144 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *
this, E,
1146 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
1147 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
1148 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *
this, E,
1150 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
1151 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
1152 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *
this, E,
1154 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
1156 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
1157 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
1159 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
1160 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
1162 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
1163 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
1165 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
1166 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
1168 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
1169 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
1171 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
1172 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
1174 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
1175 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
1177 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
1178 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
1180 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
1181 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
1183 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
1184 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
1186 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
1187 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
1189 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
1190 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
1192 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
1193 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
1195 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
1196 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
1198 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
1199 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
1201 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
1202 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
1204 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
1205 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
1207 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
1208 case NVPTX::BI__nvvm_is_explicit_cluster:
1210 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
1211 case NVPTX::BI__nvvm_isspacep_shared_cluster:
1213 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
1215 case NVPTX::BI__nvvm_mapa:
1217 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
1218 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1219 case NVPTX::BI__nvvm_mapa_shared_cluster:
1221 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
1222 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1223 case NVPTX::BI__nvvm_getctarank:
1225 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
1227 case NVPTX::BI__nvvm_getctarank_shared_cluster:
1229 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
1231 case NVPTX::BI__nvvm_barrier_cluster_arrive:
1233 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
1234 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
1236 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
1237 case NVPTX::BI__nvvm_barrier_cluster_wait:
1239 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
1240 case NVPTX::BI__nvvm_fence_sc_cluster:
1242 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
1243 case NVPTX::BI__nvvm_bar_sync:
1245 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1247 case NVPTX::BI__syncthreads:
1249 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1251 case NVPTX::BI__nvvm_barrier_sync:
1253 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all),
1255 case NVPTX::BI__nvvm_barrier_sync_cnt:
1257 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count),
1258 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1259 case NVPTX::BI__nvvm_bar0_and:
1262 Intrinsic::nvvm_barrier_cta_red_and_aligned_all, {},
1263 {Builder.getInt32(0),
1264 Builder.CreateICmpNE(EmitScalarExpr(E->getArg(0)),
1265 Builder.getInt32(0))}),
1267 case NVPTX::BI__nvvm_bar0_or:
1270 Intrinsic::nvvm_barrier_cta_red_or_aligned_all, {},
1271 {Builder.getInt32(0),
1272 Builder.CreateICmpNE(EmitScalarExpr(E->getArg(0)),
1273 Builder.getInt32(0))}),
1275 case NVPTX::BI__nvvm_bar0_popc:
1276 return Builder.CreateIntrinsic(
1277 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all, {},