431 case NVPTX::BI__nvvm_atom_add_gen_i:
432 case NVPTX::BI__nvvm_atom_add_gen_l:
433 case NVPTX::BI__nvvm_atom_add_gen_ll:
435 AtomicOrdering::Monotonic);
437 case NVPTX::BI__nvvm_atom_sub_gen_i:
438 case NVPTX::BI__nvvm_atom_sub_gen_l:
439 case NVPTX::BI__nvvm_atom_sub_gen_ll:
441 AtomicOrdering::Monotonic);
443 case NVPTX::BI__nvvm_atom_and_gen_i:
444 case NVPTX::BI__nvvm_atom_and_gen_l:
445 case NVPTX::BI__nvvm_atom_and_gen_ll:
447 AtomicOrdering::Monotonic);
449 case NVPTX::BI__nvvm_atom_or_gen_i:
450 case NVPTX::BI__nvvm_atom_or_gen_l:
451 case NVPTX::BI__nvvm_atom_or_gen_ll:
453 AtomicOrdering::Monotonic);
455 case NVPTX::BI__nvvm_atom_xor_gen_i:
456 case NVPTX::BI__nvvm_atom_xor_gen_l:
457 case NVPTX::BI__nvvm_atom_xor_gen_ll:
459 AtomicOrdering::Monotonic);
461 case NVPTX::BI__nvvm_atom_xchg_gen_i:
462 case NVPTX::BI__nvvm_atom_xchg_gen_l:
463 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
465 AtomicOrdering::Monotonic);
467 case NVPTX::BI__nvvm_atom_max_gen_i:
468 case NVPTX::BI__nvvm_atom_max_gen_l:
469 case NVPTX::BI__nvvm_atom_max_gen_ll:
471 AtomicOrdering::Monotonic);
473 case NVPTX::BI__nvvm_atom_max_gen_ui:
474 case NVPTX::BI__nvvm_atom_max_gen_ul:
475 case NVPTX::BI__nvvm_atom_max_gen_ull:
477 AtomicOrdering::Monotonic);
479 case NVPTX::BI__nvvm_atom_min_gen_i:
480 case NVPTX::BI__nvvm_atom_min_gen_l:
481 case NVPTX::BI__nvvm_atom_min_gen_ll:
483 AtomicOrdering::Monotonic);
485 case NVPTX::BI__nvvm_atom_min_gen_ui:
486 case NVPTX::BI__nvvm_atom_min_gen_ul:
487 case NVPTX::BI__nvvm_atom_min_gen_ull:
489 AtomicOrdering::Monotonic);
491 case NVPTX::BI__nvvm_atom_cas_gen_us:
492 case NVPTX::BI__nvvm_atom_cas_gen_i:
493 case NVPTX::BI__nvvm_atom_cas_gen_l:
494 case NVPTX::BI__nvvm_atom_cas_gen_ll:
498 AtomicOrdering::Monotonic,
499 AtomicOrdering::Monotonic);
501 case NVPTX::BI__nvvm_atom_add_gen_f:
502 case NVPTX::BI__nvvm_atom_add_gen_d: {
506 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
507 AtomicOrdering::Monotonic);
510 case NVPTX::BI__nvvm_atom_inc_gen_ui:
512 AtomicOrdering::Monotonic);
514 case NVPTX::BI__nvvm_atom_dec_gen_ui:
516 AtomicOrdering::Monotonic);
518 case NVPTX::BI__nvvm_ldg_c:
519 case NVPTX::BI__nvvm_ldg_sc:
520 case NVPTX::BI__nvvm_ldg_c2:
521 case NVPTX::BI__nvvm_ldg_sc2:
522 case NVPTX::BI__nvvm_ldg_c4:
523 case NVPTX::BI__nvvm_ldg_sc4:
524 case NVPTX::BI__nvvm_ldg_s:
525 case NVPTX::BI__nvvm_ldg_s2:
526 case NVPTX::BI__nvvm_ldg_s4:
527 case NVPTX::BI__nvvm_ldg_i:
528 case NVPTX::BI__nvvm_ldg_i2:
529 case NVPTX::BI__nvvm_ldg_i4:
530 case NVPTX::BI__nvvm_ldg_l:
531 case NVPTX::BI__nvvm_ldg_l2:
532 case NVPTX::BI__nvvm_ldg_ll:
533 case NVPTX::BI__nvvm_ldg_ll2:
534 case NVPTX::BI__nvvm_ldg_uc:
535 case NVPTX::BI__nvvm_ldg_uc2:
536 case NVPTX::BI__nvvm_ldg_uc4:
537 case NVPTX::BI__nvvm_ldg_us:
538 case NVPTX::BI__nvvm_ldg_us2:
539 case NVPTX::BI__nvvm_ldg_us4:
540 case NVPTX::BI__nvvm_ldg_ui:
541 case NVPTX::BI__nvvm_ldg_ui2:
542 case NVPTX::BI__nvvm_ldg_ui4:
543 case NVPTX::BI__nvvm_ldg_ul:
544 case NVPTX::BI__nvvm_ldg_ul2:
545 case NVPTX::BI__nvvm_ldg_ull:
546 case NVPTX::BI__nvvm_ldg_ull2:
547 case NVPTX::BI__nvvm_ldg_f:
548 case NVPTX::BI__nvvm_ldg_f2:
549 case NVPTX::BI__nvvm_ldg_f4:
550 case NVPTX::BI__nvvm_ldg_d:
551 case NVPTX::BI__nvvm_ldg_d2:
555 return MakeLdg(*
this, E);
557 case NVPTX::BI__nvvm_ldu_c:
558 case NVPTX::BI__nvvm_ldu_sc:
559 case NVPTX::BI__nvvm_ldu_c2:
560 case NVPTX::BI__nvvm_ldu_sc2:
561 case NVPTX::BI__nvvm_ldu_c4:
562 case NVPTX::BI__nvvm_ldu_sc4:
563 case NVPTX::BI__nvvm_ldu_s:
564 case NVPTX::BI__nvvm_ldu_s2:
565 case NVPTX::BI__nvvm_ldu_s4:
566 case NVPTX::BI__nvvm_ldu_i:
567 case NVPTX::BI__nvvm_ldu_i2:
568 case NVPTX::BI__nvvm_ldu_i4:
569 case NVPTX::BI__nvvm_ldu_l:
570 case NVPTX::BI__nvvm_ldu_l2:
571 case NVPTX::BI__nvvm_ldu_ll:
572 case NVPTX::BI__nvvm_ldu_ll2:
573 case NVPTX::BI__nvvm_ldu_uc:
574 case NVPTX::BI__nvvm_ldu_uc2:
575 case NVPTX::BI__nvvm_ldu_uc4:
576 case NVPTX::BI__nvvm_ldu_us:
577 case NVPTX::BI__nvvm_ldu_us2:
578 case NVPTX::BI__nvvm_ldu_us4:
579 case NVPTX::BI__nvvm_ldu_ui:
580 case NVPTX::BI__nvvm_ldu_ui2:
581 case NVPTX::BI__nvvm_ldu_ui4:
582 case NVPTX::BI__nvvm_ldu_ul:
583 case NVPTX::BI__nvvm_ldu_ul2:
584 case NVPTX::BI__nvvm_ldu_ull:
585 case NVPTX::BI__nvvm_ldu_ull2:
586 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *
this, E);
587 case NVPTX::BI__nvvm_ldu_f:
588 case NVPTX::BI__nvvm_ldu_f2:
589 case NVPTX::BI__nvvm_ldu_f4:
590 case NVPTX::BI__nvvm_ldu_d:
591 case NVPTX::BI__nvvm_ldu_d2:
592 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *
this, E);
594 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
595 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
596 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
597 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *
this, E);
598 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
599 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
600 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
601 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *
this, E);
602 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
603 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
604 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *
this, E);
605 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
606 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
607 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *
this, E);
608 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
609 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
610 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
611 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *
this, E);
612 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
613 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
614 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
615 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *
this, E);
616 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
617 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
618 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
619 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
620 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
621 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
622 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *
this, E);
623 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
624 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
625 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
626 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
627 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
628 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
629 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *
this, E);
630 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
631 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
632 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
633 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
634 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
635 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
636 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *
this, E);
637 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
638 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
639 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
640 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
641 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
642 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
643 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *
this, E);
644 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
645 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *
this, E);
646 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
647 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *
this, E);
648 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
649 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *
this, E);
650 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
651 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *
this, E);
652 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
653 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
654 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
655 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *
this, E);
656 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
657 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
658 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
659 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *
this, E);
660 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
661 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
662 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
663 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *
this, E);
664 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
665 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
666 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
667 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *
this, E);
668 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
669 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
670 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
671 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *
this, E);
672 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
673 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
674 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
675 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *
this, E);
676 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
677 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
678 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
679 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
685 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
686 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
688 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
689 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
690 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
691 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
697 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
698 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
700 case NVPTX::BI__nvvm_match_all_sync_i32p:
701 case NVPTX::BI__nvvm_match_all_sync_i64p: {
706 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
707 ? Intrinsic::nvvm_match_all_sync_i32p
708 : Intrinsic::nvvm_match_all_sync_i64p),
712 Builder.CreateStore(Pred, PredOutPtr);
713 return Builder.CreateExtractValue(ResultPair, 0);
717 case NVPTX::BI__hmma_m16n16k16_ld_a:
718 case NVPTX::BI__hmma_m16n16k16_ld_b:
719 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
720 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
721 case NVPTX::BI__hmma_m32n8k16_ld_a:
722 case NVPTX::BI__hmma_m32n8k16_ld_b:
723 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
724 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
725 case NVPTX::BI__hmma_m8n32k16_ld_a:
726 case NVPTX::BI__hmma_m8n32k16_ld_b:
727 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
728 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
730 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
731 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
732 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
733 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
734 case NVPTX::BI__imma_m16n16k16_ld_c:
735 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
736 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
737 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
738 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
739 case NVPTX::BI__imma_m32n8k16_ld_c:
740 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
741 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
742 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
743 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
744 case NVPTX::BI__imma_m8n32k16_ld_c:
746 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
747 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
748 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
749 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
750 case NVPTX::BI__imma_m8n8k32_ld_c:
751 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
752 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
753 case NVPTX::BI__bmma_m8n8k128_ld_c:
755 case NVPTX::BI__dmma_m8n8k4_ld_a:
756 case NVPTX::BI__dmma_m8n8k4_ld_b:
757 case NVPTX::BI__dmma_m8n8k4_ld_c:
759 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
760 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
761 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
762 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
763 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
764 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
765 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
766 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
767 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
771 std::optional<llvm::APSInt> isColMajorArg =
775 bool isColMajor = isColMajorArg->getSExtValue();
776 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
777 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
785 assert(II.NumResults);
786 if (II.NumResults == 1) {
790 for (
unsigned i = 0; i < II.NumResults; ++i) {
795 llvm::ConstantInt::get(
IntTy, i)),
802 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
803 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
804 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
805 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
806 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
807 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
808 case NVPTX::BI__imma_m16n16k16_st_c_i32:
809 case NVPTX::BI__imma_m32n8k16_st_c_i32:
810 case NVPTX::BI__imma_m8n32k16_st_c_i32:
811 case NVPTX::BI__imma_m8n8k32_st_c_i32:
812 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
813 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
814 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
818 std::optional<llvm::APSInt> isColMajorArg =
822 bool isColMajor = isColMajorArg->getSExtValue();
823 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
824 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
829 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
831 for (
unsigned i = 0; i < II.NumResults; ++i) {
835 llvm::ConstantInt::get(
IntTy, i)),
837 Values.push_back(
Builder.CreateBitCast(
V, ParamType));
839 Values.push_back(Ldm);
846 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
847 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
848 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
849 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
850 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
851 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
852 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
853 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
854 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
855 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
856 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
857 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
858 case NVPTX::BI__imma_m16n16k16_mma_s8:
859 case NVPTX::BI__imma_m16n16k16_mma_u8:
860 case NVPTX::BI__imma_m32n8k16_mma_s8:
861 case NVPTX::BI__imma_m32n8k16_mma_u8:
862 case NVPTX::BI__imma_m8n32k16_mma_s8:
863 case NVPTX::BI__imma_m8n32k16_mma_u8:
864 case NVPTX::BI__imma_m8n8k32_mma_s4:
865 case NVPTX::BI__imma_m8n8k32_mma_u4:
866 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
867 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
868 case NVPTX::BI__dmma_m8n8k4_mma_f64:
869 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
870 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
871 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
872 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
877 std::optional<llvm::APSInt> LayoutArg =
881 int Layout = LayoutArg->getSExtValue();
882 if (Layout < 0 || Layout > 3)
884 llvm::APSInt SatfArg;
885 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
886 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
888 else if (std::optional<llvm::APSInt> OptSatfArg =
890 SatfArg = *OptSatfArg;
893 bool Satf = SatfArg.getSExtValue();
894 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
895 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
901 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
903 for (
unsigned i = 0; i < MI.NumEltsA; ++i) {
907 llvm::ConstantInt::get(
IntTy, i)),
909 Values.push_back(
Builder.CreateBitCast(
V, AType));
912 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
913 for (
unsigned i = 0; i < MI.NumEltsB; ++i) {
917 llvm::ConstantInt::get(
IntTy, i)),
919 Values.push_back(
Builder.CreateBitCast(
V, BType));
923 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
924 for (
unsigned i = 0; i < MI.NumEltsC; ++i) {
928 llvm::ConstantInt::get(
IntTy, i)),
930 Values.push_back(
Builder.CreateBitCast(
V, CType));
934 for (
unsigned i = 0; i < MI.NumEltsD; ++i)
938 llvm::ConstantInt::get(
IntTy, i)),
943 case NVPTX::BI__nvvm_ex2_approx_f16:
945 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,
Builder.getHalfTy()),
946 BuiltinID, E, *
this);
947 case NVPTX::BI__nvvm_ex2_approx_f16x2:
949 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,
950 FixedVectorType::get(
Builder.getHalfTy(), 2)),
951 BuiltinID, E, *
this);
952 case NVPTX::BI__nvvm_ff2f16x2_rn:
953 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *
this);
954 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
955 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *
this);
956 case NVPTX::BI__nvvm_ff2f16x2_rz:
957 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *
this);
958 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
959 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *
this);
960 case NVPTX::BI__nvvm_fma_rn_f16:
961 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *
this);
962 case NVPTX::BI__nvvm_fma_rn_f16x2:
963 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *
this);
964 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
965 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *
this);
966 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
967 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *
this);
968 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
969 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
971 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
972 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
974 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
975 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
977 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
978 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
980 case NVPTX::BI__nvvm_fma_rn_relu_f16:
981 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *
this);
982 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
983 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *
this);
984 case NVPTX::BI__nvvm_fma_rn_sat_f16:
985 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *
this);
986 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
987 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *
this);
988 case NVPTX::BI__nvvm_fma_rn_oob_f16:
989 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
Builder.getHalfTy(), E,
991 case NVPTX::BI__nvvm_fma_rn_oob_f16x2:
992 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
993 llvm::FixedVectorType::get(
Builder.getHalfTy(), 2), E,
995 case NVPTX::BI__nvvm_fma_rn_oob_bf16:
996 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
Builder.getBFloatTy(), E,
998 case NVPTX::BI__nvvm_fma_rn_oob_bf16x2:
999 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
1000 llvm::FixedVectorType::get(
Builder.getBFloatTy(), 2), E,
1002 case NVPTX::BI__nvvm_fma_rn_oob_relu_f16:
1003 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
Builder.getHalfTy(), E,
1005 case NVPTX::BI__nvvm_fma_rn_oob_relu_f16x2:
1006 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
1007 llvm::FixedVectorType::get(
Builder.getHalfTy(), 2), E,
1009 case NVPTX::BI__nvvm_fma_rn_oob_relu_bf16:
1010 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
Builder.getBFloatTy(), E,
1012 case NVPTX::BI__nvvm_fma_rn_oob_relu_bf16x2:
1013 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
1014 llvm::FixedVectorType::get(
Builder.getBFloatTy(), 2), E,
1016 case NVPTX::BI__nvvm_fmax_f16:
1017 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *
this);
1018 case NVPTX::BI__nvvm_fmax_f16x2:
1019 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *
this);
1020 case NVPTX::BI__nvvm_fmax_ftz_f16:
1021 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *
this);
1022 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
1023 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *
this);
1024 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
1025 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *
this);
1026 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
1027 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
1029 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
1030 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
1032 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
1033 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
1034 BuiltinID, E, *
this);
1035 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
1036 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
1038 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
1039 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
1041 case NVPTX::BI__nvvm_fmax_nan_f16:
1042 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *
this);
1043 case NVPTX::BI__nvvm_fmax_nan_f16x2:
1044 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *
this);
1045 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
1046 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
1048 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
1049 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
1051 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
1052 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
1054 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
1055 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
1057 case NVPTX::BI__nvvm_fmin_f16:
1058 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *
this);
1059 case NVPTX::BI__nvvm_fmin_f16x2:
1060 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *
this);
1061 case NVPTX::BI__nvvm_fmin_ftz_f16:
1062 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *
this);
1063 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
1064 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *
this);
1065 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
1066 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *
this);
1067 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
1068 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
1070 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
1071 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
1073 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
1074 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
1075 BuiltinID, E, *
this);
1076 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
1077 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
1079 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
1080 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
1082 case NVPTX::BI__nvvm_fmin_nan_f16:
1083 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *
this);
1084 case NVPTX::BI__nvvm_fmin_nan_f16x2:
1085 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *
this);
1086 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
1087 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
1089 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
1090 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
1092 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
1093 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
1095 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
1096 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
1098 case NVPTX::BI__nvvm_fabs_f:
1099 case NVPTX::BI__nvvm_abs_bf16:
1100 case NVPTX::BI__nvvm_abs_bf16x2:
1101 case NVPTX::BI__nvvm_fabs_f16:
1102 case NVPTX::BI__nvvm_fabs_f16x2:
1103 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs,
1105 case NVPTX::BI__nvvm_fabs_ftz_f:
1106 case NVPTX::BI__nvvm_fabs_ftz_f16:
1107 case NVPTX::BI__nvvm_fabs_ftz_f16x2:
1108 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs_ftz,
1110 case NVPTX::BI__nvvm_fabs_d:
1111 return Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
1113 case NVPTX::BI__nvvm_ex2_approx_d:
1114 case NVPTX::BI__nvvm_ex2_approx_f:
1115 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx,
1117 case NVPTX::BI__nvvm_ex2_approx_ftz_f:
1118 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx_ftz,
1120 case NVPTX::BI__nvvm_ldg_h:
1121 case NVPTX::BI__nvvm_ldg_h2:
1122 return EnsureNativeHalfSupport(BuiltinID, E, *
this) ? MakeLdg(*
this, E)
1124 case NVPTX::BI__nvvm_ldu_h:
1125 case NVPTX::BI__nvvm_ldu_h2:
1126 return EnsureNativeHalfSupport(BuiltinID, E, *
this)
1127 ? MakeLdu(Intrinsic::nvvm_ldu_global_f, *
this, E)
1129 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
1130 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
1131 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *
this, E,
1133 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
1134 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
1135 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *
this, E,
1137 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
1138 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
1139 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *
this, E,
1141 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
1142 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
1143 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *
this, E,
1145 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
1147 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
1148 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
1150 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
1151 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
1153 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
1154 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
1156 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
1157 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
1159 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
1160 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
1162 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
1163 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
1165 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
1166 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
1168 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
1169 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
1171 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
1172 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
1174 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
1175 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
1177 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
1178 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
1180 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
1181 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
1183 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
1184 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
1186 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
1187 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
1189 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
1190 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
1192 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
1193 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
1195 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
1196 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
1198 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
1199 case NVPTX::BI__nvvm_is_explicit_cluster:
1201 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
1202 case NVPTX::BI__nvvm_isspacep_shared_cluster:
1204 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
1206 case NVPTX::BI__nvvm_mapa:
1208 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
1209 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1210 case NVPTX::BI__nvvm_mapa_shared_cluster:
1212 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
1213 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1214 case NVPTX::BI__nvvm_getctarank:
1216 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
1218 case NVPTX::BI__nvvm_getctarank_shared_cluster:
1220 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
1222 case NVPTX::BI__nvvm_barrier_cluster_arrive:
1224 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
1225 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
1227 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
1228 case NVPTX::BI__nvvm_barrier_cluster_wait:
1230 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
1231 case NVPTX::BI__nvvm_fence_sc_cluster:
1233 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
1234 case NVPTX::BI__nvvm_bar_sync:
1236 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1238 case NVPTX::BI__syncthreads:
1240 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1242 case NVPTX::BI__nvvm_barrier_sync:
1244 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all),
1246 case NVPTX::BI__nvvm_barrier_sync_cnt:
1248 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count),
1249 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1250 case NVPTX::BI__nvvm_bar0_and:
1253 Intrinsic::nvvm_barrier_cta_red_and_aligned_all, {},
1254 {Builder.getInt32(0),
1255 Builder.CreateICmpNE(EmitScalarExpr(E->getArg(0)),
1256 Builder.getInt32(0))}),
1258 case NVPTX::BI__nvvm_bar0_or:
1261 Intrinsic::nvvm_barrier_cta_red_or_aligned_all, {},
1262 {Builder.getInt32(0),
1263 Builder.CreateICmpNE(EmitScalarExpr(E->getArg(0)),
1264 Builder.getInt32(0))}),
1266 case NVPTX::BI__nvvm_bar0_popc:
1267 return Builder.CreateIntrinsic(
1268 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all, {},