431 case NVPTX::BI__nvvm_atom_add_gen_i:
432 case NVPTX::BI__nvvm_atom_add_gen_l:
433 case NVPTX::BI__nvvm_atom_add_gen_ll:
436 case NVPTX::BI__nvvm_atom_sub_gen_i:
437 case NVPTX::BI__nvvm_atom_sub_gen_l:
438 case NVPTX::BI__nvvm_atom_sub_gen_ll:
441 case NVPTX::BI__nvvm_atom_and_gen_i:
442 case NVPTX::BI__nvvm_atom_and_gen_l:
443 case NVPTX::BI__nvvm_atom_and_gen_ll:
446 case NVPTX::BI__nvvm_atom_or_gen_i:
447 case NVPTX::BI__nvvm_atom_or_gen_l:
448 case NVPTX::BI__nvvm_atom_or_gen_ll:
451 case NVPTX::BI__nvvm_atom_xor_gen_i:
452 case NVPTX::BI__nvvm_atom_xor_gen_l:
453 case NVPTX::BI__nvvm_atom_xor_gen_ll:
456 case NVPTX::BI__nvvm_atom_xchg_gen_i:
457 case NVPTX::BI__nvvm_atom_xchg_gen_l:
458 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
461 case NVPTX::BI__nvvm_atom_max_gen_i:
462 case NVPTX::BI__nvvm_atom_max_gen_l:
463 case NVPTX::BI__nvvm_atom_max_gen_ll:
466 case NVPTX::BI__nvvm_atom_max_gen_ui:
467 case NVPTX::BI__nvvm_atom_max_gen_ul:
468 case NVPTX::BI__nvvm_atom_max_gen_ull:
471 case NVPTX::BI__nvvm_atom_min_gen_i:
472 case NVPTX::BI__nvvm_atom_min_gen_l:
473 case NVPTX::BI__nvvm_atom_min_gen_ll:
476 case NVPTX::BI__nvvm_atom_min_gen_ui:
477 case NVPTX::BI__nvvm_atom_min_gen_ul:
478 case NVPTX::BI__nvvm_atom_min_gen_ull:
481 case NVPTX::BI__nvvm_atom_cas_gen_us:
482 case NVPTX::BI__nvvm_atom_cas_gen_i:
483 case NVPTX::BI__nvvm_atom_cas_gen_l:
484 case NVPTX::BI__nvvm_atom_cas_gen_ll:
489 case NVPTX::BI__nvvm_atom_add_gen_f:
490 case NVPTX::BI__nvvm_atom_add_gen_d: {
494 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
495 AtomicOrdering::SequentiallyConsistent);
498 case NVPTX::BI__nvvm_atom_inc_gen_ui:
501 case NVPTX::BI__nvvm_atom_dec_gen_ui:
504 case NVPTX::BI__nvvm_ldg_c:
505 case NVPTX::BI__nvvm_ldg_sc:
506 case NVPTX::BI__nvvm_ldg_c2:
507 case NVPTX::BI__nvvm_ldg_sc2:
508 case NVPTX::BI__nvvm_ldg_c4:
509 case NVPTX::BI__nvvm_ldg_sc4:
510 case NVPTX::BI__nvvm_ldg_s:
511 case NVPTX::BI__nvvm_ldg_s2:
512 case NVPTX::BI__nvvm_ldg_s4:
513 case NVPTX::BI__nvvm_ldg_i:
514 case NVPTX::BI__nvvm_ldg_i2:
515 case NVPTX::BI__nvvm_ldg_i4:
516 case NVPTX::BI__nvvm_ldg_l:
517 case NVPTX::BI__nvvm_ldg_l2:
518 case NVPTX::BI__nvvm_ldg_ll:
519 case NVPTX::BI__nvvm_ldg_ll2:
520 case NVPTX::BI__nvvm_ldg_uc:
521 case NVPTX::BI__nvvm_ldg_uc2:
522 case NVPTX::BI__nvvm_ldg_uc4:
523 case NVPTX::BI__nvvm_ldg_us:
524 case NVPTX::BI__nvvm_ldg_us2:
525 case NVPTX::BI__nvvm_ldg_us4:
526 case NVPTX::BI__nvvm_ldg_ui:
527 case NVPTX::BI__nvvm_ldg_ui2:
528 case NVPTX::BI__nvvm_ldg_ui4:
529 case NVPTX::BI__nvvm_ldg_ul:
530 case NVPTX::BI__nvvm_ldg_ul2:
531 case NVPTX::BI__nvvm_ldg_ull:
532 case NVPTX::BI__nvvm_ldg_ull2:
533 case NVPTX::BI__nvvm_ldg_f:
534 case NVPTX::BI__nvvm_ldg_f2:
535 case NVPTX::BI__nvvm_ldg_f4:
536 case NVPTX::BI__nvvm_ldg_d:
537 case NVPTX::BI__nvvm_ldg_d2:
541 return MakeLdg(*
this, E);
543 case NVPTX::BI__nvvm_ldu_c:
544 case NVPTX::BI__nvvm_ldu_sc:
545 case NVPTX::BI__nvvm_ldu_c2:
546 case NVPTX::BI__nvvm_ldu_sc2:
547 case NVPTX::BI__nvvm_ldu_c4:
548 case NVPTX::BI__nvvm_ldu_sc4:
549 case NVPTX::BI__nvvm_ldu_s:
550 case NVPTX::BI__nvvm_ldu_s2:
551 case NVPTX::BI__nvvm_ldu_s4:
552 case NVPTX::BI__nvvm_ldu_i:
553 case NVPTX::BI__nvvm_ldu_i2:
554 case NVPTX::BI__nvvm_ldu_i4:
555 case NVPTX::BI__nvvm_ldu_l:
556 case NVPTX::BI__nvvm_ldu_l2:
557 case NVPTX::BI__nvvm_ldu_ll:
558 case NVPTX::BI__nvvm_ldu_ll2:
559 case NVPTX::BI__nvvm_ldu_uc:
560 case NVPTX::BI__nvvm_ldu_uc2:
561 case NVPTX::BI__nvvm_ldu_uc4:
562 case NVPTX::BI__nvvm_ldu_us:
563 case NVPTX::BI__nvvm_ldu_us2:
564 case NVPTX::BI__nvvm_ldu_us4:
565 case NVPTX::BI__nvvm_ldu_ui:
566 case NVPTX::BI__nvvm_ldu_ui2:
567 case NVPTX::BI__nvvm_ldu_ui4:
568 case NVPTX::BI__nvvm_ldu_ul:
569 case NVPTX::BI__nvvm_ldu_ul2:
570 case NVPTX::BI__nvvm_ldu_ull:
571 case NVPTX::BI__nvvm_ldu_ull2:
572 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *
this, E);
573 case NVPTX::BI__nvvm_ldu_f:
574 case NVPTX::BI__nvvm_ldu_f2:
575 case NVPTX::BI__nvvm_ldu_f4:
576 case NVPTX::BI__nvvm_ldu_d:
577 case NVPTX::BI__nvvm_ldu_d2:
578 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *
this, E);
580 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
581 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
582 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
583 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *
this, E);
584 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
585 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
586 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
587 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *
this, E);
588 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
589 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
590 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *
this, E);
591 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
592 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
593 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *
this, E);
594 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
595 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
596 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
597 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *
this, E);
598 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
599 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
600 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
601 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *
this, E);
602 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
603 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
604 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
605 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
606 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
607 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
608 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *
this, E);
609 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
610 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
611 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
612 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
613 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
614 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
615 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *
this, E);
616 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
617 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
618 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
619 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
620 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
621 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
622 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *
this, E);
623 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
624 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
625 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
626 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
627 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
628 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
629 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *
this, E);
630 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
631 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *
this, E);
632 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
633 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *
this, E);
634 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
635 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *
this, E);
636 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
637 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *
this, E);
638 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
639 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
640 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
641 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *
this, E);
642 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
643 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
644 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
645 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *
this, E);
646 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
647 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
648 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
649 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *
this, E);
650 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
651 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
652 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
653 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *
this, E);
654 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
655 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
656 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
657 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *
this, E);
658 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
659 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
660 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
661 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *
this, E);
662 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
663 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
664 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
665 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
671 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
672 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
674 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
675 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
676 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
677 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
683 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
684 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
686 case NVPTX::BI__nvvm_match_all_sync_i32p:
687 case NVPTX::BI__nvvm_match_all_sync_i64p: {
692 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
693 ? Intrinsic::nvvm_match_all_sync_i32p
694 : Intrinsic::nvvm_match_all_sync_i64p),
698 Builder.CreateStore(Pred, PredOutPtr);
699 return Builder.CreateExtractValue(ResultPair, 0);
703 case NVPTX::BI__hmma_m16n16k16_ld_a:
704 case NVPTX::BI__hmma_m16n16k16_ld_b:
705 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
706 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
707 case NVPTX::BI__hmma_m32n8k16_ld_a:
708 case NVPTX::BI__hmma_m32n8k16_ld_b:
709 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
710 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
711 case NVPTX::BI__hmma_m8n32k16_ld_a:
712 case NVPTX::BI__hmma_m8n32k16_ld_b:
713 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
714 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
716 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
717 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
718 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
719 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
720 case NVPTX::BI__imma_m16n16k16_ld_c:
721 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
722 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
723 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
724 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
725 case NVPTX::BI__imma_m32n8k16_ld_c:
726 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
727 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
728 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
729 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
730 case NVPTX::BI__imma_m8n32k16_ld_c:
732 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
733 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
734 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
735 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
736 case NVPTX::BI__imma_m8n8k32_ld_c:
737 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
738 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
739 case NVPTX::BI__bmma_m8n8k128_ld_c:
741 case NVPTX::BI__dmma_m8n8k4_ld_a:
742 case NVPTX::BI__dmma_m8n8k4_ld_b:
743 case NVPTX::BI__dmma_m8n8k4_ld_c:
745 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
746 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
747 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
748 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
749 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
750 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
751 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
752 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
753 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
757 std::optional<llvm::APSInt> isColMajorArg =
761 bool isColMajor = isColMajorArg->getSExtValue();
762 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
763 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
771 assert(II.NumResults);
772 if (II.NumResults == 1) {
776 for (
unsigned i = 0; i < II.NumResults; ++i) {
781 llvm::ConstantInt::get(
IntTy, i)),
788 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
789 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
790 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
791 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
792 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
793 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
794 case NVPTX::BI__imma_m16n16k16_st_c_i32:
795 case NVPTX::BI__imma_m32n8k16_st_c_i32:
796 case NVPTX::BI__imma_m8n32k16_st_c_i32:
797 case NVPTX::BI__imma_m8n8k32_st_c_i32:
798 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
799 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
800 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
804 std::optional<llvm::APSInt> isColMajorArg =
808 bool isColMajor = isColMajorArg->getSExtValue();
809 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
810 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
815 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
817 for (
unsigned i = 0; i < II.NumResults; ++i) {
821 llvm::ConstantInt::get(
IntTy, i)),
823 Values.push_back(
Builder.CreateBitCast(
V, ParamType));
825 Values.push_back(Ldm);
832 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
833 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
834 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
835 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
836 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
837 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
838 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
839 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
840 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
841 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
842 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
843 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
844 case NVPTX::BI__imma_m16n16k16_mma_s8:
845 case NVPTX::BI__imma_m16n16k16_mma_u8:
846 case NVPTX::BI__imma_m32n8k16_mma_s8:
847 case NVPTX::BI__imma_m32n8k16_mma_u8:
848 case NVPTX::BI__imma_m8n32k16_mma_s8:
849 case NVPTX::BI__imma_m8n32k16_mma_u8:
850 case NVPTX::BI__imma_m8n8k32_mma_s4:
851 case NVPTX::BI__imma_m8n8k32_mma_u4:
852 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
853 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
854 case NVPTX::BI__dmma_m8n8k4_mma_f64:
855 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
856 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
857 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
858 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
863 std::optional<llvm::APSInt> LayoutArg =
867 int Layout = LayoutArg->getSExtValue();
868 if (Layout < 0 || Layout > 3)
870 llvm::APSInt SatfArg;
871 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
872 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
874 else if (std::optional<llvm::APSInt> OptSatfArg =
876 SatfArg = *OptSatfArg;
879 bool Satf = SatfArg.getSExtValue();
880 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
881 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
887 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
889 for (
unsigned i = 0; i < MI.NumEltsA; ++i) {
893 llvm::ConstantInt::get(
IntTy, i)),
895 Values.push_back(
Builder.CreateBitCast(
V, AType));
898 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
899 for (
unsigned i = 0; i < MI.NumEltsB; ++i) {
903 llvm::ConstantInt::get(
IntTy, i)),
905 Values.push_back(
Builder.CreateBitCast(
V, BType));
909 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
910 for (
unsigned i = 0; i < MI.NumEltsC; ++i) {
914 llvm::ConstantInt::get(
IntTy, i)),
916 Values.push_back(
Builder.CreateBitCast(
V, CType));
920 for (
unsigned i = 0; i < MI.NumEltsD; ++i)
924 llvm::ConstantInt::get(
IntTy, i)),
929 case NVPTX::BI__nvvm_ex2_approx_f16:
931 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,
Builder.getHalfTy()),
932 BuiltinID, E, *
this);
933 case NVPTX::BI__nvvm_ex2_approx_f16x2:
935 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,
936 FixedVectorType::get(
Builder.getHalfTy(), 2)),
937 BuiltinID, E, *
this);
938 case NVPTX::BI__nvvm_ff2f16x2_rn:
939 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *
this);
940 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
941 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *
this);
942 case NVPTX::BI__nvvm_ff2f16x2_rz:
943 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *
this);
944 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
945 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *
this);
946 case NVPTX::BI__nvvm_fma_rn_f16:
947 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *
this);
948 case NVPTX::BI__nvvm_fma_rn_f16x2:
949 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *
this);
950 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
951 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *
this);
952 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
953 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *
this);
954 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
955 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
957 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
958 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
960 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
961 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
963 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
964 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
966 case NVPTX::BI__nvvm_fma_rn_relu_f16:
967 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *
this);
968 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
969 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *
this);
970 case NVPTX::BI__nvvm_fma_rn_sat_f16:
971 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *
this);
972 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
973 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *
this);
974 case NVPTX::BI__nvvm_fma_rn_oob_f16:
975 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
Builder.getHalfTy(), E,
977 case NVPTX::BI__nvvm_fma_rn_oob_f16x2:
978 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
979 llvm::FixedVectorType::get(
Builder.getHalfTy(), 2), E,
981 case NVPTX::BI__nvvm_fma_rn_oob_bf16:
982 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
Builder.getBFloatTy(), E,
984 case NVPTX::BI__nvvm_fma_rn_oob_bf16x2:
985 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob,
986 llvm::FixedVectorType::get(
Builder.getBFloatTy(), 2), E,
988 case NVPTX::BI__nvvm_fma_rn_oob_relu_f16:
989 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
Builder.getHalfTy(), E,
991 case NVPTX::BI__nvvm_fma_rn_oob_relu_f16x2:
992 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
993 llvm::FixedVectorType::get(
Builder.getHalfTy(), 2), E,
995 case NVPTX::BI__nvvm_fma_rn_oob_relu_bf16:
996 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
Builder.getBFloatTy(), E,
998 case NVPTX::BI__nvvm_fma_rn_oob_relu_bf16x2:
999 return MakeFMAOOB(Intrinsic::nvvm_fma_rn_oob_relu,
1000 llvm::FixedVectorType::get(
Builder.getBFloatTy(), 2), E,
1002 case NVPTX::BI__nvvm_fmax_f16:
1003 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *
this);
1004 case NVPTX::BI__nvvm_fmax_f16x2:
1005 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *
this);
1006 case NVPTX::BI__nvvm_fmax_ftz_f16:
1007 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *
this);
1008 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
1009 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *
this);
1010 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
1011 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *
this);
1012 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
1013 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
1015 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
1016 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
1018 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
1019 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
1020 BuiltinID, E, *
this);
1021 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
1022 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
1024 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
1025 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
1027 case NVPTX::BI__nvvm_fmax_nan_f16:
1028 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *
this);
1029 case NVPTX::BI__nvvm_fmax_nan_f16x2:
1030 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *
this);
1031 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
1032 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
1034 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
1035 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
1037 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
1038 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
1040 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
1041 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
1043 case NVPTX::BI__nvvm_fmin_f16:
1044 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *
this);
1045 case NVPTX::BI__nvvm_fmin_f16x2:
1046 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *
this);
1047 case NVPTX::BI__nvvm_fmin_ftz_f16:
1048 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *
this);
1049 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
1050 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *
this);
1051 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
1052 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *
this);
1053 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
1054 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
1056 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
1057 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
1059 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
1060 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
1061 BuiltinID, E, *
this);
1062 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
1063 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
1065 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
1066 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
1068 case NVPTX::BI__nvvm_fmin_nan_f16:
1069 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *
this);
1070 case NVPTX::BI__nvvm_fmin_nan_f16x2:
1071 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *
this);
1072 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
1073 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
1075 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
1076 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
1078 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
1079 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
1081 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
1082 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
1084 case NVPTX::BI__nvvm_fabs_f:
1085 case NVPTX::BI__nvvm_abs_bf16:
1086 case NVPTX::BI__nvvm_abs_bf16x2:
1087 case NVPTX::BI__nvvm_fabs_f16:
1088 case NVPTX::BI__nvvm_fabs_f16x2:
1089 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs,
1091 case NVPTX::BI__nvvm_fabs_ftz_f:
1092 case NVPTX::BI__nvvm_fabs_ftz_f16:
1093 case NVPTX::BI__nvvm_fabs_ftz_f16x2:
1094 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs_ftz,
1096 case NVPTX::BI__nvvm_fabs_d:
1097 return Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
1099 case NVPTX::BI__nvvm_ex2_approx_d:
1100 case NVPTX::BI__nvvm_ex2_approx_f:
1101 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx,
1103 case NVPTX::BI__nvvm_ex2_approx_ftz_f:
1104 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx_ftz,
1106 case NVPTX::BI__nvvm_ldg_h:
1107 case NVPTX::BI__nvvm_ldg_h2:
1108 return EnsureNativeHalfSupport(BuiltinID, E, *
this) ? MakeLdg(*
this, E)
1110 case NVPTX::BI__nvvm_ldu_h:
1111 case NVPTX::BI__nvvm_ldu_h2:
1112 return EnsureNativeHalfSupport(BuiltinID, E, *
this)
1113 ? MakeLdu(Intrinsic::nvvm_ldu_global_f, *
this, E)
1115 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
1116 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
1117 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *
this, E,
1119 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
1120 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
1121 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *
this, E,
1123 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
1124 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
1125 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *
this, E,
1127 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
1128 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
1129 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *
this, E,
1131 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
1133 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
1134 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
1136 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
1137 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
1139 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
1140 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
1142 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
1143 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
1145 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
1146 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
1148 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
1149 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
1151 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
1152 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
1154 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
1155 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
1157 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
1158 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
1160 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
1161 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
1163 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
1164 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
1166 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
1167 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
1169 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
1170 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
1172 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
1173 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
1175 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
1176 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
1178 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
1179 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
1181 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
1182 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
1184 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
1185 case NVPTX::BI__nvvm_is_explicit_cluster:
1187 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
1188 case NVPTX::BI__nvvm_isspacep_shared_cluster:
1190 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
1192 case NVPTX::BI__nvvm_mapa:
1194 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
1195 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1196 case NVPTX::BI__nvvm_mapa_shared_cluster:
1198 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
1199 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1200 case NVPTX::BI__nvvm_getctarank:
1202 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
1204 case NVPTX::BI__nvvm_getctarank_shared_cluster:
1206 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
1208 case NVPTX::BI__nvvm_barrier_cluster_arrive:
1210 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
1211 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
1213 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
1214 case NVPTX::BI__nvvm_barrier_cluster_wait:
1216 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
1217 case NVPTX::BI__nvvm_fence_sc_cluster:
1219 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
1220 case NVPTX::BI__nvvm_bar_sync:
1222 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1224 case NVPTX::BI__syncthreads:
1226 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1228 case NVPTX::BI__nvvm_barrier_sync:
1230 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all),
1232 case NVPTX::BI__nvvm_barrier_sync_cnt:
1234 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count),
1235 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1236 case NVPTX::BI__nvvm_bar0_and:
1239 Intrinsic::nvvm_barrier_cta_red_and_aligned_all, {},
1240 {Builder.getInt32(0),
1241 Builder.CreateICmpNE(EmitScalarExpr(E->getArg(0)),
1242 Builder.getInt32(0))}),
1244 case NVPTX::BI__nvvm_bar0_or:
1247 Intrinsic::nvvm_barrier_cta_red_or_aligned_all, {},
1248 {Builder.getInt32(0),
1249 Builder.CreateICmpNE(EmitScalarExpr(E->getArg(0)),
1250 Builder.getInt32(0))}),
1252 case NVPTX::BI__nvvm_bar0_popc:
1253 return Builder.CreateIntrinsic(
1254 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all, {},