423 case NVPTX::BI__nvvm_atom_add_gen_i:
424 case NVPTX::BI__nvvm_atom_add_gen_l:
425 case NVPTX::BI__nvvm_atom_add_gen_ll:
428 case NVPTX::BI__nvvm_atom_sub_gen_i:
429 case NVPTX::BI__nvvm_atom_sub_gen_l:
430 case NVPTX::BI__nvvm_atom_sub_gen_ll:
433 case NVPTX::BI__nvvm_atom_and_gen_i:
434 case NVPTX::BI__nvvm_atom_and_gen_l:
435 case NVPTX::BI__nvvm_atom_and_gen_ll:
438 case NVPTX::BI__nvvm_atom_or_gen_i:
439 case NVPTX::BI__nvvm_atom_or_gen_l:
440 case NVPTX::BI__nvvm_atom_or_gen_ll:
443 case NVPTX::BI__nvvm_atom_xor_gen_i:
444 case NVPTX::BI__nvvm_atom_xor_gen_l:
445 case NVPTX::BI__nvvm_atom_xor_gen_ll:
448 case NVPTX::BI__nvvm_atom_xchg_gen_i:
449 case NVPTX::BI__nvvm_atom_xchg_gen_l:
450 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
453 case NVPTX::BI__nvvm_atom_max_gen_i:
454 case NVPTX::BI__nvvm_atom_max_gen_l:
455 case NVPTX::BI__nvvm_atom_max_gen_ll:
458 case NVPTX::BI__nvvm_atom_max_gen_ui:
459 case NVPTX::BI__nvvm_atom_max_gen_ul:
460 case NVPTX::BI__nvvm_atom_max_gen_ull:
463 case NVPTX::BI__nvvm_atom_min_gen_i:
464 case NVPTX::BI__nvvm_atom_min_gen_l:
465 case NVPTX::BI__nvvm_atom_min_gen_ll:
468 case NVPTX::BI__nvvm_atom_min_gen_ui:
469 case NVPTX::BI__nvvm_atom_min_gen_ul:
470 case NVPTX::BI__nvvm_atom_min_gen_ull:
473 case NVPTX::BI__nvvm_atom_cas_gen_us:
474 case NVPTX::BI__nvvm_atom_cas_gen_i:
475 case NVPTX::BI__nvvm_atom_cas_gen_l:
476 case NVPTX::BI__nvvm_atom_cas_gen_ll:
481 case NVPTX::BI__nvvm_atom_add_gen_f:
482 case NVPTX::BI__nvvm_atom_add_gen_d: {
486 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
487 AtomicOrdering::SequentiallyConsistent);
490 case NVPTX::BI__nvvm_atom_inc_gen_ui:
493 case NVPTX::BI__nvvm_atom_dec_gen_ui:
496 case NVPTX::BI__nvvm_ldg_c:
497 case NVPTX::BI__nvvm_ldg_sc:
498 case NVPTX::BI__nvvm_ldg_c2:
499 case NVPTX::BI__nvvm_ldg_sc2:
500 case NVPTX::BI__nvvm_ldg_c4:
501 case NVPTX::BI__nvvm_ldg_sc4:
502 case NVPTX::BI__nvvm_ldg_s:
503 case NVPTX::BI__nvvm_ldg_s2:
504 case NVPTX::BI__nvvm_ldg_s4:
505 case NVPTX::BI__nvvm_ldg_i:
506 case NVPTX::BI__nvvm_ldg_i2:
507 case NVPTX::BI__nvvm_ldg_i4:
508 case NVPTX::BI__nvvm_ldg_l:
509 case NVPTX::BI__nvvm_ldg_l2:
510 case NVPTX::BI__nvvm_ldg_ll:
511 case NVPTX::BI__nvvm_ldg_ll2:
512 case NVPTX::BI__nvvm_ldg_uc:
513 case NVPTX::BI__nvvm_ldg_uc2:
514 case NVPTX::BI__nvvm_ldg_uc4:
515 case NVPTX::BI__nvvm_ldg_us:
516 case NVPTX::BI__nvvm_ldg_us2:
517 case NVPTX::BI__nvvm_ldg_us4:
518 case NVPTX::BI__nvvm_ldg_ui:
519 case NVPTX::BI__nvvm_ldg_ui2:
520 case NVPTX::BI__nvvm_ldg_ui4:
521 case NVPTX::BI__nvvm_ldg_ul:
522 case NVPTX::BI__nvvm_ldg_ul2:
523 case NVPTX::BI__nvvm_ldg_ull:
524 case NVPTX::BI__nvvm_ldg_ull2:
525 case NVPTX::BI__nvvm_ldg_f:
526 case NVPTX::BI__nvvm_ldg_f2:
527 case NVPTX::BI__nvvm_ldg_f4:
528 case NVPTX::BI__nvvm_ldg_d:
529 case NVPTX::BI__nvvm_ldg_d2:
533 return MakeLdg(*
this, E);
535 case NVPTX::BI__nvvm_ldu_c:
536 case NVPTX::BI__nvvm_ldu_sc:
537 case NVPTX::BI__nvvm_ldu_c2:
538 case NVPTX::BI__nvvm_ldu_sc2:
539 case NVPTX::BI__nvvm_ldu_c4:
540 case NVPTX::BI__nvvm_ldu_sc4:
541 case NVPTX::BI__nvvm_ldu_s:
542 case NVPTX::BI__nvvm_ldu_s2:
543 case NVPTX::BI__nvvm_ldu_s4:
544 case NVPTX::BI__nvvm_ldu_i:
545 case NVPTX::BI__nvvm_ldu_i2:
546 case NVPTX::BI__nvvm_ldu_i4:
547 case NVPTX::BI__nvvm_ldu_l:
548 case NVPTX::BI__nvvm_ldu_l2:
549 case NVPTX::BI__nvvm_ldu_ll:
550 case NVPTX::BI__nvvm_ldu_ll2:
551 case NVPTX::BI__nvvm_ldu_uc:
552 case NVPTX::BI__nvvm_ldu_uc2:
553 case NVPTX::BI__nvvm_ldu_uc4:
554 case NVPTX::BI__nvvm_ldu_us:
555 case NVPTX::BI__nvvm_ldu_us2:
556 case NVPTX::BI__nvvm_ldu_us4:
557 case NVPTX::BI__nvvm_ldu_ui:
558 case NVPTX::BI__nvvm_ldu_ui2:
559 case NVPTX::BI__nvvm_ldu_ui4:
560 case NVPTX::BI__nvvm_ldu_ul:
561 case NVPTX::BI__nvvm_ldu_ul2:
562 case NVPTX::BI__nvvm_ldu_ull:
563 case NVPTX::BI__nvvm_ldu_ull2:
564 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *
this, E);
565 case NVPTX::BI__nvvm_ldu_f:
566 case NVPTX::BI__nvvm_ldu_f2:
567 case NVPTX::BI__nvvm_ldu_f4:
568 case NVPTX::BI__nvvm_ldu_d:
569 case NVPTX::BI__nvvm_ldu_d2:
570 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *
this, E);
572 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
573 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
574 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
575 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *
this, E);
576 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
577 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
578 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
579 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *
this, E);
580 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
581 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
582 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *
this, E);
583 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
584 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
585 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *
this, E);
586 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
587 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
588 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
589 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *
this, E);
590 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
591 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
592 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
593 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *
this, E);
594 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
595 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
596 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
597 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
598 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
599 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
600 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *
this, E);
601 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
602 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
603 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
604 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
605 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
606 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
607 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *
this, E);
608 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
609 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
610 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
611 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
612 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
613 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
614 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *
this, E);
615 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
616 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
617 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
618 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
619 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
620 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
621 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *
this, E);
622 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
623 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *
this, E);
624 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
625 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *
this, E);
626 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
627 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *
this, E);
628 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
629 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *
this, E);
630 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
631 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
632 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
633 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *
this, E);
634 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
635 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
636 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
637 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *
this, E);
638 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
639 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
640 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
641 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *
this, E);
642 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
643 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
644 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
645 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *
this, E);
646 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
647 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
648 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
649 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *
this, E);
650 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
651 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
652 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
653 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *
this, E);
654 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
655 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
656 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
657 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
663 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
664 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
666 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
667 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
668 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
669 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
675 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
676 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
678 case NVPTX::BI__nvvm_match_all_sync_i32p:
679 case NVPTX::BI__nvvm_match_all_sync_i64p: {
684 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
685 ? Intrinsic::nvvm_match_all_sync_i32p
686 : Intrinsic::nvvm_match_all_sync_i64p),
690 Builder.CreateStore(Pred, PredOutPtr);
691 return Builder.CreateExtractValue(ResultPair, 0);
695 case NVPTX::BI__hmma_m16n16k16_ld_a:
696 case NVPTX::BI__hmma_m16n16k16_ld_b:
697 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
698 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
699 case NVPTX::BI__hmma_m32n8k16_ld_a:
700 case NVPTX::BI__hmma_m32n8k16_ld_b:
701 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
702 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
703 case NVPTX::BI__hmma_m8n32k16_ld_a:
704 case NVPTX::BI__hmma_m8n32k16_ld_b:
705 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
706 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
708 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
709 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
710 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
711 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
712 case NVPTX::BI__imma_m16n16k16_ld_c:
713 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
714 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
715 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
716 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
717 case NVPTX::BI__imma_m32n8k16_ld_c:
718 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
719 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
720 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
721 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
722 case NVPTX::BI__imma_m8n32k16_ld_c:
724 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
725 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
726 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
727 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
728 case NVPTX::BI__imma_m8n8k32_ld_c:
729 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
730 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
731 case NVPTX::BI__bmma_m8n8k128_ld_c:
733 case NVPTX::BI__dmma_m8n8k4_ld_a:
734 case NVPTX::BI__dmma_m8n8k4_ld_b:
735 case NVPTX::BI__dmma_m8n8k4_ld_c:
737 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
738 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
739 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
740 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
741 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
742 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
743 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
744 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
745 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
749 std::optional<llvm::APSInt> isColMajorArg =
753 bool isColMajor = isColMajorArg->getSExtValue();
754 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
755 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
763 assert(II.NumResults);
764 if (II.NumResults == 1) {
768 for (
unsigned i = 0; i < II.NumResults; ++i) {
773 llvm::ConstantInt::get(
IntTy, i)),
780 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
781 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
782 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
783 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
784 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
785 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
786 case NVPTX::BI__imma_m16n16k16_st_c_i32:
787 case NVPTX::BI__imma_m32n8k16_st_c_i32:
788 case NVPTX::BI__imma_m8n32k16_st_c_i32:
789 case NVPTX::BI__imma_m8n8k32_st_c_i32:
790 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
791 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
792 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
796 std::optional<llvm::APSInt> isColMajorArg =
800 bool isColMajor = isColMajorArg->getSExtValue();
801 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
802 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
807 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
809 for (
unsigned i = 0; i < II.NumResults; ++i) {
813 llvm::ConstantInt::get(
IntTy, i)),
815 Values.push_back(
Builder.CreateBitCast(
V, ParamType));
817 Values.push_back(Ldm);
824 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
825 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
826 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
827 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
828 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
829 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
830 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
831 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
832 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
833 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
834 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
835 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
836 case NVPTX::BI__imma_m16n16k16_mma_s8:
837 case NVPTX::BI__imma_m16n16k16_mma_u8:
838 case NVPTX::BI__imma_m32n8k16_mma_s8:
839 case NVPTX::BI__imma_m32n8k16_mma_u8:
840 case NVPTX::BI__imma_m8n32k16_mma_s8:
841 case NVPTX::BI__imma_m8n32k16_mma_u8:
842 case NVPTX::BI__imma_m8n8k32_mma_s4:
843 case NVPTX::BI__imma_m8n8k32_mma_u4:
844 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
845 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
846 case NVPTX::BI__dmma_m8n8k4_mma_f64:
847 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
848 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
849 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
850 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
855 std::optional<llvm::APSInt> LayoutArg =
859 int Layout = LayoutArg->getSExtValue();
860 if (Layout < 0 || Layout > 3)
862 llvm::APSInt SatfArg;
863 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
864 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
866 else if (std::optional<llvm::APSInt> OptSatfArg =
868 SatfArg = *OptSatfArg;
871 bool Satf = SatfArg.getSExtValue();
872 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
873 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
879 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
881 for (
unsigned i = 0; i < MI.NumEltsA; ++i) {
885 llvm::ConstantInt::get(
IntTy, i)),
887 Values.push_back(
Builder.CreateBitCast(
V, AType));
890 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
891 for (
unsigned i = 0; i < MI.NumEltsB; ++i) {
895 llvm::ConstantInt::get(
IntTy, i)),
897 Values.push_back(
Builder.CreateBitCast(
V, BType));
901 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
902 for (
unsigned i = 0; i < MI.NumEltsC; ++i) {
906 llvm::ConstantInt::get(
IntTy, i)),
908 Values.push_back(
Builder.CreateBitCast(
V, CType));
912 for (
unsigned i = 0; i < MI.NumEltsD; ++i)
916 llvm::ConstantInt::get(
IntTy, i)),
921 case NVPTX::BI__nvvm_ex2_approx_f16:
923 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,
Builder.getHalfTy()),
924 BuiltinID, E, *
this);
925 case NVPTX::BI__nvvm_ex2_approx_f16x2:
927 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,
928 FixedVectorType::get(
Builder.getHalfTy(), 2)),
929 BuiltinID, E, *
this);
930 case NVPTX::BI__nvvm_ff2f16x2_rn:
931 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *
this);
932 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
933 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *
this);
934 case NVPTX::BI__nvvm_ff2f16x2_rz:
935 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *
this);
936 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
937 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *
this);
938 case NVPTX::BI__nvvm_fma_rn_f16:
939 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *
this);
940 case NVPTX::BI__nvvm_fma_rn_f16x2:
941 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *
this);
942 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
943 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *
this);
944 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
945 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *
this);
946 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
947 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
949 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
950 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
952 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
953 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
955 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
956 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
958 case NVPTX::BI__nvvm_fma_rn_relu_f16:
959 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *
this);
960 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
961 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *
this);
962 case NVPTX::BI__nvvm_fma_rn_sat_f16:
963 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *
this);
964 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
965 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *
this);
966 case NVPTX::BI__nvvm_fmax_f16:
967 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *
this);
968 case NVPTX::BI__nvvm_fmax_f16x2:
969 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *
this);
970 case NVPTX::BI__nvvm_fmax_ftz_f16:
971 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *
this);
972 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
973 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *
this);
974 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
975 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *
this);
976 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
977 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
979 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
980 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
982 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
983 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
984 BuiltinID, E, *
this);
985 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
986 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
988 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
989 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
991 case NVPTX::BI__nvvm_fmax_nan_f16:
992 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *
this);
993 case NVPTX::BI__nvvm_fmax_nan_f16x2:
994 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *
this);
995 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
996 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
998 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
999 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
1001 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
1002 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
1004 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
1005 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
1007 case NVPTX::BI__nvvm_fmin_f16:
1008 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *
this);
1009 case NVPTX::BI__nvvm_fmin_f16x2:
1010 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *
this);
1011 case NVPTX::BI__nvvm_fmin_ftz_f16:
1012 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *
this);
1013 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
1014 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *
this);
1015 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
1016 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *
this);
1017 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
1018 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
1020 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
1021 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
1023 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
1024 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
1025 BuiltinID, E, *
this);
1026 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
1027 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
1029 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
1030 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
1032 case NVPTX::BI__nvvm_fmin_nan_f16:
1033 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *
this);
1034 case NVPTX::BI__nvvm_fmin_nan_f16x2:
1035 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *
this);
1036 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
1037 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
1039 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
1040 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
1042 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
1043 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
1045 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
1046 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
1048 case NVPTX::BI__nvvm_fabs_f:
1049 case NVPTX::BI__nvvm_abs_bf16:
1050 case NVPTX::BI__nvvm_abs_bf16x2:
1051 case NVPTX::BI__nvvm_fabs_f16:
1052 case NVPTX::BI__nvvm_fabs_f16x2:
1053 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs,
1055 case NVPTX::BI__nvvm_fabs_ftz_f:
1056 case NVPTX::BI__nvvm_fabs_ftz_f16:
1057 case NVPTX::BI__nvvm_fabs_ftz_f16x2:
1058 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs_ftz,
1060 case NVPTX::BI__nvvm_fabs_d:
1061 return Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
1063 case NVPTX::BI__nvvm_ex2_approx_d:
1064 case NVPTX::BI__nvvm_ex2_approx_f:
1065 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx,
1067 case NVPTX::BI__nvvm_ex2_approx_ftz_f:
1068 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx_ftz,
1070 case NVPTX::BI__nvvm_ldg_h:
1071 case NVPTX::BI__nvvm_ldg_h2:
1072 return EnsureNativeHalfSupport(BuiltinID, E, *
this) ? MakeLdg(*
this, E)
1074 case NVPTX::BI__nvvm_ldu_h:
1075 case NVPTX::BI__nvvm_ldu_h2:
1076 return EnsureNativeHalfSupport(BuiltinID, E, *
this)
1077 ? MakeLdu(Intrinsic::nvvm_ldu_global_f, *
this, E)
1079 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
1080 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
1081 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *
this, E,
1083 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
1084 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
1085 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *
this, E,
1087 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
1088 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
1089 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *
this, E,
1091 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
1092 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
1093 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *
this, E,
1095 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
1097 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
1098 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
1100 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
1101 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
1103 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
1104 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
1106 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
1107 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
1109 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
1110 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
1112 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
1113 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
1115 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
1116 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
1118 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
1119 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
1121 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
1122 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
1124 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
1125 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
1127 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
1128 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
1130 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
1131 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
1133 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
1134 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
1136 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
1137 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
1139 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
1140 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
1142 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
1143 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
1145 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
1146 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
1148 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
1149 case NVPTX::BI__nvvm_is_explicit_cluster:
1151 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
1152 case NVPTX::BI__nvvm_isspacep_shared_cluster:
1154 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
1156 case NVPTX::BI__nvvm_mapa:
1158 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
1159 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1160 case NVPTX::BI__nvvm_mapa_shared_cluster:
1162 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
1163 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
1164 case NVPTX::BI__nvvm_getctarank:
1166 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
1168 case NVPTX::BI__nvvm_getctarank_shared_cluster:
1170 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
1172 case NVPTX::BI__nvvm_barrier_cluster_arrive:
1174 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
1175 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
1177 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
1178 case NVPTX::BI__nvvm_barrier_cluster_wait:
1180 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
1181 case NVPTX::BI__nvvm_fence_sc_cluster:
1183 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
1184 case NVPTX::BI__nvvm_bar_sync:
1186 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1188 case NVPTX::BI__syncthreads:
1190 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),
1192 case NVPTX::BI__nvvm_barrier_sync:
1194 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all),
1196 case NVPTX::BI__nvvm_barrier_sync_cnt:
1198 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count),
1199 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});