350 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
351 llvm::SyncScope::ID SSID;
353 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
354 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
355 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
356 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
357 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
358 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
359 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
360 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
361 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
362 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
363 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
364 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
365 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
366 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
367 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
368 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
369 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
370 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
377 case AMDGPU::BI__builtin_amdgcn_div_scale:
378 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
388 llvm::Function *Callee =
CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
391 llvm::Value *Tmp =
Builder.CreateCall(Callee, {
X, Y, Z});
394 llvm::Value *Flag =
Builder.CreateExtractValue(Tmp, 1);
398 llvm::Value *FlagExt =
Builder.CreateZExt(Flag, RealFlagType);
399 Builder.CreateStore(FlagExt, FlagOutPtr);
402 case AMDGPU::BI__builtin_amdgcn_div_fmas:
403 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
409 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
411 llvm::Value *Src3ToBool =
Builder.CreateIsNotNull(Src3);
412 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
415 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
417 Intrinsic::amdgcn_ds_swizzle);
418 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
419 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
420 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
424 unsigned ICEArguments = 0;
429 unsigned Size = DataTy->getPrimitiveSizeInBits();
431 llvm::IntegerType::get(
Builder.getContext(), std::max(Size, 32u));
433 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
434 ? Intrinsic::amdgcn_mov_dpp8
435 : Intrinsic::amdgcn_update_dpp,
439 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
441 Args.push_back(llvm::PoisonValue::get(
IntTy));
442 for (
unsigned I = 0; I != E->
getNumArgs(); ++I) {
444 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
446 if (!DataTy->isIntegerTy())
448 V, llvm::IntegerType::get(
Builder.getContext(), Size));
452 F->getFunctionType()->getFunctionParamType(I + InsertOld);
453 Args.push_back(
Builder.CreateTruncOrBitCast(
V, ExpTy));
456 if (Size < 32 && !DataTy->isIntegerTy())
458 V, llvm::IntegerType::get(
Builder.getContext(), Size));
459 return Builder.CreateTruncOrBitCast(
V, DataTy);
461 case AMDGPU::BI__builtin_amdgcn_permlane16:
462 case AMDGPU::BI__builtin_amdgcn_permlanex16:
465 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
466 ? Intrinsic::amdgcn_permlane16
467 : Intrinsic::amdgcn_permlanex16);
468 case AMDGPU::BI__builtin_amdgcn_permlane64:
470 Intrinsic::amdgcn_permlane64);
471 case AMDGPU::BI__builtin_amdgcn_readlane:
473 Intrinsic::amdgcn_readlane);
474 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
476 Intrinsic::amdgcn_readfirstlane);
477 case AMDGPU::BI__builtin_amdgcn_div_fixup:
478 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
479 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
481 Intrinsic::amdgcn_div_fixup);
482 case AMDGPU::BI__builtin_amdgcn_trig_preop:
483 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
485 case AMDGPU::BI__builtin_amdgcn_rcp:
486 case AMDGPU::BI__builtin_amdgcn_rcpf:
487 case AMDGPU::BI__builtin_amdgcn_rcph:
488 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
490 case AMDGPU::BI__builtin_amdgcn_sqrt:
491 case AMDGPU::BI__builtin_amdgcn_sqrtf:
492 case AMDGPU::BI__builtin_amdgcn_sqrth:
493 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
495 Intrinsic::amdgcn_sqrt);
496 case AMDGPU::BI__builtin_amdgcn_rsq:
497 case AMDGPU::BI__builtin_amdgcn_rsqf:
498 case AMDGPU::BI__builtin_amdgcn_rsqh:
499 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
501 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
502 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
504 Intrinsic::amdgcn_rsq_clamp);
505 case AMDGPU::BI__builtin_amdgcn_sinf:
506 case AMDGPU::BI__builtin_amdgcn_sinh:
507 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
509 case AMDGPU::BI__builtin_amdgcn_cosf:
510 case AMDGPU::BI__builtin_amdgcn_cosh:
511 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
513 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
514 return EmitAMDGPUDispatchPtr(*
this, E);
515 case AMDGPU::BI__builtin_amdgcn_logf:
516 case AMDGPU::BI__builtin_amdgcn_log_bf16:
518 case AMDGPU::BI__builtin_amdgcn_exp2f:
519 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
521 Intrinsic::amdgcn_exp2);
522 case AMDGPU::BI__builtin_amdgcn_log_clampf:
524 Intrinsic::amdgcn_log_clamp);
525 case AMDGPU::BI__builtin_amdgcn_ldexp:
526 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
530 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
531 return Builder.CreateCall(F, {Src0, Src1});
533 case AMDGPU::BI__builtin_amdgcn_ldexph: {
539 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(),
Int16Ty});
542 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
543 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
544 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
546 Intrinsic::amdgcn_frexp_mant);
547 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
548 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
550 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
552 return Builder.CreateCall(F, Src0);
554 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
556 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
558 return Builder.CreateCall(F, Src0);
560 case AMDGPU::BI__builtin_amdgcn_fract:
561 case AMDGPU::BI__builtin_amdgcn_fractf:
562 case AMDGPU::BI__builtin_amdgcn_fracth:
564 Intrinsic::amdgcn_fract);
565 case AMDGPU::BI__builtin_amdgcn_lerp:
567 Intrinsic::amdgcn_lerp);
568 case AMDGPU::BI__builtin_amdgcn_ubfe:
570 Intrinsic::amdgcn_ubfe);
571 case AMDGPU::BI__builtin_amdgcn_sbfe:
573 Intrinsic::amdgcn_sbfe);
574 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
575 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
578 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
579 return Builder.CreateCall(F, { Src });
581 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
582 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
585 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
586 return Builder.CreateCall(F, {Src});
588 case AMDGPU::BI__builtin_amdgcn_tanhf:
589 case AMDGPU::BI__builtin_amdgcn_tanhh:
590 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
592 Intrinsic::amdgcn_tanh);
593 case AMDGPU::BI__builtin_amdgcn_uicmp:
594 case AMDGPU::BI__builtin_amdgcn_uicmpl:
595 case AMDGPU::BI__builtin_amdgcn_sicmp:
596 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
602 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
603 {
Builder.getInt64Ty(), Src0->getType() });
604 return Builder.CreateCall(F, { Src0, Src1, Src2 });
606 case AMDGPU::BI__builtin_amdgcn_fcmp:
607 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
613 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
614 {
Builder.getInt64Ty(), Src0->getType() });
615 return Builder.CreateCall(F, { Src0, Src1, Src2 });
617 case AMDGPU::BI__builtin_amdgcn_class:
618 case AMDGPU::BI__builtin_amdgcn_classf:
619 case AMDGPU::BI__builtin_amdgcn_classh:
621 case AMDGPU::BI__builtin_amdgcn_fmed3f:
622 case AMDGPU::BI__builtin_amdgcn_fmed3h:
624 Intrinsic::amdgcn_fmed3);
625 case AMDGPU::BI__builtin_amdgcn_ds_append:
626 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
627 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
628 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
633 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
634 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
635 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
636 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
637 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
638 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
639 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
640 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
641 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
642 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
643 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
644 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
645 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
646 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
647 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
648 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
649 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
650 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
651 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
652 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
653 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
654 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
655 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
656 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
657 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
658 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
661 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
662 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
663 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
664 IID = Intrinsic::amdgcn_global_load_tr_b64;
666 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
667 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
668 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
669 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
670 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
671 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
672 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
673 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
674 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
675 IID = Intrinsic::amdgcn_global_load_tr_b128;
677 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
678 IID = Intrinsic::amdgcn_global_load_tr4_b64;
680 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
681 IID = Intrinsic::amdgcn_global_load_tr6_b96;
683 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
684 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
686 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
687 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
689 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
690 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
692 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
693 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
694 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
695 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
697 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
698 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
700 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
701 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
703 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
704 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
706 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
707 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
708 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
709 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
714 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
717 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
718 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
719 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
720 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
721 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
722 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
726 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
727 IID = Intrinsic::amdgcn_global_load_monitor_b32;
729 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
730 IID = Intrinsic::amdgcn_global_load_monitor_b64;
732 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
733 IID = Intrinsic::amdgcn_global_load_monitor_b128;
735 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
736 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
738 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
739 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
741 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
742 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
749 llvm::Function *F =
CGM.getIntrinsic(IID, {LoadTy});
752 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
753 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
754 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
757 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
758 IID = Intrinsic::amdgcn_cluster_load_b32;
760 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
761 IID = Intrinsic::amdgcn_cluster_load_b64;
763 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
764 IID = Intrinsic::amdgcn_cluster_load_b128;
768 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
771 return Builder.CreateCall(F, {Args});
773 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
776 Intrinsic::amdgcn_load_to_lds);
778 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
779 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
780 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
781 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
782 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
783 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
786 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
787 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
789 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
790 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
792 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
793 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
795 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
796 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
798 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
799 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
801 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
802 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
806 LLVMContext &Ctx =
CGM.getLLVMContext();
809 const unsigned ScopeArg = E->
getNumArgs() - 1;
810 for (
unsigned i = 0; i != ScopeArg; ++i)
814 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
815 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
818 llvm::Function *F =
CGM.getIntrinsic(IID, {Args[0]->getType()});
819 return Builder.CreateCall(F, {Args});
821 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
822 Function *F =
CGM.getIntrinsic(Intrinsic::get_fpenv,
826 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
827 Function *F =
CGM.getIntrinsic(Intrinsic::set_fpenv,
830 return Builder.CreateCall(F, {Env});
832 case AMDGPU::BI__builtin_amdgcn_read_exec:
834 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
836 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
838 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
839 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
840 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
841 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
851 RayOrigin =
Builder.CreateShuffleVector(RayOrigin, RayOrigin,
854 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
855 RayInverseDir =
Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
858 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
859 {NodePtr->getType(), RayDir->getType()});
860 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
861 RayInverseDir, TextureDescr});
863 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
864 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
867 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
868 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
870 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
871 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
885 llvm::Function *IntrinsicFunc =
CGM.getIntrinsic(IID);
887 llvm::CallInst *CI =
Builder.CreateCall(
888 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
889 Offset, TextureDescr});
891 llvm::Value *RetVData =
Builder.CreateExtractValue(CI, 0);
892 llvm::Value *RetRayOrigin =
Builder.CreateExtractValue(CI, 1);
893 llvm::Value *RetRayDir =
Builder.CreateExtractValue(CI, 2);
895 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
896 Builder.CreateStore(RetRayDir, RetRayDirPtr);
901 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
902 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
903 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
904 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
907 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
908 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
910 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
911 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
913 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
914 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
916 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
917 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
922 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
930 Value *I0 =
Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
934 if (A->
getType()->getPrimitiveSizeInBits() <
935 RetTy->getScalarType()->getPrimitiveSizeInBits())
936 A =
Builder.CreateZExt(A, RetTy->getScalarType());
938 return Builder.CreateInsertElement(I0, A, 1);
940 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
941 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
942 llvm::FixedVectorType *VT = FixedVectorType::get(
Builder.getInt32Ty(), 8);
944 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
945 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
946 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
950 for (
unsigned I = 0, N = E->
getNumArgs(); I != N; ++I)
952 return Builder.CreateCall(F, Args);
954 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
955 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
956 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
957 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
958 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
959 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
960 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
961 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
962 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
963 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
964 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
965 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
966 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
967 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
968 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
969 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
970 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
971 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
972 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
973 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
974 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
975 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
976 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
977 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
978 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
979 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
980 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
981 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
982 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
983 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
984 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
985 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
986 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
987 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
988 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
989 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
990 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
991 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
992 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
993 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
994 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
995 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
996 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
997 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
998 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
999 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1000 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1001 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1002 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1003 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1004 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1005 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1006 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1007 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1008 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1009 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1010 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1011 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1012 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1013 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1015 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1016 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1017 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1018 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1019 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1020 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1021 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1022 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1023 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1024 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1025 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1026 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1027 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1028 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1029 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1030 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1031 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1032 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1033 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1034 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1035 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1036 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1037 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1038 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1039 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1040 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1041 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1042 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1043 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1044 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1045 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1046 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1047 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1048 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1049 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1050 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1051 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1052 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1053 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1054 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1055 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1056 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1057 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1070 bool AppendFalseForOpselArg =
false;
1071 unsigned BuiltinWMMAOp;
1073 bool NeedReturnType =
false;
1075 switch (BuiltinID) {
1076 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1077 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1078 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1079 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1080 ArgsForMatchingMatrixTypes = {2, 0};
1081 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1083 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1084 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1085 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1086 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1087 ArgsForMatchingMatrixTypes = {2, 0};
1088 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1090 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1091 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1092 AppendFalseForOpselArg =
true;
1094 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1095 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1096 ArgsForMatchingMatrixTypes = {2, 0};
1097 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1099 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1100 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1101 AppendFalseForOpselArg =
true;
1103 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1104 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1105 ArgsForMatchingMatrixTypes = {2, 0};
1106 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1108 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1109 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1110 ArgsForMatchingMatrixTypes = {2, 0};
1111 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1113 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1114 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1115 ArgsForMatchingMatrixTypes = {2, 0};
1116 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1118 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1119 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1120 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1121 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1122 ArgsForMatchingMatrixTypes = {4, 1};
1123 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1125 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1126 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1127 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1128 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1129 ArgsForMatchingMatrixTypes = {4, 1};
1130 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1132 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1133 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1134 ArgsForMatchingMatrixTypes = {2, 0};
1135 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1137 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1138 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1139 ArgsForMatchingMatrixTypes = {2, 0};
1140 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1142 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1143 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1144 ArgsForMatchingMatrixTypes = {2, 0};
1145 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1147 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1148 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1149 ArgsForMatchingMatrixTypes = {2, 0};
1150 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1152 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1153 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1154 ArgsForMatchingMatrixTypes = {4, 1};
1155 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1157 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1158 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1159 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1160 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1162 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1163 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1164 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1165 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1167 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1168 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1169 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1170 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1172 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1173 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1174 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1175 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1177 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1178 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1179 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1180 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1182 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1183 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1184 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1185 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1187 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1188 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1189 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1190 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1192 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1193 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1194 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1195 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1197 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1198 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1199 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1200 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1202 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1203 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1204 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1205 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1207 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1208 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1209 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1210 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1213 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1214 ArgsForMatchingMatrixTypes = {5, 1};
1215 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1217 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1218 ArgsForMatchingMatrixTypes = {5, 1};
1219 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1221 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1222 ArgsForMatchingMatrixTypes = {5, 1};
1223 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1225 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1226 ArgsForMatchingMatrixTypes = {5, 1};
1227 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1229 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1230 ArgsForMatchingMatrixTypes = {5, 1};
1231 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1233 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1234 NeedReturnType =
true;
1235 ArgsForMatchingMatrixTypes = {1, 5};
1236 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1238 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1239 ArgsForMatchingMatrixTypes = {3, 0};
1240 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1242 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1243 ArgsForMatchingMatrixTypes = {3, 0};
1244 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1246 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1247 ArgsForMatchingMatrixTypes = {3, 0};
1248 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1250 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1251 ArgsForMatchingMatrixTypes = {3, 0};
1252 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1254 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1255 ArgsForMatchingMatrixTypes = {3, 0};
1256 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1258 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1259 ArgsForMatchingMatrixTypes = {3, 0};
1260 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1262 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1263 ArgsForMatchingMatrixTypes = {3, 0};
1264 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1266 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1267 ArgsForMatchingMatrixTypes = {3, 0};
1268 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1270 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1271 ArgsForMatchingMatrixTypes = {3, 0};
1272 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1274 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1275 ArgsForMatchingMatrixTypes = {3, 0};
1276 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1278 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1279 ArgsForMatchingMatrixTypes = {3, 0};
1280 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1282 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1283 ArgsForMatchingMatrixTypes = {3, 0};
1284 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1286 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1287 ArgsForMatchingMatrixTypes = {3, 0};
1288 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1290 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1291 ArgsForMatchingMatrixTypes = {3, 0};
1292 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1294 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1295 ArgsForMatchingMatrixTypes = {3, 0};
1296 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1298 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1299 ArgsForMatchingMatrixTypes = {3, 0};
1300 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1302 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1303 ArgsForMatchingMatrixTypes = {4, 1};
1304 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1306 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1307 ArgsForMatchingMatrixTypes = {5, 1, 3};
1308 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1310 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1311 ArgsForMatchingMatrixTypes = {5, 1, 3};
1312 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1314 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1315 ArgsForMatchingMatrixTypes = {5, 1, 3};
1316 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1318 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1319 ArgsForMatchingMatrixTypes = {3, 0, 1};
1320 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1322 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1323 ArgsForMatchingMatrixTypes = {3, 0, 1};
1324 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1326 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1327 ArgsForMatchingMatrixTypes = {3, 0, 1};
1328 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1330 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1331 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1332 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1334 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1335 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1336 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1338 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1339 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1340 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1342 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1343 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1344 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1346 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1347 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1348 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1350 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1351 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1352 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1354 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1355 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1356 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1358 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1359 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1360 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1362 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1363 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1364 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1366 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1367 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1368 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1370 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1371 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1372 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1374 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1375 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1376 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1378 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1379 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1380 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1382 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1383 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1384 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1389 for (
int i = 0, e = E->
getNumArgs(); i != e; ++i)
1391 if (AppendFalseForOpselArg)
1392 Args.push_back(
Builder.getFalse());
1397 for (
auto ArgIdx : ArgsForMatchingMatrixTypes)
1398 ArgTypes.push_back(Args[ArgIdx]->
getType());
1400 Function *F =
CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1401 return Builder.CreateCall(F, Args);
1404 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1405 return EmitAMDGPUWorkGroupSize(*
this, 0);
1406 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1407 return EmitAMDGPUWorkGroupSize(*
this, 1);
1408 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1409 return EmitAMDGPUWorkGroupSize(*
this, 2);
1412 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1413 return EmitAMDGPUGridSize(*
this, 0);
1414 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1415 return EmitAMDGPUGridSize(*
this, 1);
1416 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1417 return EmitAMDGPUGridSize(*
this, 2);
1420 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1421 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1423 Intrinsic::r600_recipsqrt_ieee);
1424 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1428 Function *F =
CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1429 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1431 case AMDGPU::BI__builtin_amdgcn_fence: {
1434 FenceInst *Fence =
Builder.CreateFence(AO, SSID);
1439 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1440 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1441 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1442 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1443 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1444 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1445 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1446 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1447 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1448 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1449 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1450 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1451 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1452 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1453 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1454 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1455 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1456 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1457 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1458 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1459 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1460 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1461 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1462 llvm::AtomicRMWInst::BinOp BinOp;
1463 switch (BuiltinID) {
1464 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1465 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1466 BinOp = llvm::AtomicRMWInst::UIncWrap;
1468 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1469 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1470 BinOp = llvm::AtomicRMWInst::UDecWrap;
1472 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1473 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1474 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1475 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1476 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1477 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1478 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1479 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1480 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1481 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1482 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1483 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1484 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1485 BinOp = llvm::AtomicRMWInst::FAdd;
1487 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1488 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1489 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1490 BinOp = llvm::AtomicRMWInst::FMin;
1492 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1493 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1494 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1495 BinOp = llvm::AtomicRMWInst::FMax;
1501 llvm::Type *OrigTy = Val->
getType();
1506 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1507 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1508 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1533 AO = AtomicOrdering::Monotonic;
1536 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1537 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1538 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1539 llvm::Type *V2BF16Ty = FixedVectorType::get(
1540 llvm::Type::getBFloatTy(
Builder.getContext()), 2);
1541 Val =
Builder.CreateBitCast(Val, V2BF16Ty);
1545 llvm::AtomicRMWInst *RMW =
1546 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1548 RMW->setVolatile(
true);
1550 unsigned AddrSpace = Ptr.
getType()->getAddressSpace();
1551 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1555 RMW->setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
1559 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->
getType()->isFloatTy())
1560 RMW->setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
1563 return Builder.CreateBitCast(RMW, OrigTy);
1565 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1566 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1571 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1572 return Builder.CreateCall(F, {Arg});
1574 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1575 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1583 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1584 ? Intrinsic::amdgcn_permlane16_swap
1585 : Intrinsic::amdgcn_permlane32_swap);
1586 llvm::CallInst *
Call =
1587 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1589 llvm::Value *Elt0 =
Builder.CreateExtractValue(
Call, 0);
1590 llvm::Value *Elt1 =
Builder.CreateExtractValue(
Call, 1);
1594 llvm::Value *Insert0 =
Builder.CreateInsertElement(
1595 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
1596 llvm::Value *AsVector =
1597 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
1600 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
1601 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
1603 Intrinsic::amdgcn_bitop3);
1604 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
1609 for (
unsigned I = 0; I < 4; ++I)
1611 llvm::PointerType *RetTy = llvm::PointerType::get(
1612 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
1613 Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
1614 {RetTy, Args[0]->getType()});
1615 return Builder.CreateCall(F, Args);
1617 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
1618 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
1619 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
1620 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
1621 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
1622 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1624 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
1625 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1626 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1627 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1628 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1629 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1630 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
1631 llvm::Type *RetTy =
nullptr;
1632 switch (BuiltinID) {
1633 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1636 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1639 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1642 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1643 RetTy = llvm::FixedVectorType::get(
Int32Ty, 2);
1645 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1646 RetTy = llvm::FixedVectorType::get(
Int32Ty, 3);
1648 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
1649 RetTy = llvm::FixedVectorType::get(
Int32Ty, 4);
1653 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
1658 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1660 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
1661 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1662 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1664 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
1665 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
1666 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
1668 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
1669 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1670 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1672 *
this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
1673 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
1675 *
this, E, Intrinsic::amdgcn_s_prefetch_data);
1676 case Builtin::BIlogbf:
1677 case Builtin::BI__builtin_logbf: {
1681 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1684 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1689 Fabs, ConstantFP::getInfinity(
Builder.getFloatTy()));
1690 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1692 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getFloatTy()));
1695 ConstantFP::getInfinity(
Builder.getFloatTy(),
true), Sel1);
1698 case Builtin::BIlogb:
1699 case Builtin::BI__builtin_logb: {
1703 CallInst *FrExp =
Builder.CreateCall(FrExpFunc, Src0);
1706 Exp, ConstantInt::getSigned(Exp->
getType(), -1),
"",
false,
true);
1711 Fabs, ConstantFP::getInfinity(
Builder.getDoubleTy()));
1712 Value *Sel1 =
Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1714 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(
Builder.getDoubleTy()));
1717 ConstantFP::getInfinity(
Builder.getDoubleTy(),
true),
1721 case Builtin::BIscalbnf:
1722 case Builtin::BI__builtin_scalbnf:
1723 case Builtin::BIscalbn:
1724 case Builtin::BI__builtin_scalbn:
1726 *
this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);