clang 23.0.0git
CGHLSLBuiltins.cpp
Go to the documentation of this file.
1//===------- CGHLSLBuiltins.cpp - Emit LLVM Code for HLSL builtins --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit HLSL Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
14#include "CGHLSLRuntime.h"
15#include "CodeGenFunction.h"
16#include "llvm/IR/MatrixBuilder.h"
17
18using namespace clang;
19using namespace CodeGen;
20using namespace llvm;
21
25 "asdouble operands types mismatch");
26 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
27 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
28
29 llvm::Type *ResultType = CGF.DoubleTy;
30 int N = 1;
31 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
32 N = VTy->getNumElements();
33 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
34 }
35
36 if (CGF.CGM.getTarget().getTriple().isDXIL())
37 return CGF.Builder.CreateIntrinsic(
38 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
39 {OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
40
41 if (!E->getArg(0)->getType()->isVectorType()) {
42 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
43 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
44 }
45
47 for (int i = 0; i < N; i++) {
48 Mask.push_back(i);
49 Mask.push_back(i + N);
50 }
51
52 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
53
54 return CGF.Builder.CreateBitCast(BitVec, ResultType);
55}
56
58 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
59
60 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
61 Value *CMP;
62 Value *LastInstr;
63
64 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
65 FZeroConst = ConstantVector::getSplat(
66 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
67 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
68 CMP = CGF->Builder.CreateIntrinsic(
69 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
70 {FCompInst});
71 } else {
72 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
73 }
74
75 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
76 LastInstr = CGF->Builder.CreateIntrinsic(Intrinsic::dx_discard, {CMP});
77 } else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
78 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
79 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
80
81 CGF->Builder.CreateCondBr(CMP, LT0, End);
82
83 CGF->Builder.SetInsertPoint(LT0);
84
85 CGF->Builder.CreateIntrinsic(Intrinsic::spv_discard, {});
86
87 LastInstr = CGF->Builder.CreateBr(End);
88 CGF->Builder.SetInsertPoint(End);
89 } else {
90 llvm_unreachable("Backend Codegen not supported.");
91 }
92
93 return LastInstr;
94}
95
97 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
98 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
99 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
100
101 CallArgList Args;
102 LValue Op1TmpLValue =
103 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
104 LValue Op2TmpLValue =
105 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
106
108 Args.reverseWritebacks();
109
110 Value *LowBits = nullptr;
111 Value *HighBits = nullptr;
112
113 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
114 llvm::Type *RetElementTy = CGF->Int32Ty;
115 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
116 RetElementTy = llvm::VectorType::get(
117 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
118 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
119
120 CallInst *CI = CGF->Builder.CreateIntrinsic(
121 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
122
123 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
124 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
125 } else {
126 // For Non DXIL targets we generate the instructions.
127
128 if (!Op0->getType()->isVectorTy()) {
129 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
130 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
131
132 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
133 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
134 } else {
135 int NumElements = 1;
136 if (const auto *VecTy =
138 NumElements = VecTy->getNumElements();
139
140 FixedVectorType *Uint32VecTy =
141 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
142 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
143 if (NumElements == 1) {
144 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
145 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
146 } else {
147 SmallVector<int> EvenMask, OddMask;
148 for (int I = 0, E = NumElements; I != E; ++I) {
149 EvenMask.push_back(I * 2);
150 OddMask.push_back(I * 2 + 1);
151 }
152 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
153 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
154 }
155 }
156 }
157 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
158 auto *LastInst =
159 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
160 CGF->EmitWritebacks(Args);
161 return LastInst;
162}
163
165 const CallExpr *E) {
166 Value *Cond = CGF.EmitScalarExpr(E->getArg(0));
167 llvm::Type *I32 = CGF.Int32Ty;
168
169 llvm::Type *Vec4I32 = llvm::FixedVectorType::get(I32, 4);
170 [[maybe_unused]] llvm::StructType *Struct4I32 =
171 llvm::StructType::get(CGF.getLLVMContext(), {I32, I32, I32, I32});
172
173 if (CGF.CGM.getTarget().getTriple().isDXIL()) {
174 // Call DXIL intrinsic: returns { i32, i32, i32, i32 }
175 Value *StructVal =
176 CGF.EmitIntrinsicCall(Intrinsic::dx_wave_ballot, {I32}, {Cond});
177 assert(StructVal->getType() == Struct4I32 &&
178 "dx.wave.ballot must return {i32,i32,i32,i32}");
179
180 // Reassemble struct to <4 x i32>
181 llvm::Value *VecVal = llvm::PoisonValue::get(Vec4I32);
182 for (unsigned I = 0; I < 4; ++I) {
183 Value *Elt = CGF.Builder.CreateExtractValue(StructVal, I);
184 VecVal =
185 CGF.Builder.CreateInsertElement(VecVal, Elt, CGF.Builder.getInt32(I));
186 }
187
188 return VecVal;
189 }
190
191 if (CGF.CGM.getTarget().getTriple().isSPIRV())
192 return CGF.EmitIntrinsicCall(Intrinsic::spv_subgroup_ballot, {Cond});
193
194 llvm_unreachable(
195 "WaveActiveBallot is only supported for DXIL and SPIRV targets");
196}
197
199 const CallExpr *E) {
200 Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
201 QualType Op0Ty = E->getArg(0)->getType();
202 llvm::Type *ResType = CGF.FloatTy;
203 uint64_t NumElements = 0;
204 if (Op0->getType()->isVectorTy()) {
205 NumElements =
206 E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
207 ResType =
208 llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
209 }
211 llvm_unreachable(
212 "f16tof32 operand must have an unsigned int representation");
213
214 if (CGF.CGM.getTriple().isDXIL())
215 return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf16tof32,
216 ArrayRef<Value *>{Op0}, nullptr,
217 "hlsl.f16tof32");
218
219 if (CGF.CGM.getTriple().isSPIRV()) {
220 // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
221 // Int16 and Float16 capabilities
222 auto *UnpackType =
223 llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
224
225 if (NumElements == 0) {
226 // a scalar input - simply extract the first element of the unpacked
227 // vector
228 Value *Unpack = CGF.Builder.CreateIntrinsic(
229 UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{Op0});
230 return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
231 }
232
233 // a vector input - build a congruent output vector by iterating through
234 // the input vector calling unpackhalf2x16 for each element
235 Value *Result = PoisonValue::get(ResType);
236 for (uint64_t I = 0; I < NumElements; I++) {
237 Value *InVal = CGF.Builder.CreateExtractElement(Op0, I);
238 Value *Unpack = CGF.Builder.CreateIntrinsic(
239 UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{InVal});
240 Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
241 Result = CGF.Builder.CreateInsertElement(Result, Res, I);
242 }
243 return Result;
244 }
245
246 llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
247}
248
250 const CallExpr *E) {
251 Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
252 QualType Op0Ty = E->getArg(0)->getType();
253 llvm::Type *ResType = CGF.IntTy;
254 uint64_t NumElements = 0;
255 if (Op0->getType()->isVectorTy()) {
256 NumElements =
257 E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
258 ResType =
259 llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
260 }
261 if (!Op0Ty->hasFloatingRepresentation())
262 llvm_unreachable("f32tof16 operand must have a float representation");
263
264 if (CGF.CGM.getTriple().isDXIL())
265 return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16,
266 ArrayRef<Value *>{Op0}, nullptr,
267 "hlsl.f32tof16");
268
269 if (CGF.CGM.getTriple().isSPIRV()) {
270 // We use the SPIRV PackHalf2x16 operation to avoid the need for the
271 // Int16 and Float16 capabilities
272 auto *PackType =
273 llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
274
275 if (NumElements == 0) {
276 // a scalar input - simply insert the scalar in the first element
277 // of the 2 element float vector
278 Value *Float2 = Constant::getNullValue(PackType);
279 Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0);
280 Value *Result = CGF.Builder.CreateIntrinsic(
281 ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
282 return Result;
283 }
284
285 // a vector input - build a congruent output vector by iterating through
286 // the input vector calling packhalf2x16 for each element
287 Value *Result = PoisonValue::get(ResType);
288 for (uint64_t I = 0; I < NumElements; I++) {
289 Value *Float2 = Constant::getNullValue(PackType);
290 Value *InVal = CGF.Builder.CreateExtractElement(Op0, I);
291 Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0);
292 Value *Res = CGF.Builder.CreateIntrinsic(
293 CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
294 Result = CGF.Builder.CreateInsertElement(Result, Res, I);
295 }
296 return Result;
297 }
298
299 llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
300}
301
302static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
303 LValue &Stride) {
304 // Figure out the stride of the buffer elements from the handle type.
305 auto *HandleTy =
307 QualType ElementTy = HandleTy->getContainedType();
308 Value *StrideValue = CGF->getTypeSize(ElementTy);
309 return CGF->Builder.CreateStore(StrideValue, Stride.getAddress());
310}
311
312// Return dot product intrinsic that corresponds to the QT scalar type
313static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
314 if (QT->isFloatingType())
315 return RT.getFDotIntrinsic();
316 if (QT->isSignedIntegerType())
317 return RT.getSDotIntrinsic();
318 assert(QT->isUnsignedIntegerType());
319 return RT.getUDotIntrinsic();
320}
321
322static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
324 return RT.getFirstBitSHighIntrinsic();
325 }
326
328 return RT.getFirstBitUHighIntrinsic();
329}
330
331// Return wave active sum that corresponds to the QT scalar type
332static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
333 QualType QT) {
334 switch (Arch) {
335 case llvm::Triple::spirv:
336 return Intrinsic::spv_wave_reduce_sum;
337 case llvm::Triple::dxil: {
338 if (QT->isUnsignedIntegerType())
339 return Intrinsic::dx_wave_reduce_usum;
340 return Intrinsic::dx_wave_reduce_sum;
341 }
342 default:
343 llvm_unreachable("Intrinsic WaveActiveSum"
344 " not supported by target architecture");
345 }
346}
347
348// Return wave active product that corresponds to the QT scalar type
349static Intrinsic::ID getWaveActiveProductIntrinsic(llvm::Triple::ArchType Arch,
350 QualType QT) {
351 switch (Arch) {
352 case llvm::Triple::spirv:
353 return Intrinsic::spv_wave_product;
354 case llvm::Triple::dxil: {
355 if (QT->isUnsignedIntegerType())
356 return Intrinsic::dx_wave_uproduct;
357 return Intrinsic::dx_wave_product;
358 }
359 default:
360 llvm_unreachable("Intrinsic WaveActiveProduct"
361 " not supported by target architecture");
362 }
363}
364
365static Intrinsic::ID getPrefixCountBitsIntrinsic(llvm::Triple::ArchType Arch) {
366 switch (Arch) {
367 case llvm::Triple::spirv:
368 return Intrinsic::spv_subgroup_prefix_bit_count;
369 case llvm::Triple::dxil: {
370 return Intrinsic::dx_wave_prefix_bit_count;
371 }
372 default:
373 llvm_unreachable(
374 "WavePrefixOp instruction not supported by target architecture");
375 }
376}
377
378// Return wave prefix sum that corresponds to the QT scalar type
379static Intrinsic::ID getWavePrefixSumIntrinsic(llvm::Triple::ArchType Arch,
380 QualType QT) {
381 switch (Arch) {
382 case llvm::Triple::spirv:
383 return Intrinsic::spv_wave_prefix_sum;
384 case llvm::Triple::dxil: {
385 if (QT->isUnsignedIntegerType())
386 return Intrinsic::dx_wave_prefix_usum;
387 return Intrinsic::dx_wave_prefix_sum;
388 }
389 default:
390 llvm_unreachable("Intrinsic WavePrefixSum"
391 " not supported by target architecture");
392 }
393}
394
395// Return wave prefix product that corresponds to the QT scalar type
396static Intrinsic::ID getWavePrefixProductIntrinsic(llvm::Triple::ArchType Arch,
397 QualType QT) {
398 switch (Arch) {
399 case llvm::Triple::spirv:
400 return Intrinsic::spv_wave_prefix_product;
401 case llvm::Triple::dxil: {
402 if (QT->isUnsignedIntegerType())
403 return Intrinsic::dx_wave_prefix_uproduct;
404 return Intrinsic::dx_wave_prefix_product;
405 }
406 default:
407 llvm_unreachable("Intrinsic WavePrefixProduct"
408 " not supported by target architecture");
409 }
410}
411
412// Returns the mangled name for a builtin function that the SPIR-V backend
413// will expand into a spec Constant.
414static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType,
415 ASTContext &Context) {
416 // The parameter types for our conceptual intrinsic function.
417 QualType ClangParamTypes[] = {Context.IntTy, SpecConstantType};
418
419 // Create a temporary FunctionDecl for the builtin fuction. It won't be
420 // added to the AST.
422 QualType FnType =
423 Context.getFunctionType(SpecConstantType, ClangParamTypes, EPI);
424 DeclarationName FuncName = &Context.Idents.get("__spirv_SpecConstant");
425 FunctionDecl *FnDeclForMangling = FunctionDecl::Create(
426 Context, Context.getTranslationUnitDecl(), SourceLocation(),
427 SourceLocation(), FuncName, FnType, /*TSI=*/nullptr, SC_Extern);
428
429 // Attach the created parameter declarations to the function declaration.
431 for (QualType ParamType : ClangParamTypes) {
433 Context, FnDeclForMangling, SourceLocation(), SourceLocation(),
434 /*IdentifierInfo*/ nullptr, ParamType, /*TSI*/ nullptr, SC_None,
435 /*DefaultArg*/ nullptr);
436 ParamDecls.push_back(PD);
437 }
438 FnDeclForMangling->setParams(ParamDecls);
439
440 // Get the mangled name.
441 std::string Name;
442 llvm::raw_string_ostream MangledNameStream(Name);
443 std::unique_ptr<MangleContext> Mangler(Context.createMangleContext());
444 Mangler->mangleName(FnDeclForMangling, MangledNameStream);
445 MangledNameStream.flush();
446
447 return Name;
448}
449
450static llvm::Type *getOffsetType(CodeGenModule &CGM, llvm::Type *CoordTy) {
451 llvm::Type *Int32Ty = CGM.Int32Ty;
452 if (auto *VT = dyn_cast<llvm::FixedVectorType>(CoordTy))
453 return llvm::FixedVectorType::get(Int32Ty, VT->getNumElements());
454 return Int32Ty;
455}
456
458 unsigned OffsetArgIndex, llvm::Type *OffsetTy) {
459 if (E->getNumArgs() > OffsetArgIndex)
460 return CGF.EmitScalarExpr(E->getArg(OffsetArgIndex));
461
462 return llvm::Constant::getNullValue(OffsetTy);
463}
464
466 unsigned ClampArgIndex) {
467 Value *Clamp = CGF.EmitScalarExpr(E->getArg(ClampArgIndex));
468 // The builtin is defined with variadic arguments, so the clamp parameter
469 // might have been promoted to double. The intrinsic requires a 32-bit
470 // float.
471 if (Clamp->getType() != CGF.Builder.getFloatTy())
472 Clamp = CGF.Builder.CreateFPCast(Clamp, CGF.Builder.getFloatTy());
473 return Clamp;
474}
475
477 unsigned IntrinsicID, unsigned NumRetComps,
478 bool HasLod) {
479 Value *Handle = CGF.EmitScalarExpr(E->getArg(0));
480
481 SmallVector<Value *> Args{Handle};
482 if (HasLod)
483 Args.push_back(CGF.EmitScalarExpr(E->getArg(1)));
484
485 Value *DimValue =
486 CGF.Builder.CreateIntrinsic(IntrinsicID, {Handle->getType()}, Args);
487
488 Value *LastStore = nullptr;
489 unsigned ArgIndex = HasLod ? 2 : 1;
490 for (unsigned i = 0; i < NumRetComps; ++i) {
491 const Expr *Arg = E->getArg(ArgIndex++);
492 LValue DimOut = CGF.EmitLValue(Arg);
493 Value *Elem = DimValue;
494 if (NumRetComps > 1)
495 Elem = CGF.Builder.CreateExtractElement(DimValue, i);
496
497 // Handle float casting if needed
498 if (Arg->getType()->isFloatingType())
499 Elem = CGF.Builder.CreateUIToFP(
500 Elem, llvm::Type::getFloatTy(CGF.getLLVMContext()));
501
502 LastStore = CGF.Builder.CreateStore(Elem, DimOut.getAddress());
503 }
504 return LastStore;
505}
506
508 const CallExpr *E,
510 if (!getLangOpts().HLSL)
511 return nullptr;
512
513 switch (BuiltinID) {
514 case Builtin::BI__builtin_hlsl_adduint64: {
515 Value *OpA = EmitScalarExpr(E->getArg(0));
516 Value *OpB = EmitScalarExpr(E->getArg(1));
517 QualType Arg0Ty = E->getArg(0)->getType();
518 uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements();
519 assert(Arg0Ty == E->getArg(1)->getType() &&
520 "AddUint64 operand types must match");
521 assert(Arg0Ty->hasIntegerRepresentation() &&
522 "AddUint64 operands must have an integer representation");
523 assert((NumElements == 2 || NumElements == 4) &&
524 "AddUint64 operands must have 2 or 4 elements");
525
526 llvm::Value *LowA;
527 llvm::Value *HighA;
528 llvm::Value *LowB;
529 llvm::Value *HighB;
530
531 // Obtain low and high words of inputs A and B
532 if (NumElements == 2) {
533 LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
534 HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
535 LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
536 HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
537 } else {
538 LowA = Builder.CreateShuffleVector(OpA, {0, 2}, "LowA");
539 HighA = Builder.CreateShuffleVector(OpA, {1, 3}, "HighA");
540 LowB = Builder.CreateShuffleVector(OpB, {0, 2}, "LowB");
541 HighB = Builder.CreateShuffleVector(OpB, {1, 3}, "HighB");
542 }
543
544 // Use an uadd_with_overflow to compute the sum of low words and obtain a
545 // carry value
546 llvm::Value *Carry;
547 llvm::Value *LowSum = EmitOverflowIntrinsic(
548 *this, Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
549 llvm::Value *ZExtCarry =
550 Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");
551
552 // Sum the high words and the carry
553 llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
554 llvm::Value *HighSumPlusCarry =
555 Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");
556
557 if (NumElements == 4) {
558 return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry, {0, 2, 1, 3},
559 "hlsl.AddUint64");
560 }
561
562 llvm::Value *Result = PoisonValue::get(OpA->getType());
563 Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
564 "hlsl.AddUint64.upto0");
565 Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1,
566 "hlsl.AddUint64");
567 return Result;
568 }
569 case Builtin::BI__builtin_hlsl_resource_getpointer:
570 case Builtin::BI__builtin_hlsl_resource_getpointer_typed: {
571 Value *HandleOp = EmitScalarExpr(E->getArg(0));
572 bool IsIndexed =
573 BuiltinID == Builtin::BI__builtin_hlsl_resource_getpointer_typed ||
574 E->getNumArgs() > 1;
575
576 llvm::Type *RetTy = ConvertType(E->getType());
577 llvm::Function *IntrFn = nullptr;
578 llvm::CallInst *CI = nullptr;
579 if (IsIndexed) {
580 Value *IndexOp = EmitScalarExpr(E->getArg(1));
581 IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
582 &CGM.getModule(),
583 CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
584 {RetTy, HandleOp->getType(), IndexOp->getType()});
585 CI = EmitRuntimeCall(IntrFn, {HandleOp, IndexOp});
586 } else {
587 IntrFn = llvm::Intrinsic::getOrInsertDeclaration(
588 &CGM.getModule(),
589 CGM.getHLSLRuntime().getCreateResourceGetBasePointerIntrinsic(),
590 {RetTy, HandleOp->getType()});
591 CI = EmitRuntimeCall(IntrFn, {HandleOp});
592 }
593 CI->setCallingConv(IntrFn->getCallingConv());
594 return CI;
595 }
596 case Builtin::BI__builtin_hlsl_resource_sample: {
597 Value *HandleOp = EmitScalarExpr(E->getArg(0));
598 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
599 Value *CoordOp = EmitScalarExpr(E->getArg(2));
600
602 Args.push_back(HandleOp);
603 Args.push_back(SamplerOp);
604 Args.push_back(CoordOp);
605 Args.push_back(
606 emitHlslOffset(*this, E, 3, getOffsetType(CGM, CoordOp->getType())));
607
608 llvm::Type *RetTy = ConvertType(E->getType());
609 if (E->getNumArgs() <= 4) {
610 return Builder.CreateIntrinsic(
611 RetTy, CGM.getHLSLRuntime().getSampleIntrinsic(), Args);
612 }
613
614 Args.push_back(emitHlslClamp(*this, E, 4));
615 return Builder.CreateIntrinsic(
616 RetTy, CGM.getHLSLRuntime().getSampleClampIntrinsic(), Args);
617 }
618 case Builtin::BI__builtin_hlsl_resource_sample_bias: {
619 Value *HandleOp = EmitScalarExpr(E->getArg(0));
620 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
621 Value *CoordOp = EmitScalarExpr(E->getArg(2));
622 Value *BiasOp = EmitScalarExpr(E->getArg(3));
623 if (BiasOp->getType() != Builder.getFloatTy())
624 BiasOp = Builder.CreateFPCast(BiasOp, Builder.getFloatTy());
625
626 SmallVector<Value *, 6> Args; // Max 6 arguments for SampleBias
627 Args.push_back(HandleOp);
628 Args.push_back(SamplerOp);
629 Args.push_back(CoordOp);
630 Args.push_back(BiasOp);
631 Args.push_back(
632 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
633
634 llvm::Type *RetTy = ConvertType(E->getType());
635 if (E->getNumArgs() <= 5)
636 return Builder.CreateIntrinsic(
637 RetTy, CGM.getHLSLRuntime().getSampleBiasIntrinsic(), Args);
638
639 Args.push_back(emitHlslClamp(*this, E, 5));
640 return Builder.CreateIntrinsic(
641 RetTy, CGM.getHLSLRuntime().getSampleBiasClampIntrinsic(), Args);
642 }
643 case Builtin::BI__builtin_hlsl_resource_sample_grad: {
644 Value *HandleOp = EmitScalarExpr(E->getArg(0));
645 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
646 Value *CoordOp = EmitScalarExpr(E->getArg(2));
647 Value *DDXOp = EmitScalarExpr(E->getArg(3));
648 Value *DDYOp = EmitScalarExpr(E->getArg(4));
649
651 Args.push_back(HandleOp);
652 Args.push_back(SamplerOp);
653 Args.push_back(CoordOp);
654 Args.push_back(DDXOp);
655 Args.push_back(DDYOp);
656 Args.push_back(
657 emitHlslOffset(*this, E, 5, getOffsetType(CGM, CoordOp->getType())));
658
659 llvm::Type *RetTy = ConvertType(E->getType());
660
661 if (E->getNumArgs() <= 6) {
662 return Builder.CreateIntrinsic(
663 RetTy, CGM.getHLSLRuntime().getSampleGradIntrinsic(), Args);
664 }
665
666 Args.push_back(emitHlslClamp(*this, E, 6));
667 return Builder.CreateIntrinsic(
668 RetTy, CGM.getHLSLRuntime().getSampleGradClampIntrinsic(), Args);
669 }
670 case Builtin::BI__builtin_hlsl_resource_sample_level: {
671 Value *HandleOp = EmitScalarExpr(E->getArg(0));
672 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
673 Value *CoordOp = EmitScalarExpr(E->getArg(2));
674 Value *LODOp = EmitScalarExpr(E->getArg(3));
675 if (LODOp->getType() != Builder.getFloatTy())
676 LODOp = Builder.CreateFPCast(LODOp, Builder.getFloatTy());
677
678 SmallVector<Value *, 5> Args; // Max 5 arguments for SampleLevel
679 Args.push_back(HandleOp);
680 Args.push_back(SamplerOp);
681 Args.push_back(CoordOp);
682 Args.push_back(LODOp);
683 Args.push_back(
684 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
685
686 llvm::Type *RetTy = ConvertType(E->getType());
687 return Builder.CreateIntrinsic(
688 RetTy, CGM.getHLSLRuntime().getSampleLevelIntrinsic(), Args);
689 }
690 case Builtin::BI__builtin_hlsl_resource_load_level: {
691 Value *HandleOp = EmitScalarExpr(E->getArg(0));
692 Value *CoordLODOp = EmitScalarExpr(E->getArg(1));
693
694 auto *CoordLODVecTy = cast<llvm::FixedVectorType>(CoordLODOp->getType());
695 unsigned NumElts = CoordLODVecTy->getNumElements();
696 assert(NumElts >= 2 && "CoordLOD must have at least 2 elements");
697
698 // Split CoordLOD into Coord and LOD
700 for (unsigned I = 0; I < NumElts - 1; ++I)
701 Mask.push_back(I);
702
703 Value *CoordOp =
704 Builder.CreateShuffleVector(CoordLODOp, Mask, "hlsl.load.coord");
705 Value *LODOp =
706 Builder.CreateExtractElement(CoordLODOp, NumElts - 1, "hlsl.load.lod");
707
709 Args.push_back(HandleOp);
710 Args.push_back(CoordOp);
711 Args.push_back(LODOp);
712 Args.push_back(
713 emitHlslOffset(*this, E, 2, getOffsetType(CGM, CoordOp->getType())));
714
715 llvm::Type *RetTy = ConvertType(E->getType());
716 return Builder.CreateIntrinsic(
717 RetTy, CGM.getHLSLRuntime().getLoadLevelIntrinsic(), Args);
718 }
719 case Builtin::BI__builtin_hlsl_resource_sample_cmp: {
720 Value *HandleOp = EmitScalarExpr(E->getArg(0));
721 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
722 Value *CoordOp = EmitScalarExpr(E->getArg(2));
723 Value *CmpOp = EmitScalarExpr(E->getArg(3));
724 if (CmpOp->getType() != Builder.getFloatTy())
725 CmpOp = Builder.CreateFPCast(CmpOp, Builder.getFloatTy());
726
727 SmallVector<Value *, 6> Args; // Max 6 arguments for SampleCmp
728 Args.push_back(HandleOp);
729 Args.push_back(SamplerOp);
730 Args.push_back(CoordOp);
731 Args.push_back(CmpOp);
732 Args.push_back(
733 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
734
735 llvm::Type *RetTy = ConvertType(E->getType());
736 if (E->getNumArgs() <= 5) {
737 return Builder.CreateIntrinsic(
738 RetTy, CGM.getHLSLRuntime().getSampleCmpIntrinsic(), Args);
739 }
740
741 Args.push_back(emitHlslClamp(*this, E, 5));
742 return Builder.CreateIntrinsic(
743 RetTy, CGM.getHLSLRuntime().getSampleCmpClampIntrinsic(), Args);
744 }
745 case Builtin::BI__builtin_hlsl_resource_sample_cmp_level_zero: {
746 Value *HandleOp = EmitScalarExpr(E->getArg(0));
747 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
748 Value *CoordOp = EmitScalarExpr(E->getArg(2));
749 Value *CmpOp = EmitScalarExpr(E->getArg(3));
750 if (CmpOp->getType() != Builder.getFloatTy())
751 CmpOp = Builder.CreateFPCast(CmpOp, Builder.getFloatTy());
752
754 Args.push_back(HandleOp);
755 Args.push_back(SamplerOp);
756 Args.push_back(CoordOp);
757 Args.push_back(CmpOp);
758
759 Args.push_back(
760 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
761
762 llvm::Type *RetTy = ConvertType(E->getType());
763 return Builder.CreateIntrinsic(
764 RetTy, CGM.getHLSLRuntime().getSampleCmpLevelZeroIntrinsic(), Args);
765 }
766 case Builtin::BI__builtin_hlsl_resource_calculate_lod: {
767 Value *HandleOp = EmitScalarExpr(E->getArg(0));
768 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
769 Value *CoordOp = EmitScalarExpr(E->getArg(2));
770
771 return Builder.CreateIntrinsic(
772 ConvertType(E->getType()),
773 CGM.getHLSLRuntime().getCalculateLodIntrinsic(),
774 {HandleOp, SamplerOp, CoordOp});
775 }
776 case Builtin::BI__builtin_hlsl_resource_calculate_lod_unclamped: {
777 Value *HandleOp = EmitScalarExpr(E->getArg(0));
778 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
779 Value *CoordOp = EmitScalarExpr(E->getArg(2));
780
781 return Builder.CreateIntrinsic(
782 ConvertType(E->getType()),
783 CGM.getHLSLRuntime().getCalculateLodUnclampedIntrinsic(),
784 {HandleOp, SamplerOp, CoordOp});
785 }
786 case Builtin::BI__builtin_hlsl_resource_gather: {
787 Value *HandleOp = EmitScalarExpr(E->getArg(0));
788 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
789 Value *CoordOp = EmitScalarExpr(E->getArg(2));
790 Value *ComponentOp = EmitScalarExpr(E->getArg(3));
791 if (ComponentOp->getType() != Builder.getInt32Ty())
792 ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(),
793 /*isSigned=*/false);
794
796 Args.push_back(HandleOp);
797 Args.push_back(SamplerOp);
798 Args.push_back(CoordOp);
799 Args.push_back(ComponentOp);
800 Args.push_back(
801 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
802
803 llvm::Type *RetTy = ConvertType(E->getType());
804 return Builder.CreateIntrinsic(
805 RetTy, CGM.getHLSLRuntime().getGatherIntrinsic(), Args);
806 }
807 case Builtin::BI__builtin_hlsl_resource_gather_cmp: {
808 Value *HandleOp = EmitScalarExpr(E->getArg(0));
809 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
810 Value *CoordOp = EmitScalarExpr(E->getArg(2));
811 Value *CompareOp = EmitScalarExpr(E->getArg(3));
812 if (CompareOp->getType() != Builder.getFloatTy())
813 CompareOp = Builder.CreateFPCast(CompareOp, Builder.getFloatTy());
814
816 Args.push_back(HandleOp);
817 Args.push_back(SamplerOp);
818 Args.push_back(CoordOp);
819 Args.push_back(CompareOp);
820
821 if (CGM.getTarget().getTriple().isDXIL()) {
822 Value *ComponentOp = EmitScalarExpr(E->getArg(4));
823 if (ComponentOp->getType() != Builder.getInt32Ty())
824 ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(),
825 /*isSigned=*/false);
826 Args.push_back(ComponentOp);
827 }
828
829 Args.push_back(
830 emitHlslOffset(*this, E, 5, getOffsetType(CGM, CoordOp->getType())));
831
832 llvm::Type *RetTy = ConvertType(E->getType());
833 return Builder.CreateIntrinsic(
834 RetTy, CGM.getHLSLRuntime().getGatherCmpIntrinsic(), Args);
835 }
836 case Builtin::BI__builtin_hlsl_resource_load_with_status:
837 case Builtin::BI__builtin_hlsl_resource_load_with_status_typed: {
838 Value *HandleOp = EmitScalarExpr(E->getArg(0));
839 Value *IndexOp = EmitScalarExpr(E->getArg(1));
840
841 // Get the *address* of the status argument to write to it by reference
842 LValue StatusLVal = EmitLValue(E->getArg(2));
843 Address StatusAddr = StatusLVal.getAddress();
844
845 QualType HandleTy = E->getArg(0)->getType();
846 const HLSLAttributedResourceType *RT =
847 HandleTy->getAs<HLSLAttributedResourceType>();
848 assert(CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil &&
849 "Only DXIL currently implements load with status");
850
851 Intrinsic::ID IntrID = RT->getAttrs().RawBuffer
852 ? llvm::Intrinsic::dx_resource_load_rawbuffer
853 : llvm::Intrinsic::dx_resource_load_typedbuffer;
854
855 llvm::Type *DataTy = ConvertType(E->getType());
856 llvm::Type *RetTy = llvm::StructType::get(Builder.getContext(),
857 {DataTy, Builder.getInt1Ty()});
858
860 Args.push_back(HandleOp);
861 Args.push_back(IndexOp);
862
863 if (RT->isRaw()) {
864 Value *Offset = Builder.getInt32(0);
865 // The offset parameter needs to be poison for ByteAddressBuffer
866 if (!RT->isStructured())
867 Offset = llvm::PoisonValue::get(Builder.getInt32Ty());
868 Args.push_back(Offset);
869 }
870
871 // The load intrinsics give us a (T value, i1 status) pair -
872 // shepherd these into the return value and out reference respectively.
873 Value *ResRet =
874 Builder.CreateIntrinsic(RetTy, IntrID, Args, {}, "ld.struct");
875 Value *LoadedValue = Builder.CreateExtractValue(ResRet, {0}, "ld.value");
876 Value *StatusBit = Builder.CreateExtractValue(ResRet, {1}, "ld.status");
877 Value *ExtendedStatus =
878 Builder.CreateZExt(StatusBit, Builder.getInt32Ty(), "ld.status.ext");
879 Builder.CreateStore(ExtendedStatus, StatusAddr);
880
881 return LoadedValue;
882 }
883 case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
884 llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
885 return llvm::PoisonValue::get(HandleTy);
886 }
887 case Builtin::BI__builtin_hlsl_resource_handlefrombinding: {
888 llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
889 Value *RegisterOp = EmitScalarExpr(E->getArg(1));
890 Value *SpaceOp = EmitScalarExpr(E->getArg(2));
891 Value *RangeOp = EmitScalarExpr(E->getArg(3));
892 Value *IndexOp = EmitScalarExpr(E->getArg(4));
893 Value *Name = EmitScalarExpr(E->getArg(5));
894 llvm::Intrinsic::ID IntrinsicID =
895 CGM.getHLSLRuntime().getCreateHandleFromBindingIntrinsic();
896 SmallVector<Value *> Args{SpaceOp, RegisterOp, RangeOp, IndexOp, Name};
897 return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
898 }
899 case Builtin::BI__builtin_hlsl_resource_handlefromimplicitbinding: {
900 llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
901 Value *OrderID = EmitScalarExpr(E->getArg(1));
902 Value *SpaceOp = EmitScalarExpr(E->getArg(2));
903 Value *RangeOp = EmitScalarExpr(E->getArg(3));
904 Value *IndexOp = EmitScalarExpr(E->getArg(4));
905 Value *Name = EmitScalarExpr(E->getArg(5));
906 llvm::Intrinsic::ID IntrinsicID =
907 CGM.getHLSLRuntime().getCreateHandleFromImplicitBindingIntrinsic();
908 SmallVector<Value *> Args{OrderID, SpaceOp, RangeOp, IndexOp, Name};
909 return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
910 }
911 case Builtin::BI__builtin_hlsl_resource_counterhandlefromimplicitbinding: {
912 Value *MainHandle = EmitScalarExpr(E->getArg(0));
913 if (!CGM.getTriple().isSPIRV())
914 return MainHandle;
915
916 llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
917 Value *OrderID = EmitScalarExpr(E->getArg(1));
918 Value *SpaceOp = EmitScalarExpr(E->getArg(2));
919 llvm::Intrinsic::ID IntrinsicID =
920 llvm::Intrinsic::spv_resource_counterhandlefromimplicitbinding;
921 SmallVector<Value *> Args{MainHandle, OrderID, SpaceOp};
922 return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
923 }
924 case Builtin::BI__builtin_hlsl_resource_nonuniformindex: {
925 Value *IndexOp = EmitScalarExpr(E->getArg(0));
926 llvm::Type *RetTy = ConvertType(E->getType());
927 return Builder.CreateIntrinsic(
928 RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(),
929 ArrayRef<Value *>{IndexOp});
930 }
931 case Builtin::BI__builtin_hlsl_resource_getdimensions_x:
932 case Builtin::BI__builtin_hlsl_resource_getdimensions_x_float:
933 return emitGetDimensions(*this, E,
934 CGM.getHLSLRuntime().getGetDimensionsXIntrinsic(),
935 1, /*HasLod=*/false);
936 case Builtin::BI__builtin_hlsl_resource_getdimensions_xy:
937 case Builtin::BI__builtin_hlsl_resource_getdimensions_xy_float:
938 return emitGetDimensions(*this, E,
939 CGM.getHLSLRuntime().getGetDimensionsXYIntrinsic(),
940 2, /*HasLod=*/false);
941 case Builtin::BI__builtin_hlsl_resource_getdimensions_levels_xy:
942 case Builtin::BI__builtin_hlsl_resource_getdimensions_levels_xy_float:
943 return emitGetDimensions(
944 *this, E, CGM.getHLSLRuntime().getGetDimensionsLevelsXYIntrinsic(), 3,
945 /*HasLod=*/true);
946 case Builtin::BI__builtin_hlsl_resource_getstride: {
947 LValue Stride = EmitLValue(E->getArg(1));
948 return emitBufferStride(this, E->getArg(0), Stride);
949 }
950 case Builtin::BI__builtin_hlsl_all: {
951 Value *Op0 = EmitScalarExpr(E->getArg(0));
952 return Builder.CreateIntrinsic(
953 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
954 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
955 "hlsl.all");
956 }
957 case Builtin::BI__builtin_hlsl_and: {
958 Value *Op0 = EmitScalarExpr(E->getArg(0));
959 Value *Op1 = EmitScalarExpr(E->getArg(1));
960 return Builder.CreateAnd(Op0, Op1, "hlsl.and");
961 }
962 case Builtin::BI__builtin_hlsl_or: {
963 Value *Op0 = EmitScalarExpr(E->getArg(0));
964 Value *Op1 = EmitScalarExpr(E->getArg(1));
965 return Builder.CreateOr(Op0, Op1, "hlsl.or");
966 }
967 case Builtin::BI__builtin_hlsl_any: {
968 Value *Op0 = EmitScalarExpr(E->getArg(0));
969 return Builder.CreateIntrinsic(
970 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
971 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
972 "hlsl.any");
973 }
974 case Builtin::BI__builtin_hlsl_asdouble:
975 return handleAsDoubleBuiltin(*this, E);
976 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
977 Value *OpX = EmitScalarExpr(E->getArg(0));
978 Value *OpMin = EmitScalarExpr(E->getArg(1));
979 Value *OpMax = EmitScalarExpr(E->getArg(2));
980
981 QualType Ty = E->getArg(0)->getType();
982 if (auto *VecTy = Ty->getAs<VectorType>())
983 Ty = VecTy->getElementType();
984
985 Intrinsic::ID Intr;
986 if (Ty->isFloatingType()) {
987 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
988 } else if (Ty->isUnsignedIntegerType()) {
989 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
990 } else {
991 assert(Ty->isSignedIntegerType());
992 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
993 }
994 return Builder.CreateIntrinsic(
995 /*ReturnType=*/OpX->getType(), Intr,
996 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
997 }
998 case Builtin::BI__builtin_hlsl_crossf16:
999 case Builtin::BI__builtin_hlsl_crossf32: {
1000 Value *Op0 = EmitScalarExpr(E->getArg(0));
1001 Value *Op1 = EmitScalarExpr(E->getArg(1));
1002 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1004 "cross operands must have a float representation");
1005 // make sure each vector has exactly 3 elements
1006 assert(
1007 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
1008 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
1009 "input vectors must have 3 elements each");
1010 return Builder.CreateIntrinsic(
1011 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
1012 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
1013 }
1014 case Builtin::BI__builtin_hlsl_dot: {
1015 Value *Op0 = EmitScalarExpr(E->getArg(0));
1016 Value *Op1 = EmitScalarExpr(E->getArg(1));
1017 llvm::Type *T0 = Op0->getType();
1018 llvm::Type *T1 = Op1->getType();
1019
1020 // If the arguments are scalars, just emit a multiply
1021 if (!T0->isVectorTy() && !T1->isVectorTy()) {
1022 if (T0->isFloatingPointTy())
1023 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
1024
1025 if (T0->isIntegerTy())
1026 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
1027
1028 llvm_unreachable(
1029 "Scalar dot product is only supported on ints and floats.");
1030 }
1031 // For vectors, validate types and emit the appropriate intrinsic
1032 assert(CGM.getContext().hasSameUnqualifiedType(E->getArg(0)->getType(),
1033 E->getArg(1)->getType()) &&
1034 "Dot product operands must have the same type.");
1035
1036 auto *VecTy0 = E->getArg(0)->getType()->castAs<VectorType>();
1037 assert(VecTy0 && "Dot product argument must be a vector.");
1038
1039 return Builder.CreateIntrinsic(
1040 /*ReturnType=*/T0->getScalarType(),
1041 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
1042 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
1043 }
1044 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
1045 Value *X = EmitScalarExpr(E->getArg(0));
1046 Value *Y = EmitScalarExpr(E->getArg(1));
1047 Value *Acc = EmitScalarExpr(E->getArg(2));
1048
1049 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
1050 // Note that the argument order disagrees between the builtin and the
1051 // intrinsic here.
1052 return Builder.CreateIntrinsic(
1053 /*ReturnType=*/Acc->getType(), ID, ArrayRef<Value *>{Acc, X, Y},
1054 nullptr, "hlsl.dot4add.i8packed");
1055 }
1056 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
1057 Value *X = EmitScalarExpr(E->getArg(0));
1058 Value *Y = EmitScalarExpr(E->getArg(1));
1059 Value *Acc = EmitScalarExpr(E->getArg(2));
1060
1061 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
1062 // Note that the argument order disagrees between the builtin and the
1063 // intrinsic here.
1064 return Builder.CreateIntrinsic(
1065 /*ReturnType=*/Acc->getType(), ID, ArrayRef<Value *>{Acc, X, Y},
1066 nullptr, "hlsl.dot4add.u8packed");
1067 }
1068 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
1069 Value *X = EmitScalarExpr(E->getArg(0));
1070
1071 return Builder.CreateIntrinsic(
1072 /*ReturnType=*/ConvertType(E->getType()),
1073 getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
1074 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
1075 }
1076 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
1077 Value *X = EmitScalarExpr(E->getArg(0));
1078
1079 return Builder.CreateIntrinsic(
1080 /*ReturnType=*/ConvertType(E->getType()),
1081 CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
1082 nullptr, "hlsl.firstbitlow");
1083 }
1084 case Builtin::BI__builtin_hlsl_lerp: {
1085 Value *X = EmitScalarExpr(E->getArg(0));
1086 Value *Y = EmitScalarExpr(E->getArg(1));
1087 Value *S = EmitScalarExpr(E->getArg(2));
1088 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1089 llvm_unreachable("lerp operand must have a float representation");
1090 return Builder.CreateIntrinsic(
1091 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
1092 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
1093 }
1094 case Builtin::BI__builtin_hlsl_normalize: {
1095 Value *X = EmitScalarExpr(E->getArg(0));
1096
1097 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1098 "normalize operand must have a float representation");
1099
1100 return Builder.CreateIntrinsic(
1101 /*ReturnType=*/X->getType(),
1102 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
1103 nullptr, "hlsl.normalize");
1104 }
1105 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
1106 Value *X = EmitScalarExpr(E->getArg(0));
1107
1108 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1109 "degree operand must have a float representation");
1110
1111 return Builder.CreateIntrinsic(
1112 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
1113 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
1114 }
1115 case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
1116 return handleElementwiseF16ToF32(*this, E);
1117 }
1118 case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
1119 return handleElementwiseF32ToF16(*this, E);
1120 }
1121 case Builtin::BI__builtin_hlsl_elementwise_frac: {
1122 Value *Op0 = EmitScalarExpr(E->getArg(0));
1123 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1124 llvm_unreachable("frac operand must have a float representation");
1125 return Builder.CreateIntrinsic(
1126 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
1127 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
1128 }
1129 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
1130 Value *Op0 = EmitScalarExpr(E->getArg(0));
1131 llvm::Type *Xty = Op0->getType();
1132 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
1133 if (Xty->isVectorTy()) {
1134 auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
1135 retType = llvm::VectorType::get(
1136 retType, ElementCount::getFixed(XVecTy->getNumElements()));
1137 }
1138 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1139 llvm_unreachable("isinf operand must have a float representation");
1140 return Builder.CreateIntrinsic(
1141 retType, CGM.getHLSLRuntime().getIsInfIntrinsic(),
1142 ArrayRef<Value *>{Op0}, nullptr, "hlsl.isinf");
1143 }
1144 case Builtin::BI__builtin_hlsl_elementwise_isnan: {
1145 Value *Op0 = EmitScalarExpr(E->getArg(0));
1146 llvm::Type *Xty = Op0->getType();
1147 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
1148 if (Xty->isVectorTy()) {
1149 auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
1150 retType = llvm::VectorType::get(
1151 retType, ElementCount::getFixed(XVecTy->getNumElements()));
1152 }
1153 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1154 llvm_unreachable("isnan operand must have a float representation");
1155 return Builder.CreateIntrinsic(
1156 retType, CGM.getHLSLRuntime().getIsNaNIntrinsic(),
1157 ArrayRef<Value *>{Op0}, nullptr, "hlsl.isnan");
1158 }
1159 case Builtin::BI__builtin_hlsl_mad: {
1160 Value *M = EmitScalarExpr(E->getArg(0));
1161 Value *A = EmitScalarExpr(E->getArg(1));
1162 Value *B = EmitScalarExpr(E->getArg(2));
1164 return Builder.CreateIntrinsic(
1165 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
1166 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
1167
1169 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
1170 return Builder.CreateIntrinsic(
1171 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
1172 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
1173
1174 Value *Mul = Builder.CreateNSWMul(M, A);
1175 return Builder.CreateNSWAdd(Mul, B);
1176 }
1178 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
1179 return Builder.CreateIntrinsic(
1180 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
1181 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
1182
1183 Value *Mul = Builder.CreateNUWMul(M, A);
1184 return Builder.CreateNUWAdd(Mul, B);
1185 }
1186 case Builtin::BI__builtin_hlsl_mul: {
1187 Value *Op0 = EmitScalarExpr(E->getArg(0));
1188 Value *Op1 = EmitScalarExpr(E->getArg(1));
1189 QualType QTy0 = E->getArg(0)->getType();
1190 QualType QTy1 = E->getArg(1)->getType();
1191
1192 bool IsVec0 = QTy0->isVectorType();
1193 bool IsVec1 = QTy1->isVectorType();
1194 bool IsMat0 = QTy0->isConstantMatrixType();
1195 bool IsMat1 = QTy1->isConstantMatrixType();
1196
1197 // The matrix multiply intrinsic only operates on column-major order
1198 // matrices. Therefore matrix memory layout transforms must be inserted
1199 // before and after matrix multiply intrinsics.
1200 bool IsRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
1202
1203 llvm::MatrixBuilder MB(Builder);
1204 if (IsVec0 && IsMat1) {
1205 unsigned N = QTy0->castAs<VectorType>()->getNumElements();
1206 auto *MatTy = QTy1->castAs<ConstantMatrixType>();
1207 unsigned Rows = MatTy->getNumRows();
1208 unsigned Cols = MatTy->getNumColumns();
1209 assert(N == Rows && "vector length must match matrix row count");
1210 if (IsRowMajor)
1211 Op1 = MB.CreateRowMajorToColumnMajorTransform(Op1, Rows, Cols);
1212 return MB.CreateMatrixMultiply(Op0, Op1, 1, N, Cols, "hlsl.mul");
1213 }
1214 if (IsMat0 && IsVec1) {
1215 auto *MatTy = QTy0->castAs<ConstantMatrixType>();
1216 unsigned Rows = MatTy->getNumRows();
1217 unsigned Cols = MatTy->getNumColumns();
1218 assert(QTy1->castAs<VectorType>()->getNumElements() == Cols &&
1219 "vector length must match matrix column count");
1220 if (IsRowMajor)
1221 Op0 = MB.CreateRowMajorToColumnMajorTransform(Op0, Rows, Cols);
1222 return MB.CreateMatrixMultiply(Op0, Op1, Rows, Cols, 1, "hlsl.mul");
1223 }
1224 assert(IsMat0 && IsMat1);
1225 auto *MatTy0 = QTy0->castAs<ConstantMatrixType>();
1226 auto *MatTy1 = QTy1->castAs<ConstantMatrixType>();
1227 unsigned Rows0 = MatTy0->getNumRows();
1228 unsigned Rows1 = MatTy1->getNumRows();
1229 unsigned Cols0 = MatTy0->getNumColumns();
1230 unsigned Cols1 = MatTy1->getNumColumns();
1231 assert(Cols0 == Rows1 &&
1232 "inner matrix dimensions must match for multiplication");
1233 if (IsRowMajor) {
1234 Op0 = MB.CreateRowMajorToColumnMajorTransform(Op0, Rows0, Cols0);
1235 Op1 = MB.CreateRowMajorToColumnMajorTransform(Op1, Rows1, Cols1);
1236 }
1237 Value *Result =
1238 MB.CreateMatrixMultiply(Op0, Op1, Rows0, Cols0, Cols1, "hlsl.mul");
1239 if (IsRowMajor)
1240 Result = MB.CreateColumnMajorToRowMajorTransform(Result, Rows0, Cols1);
1241 return Result;
1242 }
1243 case Builtin::BI__builtin_hlsl_transpose: {
1244 Value *Op0 = EmitScalarExpr(E->getArg(0));
1245 auto *MatTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
1246 unsigned Rows = MatTy->getNumRows();
1247 unsigned Cols = MatTy->getNumColumns();
1248 llvm::MatrixBuilder MB(Builder);
1249 // The matrix transpose intrinsic operates on column-major matrices.
1250 // For row-major, a row-major RxC matrix is equivalent to a column-major
1251 // CxR matrix, so transposing with swapped dimensions produces the correct
1252 // row-major CxR result directly.
1253 bool IsRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
1255 if (IsRowMajor)
1256 return MB.CreateMatrixTranspose(Op0, Cols, Rows);
1257 return MB.CreateMatrixTranspose(Op0, Rows, Cols);
1258 }
1259 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
1260 Value *Op0 = EmitScalarExpr(E->getArg(0));
1261 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1262 llvm_unreachable("rcp operand must have a float representation");
1263 llvm::Type *Ty = Op0->getType();
1264 llvm::Type *EltTy = Ty->getScalarType();
1265 Constant *One = Ty->isVectorTy()
1266 ? ConstantVector::getSplat(
1267 ElementCount::getFixed(
1268 cast<FixedVectorType>(Ty)->getNumElements()),
1269 ConstantFP::get(EltTy, 1.0))
1270 : ConstantFP::get(EltTy, 1.0);
1271 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
1272 }
1273 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
1274 Value *Op0 = EmitScalarExpr(E->getArg(0));
1275 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1276 llvm_unreachable("rsqrt operand must have a float representation");
1277 return Builder.CreateIntrinsic(
1278 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
1279 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
1280 }
1281 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
1282 Value *Op0 = EmitScalarExpr(E->getArg(0));
1283 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1284 "saturate operand must have a float representation");
1285 return Builder.CreateIntrinsic(
1286 /*ReturnType=*/Op0->getType(),
1287 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
1288 nullptr, "hlsl.saturate");
1289 }
1290 case Builtin::BI__builtin_hlsl_wave_prefix_count_bits: {
1291 Value *Op = EmitScalarExpr(E->getArg(0));
1292 assert(Op->getType()->isIntegerTy(1) &&
1293 "WavePrefixBitCount operand must be a boolean type");
1294
1295 Intrinsic::ID IID =
1297
1298 return EmitIntrinsicCall(IID, ArrayRef{Op}, "hlsl.wave.prefix.bit.count");
1299 }
1300 case Builtin::BI__builtin_hlsl_select: {
1301 Value *OpCond = EmitScalarExpr(E->getArg(0));
1302 RValue RValTrue = EmitAnyExpr(E->getArg(1));
1303 Value *OpTrue =
1304 RValTrue.isScalar()
1305 ? RValTrue.getScalarVal()
1306 : Builder.CreateLoad(RValTrue.getAggregateAddress(), "true_val");
1307 RValue RValFalse = EmitAnyExpr(E->getArg(2));
1308 Value *OpFalse =
1309 RValFalse.isScalar()
1310 ? RValFalse.getScalarVal()
1311 : Builder.CreateLoad(RValFalse.getAggregateAddress(), "false_val");
1312 if (auto *VTy = E->getType()->getAs<VectorType>()) {
1313 if (!OpTrue->getType()->isVectorTy())
1314 OpTrue =
1315 Builder.CreateVectorSplat(VTy->getNumElements(), OpTrue, "splat");
1316 if (!OpFalse->getType()->isVectorTy())
1317 OpFalse =
1318 Builder.CreateVectorSplat(VTy->getNumElements(), OpFalse, "splat");
1319 }
1320
1321 Value *SelectVal =
1322 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
1323 if (!RValTrue.isScalar())
1324 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
1325 ReturnValue.isVolatile());
1326
1327 return SelectVal;
1328 }
1329 case Builtin::BI__builtin_hlsl_step: {
1330 Value *Op0 = EmitScalarExpr(E->getArg(0));
1331 Value *Op1 = EmitScalarExpr(E->getArg(1));
1332 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1334 "step operands must have a float representation");
1335 return Builder.CreateIntrinsic(
1336 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
1337 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
1338 }
1339 case Builtin::BI__builtin_hlsl_wave_active_all_equal: {
1340 Value *Op = EmitScalarExpr(E->getArg(0));
1341
1342 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllEqualIntrinsic();
1343 return EmitIntrinsicCall(ID, {Op->getType()}, {Op});
1344 }
1345 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
1346 Value *Op = EmitScalarExpr(E->getArg(0));
1347 assert(Op->getType()->isIntegerTy(1) &&
1348 "Intrinsic WaveActiveAllTrue operand must be a bool");
1349
1350 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
1351 return EmitIntrinsicCall(ID, {Op});
1352 }
1353 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
1354 Value *Op = EmitScalarExpr(E->getArg(0));
1355 assert(Op->getType()->isIntegerTy(1) &&
1356 "Intrinsic WaveActiveAnyTrue operand must be a bool");
1357
1358 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
1359 return EmitIntrinsicCall(ID, {Op});
1360 }
1361 case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
1362 Value *Op = EmitScalarExpr(E->getArg(0));
1363 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
1364 "Intrinsic WaveActiveBitOr operand must have an unsigned integer "
1365 "representation");
1366
1367 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic();
1368 return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op},
1369 "hlsl.wave.active.bit.or");
1370 }
1371 case Builtin::BI__builtin_hlsl_wave_active_bit_xor: {
1372 Value *Op = EmitScalarExpr(E->getArg(0));
1373 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
1374 "Intrinsic WaveActiveBitXor operand must have an unsigned integer "
1375 "representation");
1376
1377 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic();
1378 return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op},
1379 "hlsl.wave.active.bit.xor");
1380 }
1381 case Builtin::BI__builtin_hlsl_wave_active_bit_and: {
1382 Value *Op = EmitScalarExpr(E->getArg(0));
1383 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
1384 "Intrinsic WaveActiveBitAnd operand must have an unsigned integer "
1385 "representation");
1386
1387 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitAndIntrinsic();
1388 return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op},
1389 "hlsl.wave.active.bit.and");
1390 }
1391 case Builtin::BI__builtin_hlsl_wave_active_ballot: {
1392 [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0));
1393 assert(Op->getType()->isIntegerTy(1) &&
1394 "Intrinsic WaveActiveBallot operand must be a bool");
1395
1396 return handleHlslWaveActiveBallot(*this, E);
1397 }
1398 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
1399 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1400 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
1401 return EmitIntrinsicCall(ID, ArrayRef{OpExpr});
1402 }
1403 case Builtin::BI__builtin_hlsl_wave_active_sum: {
1404 // Due to the use of variadic arguments, explicitly retrieve argument
1405 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1406 Intrinsic::ID IID = getWaveActiveSumIntrinsic(
1407 getTarget().getTriple().getArch(), E->getArg(0)->getType());
1408
1409 return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
1410 "hlsl.wave.active.sum");
1411 }
1412 case Builtin::BI__builtin_hlsl_wave_active_product: {
1413 // Due to the use of variadic arguments, explicitly retrieve argument
1414 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1415 Intrinsic::ID IID = getWaveActiveProductIntrinsic(
1416 getTarget().getTriple().getArch(), E->getArg(0)->getType());
1417
1418 return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
1419 "hlsl.wave.active.product");
1420 }
1421 case Builtin::BI__builtin_hlsl_wave_active_max: {
1422 // Due to the use of variadic arguments, explicitly retrieve argument
1423 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1424 QualType QT = E->getArg(0)->getType();
1425 Intrinsic::ID IID;
1426 if (QT->isUnsignedIntegerType())
1427 IID = CGM.getHLSLRuntime().getWaveActiveUMaxIntrinsic();
1428 else
1429 IID = CGM.getHLSLRuntime().getWaveActiveMaxIntrinsic();
1430
1431 return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
1432 "hlsl.wave.active.max");
1433 }
1434 case Builtin::BI__builtin_hlsl_wave_active_min: {
1435 // Due to the use of variadic arguments, explicitly retrieve argument
1436 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1437 QualType QT = E->getArg(0)->getType();
1438 Intrinsic::ID IID;
1439 if (QT->isUnsignedIntegerType())
1440 IID = CGM.getHLSLRuntime().getWaveActiveUMinIntrinsic();
1441 else
1442 IID = CGM.getHLSLRuntime().getWaveActiveMinIntrinsic();
1443
1444 return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
1445 "hlsl.wave.active.min");
1446 }
1447 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
1448 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
1449 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
1450 // for the DirectX intrinsic and the demangled builtin name
1451 switch (CGM.getTarget().getTriple().getArch()) {
1452 case llvm::Triple::dxil:
1453 return EmitIntrinsicCall(Intrinsic::dx_wave_getlaneindex);
1454 case llvm::Triple::spirv:
1455 return EmitRuntimeCall(CGM.CreateRuntimeFunction(
1456 llvm::FunctionType::get(IntTy, {}, false),
1457 "__hlsl_wave_get_lane_index", {}, false, true));
1458 default:
1459 llvm_unreachable(
1460 "Intrinsic WaveGetLaneIndex not supported by target architecture");
1461 }
1462 }
1463 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
1464 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
1465 return EmitIntrinsicCall(ID);
1466 }
1467 case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
1468 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
1469 return EmitIntrinsicCall(ID);
1470 }
1471 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
1472 // Due to the use of variadic arguments we must explicitly retrieve them and
1473 // create our function type.
1474 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1475 Value *OpIndex = EmitScalarExpr(E->getArg(1));
1476 return EmitIntrinsicCall(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
1477 {OpExpr->getType()}, ArrayRef{OpExpr, OpIndex},
1478 "hlsl.wave.readlane");
1479 }
1480 case Builtin::BI__builtin_hlsl_wave_prefix_sum: {
1481 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1482 Intrinsic::ID IID = getWavePrefixSumIntrinsic(
1483 getTarget().getTriple().getArch(), E->getArg(0)->getType());
1484 return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
1485 "hlsl.wave.prefix.sum");
1486 }
1487 case Builtin::BI__builtin_hlsl_wave_prefix_product: {
1488 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1489 Intrinsic::ID IID = getWavePrefixProductIntrinsic(
1490 getTarget().getTriple().getArch(), E->getArg(0)->getType());
1491 return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
1492 "hlsl.wave.prefix.product");
1493 }
1494 case Builtin::BI__builtin_hlsl_quad_read_across_x: {
1495 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1496 Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossXIntrinsic();
1497 return EmitIntrinsicCall(ID, {OpExpr->getType()}, ArrayRef{OpExpr},
1498 "hlsl.quad.read.across.x");
1499 }
1500 case Builtin::BI__builtin_hlsl_quad_read_across_y: {
1501 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1502 Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossYIntrinsic();
1503 return EmitIntrinsicCall(ID, {OpExpr->getType()}, ArrayRef{OpExpr},
1504 "hlsl.quad.read.across.y");
1505 }
1506 case Builtin::BI__builtin_hlsl_elementwise_sign: {
1507 auto *Arg0 = E->getArg(0);
1508 Value *Op0 = EmitScalarExpr(Arg0);
1509 llvm::Type *Xty = Op0->getType();
1510 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
1511 if (Xty->isVectorTy()) {
1512 auto *XVecTy = Arg0->getType()->castAs<VectorType>();
1513 retType = llvm::VectorType::get(
1514 retType, ElementCount::getFixed(XVecTy->getNumElements()));
1515 }
1516 assert((Arg0->getType()->hasFloatingRepresentation() ||
1517 Arg0->getType()->hasIntegerRepresentation()) &&
1518 "sign operand must have a float or int representation");
1519
1520 if (Arg0->getType()->hasUnsignedIntegerRepresentation()) {
1521 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
1522 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
1523 ConstantInt::get(retType, 1), "hlsl.sign");
1524 }
1525
1526 return Builder.CreateIntrinsic(
1527 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
1528 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
1529 }
1530 case Builtin::BI__builtin_hlsl_elementwise_radians: {
1531 Value *Op0 = EmitScalarExpr(E->getArg(0));
1532 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1533 "radians operand must have a float representation");
1534 return Builder.CreateIntrinsic(
1535 /*ReturnType=*/Op0->getType(),
1536 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
1537 nullptr, "hlsl.radians");
1538 }
1539 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
1540 Value *ResHandle = EmitScalarExpr(E->getArg(0));
1541 Value *Offset = EmitScalarExpr(E->getArg(1));
1542 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
1543 return Builder.CreateIntrinsic(
1544 /*ReturnType=*/Offset->getType(),
1545 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
1546 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
1547 }
1548 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
1549
1550 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
1553 "asuint operands types mismatch");
1554 return handleHlslSplitdouble(E, this);
1555 }
1556 case Builtin::BI__builtin_hlsl_elementwise_clip:
1557 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1558 "clip operands types mismatch");
1559 return handleHlslClip(E, this);
1560 case Builtin::BI__builtin_hlsl_all_memory_barrier: {
1561 Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierIntrinsic();
1562 return EmitIntrinsicCall(ID);
1563 }
1564 case Builtin::BI__builtin_hlsl_all_memory_barrier_with_group_sync: {
1565 Intrinsic::ID ID =
1566 CGM.getHLSLRuntime().getAllMemoryBarrierWithGroupSyncIntrinsic();
1567 return EmitIntrinsicCall(ID);
1568 }
1569 case Builtin::BI__builtin_hlsl_device_memory_barrier: {
1570 Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierIntrinsic();
1571 return EmitIntrinsicCall(ID);
1572 }
1573 case Builtin::BI__builtin_hlsl_device_memory_barrier_with_group_sync: {
1574 Intrinsic::ID ID =
1575 CGM.getHLSLRuntime().getDeviceMemoryBarrierWithGroupSyncIntrinsic();
1576 return EmitIntrinsicCall(ID);
1577 }
1578 case Builtin::BI__builtin_hlsl_group_memory_barrier: {
1579 Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic();
1580 return EmitIntrinsicCall(ID);
1581 }
1582 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
1583 Intrinsic::ID ID =
1584 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
1585 return EmitIntrinsicCall(ID);
1586 }
1587 case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: {
1588 Value *Op0 = EmitScalarExpr(E->getArg(0));
1589 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1590 llvm_unreachable("ddx_coarse operand must have a float representation");
1591 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic();
1592 return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
1593 ArrayRef<Value *>{Op0}, nullptr,
1594 "hlsl.ddx.coarse");
1595 }
1596 case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: {
1597 Value *Op0 = EmitScalarExpr(E->getArg(0));
1598 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1599 llvm_unreachable("ddy_coarse operand must have a float representation");
1600 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic();
1601 return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
1602 ArrayRef<Value *>{Op0}, nullptr,
1603 "hlsl.ddy.coarse");
1604 }
1605 case Builtin::BI__builtin_hlsl_elementwise_ddx_fine: {
1606 Value *Op0 = EmitScalarExpr(E->getArg(0));
1607 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1608 llvm_unreachable("ddx_fine operand must have a float representation");
1609 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxFineIntrinsic();
1610 return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
1611 ArrayRef<Value *>{Op0}, nullptr,
1612 "hlsl.ddx.fine");
1613 }
1614 case Builtin::BI__builtin_hlsl_elementwise_ddy_fine: {
1615 Value *Op0 = EmitScalarExpr(E->getArg(0));
1616 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1617 llvm_unreachable("ddy_fine operand must have a float representation");
1618 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyFineIntrinsic();
1619 return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
1620 ArrayRef<Value *>{Op0}, nullptr,
1621 "hlsl.ddy.fine");
1622 }
1623 case Builtin::BI__builtin_get_spirv_spec_constant_bool:
1624 case Builtin::BI__builtin_get_spirv_spec_constant_short:
1625 case Builtin::BI__builtin_get_spirv_spec_constant_ushort:
1626 case Builtin::BI__builtin_get_spirv_spec_constant_int:
1627 case Builtin::BI__builtin_get_spirv_spec_constant_uint:
1628 case Builtin::BI__builtin_get_spirv_spec_constant_longlong:
1629 case Builtin::BI__builtin_get_spirv_spec_constant_ulonglong:
1630 case Builtin::BI__builtin_get_spirv_spec_constant_half:
1631 case Builtin::BI__builtin_get_spirv_spec_constant_float:
1632 case Builtin::BI__builtin_get_spirv_spec_constant_double: {
1633 llvm::Function *SpecConstantFn = getSpecConstantFunction(E->getType());
1634 llvm::Value *SpecId = EmitScalarExpr(E->getArg(0));
1635 llvm::Value *DefaultVal = EmitScalarExpr(E->getArg(1));
1636 llvm::Value *Args[] = {SpecId, DefaultVal};
1637 return Builder.CreateCall(SpecConstantFn, Args);
1638 }
1639 }
1640 return nullptr;
1641}
1642
1644 const clang::QualType &SpecConstantType) {
1645
1646 // Find or create the declaration for the function.
1647 llvm::Module *M = &CGM.getModule();
1648 std::string MangledName =
1649 getSpecConstantFunctionName(SpecConstantType, getContext());
1650 llvm::Function *SpecConstantFn = M->getFunction(MangledName);
1651
1652 if (!SpecConstantFn) {
1653 llvm::Type *IntType = ConvertType(getContext().IntTy);
1654 llvm::Type *RetTy = ConvertType(SpecConstantType);
1655 llvm::Type *ArgTypes[] = {IntType, RetTy};
1656 llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, ArgTypes, false);
1657 SpecConstantFn = llvm::Function::Create(
1658 FnTy, llvm::GlobalValue::ExternalLinkage, MangledName, M);
1659 }
1660 return SpecConstantFn;
1661}
llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
static Intrinsic::ID getWavePrefixSumIntrinsic(llvm::Triple::ArchType Arch, QualType QT)
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT)
static Intrinsic::ID getPrefixCountBitsIntrinsic(llvm::Triple::ArchType Arch)
static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch, QualType QT)
static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType, ASTContext &Context)
static Value * handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF)
static Value * emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr, LValue &Stride)
static Intrinsic::ID getWavePrefixProductIntrinsic(llvm::Triple::ArchType Arch, QualType QT)
static Value * emitHlslClamp(CodeGenFunction &CGF, const CallExpr *E, unsigned ClampArgIndex)
static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT)
static llvm::Type * getOffsetType(CodeGenModule &CGM, llvm::Type *CoordTy)
static Value * emitGetDimensions(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned NumRetComps, bool HasLod)
static Value * handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E)
static Value * handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E)
static Value * handleHlslWaveActiveBallot(CodeGenFunction &CGF, const CallExpr *E)
static Value * emitHlslOffset(CodeGenFunction &CGF, const CallExpr *E, unsigned OffsetArgIndex, llvm::Type *OffsetTy)
static Value * handleElementwiseF32ToF16(CodeGenFunction &CGF, const CallExpr *E)
static Intrinsic::ID getWaveActiveProductIntrinsic(llvm::Triple::ArchType Arch, QualType QT)
static Value * handleHlslClip(const CallExpr *E, CodeGenFunction *CGF)
Result
Implement __builtin_bit_cast and related operations.
#define X(type, name)
Definition Value.h:97
static StringRef getTriple(const Command &Job)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:229
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:146
CallArgList - Type for representing both the value and type of arguments in a call.
Definition CGCall.h:275
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
const TargetInfo & getTarget() const
llvm::Function * getSpecConstantFunction(const clang::QualType &SpecConstantType)
LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, QualType Ty)
Definition CGExpr.cpp:6377
void EmitWritebacks(const CallArgList &Args)
EmitWriteback - Emit callbacks for function.
Definition CGCall.cpp:5058
llvm::CallInst * EmitIntrinsicCall(llvm::Intrinsic::ID ID, const Twine &Name="")
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot=AggValueSlot::ignored(), bool ignoreResult=false)
EmitAnyExpr - Emit code to compute the specified expression which can have any type.
Definition CGExpr.cpp:279
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1713
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
const TargetInfo & getTarget() const
const llvm::Triple & getTriple() const
LValue - This represents an lvalue references.
Definition CGValue.h:183
Address getAddress() const
Definition CGValue.h:373
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
bool isScalar() const
Definition CGValue.h:64
Address getAggregateAddress() const
getAggregateAddr() - Return the Value* of the address of the aggregate.
Definition CGValue.h:84
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:382
Represents a concrete matrix type with constant number of rows and columns.
Definition TypeBase.h:4449
unsigned getNumColumns() const
Returns the number of columns in the matrix.
Definition TypeBase.h:4468
unsigned getNumRows() const
Returns the number of rows in the matrix.
Definition TypeBase.h:4465
The name of a declaration.
This represents one expression.
Definition Expr.h:112
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2018
static FunctionDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation NLoc, DeclarationName N, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool UsesFPIntrin=false, bool isInlineSpecified=false, bool hasWrittenPrototype=true, ConstexprSpecKind ConstexprKind=ConstexprSpecKind::Unspecified, const AssociatedConstraint &TrailingRequiresClause={})
Definition Decl.h:2207
Represents a parameter to a function.
Definition Decl.h:1808
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
Definition Decl.cpp:2931
A (possibly-)qualified type.
Definition TypeBase.h:937
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8445
Encodes a location in the source.
bool areArgsDestroyedLeftToRightInCallee() const
Are arguments to a call destroyed left to right in the callee?
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2266
bool hasIntegerRepresentation() const
Determine whether this type has an integer representation of some sort, e.g., it is an integer type o...
Definition Type.cpp:2119
bool isConstantMatrixType() const
Definition TypeBase.h:8849
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9342
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition Type.cpp:2376
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition Type.cpp:2310
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition Type.cpp:2397
bool isVectorType() const
Definition TypeBase.h:8821
bool isFloatingType() const
Definition Type.cpp:2389
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2332
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9275
QualType getType() const
Definition Value.cpp:237
Represents a GCC generic vector type.
Definition TypeBase.h:4237
unsigned getNumElements() const
Definition TypeBase.h:4252
The JSON file list parser is used to communicate input to InstallAPI.
@ SC_Extern
Definition Specifiers.h:252
@ SC_None
Definition Specifiers.h:251
Expr * Cond
};
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
Extra information about a function prototype.
Definition TypeBase.h:5454