clang 23.0.0git
CGHLSLBuiltins.cpp
Go to the documentation of this file.
1//===------- CGHLSLBuiltins.cpp - Emit LLVM Code for HLSL builtins --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit HLSL Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGBuiltin.h"
14#include "CGHLSLRuntime.h"
15#include "CodeGenFunction.h"
16#include "llvm/IR/MatrixBuilder.h"
17
18using namespace clang;
19using namespace CodeGen;
20using namespace llvm;
21
25 "asdouble operands types mismatch");
26 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
27 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
28
29 llvm::Type *ResultType = CGF.DoubleTy;
30 int N = 1;
31 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
32 N = VTy->getNumElements();
33 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
34 }
35
36 if (CGF.CGM.getTarget().getTriple().isDXIL())
37 return CGF.Builder.CreateIntrinsic(
38 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
39 {OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
40
41 if (!E->getArg(0)->getType()->isVectorType()) {
42 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
43 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
44 }
45
47 for (int i = 0; i < N; i++) {
48 Mask.push_back(i);
49 Mask.push_back(i + N);
50 }
51
52 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
53
54 return CGF.Builder.CreateBitCast(BitVec, ResultType);
55}
56
58 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
59
60 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
61 Value *CMP;
62 Value *LastInstr;
63
64 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
65 FZeroConst = ConstantVector::getSplat(
66 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
67 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
68 CMP = CGF->Builder.CreateIntrinsic(
69 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
70 {FCompInst});
71 } else {
72 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
73 }
74
75 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
76 LastInstr = CGF->Builder.CreateIntrinsic(Intrinsic::dx_discard, {CMP});
77 } else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
78 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
79 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
80
81 CGF->Builder.CreateCondBr(CMP, LT0, End);
82
83 CGF->Builder.SetInsertPoint(LT0);
84
85 CGF->Builder.CreateIntrinsic(Intrinsic::spv_discard, {});
86
87 LastInstr = CGF->Builder.CreateBr(End);
88 CGF->Builder.SetInsertPoint(End);
89 } else {
90 llvm_unreachable("Backend Codegen not supported.");
91 }
92
93 return LastInstr;
94}
95
97 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
98 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
99 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
100
101 CallArgList Args;
102 LValue Op1TmpLValue =
103 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
104 LValue Op2TmpLValue =
105 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
106
108 Args.reverseWritebacks();
109
110 Value *LowBits = nullptr;
111 Value *HighBits = nullptr;
112
113 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
114 llvm::Type *RetElementTy = CGF->Int32Ty;
115 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
116 RetElementTy = llvm::VectorType::get(
117 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
118 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
119
120 CallInst *CI = CGF->Builder.CreateIntrinsic(
121 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
122
123 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
124 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
125 } else {
126 // For Non DXIL targets we generate the instructions.
127
128 if (!Op0->getType()->isVectorTy()) {
129 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
130 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
131
132 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
133 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
134 } else {
135 int NumElements = 1;
136 if (const auto *VecTy =
138 NumElements = VecTy->getNumElements();
139
140 FixedVectorType *Uint32VecTy =
141 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
142 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
143 if (NumElements == 1) {
144 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
145 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
146 } else {
147 SmallVector<int> EvenMask, OddMask;
148 for (int I = 0, E = NumElements; I != E; ++I) {
149 EvenMask.push_back(I * 2);
150 OddMask.push_back(I * 2 + 1);
151 }
152 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
153 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
154 }
155 }
156 }
157 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
158 auto *LastInst =
159 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
160 CGF->EmitWritebacks(Args);
161 return LastInst;
162}
163
165 const CallExpr *E) {
166 Value *Cond = CGF.EmitScalarExpr(E->getArg(0));
167 llvm::Type *I32 = CGF.Int32Ty;
168
169 llvm::Type *Vec4I32 = llvm::FixedVectorType::get(I32, 4);
170 [[maybe_unused]] llvm::StructType *Struct4I32 =
171 llvm::StructType::get(CGF.getLLVMContext(), {I32, I32, I32, I32});
172
173 if (CGF.CGM.getTarget().getTriple().isDXIL()) {
174 // Call DXIL intrinsic: returns { i32, i32, i32, i32 }
175 llvm::Function *Fn = CGF.CGM.getIntrinsic(Intrinsic::dx_wave_ballot, {I32});
176
177 Value *StructVal = CGF.EmitRuntimeCall(Fn, Cond);
178 assert(StructVal->getType() == Struct4I32 &&
179 "dx.wave.ballot must return {i32,i32,i32,i32}");
180
181 // Reassemble struct to <4 x i32>
182 llvm::Value *VecVal = llvm::PoisonValue::get(Vec4I32);
183 for (unsigned I = 0; I < 4; ++I) {
184 Value *Elt = CGF.Builder.CreateExtractValue(StructVal, I);
185 VecVal =
186 CGF.Builder.CreateInsertElement(VecVal, Elt, CGF.Builder.getInt32(I));
187 }
188
189 return VecVal;
190 }
191
192 if (CGF.CGM.getTarget().getTriple().isSPIRV())
193 return CGF.EmitRuntimeCall(
194 CGF.CGM.getIntrinsic(Intrinsic::spv_subgroup_ballot), Cond);
195
196 llvm_unreachable(
197 "WaveActiveBallot is only supported for DXIL and SPIRV targets");
198}
199
201 const CallExpr *E) {
202 Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
203 QualType Op0Ty = E->getArg(0)->getType();
204 llvm::Type *ResType = CGF.FloatTy;
205 uint64_t NumElements = 0;
206 if (Op0->getType()->isVectorTy()) {
207 NumElements =
208 E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
209 ResType =
210 llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
211 }
213 llvm_unreachable(
214 "f16tof32 operand must have an unsigned int representation");
215
216 if (CGF.CGM.getTriple().isDXIL())
217 return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf16tof32,
218 ArrayRef<Value *>{Op0}, nullptr,
219 "hlsl.f16tof32");
220
221 if (CGF.CGM.getTriple().isSPIRV()) {
222 // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
223 // Int16 and Float16 capabilities
224 auto *UnpackType =
225 llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
226
227 if (NumElements == 0) {
228 // a scalar input - simply extract the first element of the unpacked
229 // vector
230 Value *Unpack = CGF.Builder.CreateIntrinsic(
231 UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{Op0});
232 return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
233 }
234
235 // a vector input - build a congruent output vector by iterating through
236 // the input vector calling unpackhalf2x16 for each element
237 Value *Result = PoisonValue::get(ResType);
238 for (uint64_t I = 0; I < NumElements; I++) {
239 Value *InVal = CGF.Builder.CreateExtractElement(Op0, I);
240 Value *Unpack = CGF.Builder.CreateIntrinsic(
241 UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{InVal});
242 Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
243 Result = CGF.Builder.CreateInsertElement(Result, Res, I);
244 }
245 return Result;
246 }
247
248 llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
249}
250
252 const CallExpr *E) {
253 Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
254 QualType Op0Ty = E->getArg(0)->getType();
255 llvm::Type *ResType = CGF.IntTy;
256 uint64_t NumElements = 0;
257 if (Op0->getType()->isVectorTy()) {
258 NumElements =
259 E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
260 ResType =
261 llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
262 }
263 if (!Op0Ty->hasFloatingRepresentation())
264 llvm_unreachable("f32tof16 operand must have a float representation");
265
266 if (CGF.CGM.getTriple().isDXIL())
267 return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16,
268 ArrayRef<Value *>{Op0}, nullptr,
269 "hlsl.f32tof16");
270
271 if (CGF.CGM.getTriple().isSPIRV()) {
272 // We use the SPIRV PackHalf2x16 operation to avoid the need for the
273 // Int16 and Float16 capabilities
274 auto *PackType =
275 llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
276
277 if (NumElements == 0) {
278 // a scalar input - simply insert the scalar in the first element
279 // of the 2 element float vector
280 Value *Float2 = Constant::getNullValue(PackType);
281 Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0);
282 Value *Result = CGF.Builder.CreateIntrinsic(
283 ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
284 return Result;
285 }
286
287 // a vector input - build a congruent output vector by iterating through
288 // the input vector calling packhalf2x16 for each element
289 Value *Result = PoisonValue::get(ResType);
290 for (uint64_t I = 0; I < NumElements; I++) {
291 Value *Float2 = Constant::getNullValue(PackType);
292 Value *InVal = CGF.Builder.CreateExtractElement(Op0, I);
293 Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0);
294 Value *Res = CGF.Builder.CreateIntrinsic(
295 CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
296 Result = CGF.Builder.CreateInsertElement(Result, Res, I);
297 }
298 return Result;
299 }
300
301 llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
302}
303
304static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
305 LValue &Stride) {
306 // Figure out the stride of the buffer elements from the handle type.
307 auto *HandleTy =
309 QualType ElementTy = HandleTy->getContainedType();
310 Value *StrideValue = CGF->getTypeSize(ElementTy);
311 return CGF->Builder.CreateStore(StrideValue, Stride.getAddress());
312}
313
314// Return dot product intrinsic that corresponds to the QT scalar type
315static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
316 if (QT->isFloatingType())
317 return RT.getFDotIntrinsic();
318 if (QT->isSignedIntegerType())
319 return RT.getSDotIntrinsic();
320 assert(QT->isUnsignedIntegerType());
321 return RT.getUDotIntrinsic();
322}
323
324static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
326 return RT.getFirstBitSHighIntrinsic();
327 }
328
330 return RT.getFirstBitUHighIntrinsic();
331}
332
333// Return wave active sum that corresponds to the QT scalar type
334static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
335 QualType QT) {
336 switch (Arch) {
337 case llvm::Triple::spirv:
338 return Intrinsic::spv_wave_reduce_sum;
339 case llvm::Triple::dxil: {
340 if (QT->isUnsignedIntegerType())
341 return Intrinsic::dx_wave_reduce_usum;
342 return Intrinsic::dx_wave_reduce_sum;
343 }
344 default:
345 llvm_unreachable("Intrinsic WaveActiveSum"
346 " not supported by target architecture");
347 }
348}
349
350// Return wave active product that corresponds to the QT scalar type
351static Intrinsic::ID getWaveActiveProductIntrinsic(llvm::Triple::ArchType Arch,
352 QualType QT) {
353 switch (Arch) {
354 case llvm::Triple::spirv:
355 return Intrinsic::spv_wave_product;
356 case llvm::Triple::dxil: {
357 if (QT->isUnsignedIntegerType())
358 return Intrinsic::dx_wave_uproduct;
359 return Intrinsic::dx_wave_product;
360 }
361 default:
362 llvm_unreachable("Intrinsic WaveActiveProduct"
363 " not supported by target architecture");
364 }
365}
366
367static Intrinsic::ID getPrefixCountBitsIntrinsic(llvm::Triple::ArchType Arch) {
368 switch (Arch) {
369 case llvm::Triple::spirv:
370 return Intrinsic::spv_subgroup_prefix_bit_count;
371 case llvm::Triple::dxil: {
372 return Intrinsic::dx_wave_prefix_bit_count;
373 }
374 default:
375 llvm_unreachable(
376 "WavePrefixOp instruction not supported by target architecture");
377 }
378}
379
380// Return wave prefix sum that corresponds to the QT scalar type
381static Intrinsic::ID getWavePrefixSumIntrinsic(llvm::Triple::ArchType Arch,
382 QualType QT) {
383 switch (Arch) {
384 case llvm::Triple::spirv:
385 return Intrinsic::spv_wave_prefix_sum;
386 case llvm::Triple::dxil: {
387 if (QT->isUnsignedIntegerType())
388 return Intrinsic::dx_wave_prefix_usum;
389 return Intrinsic::dx_wave_prefix_sum;
390 }
391 default:
392 llvm_unreachable("Intrinsic WavePrefixSum"
393 " not supported by target architecture");
394 }
395}
396
397// Return wave prefix product that corresponds to the QT scalar type
398static Intrinsic::ID getWavePrefixProductIntrinsic(llvm::Triple::ArchType Arch,
399 QualType QT) {
400 switch (Arch) {
401 case llvm::Triple::spirv:
402 return Intrinsic::spv_wave_prefix_product;
403 case llvm::Triple::dxil: {
404 if (QT->isUnsignedIntegerType())
405 return Intrinsic::dx_wave_prefix_uproduct;
406 return Intrinsic::dx_wave_prefix_product;
407 }
408 default:
409 llvm_unreachable("Intrinsic WavePrefixProduct"
410 " not supported by target architecture");
411 }
412}
413
414// Returns the mangled name for a builtin function that the SPIR-V backend
415// will expand into a spec Constant.
416static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType,
417 ASTContext &Context) {
418 // The parameter types for our conceptual intrinsic function.
419 QualType ClangParamTypes[] = {Context.IntTy, SpecConstantType};
420
421 // Create a temporary FunctionDecl for the builtin fuction. It won't be
422 // added to the AST.
424 QualType FnType =
425 Context.getFunctionType(SpecConstantType, ClangParamTypes, EPI);
426 DeclarationName FuncName = &Context.Idents.get("__spirv_SpecConstant");
427 FunctionDecl *FnDeclForMangling = FunctionDecl::Create(
428 Context, Context.getTranslationUnitDecl(), SourceLocation(),
429 SourceLocation(), FuncName, FnType, /*TSI=*/nullptr, SC_Extern);
430
431 // Attach the created parameter declarations to the function declaration.
433 for (QualType ParamType : ClangParamTypes) {
435 Context, FnDeclForMangling, SourceLocation(), SourceLocation(),
436 /*IdentifierInfo*/ nullptr, ParamType, /*TSI*/ nullptr, SC_None,
437 /*DefaultArg*/ nullptr);
438 ParamDecls.push_back(PD);
439 }
440 FnDeclForMangling->setParams(ParamDecls);
441
442 // Get the mangled name.
443 std::string Name;
444 llvm::raw_string_ostream MangledNameStream(Name);
445 std::unique_ptr<MangleContext> Mangler(Context.createMangleContext());
446 Mangler->mangleName(FnDeclForMangling, MangledNameStream);
447 MangledNameStream.flush();
448
449 return Name;
450}
451
452static llvm::Type *getOffsetType(CodeGenModule &CGM, llvm::Type *CoordTy) {
453 llvm::Type *Int32Ty = CGM.Int32Ty;
454 if (auto *VT = dyn_cast<llvm::FixedVectorType>(CoordTy))
455 return llvm::FixedVectorType::get(Int32Ty, VT->getNumElements());
456 return Int32Ty;
457}
458
460 unsigned OffsetArgIndex, llvm::Type *OffsetTy) {
461 if (E->getNumArgs() > OffsetArgIndex)
462 return CGF.EmitScalarExpr(E->getArg(OffsetArgIndex));
463
464 return llvm::Constant::getNullValue(OffsetTy);
465}
466
468 unsigned ClampArgIndex) {
469 Value *Clamp = CGF.EmitScalarExpr(E->getArg(ClampArgIndex));
470 // The builtin is defined with variadic arguments, so the clamp parameter
471 // might have been promoted to double. The intrinsic requires a 32-bit
472 // float.
473 if (Clamp->getType() != CGF.Builder.getFloatTy())
474 Clamp = CGF.Builder.CreateFPCast(Clamp, CGF.Builder.getFloatTy());
475 return Clamp;
476}
477
479 unsigned IntrinsicID, unsigned NumRetComps,
480 bool HasLod) {
481 Value *Handle = CGF.EmitScalarExpr(E->getArg(0));
482
483 SmallVector<Value *> Args{Handle};
484 if (HasLod)
485 Args.push_back(CGF.EmitScalarExpr(E->getArg(1)));
486
487 Value *DimValue =
488 CGF.Builder.CreateIntrinsic(IntrinsicID, {Handle->getType()}, Args);
489
490 Value *LastStore = nullptr;
491 unsigned ArgIndex = HasLod ? 2 : 1;
492 for (unsigned i = 0; i < NumRetComps; ++i) {
493 const Expr *Arg = E->getArg(ArgIndex++);
494 LValue DimOut = CGF.EmitLValue(Arg);
495 Value *Elem = DimValue;
496 if (NumRetComps > 1)
497 Elem = CGF.Builder.CreateExtractElement(DimValue, i);
498
499 // Handle float casting if needed
500 if (Arg->getType()->isFloatingType())
501 Elem = CGF.Builder.CreateUIToFP(
502 Elem, llvm::Type::getFloatTy(CGF.getLLVMContext()));
503
504 LastStore = CGF.Builder.CreateStore(Elem, DimOut.getAddress());
505 }
506 return LastStore;
507}
508
510 const CallExpr *E,
512 if (!getLangOpts().HLSL)
513 return nullptr;
514
515 switch (BuiltinID) {
516 case Builtin::BI__builtin_hlsl_adduint64: {
517 Value *OpA = EmitScalarExpr(E->getArg(0));
518 Value *OpB = EmitScalarExpr(E->getArg(1));
519 QualType Arg0Ty = E->getArg(0)->getType();
520 uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements();
521 assert(Arg0Ty == E->getArg(1)->getType() &&
522 "AddUint64 operand types must match");
523 assert(Arg0Ty->hasIntegerRepresentation() &&
524 "AddUint64 operands must have an integer representation");
525 assert((NumElements == 2 || NumElements == 4) &&
526 "AddUint64 operands must have 2 or 4 elements");
527
528 llvm::Value *LowA;
529 llvm::Value *HighA;
530 llvm::Value *LowB;
531 llvm::Value *HighB;
532
533 // Obtain low and high words of inputs A and B
534 if (NumElements == 2) {
535 LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
536 HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
537 LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
538 HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
539 } else {
540 LowA = Builder.CreateShuffleVector(OpA, {0, 2}, "LowA");
541 HighA = Builder.CreateShuffleVector(OpA, {1, 3}, "HighA");
542 LowB = Builder.CreateShuffleVector(OpB, {0, 2}, "LowB");
543 HighB = Builder.CreateShuffleVector(OpB, {1, 3}, "HighB");
544 }
545
546 // Use an uadd_with_overflow to compute the sum of low words and obtain a
547 // carry value
548 llvm::Value *Carry;
549 llvm::Value *LowSum = EmitOverflowIntrinsic(
550 *this, Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
551 llvm::Value *ZExtCarry =
552 Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");
553
554 // Sum the high words and the carry
555 llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
556 llvm::Value *HighSumPlusCarry =
557 Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");
558
559 if (NumElements == 4) {
560 return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry, {0, 2, 1, 3},
561 "hlsl.AddUint64");
562 }
563
564 llvm::Value *Result = PoisonValue::get(OpA->getType());
565 Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
566 "hlsl.AddUint64.upto0");
567 Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1,
568 "hlsl.AddUint64");
569 return Result;
570 }
571 case Builtin::BI__builtin_hlsl_resource_getpointer:
572 case Builtin::BI__builtin_hlsl_resource_getpointer_typed: {
573 Value *HandleOp = EmitScalarExpr(E->getArg(0));
574 bool IsIndexed =
575 BuiltinID == Builtin::BI__builtin_hlsl_resource_getpointer_typed ||
576 E->getNumArgs() > 1;
577
578 llvm::Type *RetTy = ConvertType(E->getType());
579 if (IsIndexed) {
580 Value *IndexOp = EmitScalarExpr(E->getArg(1));
581 return Builder.CreateIntrinsic(
582 RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
583 ArrayRef<Value *>{HandleOp, IndexOp});
584 }
585 return Builder.CreateIntrinsic(
586 RetTy, CGM.getHLSLRuntime().getCreateResourceGetBasePointerIntrinsic(),
587 ArrayRef<Value *>{HandleOp});
588 }
589 case Builtin::BI__builtin_hlsl_resource_sample: {
590 Value *HandleOp = EmitScalarExpr(E->getArg(0));
591 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
592 Value *CoordOp = EmitScalarExpr(E->getArg(2));
593
595 Args.push_back(HandleOp);
596 Args.push_back(SamplerOp);
597 Args.push_back(CoordOp);
598 Args.push_back(
599 emitHlslOffset(*this, E, 3, getOffsetType(CGM, CoordOp->getType())));
600
601 llvm::Type *RetTy = ConvertType(E->getType());
602 if (E->getNumArgs() <= 4) {
603 return Builder.CreateIntrinsic(
604 RetTy, CGM.getHLSLRuntime().getSampleIntrinsic(), Args);
605 }
606
607 Args.push_back(emitHlslClamp(*this, E, 4));
608 return Builder.CreateIntrinsic(
609 RetTy, CGM.getHLSLRuntime().getSampleClampIntrinsic(), Args);
610 }
611 case Builtin::BI__builtin_hlsl_resource_sample_bias: {
612 Value *HandleOp = EmitScalarExpr(E->getArg(0));
613 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
614 Value *CoordOp = EmitScalarExpr(E->getArg(2));
615 Value *BiasOp = EmitScalarExpr(E->getArg(3));
616 if (BiasOp->getType() != Builder.getFloatTy())
617 BiasOp = Builder.CreateFPCast(BiasOp, Builder.getFloatTy());
618
619 SmallVector<Value *, 6> Args; // Max 6 arguments for SampleBias
620 Args.push_back(HandleOp);
621 Args.push_back(SamplerOp);
622 Args.push_back(CoordOp);
623 Args.push_back(BiasOp);
624 Args.push_back(
625 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
626
627 llvm::Type *RetTy = ConvertType(E->getType());
628 if (E->getNumArgs() <= 5)
629 return Builder.CreateIntrinsic(
630 RetTy, CGM.getHLSLRuntime().getSampleBiasIntrinsic(), Args);
631
632 Args.push_back(emitHlslClamp(*this, E, 5));
633 return Builder.CreateIntrinsic(
634 RetTy, CGM.getHLSLRuntime().getSampleBiasClampIntrinsic(), Args);
635 }
636 case Builtin::BI__builtin_hlsl_resource_sample_grad: {
637 Value *HandleOp = EmitScalarExpr(E->getArg(0));
638 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
639 Value *CoordOp = EmitScalarExpr(E->getArg(2));
640 Value *DDXOp = EmitScalarExpr(E->getArg(3));
641 Value *DDYOp = EmitScalarExpr(E->getArg(4));
642
644 Args.push_back(HandleOp);
645 Args.push_back(SamplerOp);
646 Args.push_back(CoordOp);
647 Args.push_back(DDXOp);
648 Args.push_back(DDYOp);
649 Args.push_back(
650 emitHlslOffset(*this, E, 5, getOffsetType(CGM, CoordOp->getType())));
651
652 llvm::Type *RetTy = ConvertType(E->getType());
653
654 if (E->getNumArgs() <= 6) {
655 return Builder.CreateIntrinsic(
656 RetTy, CGM.getHLSLRuntime().getSampleGradIntrinsic(), Args);
657 }
658
659 Args.push_back(emitHlslClamp(*this, E, 6));
660 return Builder.CreateIntrinsic(
661 RetTy, CGM.getHLSLRuntime().getSampleGradClampIntrinsic(), Args);
662 }
663 case Builtin::BI__builtin_hlsl_resource_sample_level: {
664 Value *HandleOp = EmitScalarExpr(E->getArg(0));
665 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
666 Value *CoordOp = EmitScalarExpr(E->getArg(2));
667 Value *LODOp = EmitScalarExpr(E->getArg(3));
668 if (LODOp->getType() != Builder.getFloatTy())
669 LODOp = Builder.CreateFPCast(LODOp, Builder.getFloatTy());
670
671 SmallVector<Value *, 5> Args; // Max 5 arguments for SampleLevel
672 Args.push_back(HandleOp);
673 Args.push_back(SamplerOp);
674 Args.push_back(CoordOp);
675 Args.push_back(LODOp);
676 Args.push_back(
677 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
678
679 llvm::Type *RetTy = ConvertType(E->getType());
680 return Builder.CreateIntrinsic(
681 RetTy, CGM.getHLSLRuntime().getSampleLevelIntrinsic(), Args);
682 }
683 case Builtin::BI__builtin_hlsl_resource_load_level: {
684 Value *HandleOp = EmitScalarExpr(E->getArg(0));
685 Value *CoordLODOp = EmitScalarExpr(E->getArg(1));
686
687 auto *CoordLODVecTy = cast<llvm::FixedVectorType>(CoordLODOp->getType());
688 unsigned NumElts = CoordLODVecTy->getNumElements();
689 assert(NumElts >= 2 && "CoordLOD must have at least 2 elements");
690
691 // Split CoordLOD into Coord and LOD
693 for (unsigned I = 0; I < NumElts - 1; ++I)
694 Mask.push_back(I);
695
696 Value *CoordOp =
697 Builder.CreateShuffleVector(CoordLODOp, Mask, "hlsl.load.coord");
698 Value *LODOp =
699 Builder.CreateExtractElement(CoordLODOp, NumElts - 1, "hlsl.load.lod");
700
702 Args.push_back(HandleOp);
703 Args.push_back(CoordOp);
704 Args.push_back(LODOp);
705 Args.push_back(
706 emitHlslOffset(*this, E, 2, getOffsetType(CGM, CoordOp->getType())));
707
708 llvm::Type *RetTy = ConvertType(E->getType());
709 return Builder.CreateIntrinsic(
710 RetTy, CGM.getHLSLRuntime().getLoadLevelIntrinsic(), Args);
711 }
712 case Builtin::BI__builtin_hlsl_resource_sample_cmp: {
713 Value *HandleOp = EmitScalarExpr(E->getArg(0));
714 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
715 Value *CoordOp = EmitScalarExpr(E->getArg(2));
716 Value *CmpOp = EmitScalarExpr(E->getArg(3));
717 if (CmpOp->getType() != Builder.getFloatTy())
718 CmpOp = Builder.CreateFPCast(CmpOp, Builder.getFloatTy());
719
720 SmallVector<Value *, 6> Args; // Max 6 arguments for SampleCmp
721 Args.push_back(HandleOp);
722 Args.push_back(SamplerOp);
723 Args.push_back(CoordOp);
724 Args.push_back(CmpOp);
725 Args.push_back(
726 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
727
728 llvm::Type *RetTy = ConvertType(E->getType());
729 if (E->getNumArgs() <= 5) {
730 return Builder.CreateIntrinsic(
731 RetTy, CGM.getHLSLRuntime().getSampleCmpIntrinsic(), Args);
732 }
733
734 Args.push_back(emitHlslClamp(*this, E, 5));
735 return Builder.CreateIntrinsic(
736 RetTy, CGM.getHLSLRuntime().getSampleCmpClampIntrinsic(), Args);
737 }
738 case Builtin::BI__builtin_hlsl_resource_sample_cmp_level_zero: {
739 Value *HandleOp = EmitScalarExpr(E->getArg(0));
740 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
741 Value *CoordOp = EmitScalarExpr(E->getArg(2));
742 Value *CmpOp = EmitScalarExpr(E->getArg(3));
743 if (CmpOp->getType() != Builder.getFloatTy())
744 CmpOp = Builder.CreateFPCast(CmpOp, Builder.getFloatTy());
745
747 Args.push_back(HandleOp);
748 Args.push_back(SamplerOp);
749 Args.push_back(CoordOp);
750 Args.push_back(CmpOp);
751
752 Args.push_back(
753 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
754
755 llvm::Type *RetTy = ConvertType(E->getType());
756 return Builder.CreateIntrinsic(
757 RetTy, CGM.getHLSLRuntime().getSampleCmpLevelZeroIntrinsic(), Args);
758 }
759 case Builtin::BI__builtin_hlsl_resource_calculate_lod: {
760 Value *HandleOp = EmitScalarExpr(E->getArg(0));
761 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
762 Value *CoordOp = EmitScalarExpr(E->getArg(2));
763
764 return Builder.CreateIntrinsic(
765 ConvertType(E->getType()),
766 CGM.getHLSLRuntime().getCalculateLodIntrinsic(),
767 {HandleOp, SamplerOp, CoordOp});
768 }
769 case Builtin::BI__builtin_hlsl_resource_calculate_lod_unclamped: {
770 Value *HandleOp = EmitScalarExpr(E->getArg(0));
771 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
772 Value *CoordOp = EmitScalarExpr(E->getArg(2));
773
774 return Builder.CreateIntrinsic(
775 ConvertType(E->getType()),
776 CGM.getHLSLRuntime().getCalculateLodUnclampedIntrinsic(),
777 {HandleOp, SamplerOp, CoordOp});
778 }
779 case Builtin::BI__builtin_hlsl_resource_gather: {
780 Value *HandleOp = EmitScalarExpr(E->getArg(0));
781 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
782 Value *CoordOp = EmitScalarExpr(E->getArg(2));
783 Value *ComponentOp = EmitScalarExpr(E->getArg(3));
784 if (ComponentOp->getType() != Builder.getInt32Ty())
785 ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(),
786 /*isSigned=*/false);
787
789 Args.push_back(HandleOp);
790 Args.push_back(SamplerOp);
791 Args.push_back(CoordOp);
792 Args.push_back(ComponentOp);
793 Args.push_back(
794 emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
795
796 llvm::Type *RetTy = ConvertType(E->getType());
797 return Builder.CreateIntrinsic(
798 RetTy, CGM.getHLSLRuntime().getGatherIntrinsic(), Args);
799 }
800 case Builtin::BI__builtin_hlsl_resource_gather_cmp: {
801 Value *HandleOp = EmitScalarExpr(E->getArg(0));
802 Value *SamplerOp = EmitScalarExpr(E->getArg(1));
803 Value *CoordOp = EmitScalarExpr(E->getArg(2));
804 Value *CompareOp = EmitScalarExpr(E->getArg(3));
805 if (CompareOp->getType() != Builder.getFloatTy())
806 CompareOp = Builder.CreateFPCast(CompareOp, Builder.getFloatTy());
807
809 Args.push_back(HandleOp);
810 Args.push_back(SamplerOp);
811 Args.push_back(CoordOp);
812 Args.push_back(CompareOp);
813
814 if (CGM.getTarget().getTriple().isDXIL()) {
815 Value *ComponentOp = EmitScalarExpr(E->getArg(4));
816 if (ComponentOp->getType() != Builder.getInt32Ty())
817 ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(),
818 /*isSigned=*/false);
819 Args.push_back(ComponentOp);
820 }
821
822 Args.push_back(
823 emitHlslOffset(*this, E, 5, getOffsetType(CGM, CoordOp->getType())));
824
825 llvm::Type *RetTy = ConvertType(E->getType());
826 return Builder.CreateIntrinsic(
827 RetTy, CGM.getHLSLRuntime().getGatherCmpIntrinsic(), Args);
828 }
829 case Builtin::BI__builtin_hlsl_resource_load_with_status:
830 case Builtin::BI__builtin_hlsl_resource_load_with_status_typed: {
831 Value *HandleOp = EmitScalarExpr(E->getArg(0));
832 Value *IndexOp = EmitScalarExpr(E->getArg(1));
833
834 // Get the *address* of the status argument to write to it by reference
835 LValue StatusLVal = EmitLValue(E->getArg(2));
836 Address StatusAddr = StatusLVal.getAddress();
837
838 QualType HandleTy = E->getArg(0)->getType();
839 const HLSLAttributedResourceType *RT =
840 HandleTy->getAs<HLSLAttributedResourceType>();
841 assert(CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil &&
842 "Only DXIL currently implements load with status");
843
844 Intrinsic::ID IntrID = RT->getAttrs().RawBuffer
845 ? llvm::Intrinsic::dx_resource_load_rawbuffer
846 : llvm::Intrinsic::dx_resource_load_typedbuffer;
847
848 llvm::Type *DataTy = ConvertType(E->getType());
849 llvm::Type *RetTy = llvm::StructType::get(Builder.getContext(),
850 {DataTy, Builder.getInt1Ty()});
851
853 Args.push_back(HandleOp);
854 Args.push_back(IndexOp);
855
856 if (RT->isRaw()) {
857 Value *Offset = Builder.getInt32(0);
858 // The offset parameter needs to be poison for ByteAddressBuffer
859 if (!RT->isStructured())
860 Offset = llvm::PoisonValue::get(Builder.getInt32Ty());
861 Args.push_back(Offset);
862 }
863
864 // The load intrinsics give us a (T value, i1 status) pair -
865 // shepherd these into the return value and out reference respectively.
866 Value *ResRet =
867 Builder.CreateIntrinsic(RetTy, IntrID, Args, {}, "ld.struct");
868 Value *LoadedValue = Builder.CreateExtractValue(ResRet, {0}, "ld.value");
869 Value *StatusBit = Builder.CreateExtractValue(ResRet, {1}, "ld.status");
870 Value *ExtendedStatus =
871 Builder.CreateZExt(StatusBit, Builder.getInt32Ty(), "ld.status.ext");
872 Builder.CreateStore(ExtendedStatus, StatusAddr);
873
874 return LoadedValue;
875 }
876 case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
877 llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
878 return llvm::PoisonValue::get(HandleTy);
879 }
880 case Builtin::BI__builtin_hlsl_resource_handlefrombinding: {
881 llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
882 Value *RegisterOp = EmitScalarExpr(E->getArg(1));
883 Value *SpaceOp = EmitScalarExpr(E->getArg(2));
884 Value *RangeOp = EmitScalarExpr(E->getArg(3));
885 Value *IndexOp = EmitScalarExpr(E->getArg(4));
886 Value *Name = EmitScalarExpr(E->getArg(5));
887 llvm::Intrinsic::ID IntrinsicID =
888 CGM.getHLSLRuntime().getCreateHandleFromBindingIntrinsic();
889 SmallVector<Value *> Args{SpaceOp, RegisterOp, RangeOp, IndexOp, Name};
890 return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
891 }
892 case Builtin::BI__builtin_hlsl_resource_handlefromimplicitbinding: {
893 llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
894 Value *OrderID = EmitScalarExpr(E->getArg(1));
895 Value *SpaceOp = EmitScalarExpr(E->getArg(2));
896 Value *RangeOp = EmitScalarExpr(E->getArg(3));
897 Value *IndexOp = EmitScalarExpr(E->getArg(4));
898 Value *Name = EmitScalarExpr(E->getArg(5));
899 llvm::Intrinsic::ID IntrinsicID =
900 CGM.getHLSLRuntime().getCreateHandleFromImplicitBindingIntrinsic();
901 SmallVector<Value *> Args{OrderID, SpaceOp, RangeOp, IndexOp, Name};
902 return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
903 }
904 case Builtin::BI__builtin_hlsl_resource_counterhandlefromimplicitbinding: {
905 Value *MainHandle = EmitScalarExpr(E->getArg(0));
906 if (!CGM.getTriple().isSPIRV())
907 return MainHandle;
908
909 llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
910 Value *OrderID = EmitScalarExpr(E->getArg(1));
911 Value *SpaceOp = EmitScalarExpr(E->getArg(2));
912 llvm::Intrinsic::ID IntrinsicID =
913 llvm::Intrinsic::spv_resource_counterhandlefromimplicitbinding;
914 SmallVector<Value *> Args{MainHandle, OrderID, SpaceOp};
915 return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
916 }
917 case Builtin::BI__builtin_hlsl_resource_nonuniformindex: {
918 Value *IndexOp = EmitScalarExpr(E->getArg(0));
919 llvm::Type *RetTy = ConvertType(E->getType());
920 return Builder.CreateIntrinsic(
921 RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(),
922 ArrayRef<Value *>{IndexOp});
923 }
924 case Builtin::BI__builtin_hlsl_resource_getdimensions_x:
925 case Builtin::BI__builtin_hlsl_resource_getdimensions_x_float:
926 return emitGetDimensions(*this, E,
927 CGM.getHLSLRuntime().getGetDimensionsXIntrinsic(),
928 1, /*HasLod=*/false);
929 case Builtin::BI__builtin_hlsl_resource_getdimensions_xy:
930 case Builtin::BI__builtin_hlsl_resource_getdimensions_xy_float:
931 return emitGetDimensions(*this, E,
932 CGM.getHLSLRuntime().getGetDimensionsXYIntrinsic(),
933 2, /*HasLod=*/false);
934 case Builtin::BI__builtin_hlsl_resource_getdimensions_levels_xy:
935 case Builtin::BI__builtin_hlsl_resource_getdimensions_levels_xy_float:
936 return emitGetDimensions(
937 *this, E, CGM.getHLSLRuntime().getGetDimensionsLevelsXYIntrinsic(), 3,
938 /*HasLod=*/true);
939 case Builtin::BI__builtin_hlsl_resource_getstride: {
940 LValue Stride = EmitLValue(E->getArg(1));
941 return emitBufferStride(this, E->getArg(0), Stride);
942 }
943 case Builtin::BI__builtin_hlsl_all: {
944 Value *Op0 = EmitScalarExpr(E->getArg(0));
945 return Builder.CreateIntrinsic(
946 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
947 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
948 "hlsl.all");
949 }
950 case Builtin::BI__builtin_hlsl_and: {
951 Value *Op0 = EmitScalarExpr(E->getArg(0));
952 Value *Op1 = EmitScalarExpr(E->getArg(1));
953 return Builder.CreateAnd(Op0, Op1, "hlsl.and");
954 }
955 case Builtin::BI__builtin_hlsl_or: {
956 Value *Op0 = EmitScalarExpr(E->getArg(0));
957 Value *Op1 = EmitScalarExpr(E->getArg(1));
958 return Builder.CreateOr(Op0, Op1, "hlsl.or");
959 }
960 case Builtin::BI__builtin_hlsl_any: {
961 Value *Op0 = EmitScalarExpr(E->getArg(0));
962 return Builder.CreateIntrinsic(
963 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
964 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
965 "hlsl.any");
966 }
967 case Builtin::BI__builtin_hlsl_asdouble:
968 return handleAsDoubleBuiltin(*this, E);
969 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
970 Value *OpX = EmitScalarExpr(E->getArg(0));
971 Value *OpMin = EmitScalarExpr(E->getArg(1));
972 Value *OpMax = EmitScalarExpr(E->getArg(2));
973
974 QualType Ty = E->getArg(0)->getType();
975 if (auto *VecTy = Ty->getAs<VectorType>())
976 Ty = VecTy->getElementType();
977
978 Intrinsic::ID Intr;
979 if (Ty->isFloatingType()) {
980 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
981 } else if (Ty->isUnsignedIntegerType()) {
982 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
983 } else {
984 assert(Ty->isSignedIntegerType());
985 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
986 }
987 return Builder.CreateIntrinsic(
988 /*ReturnType=*/OpX->getType(), Intr,
989 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
990 }
991 case Builtin::BI__builtin_hlsl_crossf16:
992 case Builtin::BI__builtin_hlsl_crossf32: {
993 Value *Op0 = EmitScalarExpr(E->getArg(0));
994 Value *Op1 = EmitScalarExpr(E->getArg(1));
995 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
997 "cross operands must have a float representation");
998 // make sure each vector has exactly 3 elements
999 assert(
1000 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
1001 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
1002 "input vectors must have 3 elements each");
1003 return Builder.CreateIntrinsic(
1004 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
1005 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
1006 }
1007 case Builtin::BI__builtin_hlsl_dot: {
1008 Value *Op0 = EmitScalarExpr(E->getArg(0));
1009 Value *Op1 = EmitScalarExpr(E->getArg(1));
1010 llvm::Type *T0 = Op0->getType();
1011 llvm::Type *T1 = Op1->getType();
1012
1013 // If the arguments are scalars, just emit a multiply
1014 if (!T0->isVectorTy() && !T1->isVectorTy()) {
1015 if (T0->isFloatingPointTy())
1016 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
1017
1018 if (T0->isIntegerTy())
1019 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
1020
1021 llvm_unreachable(
1022 "Scalar dot product is only supported on ints and floats.");
1023 }
1024 // For vectors, validate types and emit the appropriate intrinsic
1025 assert(CGM.getContext().hasSameUnqualifiedType(E->getArg(0)->getType(),
1026 E->getArg(1)->getType()) &&
1027 "Dot product operands must have the same type.");
1028
1029 auto *VecTy0 = E->getArg(0)->getType()->castAs<VectorType>();
1030 assert(VecTy0 && "Dot product argument must be a vector.");
1031
1032 return Builder.CreateIntrinsic(
1033 /*ReturnType=*/T0->getScalarType(),
1034 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
1035 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
1036 }
1037 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
1038 Value *X = EmitScalarExpr(E->getArg(0));
1039 Value *Y = EmitScalarExpr(E->getArg(1));
1040 Value *Acc = EmitScalarExpr(E->getArg(2));
1041
1042 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
1043 // Note that the argument order disagrees between the builtin and the
1044 // intrinsic here.
1045 return Builder.CreateIntrinsic(
1046 /*ReturnType=*/Acc->getType(), ID, ArrayRef<Value *>{Acc, X, Y},
1047 nullptr, "hlsl.dot4add.i8packed");
1048 }
1049 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
1050 Value *X = EmitScalarExpr(E->getArg(0));
1051 Value *Y = EmitScalarExpr(E->getArg(1));
1052 Value *Acc = EmitScalarExpr(E->getArg(2));
1053
1054 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
1055 // Note that the argument order disagrees between the builtin and the
1056 // intrinsic here.
1057 return Builder.CreateIntrinsic(
1058 /*ReturnType=*/Acc->getType(), ID, ArrayRef<Value *>{Acc, X, Y},
1059 nullptr, "hlsl.dot4add.u8packed");
1060 }
1061 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
1062 Value *X = EmitScalarExpr(E->getArg(0));
1063
1064 return Builder.CreateIntrinsic(
1065 /*ReturnType=*/ConvertType(E->getType()),
1066 getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
1067 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
1068 }
1069 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
1070 Value *X = EmitScalarExpr(E->getArg(0));
1071
1072 return Builder.CreateIntrinsic(
1073 /*ReturnType=*/ConvertType(E->getType()),
1074 CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
1075 nullptr, "hlsl.firstbitlow");
1076 }
1077 case Builtin::BI__builtin_hlsl_lerp: {
1078 Value *X = EmitScalarExpr(E->getArg(0));
1079 Value *Y = EmitScalarExpr(E->getArg(1));
1080 Value *S = EmitScalarExpr(E->getArg(2));
1081 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1082 llvm_unreachable("lerp operand must have a float representation");
1083 return Builder.CreateIntrinsic(
1084 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
1085 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
1086 }
1087 case Builtin::BI__builtin_hlsl_normalize: {
1088 Value *X = EmitScalarExpr(E->getArg(0));
1089
1090 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1091 "normalize operand must have a float representation");
1092
1093 return Builder.CreateIntrinsic(
1094 /*ReturnType=*/X->getType(),
1095 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
1096 nullptr, "hlsl.normalize");
1097 }
1098 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
1099 Value *X = EmitScalarExpr(E->getArg(0));
1100
1101 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1102 "degree operand must have a float representation");
1103
1104 return Builder.CreateIntrinsic(
1105 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
1106 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
1107 }
1108 case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
1109 return handleElementwiseF16ToF32(*this, E);
1110 }
1111 case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
1112 return handleElementwiseF32ToF16(*this, E);
1113 }
1114 case Builtin::BI__builtin_hlsl_elementwise_frac: {
1115 Value *Op0 = EmitScalarExpr(E->getArg(0));
1116 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1117 llvm_unreachable("frac operand must have a float representation");
1118 return Builder.CreateIntrinsic(
1119 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
1120 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
1121 }
1122 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
1123 Value *Op0 = EmitScalarExpr(E->getArg(0));
1124 llvm::Type *Xty = Op0->getType();
1125 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
1126 if (Xty->isVectorTy()) {
1127 auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
1128 retType = llvm::VectorType::get(
1129 retType, ElementCount::getFixed(XVecTy->getNumElements()));
1130 }
1131 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1132 llvm_unreachable("isinf operand must have a float representation");
1133 return Builder.CreateIntrinsic(
1134 retType, CGM.getHLSLRuntime().getIsInfIntrinsic(),
1135 ArrayRef<Value *>{Op0}, nullptr, "hlsl.isinf");
1136 }
1137 case Builtin::BI__builtin_hlsl_elementwise_isnan: {
1138 Value *Op0 = EmitScalarExpr(E->getArg(0));
1139 llvm::Type *Xty = Op0->getType();
1140 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
1141 if (Xty->isVectorTy()) {
1142 auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
1143 retType = llvm::VectorType::get(
1144 retType, ElementCount::getFixed(XVecTy->getNumElements()));
1145 }
1146 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1147 llvm_unreachable("isnan operand must have a float representation");
1148 return Builder.CreateIntrinsic(
1149 retType, CGM.getHLSLRuntime().getIsNaNIntrinsic(),
1150 ArrayRef<Value *>{Op0}, nullptr, "hlsl.isnan");
1151 }
1152 case Builtin::BI__builtin_hlsl_mad: {
1153 Value *M = EmitScalarExpr(E->getArg(0));
1154 Value *A = EmitScalarExpr(E->getArg(1));
1155 Value *B = EmitScalarExpr(E->getArg(2));
1157 return Builder.CreateIntrinsic(
1158 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
1159 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
1160
1162 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
1163 return Builder.CreateIntrinsic(
1164 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
1165 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
1166
1167 Value *Mul = Builder.CreateNSWMul(M, A);
1168 return Builder.CreateNSWAdd(Mul, B);
1169 }
1171 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
1172 return Builder.CreateIntrinsic(
1173 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
1174 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
1175
1176 Value *Mul = Builder.CreateNUWMul(M, A);
1177 return Builder.CreateNUWAdd(Mul, B);
1178 }
1179 case Builtin::BI__builtin_hlsl_mul: {
1180 Value *Op0 = EmitScalarExpr(E->getArg(0));
1181 Value *Op1 = EmitScalarExpr(E->getArg(1));
1182 QualType QTy0 = E->getArg(0)->getType();
1183 QualType QTy1 = E->getArg(1)->getType();
1184
1185 bool IsVec0 = QTy0->isVectorType();
1186 bool IsVec1 = QTy1->isVectorType();
1187 bool IsMat0 = QTy0->isConstantMatrixType();
1188 bool IsMat1 = QTy1->isConstantMatrixType();
1189
1190 // The matrix multiply intrinsic only operates on column-major order
1191 // matrices. Therefore matrix memory layout transforms must be inserted
1192 // before and after matrix multiply intrinsics.
1193 bool IsRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
1195
1196 llvm::MatrixBuilder MB(Builder);
1197 if (IsVec0 && IsMat1) {
1198 unsigned N = QTy0->castAs<VectorType>()->getNumElements();
1199 auto *MatTy = QTy1->castAs<ConstantMatrixType>();
1200 unsigned Rows = MatTy->getNumRows();
1201 unsigned Cols = MatTy->getNumColumns();
1202 assert(N == Rows && "vector length must match matrix row count");
1203 if (IsRowMajor)
1204 Op1 = MB.CreateRowMajorToColumnMajorTransform(Op1, Rows, Cols);
1205 return MB.CreateMatrixMultiply(Op0, Op1, 1, N, Cols, "hlsl.mul");
1206 }
1207 if (IsMat0 && IsVec1) {
1208 auto *MatTy = QTy0->castAs<ConstantMatrixType>();
1209 unsigned Rows = MatTy->getNumRows();
1210 unsigned Cols = MatTy->getNumColumns();
1211 assert(QTy1->castAs<VectorType>()->getNumElements() == Cols &&
1212 "vector length must match matrix column count");
1213 if (IsRowMajor)
1214 Op0 = MB.CreateRowMajorToColumnMajorTransform(Op0, Rows, Cols);
1215 return MB.CreateMatrixMultiply(Op0, Op1, Rows, Cols, 1, "hlsl.mul");
1216 }
1217 assert(IsMat0 && IsMat1);
1218 auto *MatTy0 = QTy0->castAs<ConstantMatrixType>();
1219 auto *MatTy1 = QTy1->castAs<ConstantMatrixType>();
1220 unsigned Rows0 = MatTy0->getNumRows();
1221 unsigned Rows1 = MatTy1->getNumRows();
1222 unsigned Cols0 = MatTy0->getNumColumns();
1223 unsigned Cols1 = MatTy1->getNumColumns();
1224 assert(Cols0 == Rows1 &&
1225 "inner matrix dimensions must match for multiplication");
1226 if (IsRowMajor) {
1227 Op0 = MB.CreateRowMajorToColumnMajorTransform(Op0, Rows0, Cols0);
1228 Op1 = MB.CreateRowMajorToColumnMajorTransform(Op1, Rows1, Cols1);
1229 }
1230 Value *Result =
1231 MB.CreateMatrixMultiply(Op0, Op1, Rows0, Cols0, Cols1, "hlsl.mul");
1232 if (IsRowMajor)
1233 Result = MB.CreateColumnMajorToRowMajorTransform(Result, Rows0, Cols1);
1234 return Result;
1235 }
1236 case Builtin::BI__builtin_hlsl_transpose: {
1237 Value *Op0 = EmitScalarExpr(E->getArg(0));
1238 auto *MatTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
1239 unsigned Rows = MatTy->getNumRows();
1240 unsigned Cols = MatTy->getNumColumns();
1241 llvm::MatrixBuilder MB(Builder);
1242 // The matrix transpose intrinsic operates on column-major matrices.
1243 // For row-major, a row-major RxC matrix is equivalent to a column-major
1244 // CxR matrix, so transposing with swapped dimensions produces the correct
1245 // row-major CxR result directly.
1246 bool IsRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
1248 if (IsRowMajor)
1249 return MB.CreateMatrixTranspose(Op0, Cols, Rows);
1250 return MB.CreateMatrixTranspose(Op0, Rows, Cols);
1251 }
1252 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
1253 Value *Op0 = EmitScalarExpr(E->getArg(0));
1254 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1255 llvm_unreachable("rcp operand must have a float representation");
1256 llvm::Type *Ty = Op0->getType();
1257 llvm::Type *EltTy = Ty->getScalarType();
1258 Constant *One = Ty->isVectorTy()
1259 ? ConstantVector::getSplat(
1260 ElementCount::getFixed(
1261 cast<FixedVectorType>(Ty)->getNumElements()),
1262 ConstantFP::get(EltTy, 1.0))
1263 : ConstantFP::get(EltTy, 1.0);
1264 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
1265 }
1266 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
1267 Value *Op0 = EmitScalarExpr(E->getArg(0));
1268 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1269 llvm_unreachable("rsqrt operand must have a float representation");
1270 return Builder.CreateIntrinsic(
1271 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
1272 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
1273 }
1274 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
1275 Value *Op0 = EmitScalarExpr(E->getArg(0));
1276 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1277 "saturate operand must have a float representation");
1278 return Builder.CreateIntrinsic(
1279 /*ReturnType=*/Op0->getType(),
1280 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
1281 nullptr, "hlsl.saturate");
1282 }
1283 case Builtin::BI__builtin_hlsl_wave_prefix_count_bits: {
1284 Value *Op = EmitScalarExpr(E->getArg(0));
1285 assert(Op->getType()->isIntegerTy(1) &&
1286 "WavePrefixBitCount operand must be a boolean type");
1287
1288 Intrinsic::ID IID =
1290
1291 return EmitRuntimeCall(
1292 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), IID), ArrayRef{Op},
1293 "hlsl.wave.prefix.bit.count");
1294 }
1295 case Builtin::BI__builtin_hlsl_select: {
1296 Value *OpCond = EmitScalarExpr(E->getArg(0));
1297 RValue RValTrue = EmitAnyExpr(E->getArg(1));
1298 Value *OpTrue =
1299 RValTrue.isScalar()
1300 ? RValTrue.getScalarVal()
1301 : Builder.CreateLoad(RValTrue.getAggregateAddress(), "true_val");
1302 RValue RValFalse = EmitAnyExpr(E->getArg(2));
1303 Value *OpFalse =
1304 RValFalse.isScalar()
1305 ? RValFalse.getScalarVal()
1306 : Builder.CreateLoad(RValFalse.getAggregateAddress(), "false_val");
1307 if (auto *VTy = E->getType()->getAs<VectorType>()) {
1308 if (!OpTrue->getType()->isVectorTy())
1309 OpTrue =
1310 Builder.CreateVectorSplat(VTy->getNumElements(), OpTrue, "splat");
1311 if (!OpFalse->getType()->isVectorTy())
1312 OpFalse =
1313 Builder.CreateVectorSplat(VTy->getNumElements(), OpFalse, "splat");
1314 }
1315
1316 Value *SelectVal =
1317 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
1318 if (!RValTrue.isScalar())
1319 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
1320 ReturnValue.isVolatile());
1321
1322 return SelectVal;
1323 }
1324 case Builtin::BI__builtin_hlsl_step: {
1325 Value *Op0 = EmitScalarExpr(E->getArg(0));
1326 Value *Op1 = EmitScalarExpr(E->getArg(1));
1327 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1329 "step operands must have a float representation");
1330 return Builder.CreateIntrinsic(
1331 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
1332 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
1333 }
1334 case Builtin::BI__builtin_hlsl_wave_active_all_equal: {
1335 Value *Op = EmitScalarExpr(E->getArg(0));
1336
1337 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllEqualIntrinsic();
1338 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1339 &CGM.getModule(), ID, {Op->getType()}),
1340 {Op});
1341 }
1342 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
1343 Value *Op = EmitScalarExpr(E->getArg(0));
1344 assert(Op->getType()->isIntegerTy(1) &&
1345 "Intrinsic WaveActiveAllTrue operand must be a bool");
1346
1347 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
1348 return EmitRuntimeCall(
1349 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
1350 }
1351 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
1352 Value *Op = EmitScalarExpr(E->getArg(0));
1353 assert(Op->getType()->isIntegerTy(1) &&
1354 "Intrinsic WaveActiveAnyTrue operand must be a bool");
1355
1356 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
1357 return EmitRuntimeCall(
1358 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
1359 }
1360 case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
1361 Value *Op = EmitScalarExpr(E->getArg(0));
1362 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
1363 "Intrinsic WaveActiveBitOr operand must have an unsigned integer "
1364 "representation");
1365
1366 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic();
1367 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1368 &CGM.getModule(), ID, {Op->getType()}),
1369 ArrayRef{Op}, "hlsl.wave.active.bit.or");
1370 }
1371 case Builtin::BI__builtin_hlsl_wave_active_bit_xor: {
1372 Value *Op = EmitScalarExpr(E->getArg(0));
1373 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
1374 "Intrinsic WaveActiveBitXor operand must have an unsigned integer "
1375 "representation");
1376
1377 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic();
1378 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1379 &CGM.getModule(), ID, {Op->getType()}),
1380 ArrayRef{Op}, "hlsl.wave.active.bit.xor");
1381 }
1382 case Builtin::BI__builtin_hlsl_wave_active_bit_and: {
1383 Value *Op = EmitScalarExpr(E->getArg(0));
1384 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
1385 "Intrinsic WaveActiveBitAnd operand must have an unsigned integer "
1386 "representation");
1387
1388 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitAndIntrinsic();
1389 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1390 &CGM.getModule(), ID, {Op->getType()}),
1391 ArrayRef{Op}, "hlsl.wave.active.bit.and");
1392 }
1393 case Builtin::BI__builtin_hlsl_wave_active_ballot: {
1394 [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0));
1395 assert(Op->getType()->isIntegerTy(1) &&
1396 "Intrinsic WaveActiveBallot operand must be a bool");
1397
1398 return handleHlslWaveActiveBallot(*this, E);
1399 }
1400 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
1401 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1402 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
1403 return EmitRuntimeCall(
1404 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
1405 ArrayRef{OpExpr});
1406 }
1407 case Builtin::BI__builtin_hlsl_wave_active_sum: {
1408 // Due to the use of variadic arguments, explicitly retrieve argument
1409 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1410 Intrinsic::ID IID = getWaveActiveSumIntrinsic(
1411 getTarget().getTriple().getArch(), E->getArg(0)->getType());
1412
1413 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1414 &CGM.getModule(), IID, {OpExpr->getType()}),
1415 ArrayRef{OpExpr}, "hlsl.wave.active.sum");
1416 }
1417 case Builtin::BI__builtin_hlsl_wave_active_product: {
1418 // Due to the use of variadic arguments, explicitly retrieve argument
1419 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1420 Intrinsic::ID IID = getWaveActiveProductIntrinsic(
1421 getTarget().getTriple().getArch(), E->getArg(0)->getType());
1422
1423 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1424 &CGM.getModule(), IID, {OpExpr->getType()}),
1425 ArrayRef{OpExpr}, "hlsl.wave.active.product");
1426 }
1427 case Builtin::BI__builtin_hlsl_wave_active_max: {
1428 // Due to the use of variadic arguments, explicitly retrieve argument
1429 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1430 QualType QT = E->getArg(0)->getType();
1431 Intrinsic::ID IID;
1432 if (QT->isUnsignedIntegerType())
1433 IID = CGM.getHLSLRuntime().getWaveActiveUMaxIntrinsic();
1434 else
1435 IID = CGM.getHLSLRuntime().getWaveActiveMaxIntrinsic();
1436
1437 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1438 &CGM.getModule(), IID, {OpExpr->getType()}),
1439 ArrayRef{OpExpr}, "hlsl.wave.active.max");
1440 }
1441 case Builtin::BI__builtin_hlsl_wave_active_min: {
1442 // Due to the use of variadic arguments, explicitly retrieve argument
1443 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1444 QualType QT = E->getArg(0)->getType();
1445 Intrinsic::ID IID;
1446 if (QT->isUnsignedIntegerType())
1447 IID = CGM.getHLSLRuntime().getWaveActiveUMinIntrinsic();
1448 else
1449 IID = CGM.getHLSLRuntime().getWaveActiveMinIntrinsic();
1450
1451 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1452 &CGM.getModule(), IID, {OpExpr->getType()}),
1453 ArrayRef{OpExpr}, "hlsl.wave.active.min");
1454 }
1455 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
1456 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
1457 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
1458 // for the DirectX intrinsic and the demangled builtin name
1459 switch (CGM.getTarget().getTriple().getArch()) {
1460 case llvm::Triple::dxil:
1461 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1462 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
1463 case llvm::Triple::spirv:
1464 return EmitRuntimeCall(CGM.CreateRuntimeFunction(
1465 llvm::FunctionType::get(IntTy, {}, false),
1466 "__hlsl_wave_get_lane_index", {}, false, true));
1467 default:
1468 llvm_unreachable(
1469 "Intrinsic WaveGetLaneIndex not supported by target architecture");
1470 }
1471 }
1472 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
1473 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
1474 return EmitRuntimeCall(
1475 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
1476 }
1477 case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
1478 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
1479 return EmitRuntimeCall(
1480 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
1481 }
1482 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
1483 // Due to the use of variadic arguments we must explicitly retrieve them and
1484 // create our function type.
1485 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1486 Value *OpIndex = EmitScalarExpr(E->getArg(1));
1487 return EmitRuntimeCall(
1488 Intrinsic::getOrInsertDeclaration(
1489 &CGM.getModule(), CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
1490 {OpExpr->getType()}),
1491 ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
1492 }
1493 case Builtin::BI__builtin_hlsl_wave_prefix_sum: {
1494 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1495 Intrinsic::ID IID = getWavePrefixSumIntrinsic(
1496 getTarget().getTriple().getArch(), E->getArg(0)->getType());
1497 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1498 &CGM.getModule(), IID, {OpExpr->getType()}),
1499 ArrayRef{OpExpr}, "hlsl.wave.prefix.sum");
1500 }
1501 case Builtin::BI__builtin_hlsl_wave_prefix_product: {
1502 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1503 Intrinsic::ID IID = getWavePrefixProductIntrinsic(
1504 getTarget().getTriple().getArch(), E->getArg(0)->getType());
1505 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1506 &CGM.getModule(), IID, {OpExpr->getType()}),
1507 ArrayRef{OpExpr}, "hlsl.wave.prefix.product");
1508 }
1509 case Builtin::BI__builtin_hlsl_quad_read_across_x: {
1510 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1511 Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossXIntrinsic();
1512 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1513 &CGM.getModule(), ID, {OpExpr->getType()}),
1514 ArrayRef{OpExpr}, "hlsl.quad.read.across.x");
1515 }
1516 case Builtin::BI__builtin_hlsl_quad_read_across_y: {
1517 Value *OpExpr = EmitScalarExpr(E->getArg(0));
1518 Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossYIntrinsic();
1519 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
1520 &CGM.getModule(), ID, {OpExpr->getType()}),
1521 ArrayRef{OpExpr}, "hlsl.quad.read.across.y");
1522 }
1523 case Builtin::BI__builtin_hlsl_elementwise_sign: {
1524 auto *Arg0 = E->getArg(0);
1525 Value *Op0 = EmitScalarExpr(Arg0);
1526 llvm::Type *Xty = Op0->getType();
1527 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
1528 if (Xty->isVectorTy()) {
1529 auto *XVecTy = Arg0->getType()->castAs<VectorType>();
1530 retType = llvm::VectorType::get(
1531 retType, ElementCount::getFixed(XVecTy->getNumElements()));
1532 }
1533 assert((Arg0->getType()->hasFloatingRepresentation() ||
1534 Arg0->getType()->hasIntegerRepresentation()) &&
1535 "sign operand must have a float or int representation");
1536
1537 if (Arg0->getType()->hasUnsignedIntegerRepresentation()) {
1538 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
1539 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
1540 ConstantInt::get(retType, 1), "hlsl.sign");
1541 }
1542
1543 return Builder.CreateIntrinsic(
1544 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
1545 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
1546 }
1547 case Builtin::BI__builtin_hlsl_elementwise_radians: {
1548 Value *Op0 = EmitScalarExpr(E->getArg(0));
1549 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1550 "radians operand must have a float representation");
1551 return Builder.CreateIntrinsic(
1552 /*ReturnType=*/Op0->getType(),
1553 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
1554 nullptr, "hlsl.radians");
1555 }
1556 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
1557 Value *ResHandle = EmitScalarExpr(E->getArg(0));
1558 Value *Offset = EmitScalarExpr(E->getArg(1));
1559 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
1560 return Builder.CreateIntrinsic(
1561 /*ReturnType=*/Offset->getType(),
1562 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
1563 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
1564 }
1565 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
1566
1567 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
1570 "asuint operands types mismatch");
1571 return handleHlslSplitdouble(E, this);
1572 }
1573 case Builtin::BI__builtin_hlsl_elementwise_clip:
1574 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
1575 "clip operands types mismatch");
1576 return handleHlslClip(E, this);
1577 case Builtin::BI__builtin_hlsl_all_memory_barrier: {
1578 Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierIntrinsic();
1579 return EmitRuntimeCall(
1580 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
1581 }
1582 case Builtin::BI__builtin_hlsl_all_memory_barrier_with_group_sync: {
1583 Intrinsic::ID ID =
1584 CGM.getHLSLRuntime().getAllMemoryBarrierWithGroupSyncIntrinsic();
1585 return EmitRuntimeCall(
1586 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
1587 }
1588 case Builtin::BI__builtin_hlsl_device_memory_barrier: {
1589 Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierIntrinsic();
1590 return EmitRuntimeCall(
1591 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
1592 }
1593 case Builtin::BI__builtin_hlsl_device_memory_barrier_with_group_sync: {
1594 Intrinsic::ID ID =
1595 CGM.getHLSLRuntime().getDeviceMemoryBarrierWithGroupSyncIntrinsic();
1596 return EmitRuntimeCall(
1597 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
1598 }
1599 case Builtin::BI__builtin_hlsl_group_memory_barrier: {
1600 Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic();
1601 return EmitRuntimeCall(
1602 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
1603 }
1604 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
1605 Intrinsic::ID ID =
1606 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
1607 return EmitRuntimeCall(
1608 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
1609 }
1610 case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: {
1611 Value *Op0 = EmitScalarExpr(E->getArg(0));
1612 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1613 llvm_unreachable("ddx_coarse operand must have a float representation");
1614 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic();
1615 return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
1616 ArrayRef<Value *>{Op0}, nullptr,
1617 "hlsl.ddx.coarse");
1618 }
1619 case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: {
1620 Value *Op0 = EmitScalarExpr(E->getArg(0));
1621 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1622 llvm_unreachable("ddy_coarse operand must have a float representation");
1623 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic();
1624 return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
1625 ArrayRef<Value *>{Op0}, nullptr,
1626 "hlsl.ddy.coarse");
1627 }
1628 case Builtin::BI__builtin_hlsl_elementwise_ddx_fine: {
1629 Value *Op0 = EmitScalarExpr(E->getArg(0));
1630 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1631 llvm_unreachable("ddx_fine operand must have a float representation");
1632 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxFineIntrinsic();
1633 return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
1634 ArrayRef<Value *>{Op0}, nullptr,
1635 "hlsl.ddx.fine");
1636 }
1637 case Builtin::BI__builtin_hlsl_elementwise_ddy_fine: {
1638 Value *Op0 = EmitScalarExpr(E->getArg(0));
1639 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
1640 llvm_unreachable("ddy_fine operand must have a float representation");
1641 Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyFineIntrinsic();
1642 return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
1643 ArrayRef<Value *>{Op0}, nullptr,
1644 "hlsl.ddy.fine");
1645 }
1646 case Builtin::BI__builtin_get_spirv_spec_constant_bool:
1647 case Builtin::BI__builtin_get_spirv_spec_constant_short:
1648 case Builtin::BI__builtin_get_spirv_spec_constant_ushort:
1649 case Builtin::BI__builtin_get_spirv_spec_constant_int:
1650 case Builtin::BI__builtin_get_spirv_spec_constant_uint:
1651 case Builtin::BI__builtin_get_spirv_spec_constant_longlong:
1652 case Builtin::BI__builtin_get_spirv_spec_constant_ulonglong:
1653 case Builtin::BI__builtin_get_spirv_spec_constant_half:
1654 case Builtin::BI__builtin_get_spirv_spec_constant_float:
1655 case Builtin::BI__builtin_get_spirv_spec_constant_double: {
1656 llvm::Function *SpecConstantFn = getSpecConstantFunction(E->getType());
1657 llvm::Value *SpecId = EmitScalarExpr(E->getArg(0));
1658 llvm::Value *DefaultVal = EmitScalarExpr(E->getArg(1));
1659 llvm::Value *Args[] = {SpecId, DefaultVal};
1660 return Builder.CreateCall(SpecConstantFn, Args);
1661 }
1662 }
1663 return nullptr;
1664}
1665
1667 const clang::QualType &SpecConstantType) {
1668
1669 // Find or create the declaration for the function.
1670 llvm::Module *M = &CGM.getModule();
1671 std::string MangledName =
1672 getSpecConstantFunctionName(SpecConstantType, getContext());
1673 llvm::Function *SpecConstantFn = M->getFunction(MangledName);
1674
1675 if (!SpecConstantFn) {
1676 llvm::Type *IntType = ConvertType(getContext().IntTy);
1677 llvm::Type *RetTy = ConvertType(SpecConstantType);
1678 llvm::Type *ArgTypes[] = {IntType, RetTy};
1679 llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, ArgTypes, false);
1680 SpecConstantFn = llvm::Function::Create(
1681 FnTy, llvm::GlobalValue::ExternalLinkage, MangledName, M);
1682 }
1683 return SpecConstantFn;
1684}
llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
static Intrinsic::ID getWavePrefixSumIntrinsic(llvm::Triple::ArchType Arch, QualType QT)
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT)
static Intrinsic::ID getPrefixCountBitsIntrinsic(llvm::Triple::ArchType Arch)
static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch, QualType QT)
static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType, ASTContext &Context)
static Value * handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF)
static Value * emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr, LValue &Stride)
static Intrinsic::ID getWavePrefixProductIntrinsic(llvm::Triple::ArchType Arch, QualType QT)
static Value * emitHlslClamp(CodeGenFunction &CGF, const CallExpr *E, unsigned ClampArgIndex)
static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT)
static llvm::Type * getOffsetType(CodeGenModule &CGM, llvm::Type *CoordTy)
static Value * emitGetDimensions(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned NumRetComps, bool HasLod)
static Value * handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E)
static Value * handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E)
static Value * handleHlslWaveActiveBallot(CodeGenFunction &CGF, const CallExpr *E)
static Value * emitHlslOffset(CodeGenFunction &CGF, const CallExpr *E, unsigned OffsetArgIndex, llvm::Type *OffsetTy)
static Value * handleElementwiseF32ToF16(CodeGenFunction &CGF, const CallExpr *E)
static Intrinsic::ID getWaveActiveProductIntrinsic(llvm::Triple::ArchType Arch, QualType QT)
static Value * handleHlslClip(const CallExpr *E, CodeGenFunction *CGF)
Result
Implement __builtin_bit_cast and related operations.
#define X(type, name)
Definition Value.h:97
static StringRef getTriple(const Command &Job)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:227
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:146
CallArgList - Type for representing both the value and type of arguments in a call.
Definition CGCall.h:274
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
const TargetInfo & getTarget() const
llvm::Function * getSpecConstantFunction(const clang::QualType &SpecConstantType)
LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, QualType Ty)
Definition CGExpr.cpp:6367
void EmitWritebacks(const CallArgList &Args)
EmitWriteback - Emit callbacks for function.
Definition CGCall.cpp:5061
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot=AggValueSlot::ignored(), bool ignoreResult=false)
EmitAnyExpr - Emit code to compute the specified expression which can have any type.
Definition CGExpr.cpp:278
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1712
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
const TargetInfo & getTarget() const
const llvm::Triple & getTriple() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
LValue - This represents an lvalue references.
Definition CGValue.h:183
Address getAddress() const
Definition CGValue.h:373
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
bool isScalar() const
Definition CGValue.h:64
Address getAggregateAddress() const
getAggregateAddr() - Return the Value* of the address of the aggregate.
Definition CGValue.h:84
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:381
Represents a concrete matrix type with constant number of rows and columns.
Definition TypeBase.h:4442
unsigned getNumColumns() const
Returns the number of columns in the matrix.
Definition TypeBase.h:4461
unsigned getNumRows() const
Returns the number of rows in the matrix.
Definition TypeBase.h:4458
The name of a declaration.
This represents one expression.
Definition Expr.h:112
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2018
static FunctionDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation NLoc, DeclarationName N, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool UsesFPIntrin=false, bool isInlineSpecified=false, bool hasWrittenPrototype=true, ConstexprSpecKind ConstexprKind=ConstexprSpecKind::Unspecified, const AssociatedConstraint &TrailingRequiresClause={})
Definition Decl.h:2207
Represents a parameter to a function.
Definition Decl.h:1808
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
Definition Decl.cpp:2952
A (possibly-)qualified type.
Definition TypeBase.h:937
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8436
Encodes a location in the source.
bool areArgsDestroyedLeftToRightInCallee() const
Are arguments to a call destroyed left to right in the callee?
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2266
bool hasIntegerRepresentation() const
Determine whether this type has an integer representation of some sort, e.g., it is an integer type o...
Definition Type.cpp:2119
bool isConstantMatrixType() const
Definition TypeBase.h:8840
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9333
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition Type.cpp:2376
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition Type.cpp:2310
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition Type.cpp:2397
bool isVectorType() const
Definition TypeBase.h:8812
bool isFloatingType() const
Definition Type.cpp:2389
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2332
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9266
QualType getType() const
Definition Value.cpp:237
Represents a GCC generic vector type.
Definition TypeBase.h:4230
unsigned getNumElements() const
Definition TypeBase.h:4245
The JSON file list parser is used to communicate input to InstallAPI.
@ SC_Extern
Definition Specifiers.h:252
@ SC_None
Definition Specifiers.h:251
Expr * Cond
};
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
Extra information about a function prototype.
Definition TypeBase.h:5447