clang 22.0.0git
CIRGenBuiltinX86.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit x86/x86_64 Builtin calls as CIR or a function
10// call to be later resolved.
11//
12//===----------------------------------------------------------------------===//
13
14#include "CIRGenBuilder.h"
15#include "CIRGenFunction.h"
16#include "CIRGenModule.h"
17#include "mlir/IR/Attributes.h"
18#include "mlir/IR/BuiltinAttributes.h"
19#include "mlir/IR/Location.h"
20#include "mlir/IR/Types.h"
21#include "mlir/IR/ValueRange.h"
27#include "llvm/ADT/Sequence.h"
28#include "llvm/Support/ErrorHandling.h"
29#include <string>
30
31using namespace clang;
32using namespace clang::CIRGen;
33
34template <typename... Operands>
35static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder,
36 mlir::Location loc, const StringRef str,
37 const mlir::Type &resTy,
38 Operands &&...op) {
39 return cir::LLVMIntrinsicCallOp::create(builder, loc,
40 builder.getStringAttr(str), resTy,
41 std::forward<Operands>(op)...)
42 .getResult();
43}
44
45// OG has unordered comparison as a form of optimization in addition to
46// ordered comparison, while CIR doesn't.
47//
48// This means that we can't encode the comparison code of UGT (unordered
49// greater than), at least not at the CIR level.
50//
51// The boolean shouldInvert compensates for this.
52// For example: to get to the comparison code UGT, we pass in
53// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
54
55// There are several ways to support this otherwise:
56// - register extra CmpOpKind for unordered comparison types and build the
57// translation code for
58// to go from CIR -> LLVM dialect. Notice we get this naturally with
59// shouldInvert, benefiting from existing infrastructure, albeit having to
60// generate an extra `not` at CIR).
61// - Just add extra comparison code to a new VecCmpOpKind instead of
62// cluttering CmpOpKind.
63// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
64// comparison
65// - Just emit the intrinsics call instead of calling this helper, see how the
66// LLVM lowering handles this.
67static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
69 mlir::Location loc, cir::CmpOpKind pred,
70 bool shouldInvert) {
72 // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented
74 mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
75 mlir::Value bitCast = builder.createBitcast(
76 shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
77 return bitCast;
78}
79
80static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
81 mlir::Value mask, unsigned numElems) {
82 auto maskTy = cir::VectorType::get(
83 builder.getSIntNTy(1), cast<cir::IntType>(mask.getType()).getWidth());
84 mlir::Value maskVec = builder.createBitcast(mask, maskTy);
85
86 // If we have less than 8 elements, then the starting mask was an i8 and
87 // we need to extract down to the right number of elements.
88 if (numElems < 8) {
90 mlir::Type i32Ty = builder.getSInt32Ty();
91 for (auto i : llvm::seq<unsigned>(0, numElems))
92 indices.push_back(cir::IntAttr::get(i32Ty, i));
93
94 maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices);
95 }
96 return maskVec;
97}
98
99// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins.
100//
101// The vector is split into lanes of 8 word elements (16 bits). The lower or
102// upper half of each lane, controlled by `isLow`, is shuffled in the following
103// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The
104// i-th field's value represents the resulting index of the i-th element in the
105// half lane after shuffling. The other half of the lane remains unchanged.
106static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder,
107 const mlir::Value vec,
108 const mlir::Value immediate,
109 const mlir::Location loc,
110 const bool isLow) {
111 uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate);
112
113 auto vecTy = cast<cir::VectorType>(vec.getType());
114 unsigned numElts = vecTy.getSize();
115
116 unsigned firstHalfStart = isLow ? 0 : 4;
117 unsigned secondHalfStart = 4 - firstHalfStart;
118
119 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
120 imm = (imm & 0xff) * 0x01010101;
121
122 int64_t indices[32];
123 for (unsigned l = 0; l != numElts; l += 8) {
124 for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) {
125 indices[l + i] = l + (imm & 3) + firstHalfStart;
126 imm >>= 2;
127 }
128 for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i)
129 indices[l + i] = l + i;
130 }
131
132 return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts));
133}
134
135// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins.
136// The shuffle mask is written to outIndices.
137static void
138computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec,
139 uint32_t imm, const bool isShufP,
140 llvm::SmallVectorImpl<int64_t> &outIndices) {
141 auto vecTy = cast<cir::VectorType>(vec.getType());
142 unsigned numElts = vecTy.getSize();
143 unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128;
144 unsigned numLaneElts = numElts / numLanes;
145
146 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
147 imm = (imm & 0xff) * 0x01010101;
148
149 for (unsigned l = 0; l != numElts; l += numLaneElts) {
150 for (unsigned i = 0; i != numLaneElts; ++i) {
151 uint32_t idx = imm % numLaneElts;
152 imm /= numLaneElts;
153 if (isShufP && i >= (numLaneElts / 2))
154 idx += numElts;
155 outIndices[l + i] = l + idx;
156 }
157 }
158
159 outIndices.resize(numElts);
160}
161static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder,
162 mlir::Location loc, mlir::Value source,
163 mlir::Value mask,
164 mlir::Value inputVector,
165 const std::string &id) {
166 auto resultTy = cast<cir::VectorType>(mask.getType());
167 mlir::Value maskValue = getMaskVecValue(
168 builder, loc, inputVector, cast<cir::VectorType>(resultTy).getSize());
169 return emitIntrinsicCallOp(builder, loc, id, resultTy,
170 mlir::ValueRange{source, mask, maskValue});
171}
172
173static mlir::Value emitX86Select(CIRGenBuilderTy &builder, mlir::Location loc,
174 mlir::Value mask, mlir::Value op0,
175 mlir::Value op1) {
176 auto constOp = mlir::dyn_cast_or_null<cir::ConstantOp>(mask.getDefiningOp());
177 // If the mask is all ones just return first argument.
178 if (constOp && constOp.isAllOnesValue())
179 return op0;
180
181 mask = getMaskVecValue(builder, loc, mask,
182 cast<cir::VectorType>(op0.getType()).getSize());
183
184 return cir::VecTernaryOp::create(builder, loc, mask, op0, op1);
185}
186
187static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
188 mlir::Location loc,
189 const std::string &intrinsicName,
191
192 auto intTy = cast<cir::IntType>(ops[0].getType());
193 unsigned numElts = intTy.getWidth();
194 mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
195 mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
196 mlir::Type vecTy = lhsVec.getType();
197 mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy,
198 mlir::ValueRange{lhsVec, rhsVec});
199 return builder.createBitcast(resVec, ops[0].getType());
200}
201
202static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder,
203 mlir::Location loc,
204 const std::string &intrinsicName,
206 unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
207
208 // Convert both operands to mask vectors.
209 mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems);
210 mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems);
211
212 mlir::Type i32Ty = builder.getSInt32Ty();
213
214 // Create indices for extracting the first half of each vector.
216 for (auto i : llvm::seq<unsigned>(0, numElems / 2))
217 halfIndices.push_back(cir::IntAttr::get(i32Ty, i));
218
219 // Extract first half of each vector. This gives better codegen than
220 // doing it in a single shuffle.
221 mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
222 mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices);
223
224 // Create indices for concatenating the vectors.
225 // NOTE: Operands are swapped to match the intrinsic definition.
226 // After the half extraction, both vectors have numElems/2 elements.
227 // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1]
228 // select from rhsHalf, and indices [numElems/2..numElems-1] select from
229 // lhsHalf.
231 for (auto i : llvm::seq<unsigned>(0, numElems))
232 concatIndices.push_back(cir::IntAttr::get(i32Ty, i));
233
234 // Concat the vectors (RHS first, then LHS).
235 mlir::Value res =
236 builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices);
237 return builder.createBitcast(res, ops[0].getType());
238}
239
240static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
241 mlir::Location loc,
242 cir::BinOpKind binOpKind,
244 bool invertLHS = false) {
245 unsigned numElts = cast<cir::IntType>(ops[0].getType()).getWidth();
246 mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts);
247 mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts);
248
249 if (invertLHS)
250 lhs = builder.createNot(lhs);
251 return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs),
252 ops[0].getType());
253}
254
255static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc,
256 const std::string &intrinsicName,
258 auto intTy = cast<cir::IntType>(ops[0].getType());
259 unsigned numElts = intTy.getWidth();
260 mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
261 mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
262 mlir::Type resTy = builder.getSInt32Ty();
263 return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy,
264 mlir::ValueRange{lhsVec, rhsVec});
265}
266
267// TODO: The cgf parameter should be removed when all the NYI cases are
268// implemented.
269static std::optional<mlir::Value>
271 mlir::Value cmp, unsigned numElts,
272 mlir::Value maskIn, mlir::Location loc) {
273 if (maskIn) {
274 cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult");
275 return {};
276 }
277 if (numElts < 8) {
279 mlir::Type i64Ty = builder.getSInt64Ty();
280
281 for (unsigned i = 0; i != numElts; ++i)
282 indices.push_back(cir::IntAttr::get(i64Ty, i));
283 for (unsigned i = numElts; i != 8; ++i)
284 indices.push_back(cir::IntAttr::get(i64Ty, i % numElts + numElts));
285
286 // This should shuffle between cmp (first vector) and null (second vector)
287 mlir::Value nullVec = builder.getNullValue(cmp.getType(), loc);
288 cmp = builder.createVecShuffle(loc, cmp, nullVec, indices);
289 }
290 return builder.createBitcast(cmp, builder.getUIntNTy(std::max(numElts, 8U)));
291}
292
293// TODO: The cgf parameter should be removed when all the NYI cases are
294// implemented.
295static std::optional<mlir::Value>
297 bool isSigned, ArrayRef<mlir::Value> ops,
298 mlir::Location loc) {
299 assert((ops.size() == 2 || ops.size() == 4) &&
300 "Unexpected number of arguments");
301 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
302 mlir::Value cmp;
303
304 if (cc == 3) {
305 cgf.cgm.errorNYI(loc, "emitX86MaskedCompare: cc == 3");
306 return {};
307 } else if (cc == 7) {
308 cgf.cgm.errorNYI(loc, "emitX86MaskedCompare cc == 7");
309 return {};
310 } else {
311 cir::CmpOpKind pred;
312 switch (cc) {
313 default:
314 llvm_unreachable("Unknown condition code");
315 case 0:
316 pred = cir::CmpOpKind::eq;
317 break;
318 case 1:
319 pred = cir::CmpOpKind::lt;
320 break;
321 case 2:
322 pred = cir::CmpOpKind::le;
323 break;
324 case 4:
325 pred = cir::CmpOpKind::ne;
326 break;
327 case 5:
328 pred = cir::CmpOpKind::ge;
329 break;
330 case 6:
331 pred = cir::CmpOpKind::gt;
332 break;
333 }
334
335 auto resultTy = cir::VectorType::get(builder.getSIntNTy(1), numElts);
336 cmp = cir::VecCmpOp::create(builder, loc, resultTy, pred, ops[0], ops[1]);
337 }
338
339 mlir::Value maskIn;
340 if (ops.size() == 4)
341 maskIn = ops[3];
342
343 return emitX86MaskedCompareResult(cgf, builder, cmp, numElts, maskIn, loc);
344}
345
346// TODO: The cgf parameter should be removed when all the NYI cases are
347// implemented.
348static std::optional<mlir::Value> emitX86ConvertToMask(CIRGenFunction &cgf,
349 CIRGenBuilderTy &builder,
350 mlir::Value in,
351 mlir::Location loc) {
352 cir::ConstantOp zero = builder.getNullValue(in.getType(), loc);
353 return emitX86MaskedCompare(cgf, builder, 1, true, {in, zero}, loc);
354}
355
356static std::optional<mlir::Value> emitX86SExtMask(CIRGenBuilderTy &builder,
357 mlir::Value op,
358 mlir::Type dstTy,
359 mlir::Location loc) {
360 unsigned numberOfElements = cast<cir::VectorType>(dstTy).getSize();
361 mlir::Value mask = getMaskVecValue(builder, loc, op, numberOfElements);
362
363 return builder.createCast(loc, cir::CastKind::integral, mask, dstTy);
364}
365
366static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
367 mlir::Value vec, mlir::Value value,
368 mlir::Value indexOp) {
369 unsigned numElts = cast<cir::VectorType>(vec.getType()).getSize();
370
371 uint64_t index =
372 indexOp.getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
373
374 index &= numElts - 1;
375
376 cir::ConstantOp indexVal = builder.getUInt64(index, loc);
377
378 return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
379}
380
381static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder,
382 mlir::Location location, mlir::Value &op0,
383 mlir::Value &op1, mlir::Value &amt,
384 bool isRight) {
385 mlir::Type op0Ty = op0.getType();
386
387 // Amount may be scalar immediate, in which case create a splat vector.
388 // Funnel shifts amounts are treated as modulo and types are all power-of-2
389 // so we only care about the lowest log2 bits anyway.
390 if (amt.getType() != op0Ty) {
391 auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
392 uint64_t numElems = vecTy.getSize();
393
394 auto amtTy = mlir::cast<cir::IntType>(amt.getType());
395 auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
396
397 // If signed, cast to the same width but unsigned first to
398 // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`.
399 if (amtTy.isSigned()) {
400 cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth());
401 amt = builder.createIntCast(amt, unsignedAmtTy);
402 }
403 cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth());
404 amt = builder.createIntCast(amt, unsignedVecElemType);
405 amt = cir::VecSplatOp::create(
406 builder, location, cir::VectorType::get(unsignedVecElemType, numElems),
407 amt);
408 }
409
410 const StringRef intrinsicName = isRight ? "fshr" : "fshl";
411 return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty,
412 mlir::ValueRange{op0, op1, amt});
413}
414
415static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
416 bool isSigned,
418 unsigned opTypePrimitiveSizeInBits) {
419 mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(),
420 opTypePrimitiveSizeInBits / 64);
421 mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
422 mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);
423 if (isSigned) {
424 cir::ConstantOp shiftAmt =
425 builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
426 cir::VecSplatOp shiftSplatVecOp =
427 cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
428 mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
429 // In CIR, right-shift operations are automatically lowered to either an
430 // arithmetic or logical shift depending on the operand type. The purpose
431 // of the shifts here is to propagate the sign bit of the 32-bit input
432 // into the upper bits of each vector lane.
433 lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
434 lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
435 rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
436 rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
437 } else {
438 cir::ConstantOp maskScalar = builder.getConstant(
439 loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
440 cir::VecSplatOp mask =
441 cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
442 // Clear the upper bits
443 lhs = builder.createAnd(loc, lhs, mask);
444 rhs = builder.createAnd(loc, rhs, mask);
445 }
446 return builder.createMul(loc, lhs, rhs);
447}
448
449// Convert f16 half values to floats.
450static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
451 mlir::Location loc,
453 mlir::Type dstTy) {
454 assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
455 "Unknown cvtph2ps intrinsic");
456
457 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
458 if (ops.size() == 4) {
459 auto constOp = ops[3].getDefiningOp<cir::ConstantOp>();
460 assert(constOp && "Expected constant operand");
461 if (constOp.getIntValue().getZExtValue() != 4) {
462 return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512",
463 dstTy, ops);
464 }
465 }
466
467 unsigned numElts = cast<cir::VectorType>(dstTy).getSize();
468 mlir::Value src = ops[0];
469
470 // Extract the subvector
471 if (numElts != cast<cir::VectorType>(src.getType()).getSize()) {
472 assert(numElts == 4 && "Unexpected vector size");
473 src = builder.createVecShuffle(loc, src, {0, 1, 2, 3});
474 }
475
476 // Bitcast from vXi16 to vXf16.
477 cir::VectorType halfTy =
478 cir::VectorType::get(cir::FP16Type::get(builder.getContext()), numElts);
479
480 src = builder.createCast(cir::CastKind::bitcast, src, halfTy);
481
482 // Perform the fp-extension
483 mlir::Value res = builder.createCast(cir::CastKind::floating, src, dstTy);
484
485 if (ops.size() >= 3)
486 res = emitX86Select(builder, loc, ops[2], res, ops[1]);
487 return res;
488}
489
490static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
492 bool isSigned) {
493 mlir::Value op0 = ops[0];
494 mlir::Value op1 = ops[1];
495
496 cir::VectorType ty = cast<cir::VectorType>(op0.getType());
497 cir::IntType elementTy = cast<cir::IntType>(ty.getElementType());
498
499 uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7;
500
501 cir::CmpOpKind pred;
502 switch (imm) {
503 case 0x0:
504 pred = cir::CmpOpKind::lt;
505 break;
506 case 0x1:
507 pred = cir::CmpOpKind::le;
508 break;
509 case 0x2:
510 pred = cir::CmpOpKind::gt;
511 break;
512 case 0x3:
513 pred = cir::CmpOpKind::ge;
514 break;
515 case 0x4:
516 pred = cir::CmpOpKind::eq;
517 break;
518 case 0x5:
519 pred = cir::CmpOpKind::ne;
520 break;
521 case 0x6:
522 return builder.getNullValue(ty, loc); // FALSE
523 case 0x7: {
524 llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth());
525 return cir::VecSplatOp::create(
526 builder, loc, ty,
527 builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
528 }
529 default:
530 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
531 }
532
533 if ((!isSigned && elementTy.isSigned()) ||
534 (isSigned && elementTy.isUnsigned())) {
535 elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth())
536 : builder.getSIntNTy(elementTy.getWidth());
537 ty = cir::VectorType::get(elementTy, ty.getSize());
538 op0 = builder.createBitcast(op0, ty);
539 op1 = builder.createBitcast(op1, ty);
540 }
541
542 return builder.createVecCompare(loc, pred, op0, op1);
543}
544
545std::optional<mlir::Value>
547 if (builtinID == Builtin::BI__builtin_cpu_is) {
548 cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
549 return mlir::Value{};
550 }
551 if (builtinID == Builtin::BI__builtin_cpu_supports) {
552 cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
553 return mlir::Value{};
554 }
555 if (builtinID == Builtin::BI__builtin_cpu_init) {
556 cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
557 return mlir::Value{};
558 }
559
560 // Handle MSVC intrinsics before argument evaluation to prevent double
561 // evaluation.
563
564 // Find out if any arguments are required to be integer constant expressions.
566
567 // The operands of the builtin call
569
570 // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
571 // is required to be a constant integer expression.
572 unsigned iceArguments = 0;
574 getContext().GetBuiltinType(builtinID, error, &iceArguments);
575 assert(error == ASTContext::GE_None && "Error while getting builtin type.");
576
577 for (auto [idx, arg] : llvm::enumerate(expr->arguments()))
578 ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
579
580 CIRGenBuilderTy &builder = getBuilder();
581 mlir::Type voidTy = builder.getVoidTy();
582
583 switch (builtinID) {
584 default:
585 return std::nullopt;
586 case X86::BI_mm_clflush:
587 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
588 "x86.sse2.clflush", voidTy, ops[0]);
589 case X86::BI_mm_lfence:
590 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
591 "x86.sse2.lfence", voidTy);
592 case X86::BI_mm_pause:
593 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
594 "x86.sse2.pause", voidTy);
595 case X86::BI_mm_mfence:
596 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
597 "x86.sse2.mfence", voidTy);
598 case X86::BI_mm_sfence:
599 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
600 "x86.sse.sfence", voidTy);
601 case X86::BI_mm_prefetch:
602 case X86::BI__rdtsc:
603 case X86::BI__builtin_ia32_rdtscp: {
604 cgm.errorNYI(expr->getSourceRange(),
605 std::string("unimplemented X86 builtin call: ") +
606 getContext().BuiltinInfo.getName(builtinID));
607 return mlir::Value{};
608 }
609 case X86::BI__builtin_ia32_lzcnt_u16:
610 case X86::BI__builtin_ia32_lzcnt_u32:
611 case X86::BI__builtin_ia32_lzcnt_u64: {
612 mlir::Location loc = getLoc(expr->getExprLoc());
613 mlir::Value isZeroPoison = builder.getFalse(loc);
614 return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(),
615 mlir::ValueRange{ops[0], isZeroPoison});
616 }
617 case X86::BI__builtin_ia32_tzcnt_u16:
618 case X86::BI__builtin_ia32_tzcnt_u32:
619 case X86::BI__builtin_ia32_tzcnt_u64: {
620 mlir::Location loc = getLoc(expr->getExprLoc());
621 mlir::Value isZeroPoison = builder.getFalse(loc);
622 return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(),
623 mlir::ValueRange{ops[0], isZeroPoison});
624 }
625 case X86::BI__builtin_ia32_undef128:
626 case X86::BI__builtin_ia32_undef256:
627 case X86::BI__builtin_ia32_undef512:
628 // The x86 definition of "undef" is not the same as the LLVM definition
629 // (PR32176). We leave optimizing away an unnecessary zero constant to the
630 // IR optimizer and backend.
631 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
632 // value, we should use that here instead of a zero.
633 return builder.getNullValue(convertType(expr->getType()),
634 getLoc(expr->getExprLoc()));
635 case X86::BI__builtin_ia32_vec_ext_v4hi:
636 case X86::BI__builtin_ia32_vec_ext_v16qi:
637 case X86::BI__builtin_ia32_vec_ext_v8hi:
638 case X86::BI__builtin_ia32_vec_ext_v4si:
639 case X86::BI__builtin_ia32_vec_ext_v4sf:
640 case X86::BI__builtin_ia32_vec_ext_v2di:
641 case X86::BI__builtin_ia32_vec_ext_v32qi:
642 case X86::BI__builtin_ia32_vec_ext_v16hi:
643 case X86::BI__builtin_ia32_vec_ext_v8si:
644 case X86::BI__builtin_ia32_vec_ext_v4di: {
645 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
646
647 uint64_t index = getZExtIntValueFromConstOp(ops[1]);
648 index &= numElts - 1;
649
650 cir::ConstantOp indexVal =
651 builder.getUInt64(index, getLoc(expr->getExprLoc()));
652
653 // These builtins exist so we can ensure the index is an ICE and in range.
654 // Otherwise we could just do this in the header file.
655 return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()),
656 ops[0], indexVal);
657 }
658 case X86::BI__builtin_ia32_vec_set_v4hi:
659 case X86::BI__builtin_ia32_vec_set_v16qi:
660 case X86::BI__builtin_ia32_vec_set_v8hi:
661 case X86::BI__builtin_ia32_vec_set_v4si:
662 case X86::BI__builtin_ia32_vec_set_v2di:
663 case X86::BI__builtin_ia32_vec_set_v32qi:
664 case X86::BI__builtin_ia32_vec_set_v16hi:
665 case X86::BI__builtin_ia32_vec_set_v8si:
666 case X86::BI__builtin_ia32_vec_set_v4di: {
667 return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
668 ops[2]);
669 }
670 case X86::BI__builtin_ia32_kunpckhi:
671 return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
672 "x86.avx512.kunpackb", ops);
673 case X86::BI__builtin_ia32_kunpcksi:
674 return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
675 "x86.avx512.kunpackw", ops);
676 case X86::BI__builtin_ia32_kunpckdi:
677 return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
678 "x86.avx512.kunpackd", ops);
679 case X86::BI_mm_setcsr:
680 case X86::BI__builtin_ia32_ldmxcsr: {
681 mlir::Location loc = getLoc(expr->getExprLoc());
682 Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
683 builder.createStore(loc, ops[0], tmp);
684 return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr",
685 builder.getVoidTy(), tmp.getPointer());
686 }
687 case X86::BI_mm_getcsr:
688 case X86::BI__builtin_ia32_stmxcsr: {
689 mlir::Location loc = getLoc(expr->getExprLoc());
690 Address tmp = createMemTemp(expr->getType(), loc);
691 emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(),
692 tmp.getPointer());
693 return builder.createLoad(loc, tmp);
694 }
695 case X86::BI__builtin_ia32_xsave:
696 case X86::BI__builtin_ia32_xsave64:
697 case X86::BI__builtin_ia32_xrstor:
698 case X86::BI__builtin_ia32_xrstor64:
699 case X86::BI__builtin_ia32_xsaveopt:
700 case X86::BI__builtin_ia32_xsaveopt64:
701 case X86::BI__builtin_ia32_xrstors:
702 case X86::BI__builtin_ia32_xrstors64:
703 case X86::BI__builtin_ia32_xsavec:
704 case X86::BI__builtin_ia32_xsavec64:
705 case X86::BI__builtin_ia32_xsaves:
706 case X86::BI__builtin_ia32_xsaves64:
707 case X86::BI__builtin_ia32_xsetbv:
708 case X86::BI_xsetbv: {
709 mlir::Location loc = getLoc(expr->getExprLoc());
710 StringRef intrinsicName;
711 switch (builtinID) {
712 default:
713 llvm_unreachable("Unexpected builtin");
714 case X86::BI__builtin_ia32_xsave:
715 intrinsicName = "x86.xsave";
716 break;
717 case X86::BI__builtin_ia32_xsave64:
718 intrinsicName = "x86.xsave64";
719 break;
720 case X86::BI__builtin_ia32_xrstor:
721 intrinsicName = "x86.xrstor";
722 break;
723 case X86::BI__builtin_ia32_xrstor64:
724 intrinsicName = "x86.xrstor64";
725 break;
726 case X86::BI__builtin_ia32_xsaveopt:
727 intrinsicName = "x86.xsaveopt";
728 break;
729 case X86::BI__builtin_ia32_xsaveopt64:
730 intrinsicName = "x86.xsaveopt64";
731 break;
732 case X86::BI__builtin_ia32_xrstors:
733 intrinsicName = "x86.xrstors";
734 break;
735 case X86::BI__builtin_ia32_xrstors64:
736 intrinsicName = "x86.xrstors64";
737 break;
738 case X86::BI__builtin_ia32_xsavec:
739 intrinsicName = "x86.xsavec";
740 break;
741 case X86::BI__builtin_ia32_xsavec64:
742 intrinsicName = "x86.xsavec64";
743 break;
744 case X86::BI__builtin_ia32_xsaves:
745 intrinsicName = "x86.xsaves";
746 break;
747 case X86::BI__builtin_ia32_xsaves64:
748 intrinsicName = "x86.xsaves64";
749 break;
750 case X86::BI__builtin_ia32_xsetbv:
751 case X86::BI_xsetbv:
752 intrinsicName = "x86.xsetbv";
753 break;
754 }
755
756 // The xsave family of instructions take a 64-bit mask that specifies
757 // which processor state components to save/restore. The hardware expects
758 // this mask split into two 32-bit registers: EDX (high 32 bits) and
759 // EAX (low 32 bits).
760 mlir::Type i32Ty = builder.getSInt32Ty();
761
762 // Mhi = (uint32_t)(ops[1] >> 32) - extract high 32 bits via right shift
763 cir::ConstantOp shift32 = builder.getSInt64(32, loc);
764 mlir::Value mhi = builder.createShift(loc, ops[1], shift32.getResult(),
765 /*isShiftLeft=*/false);
766 mhi = builder.createIntCast(mhi, i32Ty);
767
768 // Mlo = (uint32_t)ops[1] - extract low 32 bits by truncation
769 mlir::Value mlo = builder.createIntCast(ops[1], i32Ty);
770
771 return emitIntrinsicCallOp(builder, loc, intrinsicName, voidTy,
772 mlir::ValueRange{ops[0], mhi, mlo});
773 }
774 case X86::BI__builtin_ia32_xgetbv:
775 case X86::BI_xgetbv:
776 // xgetbv reads the extended control register specified by ops[0] (ECX)
777 // and returns the 64-bit value
778 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
779 "x86.xgetbv", builder.getUInt64Ty(), ops[0]);
780 case X86::BI__builtin_ia32_storedqudi128_mask:
781 case X86::BI__builtin_ia32_storedqusi128_mask:
782 case X86::BI__builtin_ia32_storedquhi128_mask:
783 case X86::BI__builtin_ia32_storedquqi128_mask:
784 case X86::BI__builtin_ia32_storeupd128_mask:
785 case X86::BI__builtin_ia32_storeups128_mask:
786 case X86::BI__builtin_ia32_storedqudi256_mask:
787 case X86::BI__builtin_ia32_storedqusi256_mask:
788 case X86::BI__builtin_ia32_storedquhi256_mask:
789 case X86::BI__builtin_ia32_storedquqi256_mask:
790 case X86::BI__builtin_ia32_storeupd256_mask:
791 case X86::BI__builtin_ia32_storeups256_mask:
792 case X86::BI__builtin_ia32_storedqudi512_mask:
793 case X86::BI__builtin_ia32_storedqusi512_mask:
794 case X86::BI__builtin_ia32_storedquhi512_mask:
795 case X86::BI__builtin_ia32_storedquqi512_mask:
796 case X86::BI__builtin_ia32_storeupd512_mask:
797 case X86::BI__builtin_ia32_storeups512_mask:
798 case X86::BI__builtin_ia32_storesbf16128_mask:
799 case X86::BI__builtin_ia32_storesh128_mask:
800 case X86::BI__builtin_ia32_storess128_mask:
801 case X86::BI__builtin_ia32_storesd128_mask:
802 cgm.errorNYI(expr->getSourceRange(),
803 std::string("unimplemented x86 builtin call: ") +
804 getContext().BuiltinInfo.getName(builtinID));
805 return mlir::Value{};
806 case X86::BI__builtin_ia32_cvtmask2b128:
807 case X86::BI__builtin_ia32_cvtmask2b256:
808 case X86::BI__builtin_ia32_cvtmask2b512:
809 case X86::BI__builtin_ia32_cvtmask2w128:
810 case X86::BI__builtin_ia32_cvtmask2w256:
811 case X86::BI__builtin_ia32_cvtmask2w512:
812 case X86::BI__builtin_ia32_cvtmask2d128:
813 case X86::BI__builtin_ia32_cvtmask2d256:
814 case X86::BI__builtin_ia32_cvtmask2d512:
815 case X86::BI__builtin_ia32_cvtmask2q128:
816 case X86::BI__builtin_ia32_cvtmask2q256:
817 case X86::BI__builtin_ia32_cvtmask2q512:
818 return emitX86SExtMask(this->getBuilder(), ops[0],
819 convertType(expr->getType()),
820 getLoc(expr->getExprLoc()));
821 case X86::BI__builtin_ia32_cvtb2mask128:
822 case X86::BI__builtin_ia32_cvtb2mask256:
823 case X86::BI__builtin_ia32_cvtb2mask512:
824 case X86::BI__builtin_ia32_cvtw2mask128:
825 case X86::BI__builtin_ia32_cvtw2mask256:
826 case X86::BI__builtin_ia32_cvtw2mask512:
827 case X86::BI__builtin_ia32_cvtd2mask128:
828 case X86::BI__builtin_ia32_cvtd2mask256:
829 case X86::BI__builtin_ia32_cvtd2mask512:
830 case X86::BI__builtin_ia32_cvtq2mask128:
831 case X86::BI__builtin_ia32_cvtq2mask256:
832 case X86::BI__builtin_ia32_cvtq2mask512:
833 return emitX86ConvertToMask(*this, this->getBuilder(), ops[0],
834 getLoc(expr->getExprLoc()));
835 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
836 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
837 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
838 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
839 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
840 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
841 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
842 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
843 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
844 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
845 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
846 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
847 case X86::BI__builtin_ia32_vfmaddsh3_mask:
848 case X86::BI__builtin_ia32_vfmaddss3_mask:
849 case X86::BI__builtin_ia32_vfmaddsd3_mask:
850 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
851 case X86::BI__builtin_ia32_vfmaddss3_maskz:
852 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
853 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
854 case X86::BI__builtin_ia32_vfmaddss3_mask3:
855 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
856 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
857 case X86::BI__builtin_ia32_vfmsubss3_mask3:
858 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
859 case X86::BI__builtin_ia32_vfmaddph512_mask:
860 case X86::BI__builtin_ia32_vfmaddph512_maskz:
861 case X86::BI__builtin_ia32_vfmaddph512_mask3:
862 case X86::BI__builtin_ia32_vfmaddps512_mask:
863 case X86::BI__builtin_ia32_vfmaddps512_maskz:
864 case X86::BI__builtin_ia32_vfmaddps512_mask3:
865 case X86::BI__builtin_ia32_vfmsubps512_mask3:
866 case X86::BI__builtin_ia32_vfmaddpd512_mask:
867 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
868 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
869 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
870 case X86::BI__builtin_ia32_vfmsubph512_mask3:
871 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
872 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
873 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
874 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
875 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
876 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
877 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
878 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
879 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
880 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
881 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
882 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
883 case X86::BI__builtin_ia32_movdqa32store128_mask:
884 case X86::BI__builtin_ia32_movdqa64store128_mask:
885 case X86::BI__builtin_ia32_storeaps128_mask:
886 case X86::BI__builtin_ia32_storeapd128_mask:
887 case X86::BI__builtin_ia32_movdqa32store256_mask:
888 case X86::BI__builtin_ia32_movdqa64store256_mask:
889 case X86::BI__builtin_ia32_storeaps256_mask:
890 case X86::BI__builtin_ia32_storeapd256_mask:
891 case X86::BI__builtin_ia32_movdqa32store512_mask:
892 case X86::BI__builtin_ia32_movdqa64store512_mask:
893 case X86::BI__builtin_ia32_storeaps512_mask:
894 case X86::BI__builtin_ia32_storeapd512_mask:
895 case X86::BI__builtin_ia32_loadups128_mask:
896 case X86::BI__builtin_ia32_loadups256_mask:
897 case X86::BI__builtin_ia32_loadups512_mask:
898 case X86::BI__builtin_ia32_loadupd128_mask:
899 case X86::BI__builtin_ia32_loadupd256_mask:
900 case X86::BI__builtin_ia32_loadupd512_mask:
901 case X86::BI__builtin_ia32_loaddquqi128_mask:
902 case X86::BI__builtin_ia32_loaddquqi256_mask:
903 case X86::BI__builtin_ia32_loaddquqi512_mask:
904 case X86::BI__builtin_ia32_loaddquhi128_mask:
905 case X86::BI__builtin_ia32_loaddquhi256_mask:
906 case X86::BI__builtin_ia32_loaddquhi512_mask:
907 case X86::BI__builtin_ia32_loaddqusi128_mask:
908 case X86::BI__builtin_ia32_loaddqusi256_mask:
909 case X86::BI__builtin_ia32_loaddqusi512_mask:
910 case X86::BI__builtin_ia32_loaddqudi128_mask:
911 case X86::BI__builtin_ia32_loaddqudi256_mask:
912 case X86::BI__builtin_ia32_loaddqudi512_mask:
913 case X86::BI__builtin_ia32_loadsbf16128_mask:
914 case X86::BI__builtin_ia32_loadsh128_mask:
915 case X86::BI__builtin_ia32_loadss128_mask:
916 case X86::BI__builtin_ia32_loadsd128_mask:
917 case X86::BI__builtin_ia32_loadaps128_mask:
918 case X86::BI__builtin_ia32_loadaps256_mask:
919 case X86::BI__builtin_ia32_loadaps512_mask:
920 case X86::BI__builtin_ia32_loadapd128_mask:
921 case X86::BI__builtin_ia32_loadapd256_mask:
922 case X86::BI__builtin_ia32_loadapd512_mask:
923 case X86::BI__builtin_ia32_movdqa32load128_mask:
924 case X86::BI__builtin_ia32_movdqa32load256_mask:
925 case X86::BI__builtin_ia32_movdqa32load512_mask:
926 case X86::BI__builtin_ia32_movdqa64load128_mask:
927 case X86::BI__builtin_ia32_movdqa64load256_mask:
928 case X86::BI__builtin_ia32_movdqa64load512_mask:
929 case X86::BI__builtin_ia32_expandloaddf128_mask:
930 case X86::BI__builtin_ia32_expandloaddf256_mask:
931 case X86::BI__builtin_ia32_expandloaddf512_mask:
932 case X86::BI__builtin_ia32_expandloadsf128_mask:
933 case X86::BI__builtin_ia32_expandloadsf256_mask:
934 case X86::BI__builtin_ia32_expandloadsf512_mask:
935 case X86::BI__builtin_ia32_expandloaddi128_mask:
936 case X86::BI__builtin_ia32_expandloaddi256_mask:
937 case X86::BI__builtin_ia32_expandloaddi512_mask:
938 case X86::BI__builtin_ia32_expandloadsi128_mask:
939 case X86::BI__builtin_ia32_expandloadsi256_mask:
940 case X86::BI__builtin_ia32_expandloadsi512_mask:
941 case X86::BI__builtin_ia32_expandloadhi128_mask:
942 case X86::BI__builtin_ia32_expandloadhi256_mask:
943 case X86::BI__builtin_ia32_expandloadhi512_mask:
944 case X86::BI__builtin_ia32_expandloadqi128_mask:
945 case X86::BI__builtin_ia32_expandloadqi256_mask:
946 case X86::BI__builtin_ia32_expandloadqi512_mask:
947 case X86::BI__builtin_ia32_compressstoredf128_mask:
948 case X86::BI__builtin_ia32_compressstoredf256_mask:
949 case X86::BI__builtin_ia32_compressstoredf512_mask:
950 case X86::BI__builtin_ia32_compressstoresf128_mask:
951 case X86::BI__builtin_ia32_compressstoresf256_mask:
952 case X86::BI__builtin_ia32_compressstoresf512_mask:
953 case X86::BI__builtin_ia32_compressstoredi128_mask:
954 case X86::BI__builtin_ia32_compressstoredi256_mask:
955 case X86::BI__builtin_ia32_compressstoredi512_mask:
956 case X86::BI__builtin_ia32_compressstoresi128_mask:
957 case X86::BI__builtin_ia32_compressstoresi256_mask:
958 case X86::BI__builtin_ia32_compressstoresi512_mask:
959 case X86::BI__builtin_ia32_compressstorehi128_mask:
960 case X86::BI__builtin_ia32_compressstorehi256_mask:
961 case X86::BI__builtin_ia32_compressstorehi512_mask:
962 case X86::BI__builtin_ia32_compressstoreqi128_mask:
963 case X86::BI__builtin_ia32_compressstoreqi256_mask:
964 case X86::BI__builtin_ia32_compressstoreqi512_mask:
965 cgm.errorNYI(expr->getSourceRange(),
966 std::string("unimplemented X86 builtin call: ") +
967 getContext().BuiltinInfo.getName(builtinID));
968 return mlir::Value{};
969 case X86::BI__builtin_ia32_expanddf128_mask:
970 case X86::BI__builtin_ia32_expanddf256_mask:
971 case X86::BI__builtin_ia32_expanddf512_mask:
972 case X86::BI__builtin_ia32_expandsf128_mask:
973 case X86::BI__builtin_ia32_expandsf256_mask:
974 case X86::BI__builtin_ia32_expandsf512_mask:
975 case X86::BI__builtin_ia32_expanddi128_mask:
976 case X86::BI__builtin_ia32_expanddi256_mask:
977 case X86::BI__builtin_ia32_expanddi512_mask:
978 case X86::BI__builtin_ia32_expandsi128_mask:
979 case X86::BI__builtin_ia32_expandsi256_mask:
980 case X86::BI__builtin_ia32_expandsi512_mask:
981 case X86::BI__builtin_ia32_expandhi128_mask:
982 case X86::BI__builtin_ia32_expandhi256_mask:
983 case X86::BI__builtin_ia32_expandhi512_mask:
984 case X86::BI__builtin_ia32_expandqi128_mask:
985 case X86::BI__builtin_ia32_expandqi256_mask:
986 case X86::BI__builtin_ia32_expandqi512_mask: {
987 mlir::Location loc = getLoc(expr->getExprLoc());
988 return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
989 "x86.avx512.mask.expand");
990 }
991 case X86::BI__builtin_ia32_compressdf128_mask:
992 case X86::BI__builtin_ia32_compressdf256_mask:
993 case X86::BI__builtin_ia32_compressdf512_mask:
994 case X86::BI__builtin_ia32_compresssf128_mask:
995 case X86::BI__builtin_ia32_compresssf256_mask:
996 case X86::BI__builtin_ia32_compresssf512_mask:
997 case X86::BI__builtin_ia32_compressdi128_mask:
998 case X86::BI__builtin_ia32_compressdi256_mask:
999 case X86::BI__builtin_ia32_compressdi512_mask:
1000 case X86::BI__builtin_ia32_compresssi128_mask:
1001 case X86::BI__builtin_ia32_compresssi256_mask:
1002 case X86::BI__builtin_ia32_compresssi512_mask:
1003 case X86::BI__builtin_ia32_compresshi128_mask:
1004 case X86::BI__builtin_ia32_compresshi256_mask:
1005 case X86::BI__builtin_ia32_compresshi512_mask:
1006 case X86::BI__builtin_ia32_compressqi128_mask:
1007 case X86::BI__builtin_ia32_compressqi256_mask:
1008 case X86::BI__builtin_ia32_compressqi512_mask: {
1009 mlir::Location loc = getLoc(expr->getExprLoc());
1010 return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
1011 "x86.avx512.mask.compress");
1012 }
1013 case X86::BI__builtin_ia32_gather3div2df:
1014 case X86::BI__builtin_ia32_gather3div2di:
1015 case X86::BI__builtin_ia32_gather3div4df:
1016 case X86::BI__builtin_ia32_gather3div4di:
1017 case X86::BI__builtin_ia32_gather3div4sf:
1018 case X86::BI__builtin_ia32_gather3div4si:
1019 case X86::BI__builtin_ia32_gather3div8sf:
1020 case X86::BI__builtin_ia32_gather3div8si:
1021 case X86::BI__builtin_ia32_gather3siv2df:
1022 case X86::BI__builtin_ia32_gather3siv2di:
1023 case X86::BI__builtin_ia32_gather3siv4df:
1024 case X86::BI__builtin_ia32_gather3siv4di:
1025 case X86::BI__builtin_ia32_gather3siv4sf:
1026 case X86::BI__builtin_ia32_gather3siv4si:
1027 case X86::BI__builtin_ia32_gather3siv8sf:
1028 case X86::BI__builtin_ia32_gather3siv8si:
1029 case X86::BI__builtin_ia32_gathersiv8df:
1030 case X86::BI__builtin_ia32_gathersiv16sf:
1031 case X86::BI__builtin_ia32_gatherdiv8df:
1032 case X86::BI__builtin_ia32_gatherdiv16sf:
1033 case X86::BI__builtin_ia32_gathersiv8di:
1034 case X86::BI__builtin_ia32_gathersiv16si:
1035 case X86::BI__builtin_ia32_gatherdiv8di:
1036 case X86::BI__builtin_ia32_gatherdiv16si: {
1037 StringRef intrinsicName;
1038 switch (builtinID) {
1039 default:
1040 llvm_unreachable("Unexpected builtin");
1041 case X86::BI__builtin_ia32_gather3div2df:
1042 intrinsicName = "x86.avx512.mask.gather3div2.df";
1043 break;
1044 case X86::BI__builtin_ia32_gather3div2di:
1045 intrinsicName = "x86.avx512.mask.gather3div2.di";
1046 break;
1047 case X86::BI__builtin_ia32_gather3div4df:
1048 intrinsicName = "x86.avx512.mask.gather3div4.df";
1049 break;
1050 case X86::BI__builtin_ia32_gather3div4di:
1051 intrinsicName = "x86.avx512.mask.gather3div4.di";
1052 break;
1053 case X86::BI__builtin_ia32_gather3div4sf:
1054 intrinsicName = "x86.avx512.mask.gather3div4.sf";
1055 break;
1056 case X86::BI__builtin_ia32_gather3div4si:
1057 intrinsicName = "x86.avx512.mask.gather3div4.si";
1058 break;
1059 case X86::BI__builtin_ia32_gather3div8sf:
1060 intrinsicName = "x86.avx512.mask.gather3div8.sf";
1061 break;
1062 case X86::BI__builtin_ia32_gather3div8si:
1063 intrinsicName = "x86.avx512.mask.gather3div8.si";
1064 break;
1065 case X86::BI__builtin_ia32_gather3siv2df:
1066 intrinsicName = "x86.avx512.mask.gather3siv2.df";
1067 break;
1068 case X86::BI__builtin_ia32_gather3siv2di:
1069 intrinsicName = "x86.avx512.mask.gather3siv2.di";
1070 break;
1071 case X86::BI__builtin_ia32_gather3siv4df:
1072 intrinsicName = "x86.avx512.mask.gather3siv4.df";
1073 break;
1074 case X86::BI__builtin_ia32_gather3siv4di:
1075 intrinsicName = "x86.avx512.mask.gather3siv4.di";
1076 break;
1077 case X86::BI__builtin_ia32_gather3siv4sf:
1078 intrinsicName = "x86.avx512.mask.gather3siv4.sf";
1079 break;
1080 case X86::BI__builtin_ia32_gather3siv4si:
1081 intrinsicName = "x86.avx512.mask.gather3siv4.si";
1082 break;
1083 case X86::BI__builtin_ia32_gather3siv8sf:
1084 intrinsicName = "x86.avx512.mask.gather3siv8.sf";
1085 break;
1086 case X86::BI__builtin_ia32_gather3siv8si:
1087 intrinsicName = "x86.avx512.mask.gather3siv8.si";
1088 break;
1089 case X86::BI__builtin_ia32_gathersiv8df:
1090 intrinsicName = "x86.avx512.mask.gather.dpd.512";
1091 break;
1092 case X86::BI__builtin_ia32_gathersiv16sf:
1093 intrinsicName = "x86.avx512.mask.gather.dps.512";
1094 break;
1095 case X86::BI__builtin_ia32_gatherdiv8df:
1096 intrinsicName = "x86.avx512.mask.gather.qpd.512";
1097 break;
1098 case X86::BI__builtin_ia32_gatherdiv16sf:
1099 intrinsicName = "x86.avx512.mask.gather.qps.512";
1100 break;
1101 case X86::BI__builtin_ia32_gathersiv8di:
1102 intrinsicName = "x86.avx512.mask.gather.dpq.512";
1103 break;
1104 case X86::BI__builtin_ia32_gathersiv16si:
1105 intrinsicName = "x86.avx512.mask.gather.dpi.512";
1106 break;
1107 case X86::BI__builtin_ia32_gatherdiv8di:
1108 intrinsicName = "x86.avx512.mask.gather.qpq.512";
1109 break;
1110 case X86::BI__builtin_ia32_gatherdiv16si:
1111 intrinsicName = "x86.avx512.mask.gather.qpi.512";
1112 break;
1113 }
1114
1115 mlir::Location loc = getLoc(expr->getExprLoc());
1116 unsigned minElts =
1117 std::min(cast<cir::VectorType>(ops[0].getType()).getSize(),
1118 cast<cir::VectorType>(ops[2].getType()).getSize());
1119 ops[3] = getMaskVecValue(builder, loc, ops[3], minElts);
1120 return emitIntrinsicCallOp(builder, loc, intrinsicName,
1121 convertType(expr->getType()), ops);
1122 }
1123 case X86::BI__builtin_ia32_scattersiv8df:
1124 case X86::BI__builtin_ia32_scattersiv16sf:
1125 case X86::BI__builtin_ia32_scatterdiv8df:
1126 case X86::BI__builtin_ia32_scatterdiv16sf:
1127 case X86::BI__builtin_ia32_scattersiv8di:
1128 case X86::BI__builtin_ia32_scattersiv16si:
1129 case X86::BI__builtin_ia32_scatterdiv8di:
1130 case X86::BI__builtin_ia32_scatterdiv16si:
1131 case X86::BI__builtin_ia32_scatterdiv2df:
1132 case X86::BI__builtin_ia32_scatterdiv2di:
1133 case X86::BI__builtin_ia32_scatterdiv4df:
1134 case X86::BI__builtin_ia32_scatterdiv4di:
1135 case X86::BI__builtin_ia32_scatterdiv4sf:
1136 case X86::BI__builtin_ia32_scatterdiv4si:
1137 case X86::BI__builtin_ia32_scatterdiv8sf:
1138 case X86::BI__builtin_ia32_scatterdiv8si:
1139 case X86::BI__builtin_ia32_scattersiv2df:
1140 case X86::BI__builtin_ia32_scattersiv2di:
1141 case X86::BI__builtin_ia32_scattersiv4df:
1142 case X86::BI__builtin_ia32_scattersiv4di:
1143 case X86::BI__builtin_ia32_scattersiv4sf:
1144 case X86::BI__builtin_ia32_scattersiv4si:
1145 case X86::BI__builtin_ia32_scattersiv8sf:
1146 case X86::BI__builtin_ia32_scattersiv8si: {
1147 llvm::StringRef intrinsicName;
1148 switch (builtinID) {
1149 default:
1150 llvm_unreachable("Unexpected builtin");
1151 case X86::BI__builtin_ia32_scattersiv8df:
1152 intrinsicName = "x86.avx512.mask.scatter.dpd.512";
1153 break;
1154 case X86::BI__builtin_ia32_scattersiv16sf:
1155 intrinsicName = "x86.avx512.mask.scatter.dps.512";
1156 break;
1157 case X86::BI__builtin_ia32_scatterdiv8df:
1158 intrinsicName = "x86.avx512.mask.scatter.qpd.512";
1159 break;
1160 case X86::BI__builtin_ia32_scatterdiv16sf:
1161 intrinsicName = "x86.avx512.mask.scatter.qps.512";
1162 break;
1163 case X86::BI__builtin_ia32_scattersiv8di:
1164 intrinsicName = "x86.avx512.mask.scatter.dpq.512";
1165 break;
1166 case X86::BI__builtin_ia32_scattersiv16si:
1167 intrinsicName = "x86.avx512.mask.scatter.dpi.512";
1168 break;
1169 case X86::BI__builtin_ia32_scatterdiv8di:
1170 intrinsicName = "x86.avx512.mask.scatter.qpq.512";
1171 break;
1172 case X86::BI__builtin_ia32_scatterdiv16si:
1173 intrinsicName = "x86.avx512.mask.scatter.qpi.512";
1174 break;
1175 case X86::BI__builtin_ia32_scatterdiv2df:
1176 intrinsicName = "x86.avx512.mask.scatterdiv2.df";
1177 break;
1178 case X86::BI__builtin_ia32_scatterdiv2di:
1179 intrinsicName = "x86.avx512.mask.scatterdiv2.di";
1180 break;
1181 case X86::BI__builtin_ia32_scatterdiv4df:
1182 intrinsicName = "x86.avx512.mask.scatterdiv4.df";
1183 break;
1184 case X86::BI__builtin_ia32_scatterdiv4di:
1185 intrinsicName = "x86.avx512.mask.scatterdiv4.di";
1186 break;
1187 case X86::BI__builtin_ia32_scatterdiv4sf:
1188 intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
1189 break;
1190 case X86::BI__builtin_ia32_scatterdiv4si:
1191 intrinsicName = "x86.avx512.mask.scatterdiv4.si";
1192 break;
1193 case X86::BI__builtin_ia32_scatterdiv8sf:
1194 intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
1195 break;
1196 case X86::BI__builtin_ia32_scatterdiv8si:
1197 intrinsicName = "x86.avx512.mask.scatterdiv8.si";
1198 break;
1199 case X86::BI__builtin_ia32_scattersiv2df:
1200 intrinsicName = "x86.avx512.mask.scattersiv2.df";
1201 break;
1202 case X86::BI__builtin_ia32_scattersiv2di:
1203 intrinsicName = "x86.avx512.mask.scattersiv2.di";
1204 break;
1205 case X86::BI__builtin_ia32_scattersiv4df:
1206 intrinsicName = "x86.avx512.mask.scattersiv4.df";
1207 break;
1208 case X86::BI__builtin_ia32_scattersiv4di:
1209 intrinsicName = "x86.avx512.mask.scattersiv4.di";
1210 break;
1211 case X86::BI__builtin_ia32_scattersiv4sf:
1212 intrinsicName = "x86.avx512.mask.scattersiv4.sf";
1213 break;
1214 case X86::BI__builtin_ia32_scattersiv4si:
1215 intrinsicName = "x86.avx512.mask.scattersiv4.si";
1216 break;
1217 case X86::BI__builtin_ia32_scattersiv8sf:
1218 intrinsicName = "x86.avx512.mask.scattersiv8.sf";
1219 break;
1220 case X86::BI__builtin_ia32_scattersiv8si:
1221 intrinsicName = "x86.avx512.mask.scattersiv8.si";
1222 break;
1223 }
1224
1225 mlir::Location loc = getLoc(expr->getExprLoc());
1226 unsigned minElts =
1227 std::min(cast<cir::VectorType>(ops[2].getType()).getSize(),
1228 cast<cir::VectorType>(ops[3].getType()).getSize());
1229 ops[1] = getMaskVecValue(builder, loc, ops[1], minElts);
1230
1231 return emitIntrinsicCallOp(builder, loc, intrinsicName,
1232 convertType(expr->getType()), ops);
1233 }
1234 case X86::BI__builtin_ia32_vextractf128_pd256:
1235 case X86::BI__builtin_ia32_vextractf128_ps256:
1236 case X86::BI__builtin_ia32_vextractf128_si256:
1237 case X86::BI__builtin_ia32_extract128i256:
1238 case X86::BI__builtin_ia32_extractf64x4_mask:
1239 case X86::BI__builtin_ia32_extractf32x4_mask:
1240 case X86::BI__builtin_ia32_extracti64x4_mask:
1241 case X86::BI__builtin_ia32_extracti32x4_mask:
1242 case X86::BI__builtin_ia32_extractf32x8_mask:
1243 case X86::BI__builtin_ia32_extracti32x8_mask:
1244 case X86::BI__builtin_ia32_extractf32x4_256_mask:
1245 case X86::BI__builtin_ia32_extracti32x4_256_mask:
1246 case X86::BI__builtin_ia32_extractf64x2_256_mask:
1247 case X86::BI__builtin_ia32_extracti64x2_256_mask:
1248 case X86::BI__builtin_ia32_extractf64x2_512_mask:
1249 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
1250 mlir::Location loc = getLoc(expr->getExprLoc());
1251 cir::VectorType dstTy = cast<cir::VectorType>(convertType(expr->getType()));
1252 unsigned numElts = dstTy.getSize();
1253 unsigned srcNumElts = cast<cir::VectorType>(ops[0].getType()).getSize();
1254 unsigned subVectors = srcNumElts / numElts;
1255 assert(llvm::isPowerOf2_32(subVectors) && "Expected power of 2 subvectors");
1256 unsigned index =
1257 ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
1258
1259 index &= subVectors - 1; // Remove any extra bits.
1260 index *= numElts;
1261
1262 int64_t indices[16];
1263 std::iota(indices, indices + numElts, index);
1264
1265 mlir::Value poison =
1266 builder.getConstant(loc, cir::PoisonAttr::get(ops[0].getType()));
1267 mlir::Value res = builder.createVecShuffle(loc, ops[0], poison,
1268 ArrayRef(indices, numElts));
1269 if (ops.size() == 4)
1270 res = emitX86Select(builder, loc, ops[3], res, ops[2]);
1271
1272 return res;
1273 }
1274 case X86::BI__builtin_ia32_vinsertf128_pd256:
1275 case X86::BI__builtin_ia32_vinsertf128_ps256:
1276 case X86::BI__builtin_ia32_vinsertf128_si256:
1277 case X86::BI__builtin_ia32_insert128i256:
1278 case X86::BI__builtin_ia32_insertf64x4:
1279 case X86::BI__builtin_ia32_insertf32x4:
1280 case X86::BI__builtin_ia32_inserti64x4:
1281 case X86::BI__builtin_ia32_inserti32x4:
1282 case X86::BI__builtin_ia32_insertf32x8:
1283 case X86::BI__builtin_ia32_inserti32x8:
1284 case X86::BI__builtin_ia32_insertf32x4_256:
1285 case X86::BI__builtin_ia32_inserti32x4_256:
1286 case X86::BI__builtin_ia32_insertf64x2_256:
1287 case X86::BI__builtin_ia32_inserti64x2_256:
1288 case X86::BI__builtin_ia32_insertf64x2_512:
1289 case X86::BI__builtin_ia32_inserti64x2_512: {
1290 unsigned dstNumElts = cast<cir::VectorType>(ops[0].getType()).getSize();
1291 unsigned srcNumElts = cast<cir::VectorType>(ops[1].getType()).getSize();
1292 unsigned subVectors = dstNumElts / srcNumElts;
1293 assert(llvm::isPowerOf2_32(subVectors) && "Expected power of 2 subvectors");
1294 assert(dstNumElts <= 16);
1295
1296 uint64_t index = getZExtIntValueFromConstOp(ops[2]);
1297 index &= subVectors - 1; // Remove any extra bits.
1298 index *= srcNumElts;
1299
1300 llvm::SmallVector<int64_t, 16> mask(dstNumElts);
1301 for (unsigned i = 0; i != dstNumElts; ++i)
1302 mask[i] = (i >= srcNumElts) ? srcNumElts + (i % srcNumElts) : i;
1303
1304 mlir::Value op1 =
1305 builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[1], mask);
1306
1307 for (unsigned i = 0; i != dstNumElts; ++i) {
1308 if (i >= index && i < (index + srcNumElts))
1309 mask[i] = (i - index) + dstNumElts;
1310 else
1311 mask[i] = i;
1312 }
1313
1314 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], op1,
1315 mask);
1316 }
1317 case X86::BI__builtin_ia32_pmovqd512_mask:
1318 case X86::BI__builtin_ia32_pmovwb512_mask:
1319 case X86::BI__builtin_ia32_pblendw128:
1320 case X86::BI__builtin_ia32_blendpd:
1321 case X86::BI__builtin_ia32_blendps:
1322 case X86::BI__builtin_ia32_blendpd256:
1323 case X86::BI__builtin_ia32_blendps256:
1324 case X86::BI__builtin_ia32_pblendw256:
1325 case X86::BI__builtin_ia32_pblendd128:
1326 case X86::BI__builtin_ia32_pblendd256:
1327 cgm.errorNYI(expr->getSourceRange(),
1328 std::string("unimplemented X86 builtin call: ") +
1329 getContext().BuiltinInfo.getName(builtinID));
1330 return mlir::Value{};
1331 case X86::BI__builtin_ia32_pshuflw:
1332 case X86::BI__builtin_ia32_pshuflw256:
1333 case X86::BI__builtin_ia32_pshuflw512:
1334 return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
1335 true);
1336 case X86::BI__builtin_ia32_pshufhw:
1337 case X86::BI__builtin_ia32_pshufhw256:
1338 case X86::BI__builtin_ia32_pshufhw512:
1339 return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
1340 false);
1341 case X86::BI__builtin_ia32_pshufd:
1342 case X86::BI__builtin_ia32_pshufd256:
1343 case X86::BI__builtin_ia32_pshufd512:
1344 case X86::BI__builtin_ia32_vpermilpd:
1345 case X86::BI__builtin_ia32_vpermilps:
1346 case X86::BI__builtin_ia32_vpermilpd256:
1347 case X86::BI__builtin_ia32_vpermilps256:
1348 case X86::BI__builtin_ia32_vpermilpd512:
1349 case X86::BI__builtin_ia32_vpermilps512: {
1350 const uint32_t imm = getSExtIntValueFromConstOp(ops[1]);
1351
1353 computeFullLaneShuffleMask(*this, ops[0], imm, false, mask);
1354
1355 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask);
1356 }
1357 case X86::BI__builtin_ia32_shufpd:
1358 case X86::BI__builtin_ia32_shufpd256:
1359 case X86::BI__builtin_ia32_shufpd512:
1360 case X86::BI__builtin_ia32_shufps:
1361 case X86::BI__builtin_ia32_shufps256:
1362 case X86::BI__builtin_ia32_shufps512: {
1363 const uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
1364
1366 computeFullLaneShuffleMask(*this, ops[0], imm, true, mask);
1367
1368 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
1369 mask);
1370 }
1371 case X86::BI__builtin_ia32_permdi256:
1372 case X86::BI__builtin_ia32_permdf256:
1373 case X86::BI__builtin_ia32_permdi512:
1374 case X86::BI__builtin_ia32_permdf512:
1375 case X86::BI__builtin_ia32_palignr128:
1376 case X86::BI__builtin_ia32_palignr256:
1377 case X86::BI__builtin_ia32_palignr512:
1378 cgm.errorNYI(expr->getSourceRange(),
1379 std::string("unimplemented X86 builtin call: ") +
1380 getContext().BuiltinInfo.getName(builtinID));
1381 return {};
1382 case X86::BI__builtin_ia32_alignd128:
1383 case X86::BI__builtin_ia32_alignd256:
1384 case X86::BI__builtin_ia32_alignd512:
1385 case X86::BI__builtin_ia32_alignq128:
1386 case X86::BI__builtin_ia32_alignq256:
1387 case X86::BI__builtin_ia32_alignq512: {
1388 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
1389 unsigned shiftVal =
1390 ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
1391 0xff;
1392
1393 // Mask the shift amount to width of a vector.
1394 shiftVal &= numElts - 1;
1395
1397 mlir::Type i32Ty = builder.getSInt32Ty();
1398 for (unsigned i = 0; i != numElts; ++i)
1399 indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal));
1400
1401 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
1402 indices);
1403 }
1404 case X86::BI__builtin_ia32_shuf_f32x4_256:
1405 case X86::BI__builtin_ia32_shuf_f64x2_256:
1406 case X86::BI__builtin_ia32_shuf_i32x4_256:
1407 case X86::BI__builtin_ia32_shuf_i64x2_256:
1408 case X86::BI__builtin_ia32_shuf_f32x4:
1409 case X86::BI__builtin_ia32_shuf_f64x2:
1410 case X86::BI__builtin_ia32_shuf_i32x4:
1411 case X86::BI__builtin_ia32_shuf_i64x2:
1412 case X86::BI__builtin_ia32_vperm2f128_pd256:
1413 case X86::BI__builtin_ia32_vperm2f128_ps256:
1414 case X86::BI__builtin_ia32_vperm2f128_si256:
1415 case X86::BI__builtin_ia32_permti256:
1416 case X86::BI__builtin_ia32_pslldqi128_byteshift:
1417 case X86::BI__builtin_ia32_pslldqi256_byteshift:
1418 case X86::BI__builtin_ia32_pslldqi512_byteshift:
1419 case X86::BI__builtin_ia32_psrldqi128_byteshift:
1420 case X86::BI__builtin_ia32_psrldqi256_byteshift:
1421 case X86::BI__builtin_ia32_psrldqi512_byteshift:
1422 cgm.errorNYI(expr->getSourceRange(),
1423 std::string("unimplemented X86 builtin call: ") +
1424 getContext().BuiltinInfo.getName(builtinID));
1425 return mlir::Value{};
1426 case X86::BI__builtin_ia32_kshiftliqi:
1427 case X86::BI__builtin_ia32_kshiftlihi:
1428 case X86::BI__builtin_ia32_kshiftlisi:
1429 case X86::BI__builtin_ia32_kshiftlidi: {
1430 mlir::Location loc = getLoc(expr->getExprLoc());
1431 unsigned shiftVal =
1432 ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
1433 0xff;
1434 unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
1435
1436 if (shiftVal >= numElems)
1437 return builder.getNullValue(ops[0].getType(), loc);
1438
1439 mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
1440
1442 mlir::Type i32Ty = builder.getSInt32Ty();
1443 for (auto i : llvm::seq<unsigned>(0, numElems))
1444 indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal));
1445
1446 mlir::Value zero = builder.getNullValue(in.getType(), loc);
1447 mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices);
1448 return builder.createBitcast(sv, ops[0].getType());
1449 }
1450 case X86::BI__builtin_ia32_kshiftriqi:
1451 case X86::BI__builtin_ia32_kshiftrihi:
1452 case X86::BI__builtin_ia32_kshiftrisi:
1453 case X86::BI__builtin_ia32_kshiftridi: {
1454 mlir::Location loc = getLoc(expr->getExprLoc());
1455 unsigned shiftVal =
1456 ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
1457 0xff;
1458 unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
1459
1460 if (shiftVal >= numElems)
1461 return builder.getNullValue(ops[0].getType(), loc);
1462
1463 mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
1464
1466 mlir::Type i32Ty = builder.getSInt32Ty();
1467 for (auto i : llvm::seq<unsigned>(0, numElems))
1468 indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal));
1469
1470 mlir::Value zero = builder.getNullValue(in.getType(), loc);
1471 mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices);
1472 return builder.createBitcast(sv, ops[0].getType());
1473 }
1474 case X86::BI__builtin_ia32_vprotbi:
1475 case X86::BI__builtin_ia32_vprotwi:
1476 case X86::BI__builtin_ia32_vprotdi:
1477 case X86::BI__builtin_ia32_vprotqi:
1478 case X86::BI__builtin_ia32_prold128:
1479 case X86::BI__builtin_ia32_prold256:
1480 case X86::BI__builtin_ia32_prold512:
1481 case X86::BI__builtin_ia32_prolq128:
1482 case X86::BI__builtin_ia32_prolq256:
1483 case X86::BI__builtin_ia32_prolq512:
1484 return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
1485 ops[0], ops[1], false);
1486 case X86::BI__builtin_ia32_prord128:
1487 case X86::BI__builtin_ia32_prord256:
1488 case X86::BI__builtin_ia32_prord512:
1489 case X86::BI__builtin_ia32_prorq128:
1490 case X86::BI__builtin_ia32_prorq256:
1491 case X86::BI__builtin_ia32_prorq512:
1492 return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
1493 ops[0], ops[1], true);
1494 case X86::BI__builtin_ia32_selectb_128:
1495 case X86::BI__builtin_ia32_selectb_256:
1496 case X86::BI__builtin_ia32_selectb_512:
1497 case X86::BI__builtin_ia32_selectw_128:
1498 case X86::BI__builtin_ia32_selectw_256:
1499 case X86::BI__builtin_ia32_selectw_512:
1500 case X86::BI__builtin_ia32_selectd_128:
1501 case X86::BI__builtin_ia32_selectd_256:
1502 case X86::BI__builtin_ia32_selectd_512:
1503 case X86::BI__builtin_ia32_selectq_128:
1504 case X86::BI__builtin_ia32_selectq_256:
1505 case X86::BI__builtin_ia32_selectq_512:
1506 case X86::BI__builtin_ia32_selectph_128:
1507 case X86::BI__builtin_ia32_selectph_256:
1508 case X86::BI__builtin_ia32_selectph_512:
1509 case X86::BI__builtin_ia32_selectpbf_128:
1510 case X86::BI__builtin_ia32_selectpbf_256:
1511 case X86::BI__builtin_ia32_selectpbf_512:
1512 case X86::BI__builtin_ia32_selectps_128:
1513 case X86::BI__builtin_ia32_selectps_256:
1514 case X86::BI__builtin_ia32_selectps_512:
1515 case X86::BI__builtin_ia32_selectpd_128:
1516 case X86::BI__builtin_ia32_selectpd_256:
1517 case X86::BI__builtin_ia32_selectpd_512:
1518 case X86::BI__builtin_ia32_selectsh_128:
1519 case X86::BI__builtin_ia32_selectsbf_128:
1520 case X86::BI__builtin_ia32_selectss_128:
1521 case X86::BI__builtin_ia32_selectsd_128:
1522 case X86::BI__builtin_ia32_cmpb128_mask:
1523 case X86::BI__builtin_ia32_cmpb256_mask:
1524 case X86::BI__builtin_ia32_cmpb512_mask:
1525 case X86::BI__builtin_ia32_cmpw128_mask:
1526 case X86::BI__builtin_ia32_cmpw256_mask:
1527 case X86::BI__builtin_ia32_cmpw512_mask:
1528 case X86::BI__builtin_ia32_cmpd128_mask:
1529 case X86::BI__builtin_ia32_cmpd256_mask:
1530 case X86::BI__builtin_ia32_cmpd512_mask:
1531 case X86::BI__builtin_ia32_cmpq128_mask:
1532 case X86::BI__builtin_ia32_cmpq256_mask:
1533 case X86::BI__builtin_ia32_cmpq512_mask:
1534 case X86::BI__builtin_ia32_ucmpb128_mask:
1535 case X86::BI__builtin_ia32_ucmpb256_mask:
1536 case X86::BI__builtin_ia32_ucmpb512_mask:
1537 case X86::BI__builtin_ia32_ucmpw128_mask:
1538 case X86::BI__builtin_ia32_ucmpw256_mask:
1539 case X86::BI__builtin_ia32_ucmpw512_mask:
1540 case X86::BI__builtin_ia32_ucmpd128_mask:
1541 case X86::BI__builtin_ia32_ucmpd256_mask:
1542 case X86::BI__builtin_ia32_ucmpd512_mask:
1543 case X86::BI__builtin_ia32_ucmpq128_mask:
1544 case X86::BI__builtin_ia32_ucmpq256_mask:
1545 case X86::BI__builtin_ia32_ucmpq512_mask:
1546 cgm.errorNYI(expr->getSourceRange(),
1547 std::string("unimplemented X86 builtin call: ") +
1548 getContext().BuiltinInfo.getName(builtinID));
1549 return mlir::Value{};
1550 case X86::BI__builtin_ia32_vpcomb:
1551 case X86::BI__builtin_ia32_vpcomw:
1552 case X86::BI__builtin_ia32_vpcomd:
1553 case X86::BI__builtin_ia32_vpcomq:
1554 return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
1555 case X86::BI__builtin_ia32_vpcomub:
1556 case X86::BI__builtin_ia32_vpcomuw:
1557 case X86::BI__builtin_ia32_vpcomud:
1558 case X86::BI__builtin_ia32_vpcomuq:
1559 return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
1560 case X86::BI__builtin_ia32_kortestcqi:
1561 case X86::BI__builtin_ia32_kortestchi:
1562 case X86::BI__builtin_ia32_kortestcsi:
1563 case X86::BI__builtin_ia32_kortestcdi: {
1564 mlir::Location loc = getLoc(expr->getExprLoc());
1565 cir::IntType ty = cast<cir::IntType>(ops[0].getType());
1566 mlir::Value allOnesOp =
1567 builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth()));
1568 mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
1569 mlir::Value cmp =
1570 cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp);
1571 return builder.createCast(cir::CastKind::bool_to_int, cmp,
1572 cgm.convertType(expr->getType()));
1573 }
1574 case X86::BI__builtin_ia32_kortestzqi:
1575 case X86::BI__builtin_ia32_kortestzhi:
1576 case X86::BI__builtin_ia32_kortestzsi:
1577 case X86::BI__builtin_ia32_kortestzdi: {
1578 mlir::Location loc = getLoc(expr->getExprLoc());
1579 cir::IntType ty = cast<cir::IntType>(ops[0].getType());
1580 mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult();
1581 mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
1582 mlir::Value cmp =
1583 cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp);
1584 return builder.createCast(cir::CastKind::bool_to_int, cmp,
1585 cgm.convertType(expr->getType()));
1586 }
1587 case X86::BI__builtin_ia32_ktestcqi:
1588 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1589 "x86.avx512.ktestc.b", ops);
1590 case X86::BI__builtin_ia32_ktestzqi:
1591 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1592 "x86.avx512.ktestz.b", ops);
1593 case X86::BI__builtin_ia32_ktestchi:
1594 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1595 "x86.avx512.ktestc.w", ops);
1596 case X86::BI__builtin_ia32_ktestzhi:
1597 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1598 "x86.avx512.ktestz.w", ops);
1599 case X86::BI__builtin_ia32_ktestcsi:
1600 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1601 "x86.avx512.ktestc.d", ops);
1602 case X86::BI__builtin_ia32_ktestzsi:
1603 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1604 "x86.avx512.ktestz.d", ops);
1605 case X86::BI__builtin_ia32_ktestcdi:
1606 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1607 "x86.avx512.ktestc.q", ops);
1608 case X86::BI__builtin_ia32_ktestzdi:
1609 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1610 "x86.avx512.ktestz.q", ops);
1611 case X86::BI__builtin_ia32_kaddqi:
1612 return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
1613 "x86.avx512.kadd.b", ops);
1614 case X86::BI__builtin_ia32_kaddhi:
1615 return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
1616 "x86.avx512.kadd.w", ops);
1617 case X86::BI__builtin_ia32_kaddsi:
1618 return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
1619 "x86.avx512.kadd.d", ops);
1620 case X86::BI__builtin_ia32_kadddi:
1621 return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
1622 "x86.avx512.kadd.q", ops);
1623 case X86::BI__builtin_ia32_kandqi:
1624 case X86::BI__builtin_ia32_kandhi:
1625 case X86::BI__builtin_ia32_kandsi:
1626 case X86::BI__builtin_ia32_kanddi:
1627 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1628 cir::BinOpKind::And, ops);
1629 case X86::BI__builtin_ia32_kandnqi:
1630 case X86::BI__builtin_ia32_kandnhi:
1631 case X86::BI__builtin_ia32_kandnsi:
1632 case X86::BI__builtin_ia32_kandndi:
1633 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1634 cir::BinOpKind::And, ops, true);
1635 case X86::BI__builtin_ia32_korqi:
1636 case X86::BI__builtin_ia32_korhi:
1637 case X86::BI__builtin_ia32_korsi:
1638 case X86::BI__builtin_ia32_kordi:
1639 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1640 cir::BinOpKind::Or, ops);
1641 case X86::BI__builtin_ia32_kxnorqi:
1642 case X86::BI__builtin_ia32_kxnorhi:
1643 case X86::BI__builtin_ia32_kxnorsi:
1644 case X86::BI__builtin_ia32_kxnordi:
1645 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1646 cir::BinOpKind::Xor, ops, true);
1647 case X86::BI__builtin_ia32_kxorqi:
1648 case X86::BI__builtin_ia32_kxorhi:
1649 case X86::BI__builtin_ia32_kxorsi:
1650 case X86::BI__builtin_ia32_kxordi:
1651 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1652 cir::BinOpKind::Xor, ops);
1653 case X86::BI__builtin_ia32_knotqi:
1654 case X86::BI__builtin_ia32_knothi:
1655 case X86::BI__builtin_ia32_knotsi:
1656 case X86::BI__builtin_ia32_knotdi: {
1657 cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
1658 unsigned numElts = intTy.getWidth();
1659 mlir::Value resVec =
1660 getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
1661 return builder.createBitcast(builder.createNot(resVec), ops[0].getType());
1662 }
1663 case X86::BI__builtin_ia32_kmovb:
1664 case X86::BI__builtin_ia32_kmovw:
1665 case X86::BI__builtin_ia32_kmovd:
1666 case X86::BI__builtin_ia32_kmovq: {
1667 // Bitcast to vXi1 type and then back to integer. This gets the mask
1668 // register type into the IR, but might be optimized out depending on
1669 // what's around it.
1670 cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
1671 unsigned numElts = intTy.getWidth();
1672 mlir::Value resVec =
1673 getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
1674 return builder.createBitcast(resVec, ops[0].getType());
1675 }
1676 case X86::BI__builtin_ia32_sqrtsh_round_mask:
1677 case X86::BI__builtin_ia32_sqrtsd_round_mask:
1678 case X86::BI__builtin_ia32_sqrtss_round_mask:
1679 cgm.errorNYI(expr->getSourceRange(),
1680 std::string("unimplemented X86 builtin call: ") +
1681 getContext().BuiltinInfo.getName(builtinID));
1682 return mlir::Value{};
1683 case X86::BI__builtin_ia32_sqrtph512:
1684 case X86::BI__builtin_ia32_sqrtps512:
1685 case X86::BI__builtin_ia32_sqrtpd512: {
1686 mlir::Location loc = getLoc(expr->getExprLoc());
1687 mlir::Value arg = ops[0];
1688 return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
1689 }
1690 case X86::BI__builtin_ia32_pmuludq128:
1691 case X86::BI__builtin_ia32_pmuludq256:
1692 case X86::BI__builtin_ia32_pmuludq512: {
1693 unsigned opTypePrimitiveSizeInBits =
1694 cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
1695 return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false,
1696 ops, opTypePrimitiveSizeInBits);
1697 }
1698 case X86::BI__builtin_ia32_pmuldq128:
1699 case X86::BI__builtin_ia32_pmuldq256:
1700 case X86::BI__builtin_ia32_pmuldq512: {
1701 unsigned opTypePrimitiveSizeInBits =
1702 cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
1703 return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true,
1704 ops, opTypePrimitiveSizeInBits);
1705 }
1706 case X86::BI__builtin_ia32_pternlogd512_mask:
1707 case X86::BI__builtin_ia32_pternlogq512_mask:
1708 case X86::BI__builtin_ia32_pternlogd128_mask:
1709 case X86::BI__builtin_ia32_pternlogd256_mask:
1710 case X86::BI__builtin_ia32_pternlogq128_mask:
1711 case X86::BI__builtin_ia32_pternlogq256_mask:
1712 case X86::BI__builtin_ia32_pternlogd512_maskz:
1713 case X86::BI__builtin_ia32_pternlogq512_maskz:
1714 case X86::BI__builtin_ia32_pternlogd128_maskz:
1715 case X86::BI__builtin_ia32_pternlogd256_maskz:
1716 case X86::BI__builtin_ia32_pternlogq128_maskz:
1717 case X86::BI__builtin_ia32_pternlogq256_maskz:
1718 case X86::BI__builtin_ia32_vpshldd128:
1719 case X86::BI__builtin_ia32_vpshldd256:
1720 case X86::BI__builtin_ia32_vpshldd512:
1721 case X86::BI__builtin_ia32_vpshldq128:
1722 case X86::BI__builtin_ia32_vpshldq256:
1723 case X86::BI__builtin_ia32_vpshldq512:
1724 case X86::BI__builtin_ia32_vpshldw128:
1725 case X86::BI__builtin_ia32_vpshldw256:
1726 case X86::BI__builtin_ia32_vpshldw512:
1727 case X86::BI__builtin_ia32_vpshrdd128:
1728 case X86::BI__builtin_ia32_vpshrdd256:
1729 case X86::BI__builtin_ia32_vpshrdd512:
1730 case X86::BI__builtin_ia32_vpshrdq128:
1731 case X86::BI__builtin_ia32_vpshrdq256:
1732 case X86::BI__builtin_ia32_vpshrdq512:
1733 case X86::BI__builtin_ia32_vpshrdw128:
1734 case X86::BI__builtin_ia32_vpshrdw256:
1735 case X86::BI__builtin_ia32_vpshrdw512:
1736 cgm.errorNYI(expr->getSourceRange(),
1737 std::string("unimplemented X86 builtin call: ") +
1738 getContext().BuiltinInfo.getName(builtinID));
1739 return {};
1740 case X86::BI__builtin_ia32_reduce_fadd_pd512:
1741 case X86::BI__builtin_ia32_reduce_fadd_ps512:
1742 case X86::BI__builtin_ia32_reduce_fadd_ph512:
1743 case X86::BI__builtin_ia32_reduce_fadd_ph256:
1744 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
1746 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
1747 "vector.reduce.fadd", ops[0].getType(),
1748 mlir::ValueRange{ops[0], ops[1]});
1749 }
1750 case X86::BI__builtin_ia32_reduce_fmul_pd512:
1751 case X86::BI__builtin_ia32_reduce_fmul_ps512:
1752 case X86::BI__builtin_ia32_reduce_fmul_ph512:
1753 case X86::BI__builtin_ia32_reduce_fmul_ph256:
1754 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
1756 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
1757 "vector.reduce.fmul", ops[0].getType(),
1758 mlir::ValueRange{ops[0], ops[1]});
1759 }
1760 case X86::BI__builtin_ia32_reduce_fmax_pd512:
1761 case X86::BI__builtin_ia32_reduce_fmax_ps512:
1762 case X86::BI__builtin_ia32_reduce_fmax_ph512:
1763 case X86::BI__builtin_ia32_reduce_fmax_ph256:
1764 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
1766 cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
1767 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
1768 "vector.reduce.fmax", vecTy.getElementType(),
1769 mlir::ValueRange{ops[0]});
1770 }
1771 case X86::BI__builtin_ia32_reduce_fmin_pd512:
1772 case X86::BI__builtin_ia32_reduce_fmin_ps512:
1773 case X86::BI__builtin_ia32_reduce_fmin_ph512:
1774 case X86::BI__builtin_ia32_reduce_fmin_ph256:
1775 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
1777 cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
1778 return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
1779 "vector.reduce.fmin", vecTy.getElementType(),
1780 mlir::ValueRange{ops[0]});
1781 }
1782 case X86::BI__builtin_ia32_rdrand16_step:
1783 case X86::BI__builtin_ia32_rdrand32_step:
1784 case X86::BI__builtin_ia32_rdrand64_step:
1785 case X86::BI__builtin_ia32_rdseed16_step:
1786 case X86::BI__builtin_ia32_rdseed32_step:
1787 case X86::BI__builtin_ia32_rdseed64_step:
1788 case X86::BI__builtin_ia32_addcarryx_u32:
1789 case X86::BI__builtin_ia32_addcarryx_u64:
1790 case X86::BI__builtin_ia32_subborrow_u32:
1791 case X86::BI__builtin_ia32_subborrow_u64:
1792 case X86::BI__builtin_ia32_fpclassps128_mask:
1793 case X86::BI__builtin_ia32_fpclassps256_mask:
1794 case X86::BI__builtin_ia32_fpclassps512_mask:
1795 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
1796 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
1797 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
1798 case X86::BI__builtin_ia32_fpclassph128_mask:
1799 case X86::BI__builtin_ia32_fpclassph256_mask:
1800 case X86::BI__builtin_ia32_fpclassph512_mask:
1801 case X86::BI__builtin_ia32_fpclasspd128_mask:
1802 case X86::BI__builtin_ia32_fpclasspd256_mask:
1803 case X86::BI__builtin_ia32_fpclasspd512_mask:
1804 case X86::BI__builtin_ia32_vp2intersect_q_512:
1805 case X86::BI__builtin_ia32_vp2intersect_q_256:
1806 case X86::BI__builtin_ia32_vp2intersect_q_128:
1807 case X86::BI__builtin_ia32_vp2intersect_d_512:
1808 case X86::BI__builtin_ia32_vp2intersect_d_256:
1809 case X86::BI__builtin_ia32_vp2intersect_d_128:
1810 case X86::BI__builtin_ia32_vpmultishiftqb128:
1811 case X86::BI__builtin_ia32_vpmultishiftqb256:
1812 case X86::BI__builtin_ia32_vpmultishiftqb512:
1813 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
1814 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
1815 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
1816 case X86::BI__builtin_ia32_cmpeqps:
1817 case X86::BI__builtin_ia32_cmpeqpd:
1818 case X86::BI__builtin_ia32_cmpltps:
1819 case X86::BI__builtin_ia32_cmpltpd:
1820 case X86::BI__builtin_ia32_cmpleps:
1821 case X86::BI__builtin_ia32_cmplepd:
1822 case X86::BI__builtin_ia32_cmpunordps:
1823 case X86::BI__builtin_ia32_cmpunordpd:
1824 case X86::BI__builtin_ia32_cmpneqps:
1825 case X86::BI__builtin_ia32_cmpneqpd:
1826 cgm.errorNYI(expr->getSourceRange(),
1827 std::string("unimplemented X86 builtin call: ") +
1828 getContext().BuiltinInfo.getName(builtinID));
1829 return mlir::Value{};
1830 case X86::BI__builtin_ia32_cmpnltps:
1831 case X86::BI__builtin_ia32_cmpnltpd:
1832 return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
1833 cir::CmpOpKind::lt, /*shouldInvert=*/true);
1834 case X86::BI__builtin_ia32_cmpnleps:
1835 case X86::BI__builtin_ia32_cmpnlepd:
1836 return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
1837 cir::CmpOpKind::le, /*shouldInvert=*/true);
1838 case X86::BI__builtin_ia32_cmpordps:
1839 case X86::BI__builtin_ia32_cmpordpd:
1840 case X86::BI__builtin_ia32_cmpph128_mask:
1841 case X86::BI__builtin_ia32_cmpph256_mask:
1842 case X86::BI__builtin_ia32_cmpph512_mask:
1843 case X86::BI__builtin_ia32_cmpps128_mask:
1844 case X86::BI__builtin_ia32_cmpps256_mask:
1845 case X86::BI__builtin_ia32_cmpps512_mask:
1846 case X86::BI__builtin_ia32_cmppd128_mask:
1847 case X86::BI__builtin_ia32_cmppd256_mask:
1848 case X86::BI__builtin_ia32_cmppd512_mask:
1849 case X86::BI__builtin_ia32_vcmpbf16512_mask:
1850 case X86::BI__builtin_ia32_vcmpbf16256_mask:
1851 case X86::BI__builtin_ia32_vcmpbf16128_mask:
1852 case X86::BI__builtin_ia32_cmpps:
1853 case X86::BI__builtin_ia32_cmpps256:
1854 case X86::BI__builtin_ia32_cmppd:
1855 case X86::BI__builtin_ia32_cmppd256:
1856 case X86::BI__builtin_ia32_cmpeqss:
1857 case X86::BI__builtin_ia32_cmpltss:
1858 case X86::BI__builtin_ia32_cmpless:
1859 case X86::BI__builtin_ia32_cmpunordss:
1860 case X86::BI__builtin_ia32_cmpneqss:
1861 case X86::BI__builtin_ia32_cmpnltss:
1862 case X86::BI__builtin_ia32_cmpnless:
1863 case X86::BI__builtin_ia32_cmpordss:
1864 case X86::BI__builtin_ia32_cmpeqsd:
1865 case X86::BI__builtin_ia32_cmpltsd:
1866 case X86::BI__builtin_ia32_cmplesd:
1867 case X86::BI__builtin_ia32_cmpunordsd:
1868 case X86::BI__builtin_ia32_cmpneqsd:
1869 case X86::BI__builtin_ia32_cmpnltsd:
1870 case X86::BI__builtin_ia32_cmpnlesd:
1871 case X86::BI__builtin_ia32_cmpordsd:
1872 cgm.errorNYI(expr->getSourceRange(),
1873 std::string("unimplemented X86 builtin call: ") +
1874 getContext().BuiltinInfo.getName(builtinID));
1875 return {};
1876 case X86::BI__builtin_ia32_vcvtph2ps_mask:
1877 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
1878 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
1879 mlir::Location loc = getLoc(expr->getExprLoc());
1880 return emitX86CvtF16ToFloatExpr(builder, loc, ops,
1881 convertType(expr->getType()));
1882 }
1883 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
1884 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
1885 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
1886 case X86::BI__cpuid:
1887 case X86::BI__cpuidex:
1888 case X86::BI__emul:
1889 case X86::BI__emulu:
1890 case X86::BI__mulh:
1891 case X86::BI__umulh:
1892 case X86::BI_mul128:
1893 case X86::BI_umul128: {
1894 cgm.errorNYI(expr->getSourceRange(),
1895 std::string("unimplemented X86 builtin call: ") +
1896 getContext().BuiltinInfo.getName(builtinID));
1897 return mlir::Value{};
1898 }
1899 case X86::BI__faststorefence: {
1900 cir::AtomicFenceOp::create(
1901 builder, getLoc(expr->getExprLoc()),
1902 cir::MemOrder::SequentiallyConsistent,
1903 cir::SyncScopeKindAttr::get(&getMLIRContext(),
1904 cir::SyncScopeKind::System));
1905 return mlir::Value{};
1906 }
1907 case X86::BI__shiftleft128:
1908 case X86::BI__shiftright128: {
1909 cgm.errorNYI(expr->getSourceRange(),
1910 std::string("unimplemented X86 builtin call: ") +
1911 getContext().BuiltinInfo.getName(builtinID));
1912 return mlir::Value{};
1913 }
1914 case X86::BI_ReadWriteBarrier:
1915 case X86::BI_ReadBarrier:
1916 case X86::BI_WriteBarrier: {
1917 cir::AtomicFenceOp::create(
1918 builder, getLoc(expr->getExprLoc()),
1919 cir::MemOrder::SequentiallyConsistent,
1920 cir::SyncScopeKindAttr::get(&getMLIRContext(),
1921 cir::SyncScopeKind::SingleThread));
1922 return mlir::Value{};
1923 }
1924 case X86::BI_AddressOfReturnAddress: {
1925 mlir::Location loc = getLoc(expr->getExprLoc());
1926 mlir::Value addr =
1927 cir::AddrOfReturnAddrOp::create(builder, loc, allocaInt8PtrTy);
1928 return builder.createCast(loc, cir::CastKind::bitcast, addr, voidPtrTy);
1929 }
1930 case X86::BI__stosb:
1931 case X86::BI__ud2:
1932 case X86::BI__int2c:
1933 case X86::BI__readfsbyte:
1934 case X86::BI__readfsword:
1935 case X86::BI__readfsdword:
1936 case X86::BI__readfsqword:
1937 case X86::BI__readgsbyte:
1938 case X86::BI__readgsword:
1939 case X86::BI__readgsdword:
1940 case X86::BI__readgsqword:
1941 case X86::BI__builtin_ia32_encodekey128_u32:
1942 case X86::BI__builtin_ia32_encodekey256_u32:
1943 case X86::BI__builtin_ia32_aesenc128kl_u8:
1944 case X86::BI__builtin_ia32_aesdec128kl_u8:
1945 case X86::BI__builtin_ia32_aesenc256kl_u8:
1946 case X86::BI__builtin_ia32_aesdec256kl_u8:
1947 case X86::BI__builtin_ia32_aesencwide128kl_u8:
1948 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
1949 case X86::BI__builtin_ia32_aesencwide256kl_u8:
1950 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
1951 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
1952 case X86::BI__builtin_ia32_vfmaddcph512_mask:
1953 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
1954 case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
1955 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
1956 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
1957 case X86::BI__builtin_ia32_prefetchi:
1958 cgm.errorNYI(expr->getSourceRange(),
1959 std::string("unimplemented X86 builtin call: ") +
1960 getContext().BuiltinInfo.getName(builtinID));
1961 return mlir::Value{};
1962 }
1963}
Defines enum values for all the target-independent builtin functions.
static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, mlir::Location loc, const StringRef str, const mlir::Type &resTy, Operands &&...op)
static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, llvm::SmallVector< mlir::Value > ops, bool isSigned)
static std::optional< mlir::Value > emitX86ConvertToMask(CIRGenFunction &cgf, CIRGenBuilderTy &builder, mlir::Value in, mlir::Location loc)
static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value source, mlir::Value mask, mlir::Value inputVector, const std::string &id)
static void computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec, uint32_t imm, const bool isShufP, llvm::SmallVectorImpl< int64_t > &outIndices)
static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, llvm::SmallVector< mlir::Value > &ops, mlir::Location loc, cir::CmpOpKind pred, bool shouldInvert)
static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder, mlir::Location loc, cir::BinOpKind binOpKind, SmallVectorImpl< mlir::Value > &ops, bool invertLHS=false)
static std::optional< mlir::Value > emitX86MaskedCompare(CIRGenFunction &cgf, CIRGenBuilderTy &builder, unsigned cc, bool isSigned, ArrayRef< mlir::Value > ops, mlir::Location loc)
static std::optional< mlir::Value > emitX86SExtMask(CIRGenBuilderTy &builder, mlir::Value op, mlir::Type dstTy, mlir::Location loc)
static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, mlir::Location loc, const StringRef str, const mlir::Type &resTy, Operands &&...op)
static std::optional< mlir::Value > emitX86MaskedCompareResult(CIRGenFunction &cgf, CIRGenBuilderTy &builder, mlir::Value cmp, unsigned numElts, mlir::Value maskIn, mlir::Location loc)
static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, const std::string &intrinsicName, SmallVectorImpl< mlir::Value > &ops)
static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value vec, mlir::Value value, mlir::Value indexOp)
static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder, mlir::Location loc, const std::string &intrinsicName, SmallVectorImpl< mlir::Value > &ops)
static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder, const mlir::Value vec, const mlir::Value immediate, const mlir::Location loc, const bool isLow)
static mlir::Value emitX86Select(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value mask, mlir::Value op0, mlir::Value op1)
static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, mlir::Location loc, llvm::ArrayRef< mlir::Value > ops, mlir::Type dstTy)
static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder, mlir::Location location, mlir::Value &op0, mlir::Value &op1, mlir::Value &amt, bool isRight)
static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, bool isSigned, SmallVectorImpl< mlir::Value > &ops, unsigned opTypePrimitiveSizeInBits)
static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value mask, unsigned numElems)
static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder, mlir::Location loc, const std::string &intrinsicName, SmallVectorImpl< mlir::Value > &ops)
TokenType getType() const
Returns the token's type, e.g.
Enumerates target-specific builtins in their own namespaces within namespace clang.
mlir::Value getConstAPInt(mlir::Location loc, mlir::Type typ, const llvm::APInt &val)
mlir::Value createShift(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, bool isShiftLeft)
cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc)
cir::ConstantOp getConstant(mlir::Location loc, mlir::TypedAttr attr)
mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy)
mlir::Value createNot(mlir::Value value)
mlir::Value createAnd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
mlir::Value createIntCast(mlir::Value src, mlir::Type newTy)
mlir::Value createBitcast(mlir::Value src, mlir::Type newTy)
mlir::Value createBinop(mlir::Location loc, mlir::Value lhs, cir::BinOpKind kind, mlir::Value rhs)
mlir::Value createMul(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
llvm::TypeSize getTypeSizeInBits(mlir::Type ty) const
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
mlir::Value getPointer() const
Definition Address.h:90
cir::ConstantOp getUInt64(uint64_t c, mlir::Location loc)
cir::IntType getSIntNTy(int n)
cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1, mlir::Value vec2, llvm::ArrayRef< mlir::Attribute > maskAttrs)
cir::IntType getUIntNTy(int n)
mlir::Type convertType(clang::QualType t)
mlir::Location getLoc(clang::SourceLocation srcLoc)
Helpers to convert Clang's SourceLocation to a MLIR Location.
static int64_t getZExtIntValueFromConstOp(mlir::Value val)
Get zero-extended integer from a mlir::Value that is an int constant or a constant op.
static int64_t getSExtIntValueFromConstOp(mlir::Value val)
Get integer from a mlir::Value that is an int constant or a constant op.
std::optional< mlir::Value > emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr)
CIRGenBuilderTy & getBuilder()
mlir::MLIRContext & getMLIRContext()
clang::ASTContext & getContext() const
Address createMemTemp(QualType t, mlir::Location loc, const Twine &name="tmp", Address *alloca=nullptr, mlir::OpBuilder::InsertPoint ip={})
Create a temporary memory object of the given type, with appropriate alignmen and cast it to the defa...
mlir::Value emitScalarOrConstFoldImmArg(unsigned iceArguments, unsigned idx, const Expr *argExpr)
DiagnosticBuilder errorNYI(SourceLocation, llvm::StringRef)
Helpers to emit "not yet implemented" error diagnostics.
const cir::CIRDataLayout getDataLayout() const
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2943
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
The JSON file list parser is used to communicate input to InstallAPI.
U cast(CodeGen::Address addr)
Definition Address.h:327
static bool msvcBuiltins()
static bool handleBuiltinICEArguments()
static bool cgFPOptionsRAII()
static bool emitConstrainedFPCall()
static bool fastMathFlags()
cir::PointerType allocaInt8PtrTy
void* in alloca address space
cir::PointerType voidPtrTy
void* in address space 0