clang 23.0.0git
CIRGenBuiltinX86.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit x86/x86_64 Builtin calls as CIR or a function
10// call to be later resolved.
11//
12//===----------------------------------------------------------------------===//
13
14#include "CIRGenBuilder.h"
15#include "CIRGenFunction.h"
16#include "CIRGenModule.h"
17#include "mlir/IR/Attributes.h"
18#include "mlir/IR/BuiltinAttributes.h"
19#include "mlir/IR/Location.h"
20#include "mlir/IR/Types.h"
21#include "mlir/IR/ValueRange.h"
27#include "llvm/ADT/Sequence.h"
28#include "llvm/Support/ErrorHandling.h"
29#include <string>
30
31using namespace clang;
32using namespace clang::CIRGen;
33
34// OG has unordered comparison as a form of optimization in addition to
35// ordered comparison, while CIR doesn't.
36//
37// This means that we can't encode the comparison code of UGT (unordered
38// greater than), at least not at the CIR level.
39//
40// The boolean shouldInvert compensates for this.
41// For example: to get to the comparison code UGT, we pass in
42// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
43
44// There are several ways to support this otherwise:
45// - register extra CmpOpKind for unordered comparison types and build the
46// translation code for
47// to go from CIR -> LLVM dialect. Notice we get this naturally with
48// shouldInvert, benefiting from existing infrastructure, albeit having to
49// generate an extra `not` at CIR).
50// - Just add extra comparison code to a new VecCmpOpKind instead of
51// cluttering CmpOpKind.
52// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
53// comparison
54// - Just emit the intrinsics call instead of calling this helper, see how the
55// LLVM lowering handles this.
56static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
58 mlir::Location loc, cir::CmpOpKind pred,
59 bool shouldInvert) {
61 // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented
63 mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
64 mlir::Value bitCast = builder.createBitcast(
65 shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
66 return bitCast;
67}
68
69static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
70 mlir::Value mask, unsigned numElems) {
71 auto maskTy = cir::VectorType::get(
72 builder.getSIntNTy(1), cast<cir::IntType>(mask.getType()).getWidth());
73 mlir::Value maskVec = builder.createBitcast(mask, maskTy);
74
75 // If we have less than 8 elements, then the starting mask was an i8 and
76 // we need to extract down to the right number of elements.
77 if (numElems < 8) {
79 mlir::Type i32Ty = builder.getSInt32Ty();
80 for (auto i : llvm::seq<unsigned>(0, numElems))
81 indices.push_back(cir::IntAttr::get(i32Ty, i));
82
83 maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices);
84 }
85 return maskVec;
86}
87
88// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins.
89//
90// The vector is split into lanes of 8 word elements (16 bits). The lower or
91// upper half of each lane, controlled by `isLow`, is shuffled in the following
92// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The
93// i-th field's value represents the resulting index of the i-th element in the
94// half lane after shuffling. The other half of the lane remains unchanged.
95static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder,
96 const mlir::Value vec,
97 const mlir::Value immediate,
98 const mlir::Location loc,
99 const bool isLow) {
100 uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate);
101
102 auto vecTy = cast<cir::VectorType>(vec.getType());
103 unsigned numElts = vecTy.getSize();
104
105 unsigned firstHalfStart = isLow ? 0 : 4;
106 unsigned secondHalfStart = 4 - firstHalfStart;
107
108 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
109 imm = (imm & 0xff) * 0x01010101;
110
111 int64_t indices[32];
112 for (unsigned l = 0; l != numElts; l += 8) {
113 for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) {
114 indices[l + i] = l + (imm & 3) + firstHalfStart;
115 imm >>= 2;
116 }
117 for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i)
118 indices[l + i] = l + i;
119 }
120
121 return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts));
122}
123
124// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins.
125// The shuffle mask is written to outIndices.
126static void
127computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec,
128 uint32_t imm, const bool isShufP,
129 llvm::SmallVectorImpl<int64_t> &outIndices) {
130 auto vecTy = cast<cir::VectorType>(vec.getType());
131 unsigned numElts = vecTy.getSize();
132 unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128;
133 unsigned numLaneElts = numElts / numLanes;
134
135 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
136 imm = (imm & 0xff) * 0x01010101;
137
138 for (unsigned l = 0; l != numElts; l += numLaneElts) {
139 for (unsigned i = 0; i != numLaneElts; ++i) {
140 uint32_t idx = imm % numLaneElts;
141 imm /= numLaneElts;
142 if (isShufP && i >= (numLaneElts / 2))
143 idx += numElts;
144 outIndices[l + i] = l + idx;
145 }
146 }
147
148 outIndices.resize(numElts);
149}
150
151static mlir::Value emitPrefetch(CIRGenFunction &cgf, unsigned builtinID,
152 const CallExpr *e,
153 const SmallVector<mlir::Value> &ops) {
154 CIRGenBuilderTy &builder = cgf.getBuilder();
155 mlir::Location location = cgf.getLoc(e->getExprLoc());
156 mlir::Type voidTy = builder.getVoidTy();
157 mlir::Value address = builder.createPtrBitcast(ops[0], voidTy);
158 bool isWrite{};
159 int locality{};
160
161 assert(builtinID == X86::BI_mm_prefetch || builtinID == X86::BI_m_prefetchw ||
162 builtinID == X86::BI_m_prefetch && "Expected prefetch builtin");
163
164 if (builtinID == X86::BI_mm_prefetch) {
165 int hint = cgf.getSExtIntValueFromConstOp(ops[1]);
166 isWrite = (hint >> 2) & 0x1;
167 locality = hint & 0x3;
168 } else {
169 isWrite = (builtinID == X86::BI_m_prefetchw);
170 locality = 0x3;
171 }
172
173 cir::PrefetchOp::create(builder, location, address, locality, isWrite);
174 return {};
175}
176
177static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder,
178 mlir::Location loc, mlir::Value source,
179 mlir::Value mask,
180 mlir::Value inputVector,
181 const std::string &id) {
182 auto resultTy = cast<cir::VectorType>(mask.getType());
183 mlir::Value maskValue = getMaskVecValue(
184 builder, loc, inputVector, cast<cir::VectorType>(resultTy).getSize());
185 return builder.emitIntrinsicCallOp(loc, id, resultTy,
186 mlir::ValueRange{source, mask, maskValue});
187}
188
189static mlir::Value emitX86Select(CIRGenBuilderTy &builder, mlir::Location loc,
190 mlir::Value mask, mlir::Value op0,
191 mlir::Value op1) {
192 auto constOp = mlir::dyn_cast_or_null<cir::ConstantOp>(mask.getDefiningOp());
193 // If the mask is all ones just return first argument.
194 if (constOp && constOp.isAllOnesValue())
195 return op0;
196
197 mask = getMaskVecValue(builder, loc, mask,
198 cast<cir::VectorType>(op0.getType()).getSize());
199
200 return cir::VecTernaryOp::create(builder, loc, mask, op0, op1);
201}
202
203// Helper function to extract zero-bit from a mask as a boolean
204static mlir::Value getMaskZeroBitAsBool(CIRGenBuilderTy &builder,
205 mlir::Location loc, mlir::Value mask) {
206 // Get the mask as a vector of i1 and extract bit 0
207 auto intTy = mlir::dyn_cast<cir::IntType>(mask.getType());
208 assert(intTy && "mask must be an integer type");
209 unsigned width = intTy.getWidth();
210
211 auto maskVecTy = cir::VectorType::get(builder.getSIntNTy(1), width);
212 mlir::Value maskVec = builder.createBitcast(mask, maskVecTy);
213
214 // Extract bit 0 from the mask vector
215 mlir::Value bit0 = builder.createExtractElement(loc, maskVec, uint64_t(0));
216
217 // Convert i1 to bool for select
218 auto boolTy = cir::BoolType::get(builder.getContext());
219 return cir::CastOp::create(builder, loc, boolTy, cir::CastKind::int_to_bool,
220 bit0);
221}
222
223static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder,
224 mlir::Location loc, mlir::Value mask,
225 mlir::Value op0, mlir::Value op1) {
226
227 // If the mask is all ones just return first argument.
228 if (auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(mask.getDefiningOp()))
229 if (c.isAllOnesValue())
230 return op0;
231
232 mlir::Value cond = getMaskZeroBitAsBool(builder, loc, mask);
233 return builder.createSelect(loc, cond, op0, op1);
234}
235
236static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
237 mlir::Location loc,
238 const std::string &intrinsicName,
240
241 auto intTy = cast<cir::IntType>(ops[0].getType());
242 unsigned numElts = intTy.getWidth();
243 mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
244 mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
245 mlir::Type vecTy = lhsVec.getType();
246 mlir::Value resVec = builder.emitIntrinsicCallOp(
247 loc, intrinsicName, vecTy, mlir::ValueRange{lhsVec, rhsVec});
248 return builder.createBitcast(resVec, ops[0].getType());
249}
250
251static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder,
252 mlir::Location loc,
253 const std::string &intrinsicName,
255 unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
256
257 // Convert both operands to mask vectors.
258 mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems);
259 mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems);
260
261 mlir::Type i32Ty = builder.getSInt32Ty();
262
263 // Create indices for extracting the first half of each vector.
265 for (auto i : llvm::seq<unsigned>(0, numElems / 2))
266 halfIndices.push_back(cir::IntAttr::get(i32Ty, i));
267
268 // Extract first half of each vector. This gives better codegen than
269 // doing it in a single shuffle.
270 mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
271 mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices);
272
273 // Create indices for concatenating the vectors.
274 // NOTE: Operands are swapped to match the intrinsic definition.
275 // After the half extraction, both vectors have numElems/2 elements.
276 // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1]
277 // select from rhsHalf, and indices [numElems/2..numElems-1] select from
278 // lhsHalf.
280 for (auto i : llvm::seq<unsigned>(0, numElems))
281 concatIndices.push_back(cir::IntAttr::get(i32Ty, i));
282
283 // Concat the vectors (RHS first, then LHS).
284 mlir::Value res =
285 builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices);
286 return builder.createBitcast(res, ops[0].getType());
287}
288
289static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
290 mlir::Location loc,
291 cir::BinOpKind binOpKind,
293 bool invertLHS = false) {
294 unsigned numElts = cast<cir::IntType>(ops[0].getType()).getWidth();
295 mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts);
296 mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts);
297
298 if (invertLHS)
299 lhs = builder.createNot(lhs);
300 return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs),
301 ops[0].getType());
302}
303
304static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc,
305 const std::string &intrinsicName,
307 auto intTy = cast<cir::IntType>(ops[0].getType());
308 unsigned numElts = intTy.getWidth();
309 mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
310 mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
311 mlir::Type resTy = builder.getSInt32Ty();
312 return builder.emitIntrinsicCallOp(loc, intrinsicName, resTy,
313 mlir::ValueRange{lhsVec, rhsVec});
314}
315
316static mlir::Value emitX86MaskedCompareResult(CIRGenBuilderTy &builder,
317 mlir::Value cmp, unsigned numElts,
318 mlir::Value maskIn,
319 mlir::Location loc) {
320 if (maskIn) {
321 auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(maskIn.getDefiningOp());
322 if (!c || !c.isAllOnesValue())
323 cmp = builder.createAnd(loc, cmp,
324 getMaskVecValue(builder, loc, maskIn, numElts));
325 }
326 if (numElts < 8) {
328 mlir::Type i64Ty = builder.getSInt64Ty();
329
330 for (unsigned i = 0; i != numElts; ++i)
331 indices.push_back(cir::IntAttr::get(i64Ty, i));
332 for (unsigned i = numElts; i != 8; ++i)
333 indices.push_back(cir::IntAttr::get(i64Ty, i % numElts + numElts));
334
335 // This should shuffle between cmp (first vector) and null (second vector)
336 mlir::Value nullVec = builder.getNullValue(cmp.getType(), loc);
337 cmp = builder.createVecShuffle(loc, cmp, nullVec, indices);
338 }
339 return builder.createBitcast(cmp, builder.getUIntNTy(std::max(numElts, 8U)));
340}
341
342// TODO: The cgf parameter should be removed when all the NYI cases are
343// implemented.
344static std::optional<mlir::Value>
346 bool isSigned, ArrayRef<mlir::Value> ops,
347 mlir::Location loc) {
348 assert((ops.size() == 2 || ops.size() == 4) &&
349 "Unexpected number of arguments");
350 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
351 mlir::Value cmp;
352
353 if (cc == 3) {
354 cgf.cgm.errorNYI(loc, "emitX86MaskedCompare: cc == 3");
355 return {};
356 } else if (cc == 7) {
357 cgf.cgm.errorNYI(loc, "emitX86MaskedCompare cc == 7");
358 return {};
359 } else {
360 cir::CmpOpKind pred;
361 switch (cc) {
362 default:
363 llvm_unreachable("Unknown condition code");
364 case 0:
365 pred = cir::CmpOpKind::eq;
366 break;
367 case 1:
368 pred = cir::CmpOpKind::lt;
369 break;
370 case 2:
371 pred = cir::CmpOpKind::le;
372 break;
373 case 4:
374 pred = cir::CmpOpKind::ne;
375 break;
376 case 5:
377 pred = cir::CmpOpKind::ge;
378 break;
379 case 6:
380 pred = cir::CmpOpKind::gt;
381 break;
382 }
383
384 auto resultTy = cir::VectorType::get(builder.getSIntNTy(1), numElts);
385 cmp = cir::VecCmpOp::create(builder, loc, resultTy, pred, ops[0], ops[1]);
386 }
387
388 mlir::Value maskIn;
389 if (ops.size() == 4)
390 maskIn = ops[3];
391
392 return emitX86MaskedCompareResult(builder, cmp, numElts, maskIn, loc);
393}
394
395// TODO: The cgf parameter should be removed when all the NYI cases are
396// implemented.
397static std::optional<mlir::Value> emitX86ConvertToMask(CIRGenFunction &cgf,
398 CIRGenBuilderTy &builder,
399 mlir::Value in,
400 mlir::Location loc) {
401 cir::ConstantOp zero = builder.getNullValue(in.getType(), loc);
402 return emitX86MaskedCompare(cgf, builder, 1, true, {in, zero}, loc);
403}
404
405static std::optional<mlir::Value> emitX86SExtMask(CIRGenBuilderTy &builder,
406 mlir::Value op,
407 mlir::Type dstTy,
408 mlir::Location loc) {
409 unsigned numberOfElements = cast<cir::VectorType>(dstTy).getSize();
410 mlir::Value mask = getMaskVecValue(builder, loc, op, numberOfElements);
411
412 return builder.createCast(loc, cir::CastKind::integral, mask, dstTy);
413}
414
415static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
416 mlir::Value vec, mlir::Value value,
417 mlir::Value indexOp) {
418 unsigned numElts = cast<cir::VectorType>(vec.getType()).getSize();
419
420 uint64_t index =
421 indexOp.getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
422
423 index &= numElts - 1;
424
425 cir::ConstantOp indexVal = builder.getUInt64(index, loc);
426
427 return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
428}
429
430static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder,
431 mlir::Location location, mlir::Value &op0,
432 mlir::Value &op1, mlir::Value &amt,
433 bool isRight) {
434 mlir::Type op0Ty = op0.getType();
435
436 // Amount may be scalar immediate, in which case create a splat vector.
437 // Funnel shifts amounts are treated as modulo and types are all power-of-2
438 // so we only care about the lowest log2 bits anyway.
439 if (amt.getType() != op0Ty) {
440 auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
441 uint64_t numElems = vecTy.getSize();
442
443 auto amtTy = mlir::cast<cir::IntType>(amt.getType());
444 auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
445
446 // If signed, cast to the same width but unsigned first to
447 // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`.
448 if (amtTy.isSigned()) {
449 cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth());
450 amt = builder.createIntCast(amt, unsignedAmtTy);
451 }
452 cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth());
453 amt = builder.createIntCast(amt, unsignedVecElemType);
454 amt = cir::VecSplatOp::create(
455 builder, location, cir::VectorType::get(unsignedVecElemType, numElems),
456 amt);
457 }
458
459 const StringRef intrinsicName = isRight ? "fshr" : "fshl";
460 return builder.emitIntrinsicCallOp(location, intrinsicName, op0Ty,
461 mlir::ValueRange{op0, op1, amt});
462}
463
464static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
465 bool isSigned,
467 unsigned opTypePrimitiveSizeInBits) {
468 mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(),
469 opTypePrimitiveSizeInBits / 64);
470 mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
471 mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);
472 if (isSigned) {
473 cir::ConstantOp shiftAmt =
474 builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
475 cir::VecSplatOp shiftSplatVecOp =
476 cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
477 mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
478 // In CIR, right-shift operations are automatically lowered to either an
479 // arithmetic or logical shift depending on the operand type. The purpose
480 // of the shifts here is to propagate the sign bit of the 32-bit input
481 // into the upper bits of each vector lane.
482 lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
483 lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
484 rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
485 rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
486 } else {
487 cir::ConstantOp maskScalar = builder.getConstant(
488 loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
489 cir::VecSplatOp mask =
490 cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
491 // Clear the upper bits
492 lhs = builder.createAnd(loc, lhs, mask);
493 rhs = builder.createAnd(loc, rhs, mask);
494 }
495 return builder.createMul(loc, lhs, rhs);
496}
497
498// Convert f16 half values to floats.
499static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
500 mlir::Location loc,
502 mlir::Type dstTy) {
503 assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
504 "Unknown cvtph2ps intrinsic");
505
506 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
507 if (ops.size() == 4) {
508 auto constOp = ops[3].getDefiningOp<cir::ConstantOp>();
509 assert(constOp && "Expected constant operand");
510 if (constOp.getIntValue().getZExtValue() != 4) {
511 return builder.emitIntrinsicCallOp(loc, "x86.avx512.mask.vcvtph2ps.512",
512 dstTy, ops);
513 }
514 }
515
516 unsigned numElts = cast<cir::VectorType>(dstTy).getSize();
517 mlir::Value src = ops[0];
518
519 // Extract the subvector
520 if (numElts != cast<cir::VectorType>(src.getType()).getSize()) {
521 assert(numElts == 4 && "Unexpected vector size");
522 src = builder.createVecShuffle(loc, src, {0, 1, 2, 3});
523 }
524
525 // Bitcast from vXi16 to vXf16.
526 cir::VectorType halfTy =
527 cir::VectorType::get(cir::FP16Type::get(builder.getContext()), numElts);
528
529 src = builder.createCast(cir::CastKind::bitcast, src, halfTy);
530
531 // Perform the fp-extension
532 mlir::Value res = builder.createCast(cir::CastKind::floating, src, dstTy);
533
534 if (ops.size() >= 3)
535 res = emitX86Select(builder, loc, ops[2], res, ops[1]);
536 return res;
537}
538
539static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
541 bool isSigned) {
542 mlir::Value op0 = ops[0];
543 mlir::Value op1 = ops[1];
544
545 cir::VectorType ty = cast<cir::VectorType>(op0.getType());
546 cir::IntType elementTy = cast<cir::IntType>(ty.getElementType());
547
548 uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7;
549
550 cir::CmpOpKind pred;
551 switch (imm) {
552 case 0x0:
553 pred = cir::CmpOpKind::lt;
554 break;
555 case 0x1:
556 pred = cir::CmpOpKind::le;
557 break;
558 case 0x2:
559 pred = cir::CmpOpKind::gt;
560 break;
561 case 0x3:
562 pred = cir::CmpOpKind::ge;
563 break;
564 case 0x4:
565 pred = cir::CmpOpKind::eq;
566 break;
567 case 0x5:
568 pred = cir::CmpOpKind::ne;
569 break;
570 case 0x6:
571 return builder.getNullValue(ty, loc); // FALSE
572 case 0x7: {
573 llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth());
574 return cir::VecSplatOp::create(
575 builder, loc, ty,
576 builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
577 }
578 default:
579 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
580 }
581
582 if ((!isSigned && elementTy.isSigned()) ||
583 (isSigned && elementTy.isUnsigned())) {
584 elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth())
585 : builder.getSIntNTy(elementTy.getWidth());
586 ty = cir::VectorType::get(elementTy, ty.getSize());
587 op0 = builder.createBitcast(op0, ty);
588 op1 = builder.createBitcast(op1, ty);
589 }
590
591 return builder.createVecCompare(loc, pred, op0, op1);
592}
593
594static mlir::Value emitX86Fpclass(CIRGenBuilderTy &builder, mlir::Location loc,
595 unsigned builtinID,
597 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
598 mlir::Value maskIn = ops[2];
599 ops.erase(ops.begin() + 2);
600
601 StringRef intrinsicName;
602 switch (builtinID) {
603 default:
604 llvm_unreachable("Unsupported fpclass builtin");
605 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
606 intrinsicName = "x86.avx10.fpclass.bf16.128";
607 break;
608 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
609 intrinsicName = "x86.avx10.fpclass.bf16.256";
610 break;
611 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
612 intrinsicName = "x86.avx10.fpclass.bf16.512";
613 break;
614 case X86::BI__builtin_ia32_fpclassph128_mask:
615 intrinsicName = "x86.avx512fp16.fpclass.ph.128";
616 break;
617 case X86::BI__builtin_ia32_fpclassph256_mask:
618 intrinsicName = "x86.avx512fp16.fpclass.ph.256";
619 break;
620 case X86::BI__builtin_ia32_fpclassph512_mask:
621 intrinsicName = "x86.avx512fp16.fpclass.ph.512";
622 break;
623 case X86::BI__builtin_ia32_fpclassps128_mask:
624 intrinsicName = "x86.avx512.fpclass.ps.128";
625 break;
626 case X86::BI__builtin_ia32_fpclassps256_mask:
627 intrinsicName = "x86.avx512.fpclass.ps.256";
628 break;
629 case X86::BI__builtin_ia32_fpclassps512_mask:
630 intrinsicName = "x86.avx512.fpclass.ps.512";
631 break;
632 case X86::BI__builtin_ia32_fpclasspd128_mask:
633 intrinsicName = "x86.avx512.fpclass.pd.128";
634 break;
635 case X86::BI__builtin_ia32_fpclasspd256_mask:
636 intrinsicName = "x86.avx512.fpclass.pd.256";
637 break;
638 case X86::BI__builtin_ia32_fpclasspd512_mask:
639 intrinsicName = "x86.avx512.fpclass.pd.512";
640 break;
641 }
642
643 auto cmpResultTy = cir::VectorType::get(builder.getSIntNTy(1), numElts);
644 mlir::Value fpclass =
645 builder.emitIntrinsicCallOp(loc, intrinsicName, cmpResultTy, ops);
646 return emitX86MaskedCompareResult(builder, fpclass, numElts, maskIn, loc);
647}
648
649static mlir::Value emitX86Aes(CIRGenBuilderTy &builder, mlir::Location loc,
650 llvm::StringRef intrinsicName, mlir::Type retType,
652 // Create return struct type and call intrinsic function.
653 mlir::Type vecType =
654 mlir::cast<cir::PointerType>(ops[0].getType()).getPointee();
655 cir::RecordType rstRecTy = builder.getAnonRecordTy({retType, vecType});
656 mlir::Value rstValueRec = builder.emitIntrinsicCallOp(
657 loc, intrinsicName, rstRecTy, mlir::ValueRange{ops[1], ops[2]});
658
659 // Extract the first return value and truncate it to 1 bit, then cast result
660 // to bool value.
661 mlir::Value flag =
662 cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/0);
663 mlir::Value flagBit0 = builder.createCast(loc, cir::CastKind::integral, flag,
664 builder.getUIntNTy(1));
665 mlir::Value succ = builder.createCast(loc, cir::CastKind::int_to_bool,
666 flagBit0, builder.getBoolTy());
667
668 // Extract the second return value, store it to output address if success.
669 mlir::Value out =
670 cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/1);
671 Address outAddr(ops[0], /*align=*/CharUnits::fromQuantity(16));
672 cir::IfOp::create(
673 builder, loc, succ, /*withElseRegion=*/true,
674 /*thenBuilder=*/
675 [&](mlir::OpBuilder &b, mlir::Location) {
676 builder.createStore(loc, out, outAddr);
677 builder.createYield(loc);
678 },
679 /*elseBuilder=*/
680 [&](mlir::OpBuilder &b, mlir::Location) {
681 mlir::Value zero = builder.getNullValue(vecType, loc);
682 builder.createStore(loc, zero, outAddr);
683 builder.createYield(loc);
684 });
685
686 return cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/0);
687}
688
689static mlir::Value emitX86Aeswide(CIRGenBuilderTy &builder, mlir::Location loc,
690 llvm::StringRef intrinsicName,
691 mlir::Type retType,
693 mlir::Type vecType =
694 mlir::cast<cir::PointerType>(ops[1].getType()).getPointee();
695
696 // Create struct for return type and load input arguments, then call
697 // intrinsic function.
698 mlir::Type recTypes[9] = {retType, vecType, vecType, vecType, vecType,
699 vecType, vecType, vecType, vecType};
700 mlir::Value arguments[9];
701 arguments[0] = ops[2];
702 for (int i = 0; i < 8; i++) {
703 // Loading each vector argument from input address.
704 cir::ConstantOp idx = builder.getUInt32(i, loc);
705 mlir::Value nextInElePtr =
706 builder.getArrayElement(loc, loc, ops[1], vecType, idx,
707 /*shouldDecay=*/false);
708 arguments[i + 1] =
709 builder.createAlignedLoad(loc, vecType, nextInElePtr,
710 /*align=*/CharUnits::fromQuantity(16));
711 }
712 cir::RecordType rstRecTy = builder.getAnonRecordTy(recTypes);
713 mlir::Value rstValueRec =
714 builder.emitIntrinsicCallOp(loc, intrinsicName, rstRecTy, arguments);
715
716 // Extract the first return value and truncate it to 1 bit, then cast result
717 // to bool value.
718 mlir::Value flag =
719 cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/0);
720 mlir::Value flagBit0 = builder.createCast(loc, cir::CastKind::integral, flag,
721 builder.getUIntNTy(1));
722 mlir::Value succ = builder.createCast(loc, cir::CastKind::int_to_bool,
723 flagBit0, builder.getBoolTy());
724
725 // Extract other return values, store those to output address if success.
726 cir::IfOp::create(
727 builder, loc, succ, /*withElseRegion=*/true,
728 /*thenBuilder=*/
729 [&](mlir::OpBuilder &b, mlir::Location) {
730 for (int i = 0; i < 8; i++) {
731 mlir::Value out =
732 cir::ExtractMemberOp::create(builder, loc, rstValueRec,
733 /*index=*/i + 1);
734 cir::ConstantOp idx = builder.getUInt32(i, loc);
735 mlir::Value nextOutEleAddr =
736 builder.getArrayElement(loc, loc, ops[0], vecType, idx,
737 /*shouldDecay=*/false);
738 Address outAddr(nextOutEleAddr,
739 /*align=*/CharUnits::fromQuantity(16));
740 builder.createStore(loc, out, outAddr);
741 }
742 builder.createYield(loc);
743 },
744 /*elseBuilder=*/
745 [&](mlir::OpBuilder &b, mlir::Location) {
746 mlir::Value zero = builder.getNullValue(vecType, loc);
747 for (int i = 0; i < 8; i++) {
748 cir::ConstantOp idx = builder.getUInt32(i, loc);
749 mlir::Value nextOutEleAddr =
750 builder.getArrayElement(loc, loc, ops[0], vecType, idx,
751 /*shouldDecay=*/false);
752 Address outAddr(nextOutEleAddr,
753 /*align=*/CharUnits::fromQuantity(16));
754 builder.createStore(loc, zero, outAddr);
755 }
756 builder.createYield(loc);
757 });
758
759 return cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/0);
760}
761
762std::optional<mlir::Value>
764 if (builtinID == Builtin::BI__builtin_cpu_is) {
765 cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
766 return mlir::Value{};
767 }
768 if (builtinID == Builtin::BI__builtin_cpu_supports) {
769 cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
770 return mlir::Value{};
771 }
772 if (builtinID == Builtin::BI__builtin_cpu_init) {
773 cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
774 return mlir::Value{};
775 }
776
777 // Handle MSVC intrinsics before argument evaluation to prevent double
778 // evaluation.
780
781 // Find out if any arguments are required to be integer constant expressions.
783
784 // The operands of the builtin call
786
787 // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
788 // is required to be a constant integer expression.
789 unsigned iceArguments = 0;
791 getContext().GetBuiltinType(builtinID, error, &iceArguments);
792 assert(error == ASTContext::GE_None && "Error while getting builtin type.");
793
794 for (auto [idx, arg] : llvm::enumerate(expr->arguments()))
795 ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
796
797 CIRGenBuilderTy &builder = getBuilder();
798 mlir::Type voidTy = builder.getVoidTy();
799
800 switch (builtinID) {
801 default:
802 return std::nullopt;
803 case X86::BI_mm_clflush:
804 return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()),
805 "x86.sse2.clflush", voidTy, ops[0]);
806 case X86::BI_mm_lfence:
807 return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()),
808 "x86.sse2.lfence", voidTy);
809 case X86::BI_mm_pause:
810 return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()),
811 "x86.sse2.pause", voidTy);
812 case X86::BI_mm_mfence:
813 return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()),
814 "x86.sse2.mfence", voidTy);
815 case X86::BI_mm_sfence:
816 return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()),
817 "x86.sse.sfence", voidTy);
818 case X86::BI_mm_prefetch:
819 case X86::BI_m_prefetch:
820 case X86::BI_m_prefetchw:
821 return emitPrefetch(*this, builtinID, expr, ops);
822 case X86::BI__rdtsc:
823 case X86::BI__builtin_ia32_rdtscp: {
824 cgm.errorNYI(expr->getSourceRange(),
825 std::string("unimplemented X86 builtin call: ") +
826 getContext().BuiltinInfo.getName(builtinID));
827 return mlir::Value{};
828 }
829 case X86::BI__builtin_ia32_lzcnt_u16:
830 case X86::BI__builtin_ia32_lzcnt_u32:
831 case X86::BI__builtin_ia32_lzcnt_u64: {
832 mlir::Location loc = getLoc(expr->getExprLoc());
833 mlir::Value isZeroPoison = builder.getFalse(loc);
834 return builder.emitIntrinsicCallOp(loc, "ctlz", ops[0].getType(),
835 mlir::ValueRange{ops[0], isZeroPoison});
836 }
837 case X86::BI__builtin_ia32_tzcnt_u16:
838 case X86::BI__builtin_ia32_tzcnt_u32:
839 case X86::BI__builtin_ia32_tzcnt_u64: {
840 mlir::Location loc = getLoc(expr->getExprLoc());
841 mlir::Value isZeroPoison = builder.getFalse(loc);
842 return builder.emitIntrinsicCallOp(loc, "cttz", ops[0].getType(),
843 mlir::ValueRange{ops[0], isZeroPoison});
844 }
845 case X86::BI__builtin_ia32_undef128:
846 case X86::BI__builtin_ia32_undef256:
847 case X86::BI__builtin_ia32_undef512:
848 // The x86 definition of "undef" is not the same as the LLVM definition
849 // (PR32176). We leave optimizing away an unnecessary zero constant to the
850 // IR optimizer and backend.
851 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
852 // value, we should use that here instead of a zero.
853 return builder.getNullValue(convertType(expr->getType()),
854 getLoc(expr->getExprLoc()));
855 case X86::BI__builtin_ia32_vec_ext_v4hi:
856 case X86::BI__builtin_ia32_vec_ext_v16qi:
857 case X86::BI__builtin_ia32_vec_ext_v8hi:
858 case X86::BI__builtin_ia32_vec_ext_v4si:
859 case X86::BI__builtin_ia32_vec_ext_v4sf:
860 case X86::BI__builtin_ia32_vec_ext_v2di:
861 case X86::BI__builtin_ia32_vec_ext_v32qi:
862 case X86::BI__builtin_ia32_vec_ext_v16hi:
863 case X86::BI__builtin_ia32_vec_ext_v8si:
864 case X86::BI__builtin_ia32_vec_ext_v4di: {
865 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
866
867 uint64_t index = getZExtIntValueFromConstOp(ops[1]);
868 index &= numElts - 1;
869
870 cir::ConstantOp indexVal =
871 builder.getUInt64(index, getLoc(expr->getExprLoc()));
872
873 // These builtins exist so we can ensure the index is an ICE and in range.
874 // Otherwise we could just do this in the header file.
875 return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()),
876 ops[0], indexVal);
877 }
878 case X86::BI__builtin_ia32_vec_set_v4hi:
879 case X86::BI__builtin_ia32_vec_set_v16qi:
880 case X86::BI__builtin_ia32_vec_set_v8hi:
881 case X86::BI__builtin_ia32_vec_set_v4si:
882 case X86::BI__builtin_ia32_vec_set_v2di:
883 case X86::BI__builtin_ia32_vec_set_v32qi:
884 case X86::BI__builtin_ia32_vec_set_v16hi:
885 case X86::BI__builtin_ia32_vec_set_v8si:
886 case X86::BI__builtin_ia32_vec_set_v4di: {
887 return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
888 ops[2]);
889 }
890 case X86::BI__builtin_ia32_kunpckhi:
891 return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
892 "x86.avx512.kunpackb", ops);
893 case X86::BI__builtin_ia32_kunpcksi:
894 return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
895 "x86.avx512.kunpackw", ops);
896 case X86::BI__builtin_ia32_kunpckdi:
897 return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
898 "x86.avx512.kunpackd", ops);
899 case X86::BI_mm_setcsr:
900 case X86::BI__builtin_ia32_ldmxcsr: {
901 mlir::Location loc = getLoc(expr->getExprLoc());
902 Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
903 builder.createStore(loc, ops[0], tmp);
904 return builder.emitIntrinsicCallOp(loc, "x86.sse.ldmxcsr",
905 builder.getVoidTy(), tmp.getPointer());
906 }
907 case X86::BI_mm_getcsr:
908 case X86::BI__builtin_ia32_stmxcsr: {
909 mlir::Location loc = getLoc(expr->getExprLoc());
910 Address tmp = createMemTemp(expr->getType(), loc);
911 builder.emitIntrinsicCallOp(loc, "x86.sse.stmxcsr", builder.getVoidTy(),
912 tmp.getPointer());
913 return builder.createLoad(loc, tmp);
914 }
915 case X86::BI__builtin_ia32_xsave:
916 case X86::BI__builtin_ia32_xsave64:
917 case X86::BI__builtin_ia32_xrstor:
918 case X86::BI__builtin_ia32_xrstor64:
919 case X86::BI__builtin_ia32_xsaveopt:
920 case X86::BI__builtin_ia32_xsaveopt64:
921 case X86::BI__builtin_ia32_xrstors:
922 case X86::BI__builtin_ia32_xrstors64:
923 case X86::BI__builtin_ia32_xsavec:
924 case X86::BI__builtin_ia32_xsavec64:
925 case X86::BI__builtin_ia32_xsaves:
926 case X86::BI__builtin_ia32_xsaves64:
927 case X86::BI__builtin_ia32_xsetbv:
928 case X86::BI_xsetbv: {
929 mlir::Location loc = getLoc(expr->getExprLoc());
930 StringRef intrinsicName;
931 switch (builtinID) {
932 default:
933 llvm_unreachable("Unexpected builtin");
934 case X86::BI__builtin_ia32_xsave:
935 intrinsicName = "x86.xsave";
936 break;
937 case X86::BI__builtin_ia32_xsave64:
938 intrinsicName = "x86.xsave64";
939 break;
940 case X86::BI__builtin_ia32_xrstor:
941 intrinsicName = "x86.xrstor";
942 break;
943 case X86::BI__builtin_ia32_xrstor64:
944 intrinsicName = "x86.xrstor64";
945 break;
946 case X86::BI__builtin_ia32_xsaveopt:
947 intrinsicName = "x86.xsaveopt";
948 break;
949 case X86::BI__builtin_ia32_xsaveopt64:
950 intrinsicName = "x86.xsaveopt64";
951 break;
952 case X86::BI__builtin_ia32_xrstors:
953 intrinsicName = "x86.xrstors";
954 break;
955 case X86::BI__builtin_ia32_xrstors64:
956 intrinsicName = "x86.xrstors64";
957 break;
958 case X86::BI__builtin_ia32_xsavec:
959 intrinsicName = "x86.xsavec";
960 break;
961 case X86::BI__builtin_ia32_xsavec64:
962 intrinsicName = "x86.xsavec64";
963 break;
964 case X86::BI__builtin_ia32_xsaves:
965 intrinsicName = "x86.xsaves";
966 break;
967 case X86::BI__builtin_ia32_xsaves64:
968 intrinsicName = "x86.xsaves64";
969 break;
970 case X86::BI__builtin_ia32_xsetbv:
971 case X86::BI_xsetbv:
972 intrinsicName = "x86.xsetbv";
973 break;
974 }
975
976 // The xsave family of instructions take a 64-bit mask that specifies
977 // which processor state components to save/restore. The hardware expects
978 // this mask split into two 32-bit registers: EDX (high 32 bits) and
979 // EAX (low 32 bits).
980 mlir::Type i32Ty = builder.getSInt32Ty();
981
982 // Mhi = (uint32_t)(ops[1] >> 32) - extract high 32 bits via right shift
983 cir::ConstantOp shift32 = builder.getSInt64(32, loc);
984 mlir::Value mhi = builder.createShift(loc, ops[1], shift32.getResult(),
985 /*isShiftLeft=*/false);
986 mhi = builder.createIntCast(mhi, i32Ty);
987
988 // Mlo = (uint32_t)ops[1] - extract low 32 bits by truncation
989 mlir::Value mlo = builder.createIntCast(ops[1], i32Ty);
990
991 return builder.emitIntrinsicCallOp(loc, intrinsicName, voidTy,
992 mlir::ValueRange{ops[0], mhi, mlo});
993 }
994 case X86::BI__builtin_ia32_xgetbv:
995 case X86::BI_xgetbv:
996 // xgetbv reads the extended control register specified by ops[0] (ECX)
997 // and returns the 64-bit value
998 return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), "x86.xgetbv",
999 builder.getUInt64Ty(), ops[0]);
1000 case X86::BI__builtin_ia32_storedqudi128_mask:
1001 case X86::BI__builtin_ia32_storedqusi128_mask:
1002 case X86::BI__builtin_ia32_storedquhi128_mask:
1003 case X86::BI__builtin_ia32_storedquqi128_mask:
1004 case X86::BI__builtin_ia32_storeupd128_mask:
1005 case X86::BI__builtin_ia32_storeups128_mask:
1006 case X86::BI__builtin_ia32_storedqudi256_mask:
1007 case X86::BI__builtin_ia32_storedqusi256_mask:
1008 case X86::BI__builtin_ia32_storedquhi256_mask:
1009 case X86::BI__builtin_ia32_storedquqi256_mask:
1010 case X86::BI__builtin_ia32_storeupd256_mask:
1011 case X86::BI__builtin_ia32_storeups256_mask:
1012 case X86::BI__builtin_ia32_storedqudi512_mask:
1013 case X86::BI__builtin_ia32_storedqusi512_mask:
1014 case X86::BI__builtin_ia32_storedquhi512_mask:
1015 case X86::BI__builtin_ia32_storedquqi512_mask:
1016 case X86::BI__builtin_ia32_storeupd512_mask:
1017 case X86::BI__builtin_ia32_storeups512_mask:
1018 case X86::BI__builtin_ia32_storesbf16128_mask:
1019 case X86::BI__builtin_ia32_storesh128_mask:
1020 case X86::BI__builtin_ia32_storess128_mask:
1021 case X86::BI__builtin_ia32_storesd128_mask:
1022 cgm.errorNYI(expr->getSourceRange(),
1023 std::string("unimplemented x86 builtin call: ") +
1024 getContext().BuiltinInfo.getName(builtinID));
1025 return mlir::Value{};
1026 case X86::BI__builtin_ia32_cvtmask2b128:
1027 case X86::BI__builtin_ia32_cvtmask2b256:
1028 case X86::BI__builtin_ia32_cvtmask2b512:
1029 case X86::BI__builtin_ia32_cvtmask2w128:
1030 case X86::BI__builtin_ia32_cvtmask2w256:
1031 case X86::BI__builtin_ia32_cvtmask2w512:
1032 case X86::BI__builtin_ia32_cvtmask2d128:
1033 case X86::BI__builtin_ia32_cvtmask2d256:
1034 case X86::BI__builtin_ia32_cvtmask2d512:
1035 case X86::BI__builtin_ia32_cvtmask2q128:
1036 case X86::BI__builtin_ia32_cvtmask2q256:
1037 case X86::BI__builtin_ia32_cvtmask2q512:
1038 return emitX86SExtMask(this->getBuilder(), ops[0],
1039 convertType(expr->getType()),
1040 getLoc(expr->getExprLoc()));
1041 case X86::BI__builtin_ia32_cvtb2mask128:
1042 case X86::BI__builtin_ia32_cvtb2mask256:
1043 case X86::BI__builtin_ia32_cvtb2mask512:
1044 case X86::BI__builtin_ia32_cvtw2mask128:
1045 case X86::BI__builtin_ia32_cvtw2mask256:
1046 case X86::BI__builtin_ia32_cvtw2mask512:
1047 case X86::BI__builtin_ia32_cvtd2mask128:
1048 case X86::BI__builtin_ia32_cvtd2mask256:
1049 case X86::BI__builtin_ia32_cvtd2mask512:
1050 case X86::BI__builtin_ia32_cvtq2mask128:
1051 case X86::BI__builtin_ia32_cvtq2mask256:
1052 case X86::BI__builtin_ia32_cvtq2mask512:
1053 return emitX86ConvertToMask(*this, this->getBuilder(), ops[0],
1054 getLoc(expr->getExprLoc()));
1055 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
1056 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
1057 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
1058 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
1059 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
1060 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
1061 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
1062 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
1063 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
1064 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
1065 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
1066 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
1067 case X86::BI__builtin_ia32_vfmaddsh3_mask:
1068 case X86::BI__builtin_ia32_vfmaddss3_mask:
1069 case X86::BI__builtin_ia32_vfmaddsd3_mask:
1070 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
1071 case X86::BI__builtin_ia32_vfmaddss3_maskz:
1072 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
1073 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
1074 case X86::BI__builtin_ia32_vfmaddss3_mask3:
1075 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
1076 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
1077 case X86::BI__builtin_ia32_vfmsubss3_mask3:
1078 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
1079 case X86::BI__builtin_ia32_vfmaddph512_mask:
1080 case X86::BI__builtin_ia32_vfmaddph512_maskz:
1081 case X86::BI__builtin_ia32_vfmaddph512_mask3:
1082 case X86::BI__builtin_ia32_vfmaddps512_mask:
1083 case X86::BI__builtin_ia32_vfmaddps512_maskz:
1084 case X86::BI__builtin_ia32_vfmaddps512_mask3:
1085 case X86::BI__builtin_ia32_vfmsubps512_mask3:
1086 case X86::BI__builtin_ia32_vfmaddpd512_mask:
1087 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
1088 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
1089 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
1090 case X86::BI__builtin_ia32_vfmsubph512_mask3:
1091 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
1092 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
1093 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
1094 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
1095 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
1096 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
1097 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
1098 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
1099 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
1100 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
1101 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
1102 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
1103 case X86::BI__builtin_ia32_movdqa32store128_mask:
1104 case X86::BI__builtin_ia32_movdqa64store128_mask:
1105 case X86::BI__builtin_ia32_storeaps128_mask:
1106 case X86::BI__builtin_ia32_storeapd128_mask:
1107 case X86::BI__builtin_ia32_movdqa32store256_mask:
1108 case X86::BI__builtin_ia32_movdqa64store256_mask:
1109 case X86::BI__builtin_ia32_storeaps256_mask:
1110 case X86::BI__builtin_ia32_storeapd256_mask:
1111 case X86::BI__builtin_ia32_movdqa32store512_mask:
1112 case X86::BI__builtin_ia32_movdqa64store512_mask:
1113 case X86::BI__builtin_ia32_storeaps512_mask:
1114 case X86::BI__builtin_ia32_storeapd512_mask:
1115 case X86::BI__builtin_ia32_loadups128_mask:
1116 case X86::BI__builtin_ia32_loadups256_mask:
1117 case X86::BI__builtin_ia32_loadups512_mask:
1118 case X86::BI__builtin_ia32_loadupd128_mask:
1119 case X86::BI__builtin_ia32_loadupd256_mask:
1120 case X86::BI__builtin_ia32_loadupd512_mask:
1121 case X86::BI__builtin_ia32_loaddquqi128_mask:
1122 case X86::BI__builtin_ia32_loaddquqi256_mask:
1123 case X86::BI__builtin_ia32_loaddquqi512_mask:
1124 case X86::BI__builtin_ia32_loaddquhi128_mask:
1125 case X86::BI__builtin_ia32_loaddquhi256_mask:
1126 case X86::BI__builtin_ia32_loaddquhi512_mask:
1127 case X86::BI__builtin_ia32_loaddqusi128_mask:
1128 case X86::BI__builtin_ia32_loaddqusi256_mask:
1129 case X86::BI__builtin_ia32_loaddqusi512_mask:
1130 case X86::BI__builtin_ia32_loaddqudi128_mask:
1131 case X86::BI__builtin_ia32_loaddqudi256_mask:
1132 case X86::BI__builtin_ia32_loaddqudi512_mask:
1133 case X86::BI__builtin_ia32_loadsbf16128_mask:
1134 case X86::BI__builtin_ia32_loadsh128_mask:
1135 case X86::BI__builtin_ia32_loadss128_mask:
1136 case X86::BI__builtin_ia32_loadsd128_mask:
1137 case X86::BI__builtin_ia32_loadaps128_mask:
1138 case X86::BI__builtin_ia32_loadaps256_mask:
1139 case X86::BI__builtin_ia32_loadaps512_mask:
1140 case X86::BI__builtin_ia32_loadapd128_mask:
1141 case X86::BI__builtin_ia32_loadapd256_mask:
1142 case X86::BI__builtin_ia32_loadapd512_mask:
1143 case X86::BI__builtin_ia32_movdqa32load128_mask:
1144 case X86::BI__builtin_ia32_movdqa32load256_mask:
1145 case X86::BI__builtin_ia32_movdqa32load512_mask:
1146 case X86::BI__builtin_ia32_movdqa64load128_mask:
1147 case X86::BI__builtin_ia32_movdqa64load256_mask:
1148 case X86::BI__builtin_ia32_movdqa64load512_mask:
1149 case X86::BI__builtin_ia32_expandloaddf128_mask:
1150 case X86::BI__builtin_ia32_expandloaddf256_mask:
1151 case X86::BI__builtin_ia32_expandloaddf512_mask:
1152 case X86::BI__builtin_ia32_expandloadsf128_mask:
1153 case X86::BI__builtin_ia32_expandloadsf256_mask:
1154 case X86::BI__builtin_ia32_expandloadsf512_mask:
1155 case X86::BI__builtin_ia32_expandloaddi128_mask:
1156 case X86::BI__builtin_ia32_expandloaddi256_mask:
1157 case X86::BI__builtin_ia32_expandloaddi512_mask:
1158 case X86::BI__builtin_ia32_expandloadsi128_mask:
1159 case X86::BI__builtin_ia32_expandloadsi256_mask:
1160 case X86::BI__builtin_ia32_expandloadsi512_mask:
1161 case X86::BI__builtin_ia32_expandloadhi128_mask:
1162 case X86::BI__builtin_ia32_expandloadhi256_mask:
1163 case X86::BI__builtin_ia32_expandloadhi512_mask:
1164 case X86::BI__builtin_ia32_expandloadqi128_mask:
1165 case X86::BI__builtin_ia32_expandloadqi256_mask:
1166 case X86::BI__builtin_ia32_expandloadqi512_mask:
1167 case X86::BI__builtin_ia32_compressstoredf128_mask:
1168 case X86::BI__builtin_ia32_compressstoredf256_mask:
1169 case X86::BI__builtin_ia32_compressstoredf512_mask:
1170 case X86::BI__builtin_ia32_compressstoresf128_mask:
1171 case X86::BI__builtin_ia32_compressstoresf256_mask:
1172 case X86::BI__builtin_ia32_compressstoresf512_mask:
1173 case X86::BI__builtin_ia32_compressstoredi128_mask:
1174 case X86::BI__builtin_ia32_compressstoredi256_mask:
1175 case X86::BI__builtin_ia32_compressstoredi512_mask:
1176 case X86::BI__builtin_ia32_compressstoresi128_mask:
1177 case X86::BI__builtin_ia32_compressstoresi256_mask:
1178 case X86::BI__builtin_ia32_compressstoresi512_mask:
1179 case X86::BI__builtin_ia32_compressstorehi128_mask:
1180 case X86::BI__builtin_ia32_compressstorehi256_mask:
1181 case X86::BI__builtin_ia32_compressstorehi512_mask:
1182 case X86::BI__builtin_ia32_compressstoreqi128_mask:
1183 case X86::BI__builtin_ia32_compressstoreqi256_mask:
1184 case X86::BI__builtin_ia32_compressstoreqi512_mask:
1185 cgm.errorNYI(expr->getSourceRange(),
1186 std::string("unimplemented X86 builtin call: ") +
1187 getContext().BuiltinInfo.getName(builtinID));
1188 return mlir::Value{};
1189 case X86::BI__builtin_ia32_expanddf128_mask:
1190 case X86::BI__builtin_ia32_expanddf256_mask:
1191 case X86::BI__builtin_ia32_expanddf512_mask:
1192 case X86::BI__builtin_ia32_expandsf128_mask:
1193 case X86::BI__builtin_ia32_expandsf256_mask:
1194 case X86::BI__builtin_ia32_expandsf512_mask:
1195 case X86::BI__builtin_ia32_expanddi128_mask:
1196 case X86::BI__builtin_ia32_expanddi256_mask:
1197 case X86::BI__builtin_ia32_expanddi512_mask:
1198 case X86::BI__builtin_ia32_expandsi128_mask:
1199 case X86::BI__builtin_ia32_expandsi256_mask:
1200 case X86::BI__builtin_ia32_expandsi512_mask:
1201 case X86::BI__builtin_ia32_expandhi128_mask:
1202 case X86::BI__builtin_ia32_expandhi256_mask:
1203 case X86::BI__builtin_ia32_expandhi512_mask:
1204 case X86::BI__builtin_ia32_expandqi128_mask:
1205 case X86::BI__builtin_ia32_expandqi256_mask:
1206 case X86::BI__builtin_ia32_expandqi512_mask: {
1207 mlir::Location loc = getLoc(expr->getExprLoc());
1208 return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
1209 "x86.avx512.mask.expand");
1210 }
1211 case X86::BI__builtin_ia32_compressdf128_mask:
1212 case X86::BI__builtin_ia32_compressdf256_mask:
1213 case X86::BI__builtin_ia32_compressdf512_mask:
1214 case X86::BI__builtin_ia32_compresssf128_mask:
1215 case X86::BI__builtin_ia32_compresssf256_mask:
1216 case X86::BI__builtin_ia32_compresssf512_mask:
1217 case X86::BI__builtin_ia32_compressdi128_mask:
1218 case X86::BI__builtin_ia32_compressdi256_mask:
1219 case X86::BI__builtin_ia32_compressdi512_mask:
1220 case X86::BI__builtin_ia32_compresssi128_mask:
1221 case X86::BI__builtin_ia32_compresssi256_mask:
1222 case X86::BI__builtin_ia32_compresssi512_mask:
1223 case X86::BI__builtin_ia32_compresshi128_mask:
1224 case X86::BI__builtin_ia32_compresshi256_mask:
1225 case X86::BI__builtin_ia32_compresshi512_mask:
1226 case X86::BI__builtin_ia32_compressqi128_mask:
1227 case X86::BI__builtin_ia32_compressqi256_mask:
1228 case X86::BI__builtin_ia32_compressqi512_mask: {
1229 mlir::Location loc = getLoc(expr->getExprLoc());
1230 return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
1231 "x86.avx512.mask.compress");
1232 }
1233 case X86::BI__builtin_ia32_gather3div2df:
1234 case X86::BI__builtin_ia32_gather3div2di:
1235 case X86::BI__builtin_ia32_gather3div4df:
1236 case X86::BI__builtin_ia32_gather3div4di:
1237 case X86::BI__builtin_ia32_gather3div4sf:
1238 case X86::BI__builtin_ia32_gather3div4si:
1239 case X86::BI__builtin_ia32_gather3div8sf:
1240 case X86::BI__builtin_ia32_gather3div8si:
1241 case X86::BI__builtin_ia32_gather3siv2df:
1242 case X86::BI__builtin_ia32_gather3siv2di:
1243 case X86::BI__builtin_ia32_gather3siv4df:
1244 case X86::BI__builtin_ia32_gather3siv4di:
1245 case X86::BI__builtin_ia32_gather3siv4sf:
1246 case X86::BI__builtin_ia32_gather3siv4si:
1247 case X86::BI__builtin_ia32_gather3siv8sf:
1248 case X86::BI__builtin_ia32_gather3siv8si:
1249 case X86::BI__builtin_ia32_gathersiv8df:
1250 case X86::BI__builtin_ia32_gathersiv16sf:
1251 case X86::BI__builtin_ia32_gatherdiv8df:
1252 case X86::BI__builtin_ia32_gatherdiv16sf:
1253 case X86::BI__builtin_ia32_gathersiv8di:
1254 case X86::BI__builtin_ia32_gathersiv16si:
1255 case X86::BI__builtin_ia32_gatherdiv8di:
1256 case X86::BI__builtin_ia32_gatherdiv16si: {
1257 StringRef intrinsicName;
1258 switch (builtinID) {
1259 default:
1260 llvm_unreachable("Unexpected builtin");
1261 case X86::BI__builtin_ia32_gather3div2df:
1262 intrinsicName = "x86.avx512.mask.gather3div2.df";
1263 break;
1264 case X86::BI__builtin_ia32_gather3div2di:
1265 intrinsicName = "x86.avx512.mask.gather3div2.di";
1266 break;
1267 case X86::BI__builtin_ia32_gather3div4df:
1268 intrinsicName = "x86.avx512.mask.gather3div4.df";
1269 break;
1270 case X86::BI__builtin_ia32_gather3div4di:
1271 intrinsicName = "x86.avx512.mask.gather3div4.di";
1272 break;
1273 case X86::BI__builtin_ia32_gather3div4sf:
1274 intrinsicName = "x86.avx512.mask.gather3div4.sf";
1275 break;
1276 case X86::BI__builtin_ia32_gather3div4si:
1277 intrinsicName = "x86.avx512.mask.gather3div4.si";
1278 break;
1279 case X86::BI__builtin_ia32_gather3div8sf:
1280 intrinsicName = "x86.avx512.mask.gather3div8.sf";
1281 break;
1282 case X86::BI__builtin_ia32_gather3div8si:
1283 intrinsicName = "x86.avx512.mask.gather3div8.si";
1284 break;
1285 case X86::BI__builtin_ia32_gather3siv2df:
1286 intrinsicName = "x86.avx512.mask.gather3siv2.df";
1287 break;
1288 case X86::BI__builtin_ia32_gather3siv2di:
1289 intrinsicName = "x86.avx512.mask.gather3siv2.di";
1290 break;
1291 case X86::BI__builtin_ia32_gather3siv4df:
1292 intrinsicName = "x86.avx512.mask.gather3siv4.df";
1293 break;
1294 case X86::BI__builtin_ia32_gather3siv4di:
1295 intrinsicName = "x86.avx512.mask.gather3siv4.di";
1296 break;
1297 case X86::BI__builtin_ia32_gather3siv4sf:
1298 intrinsicName = "x86.avx512.mask.gather3siv4.sf";
1299 break;
1300 case X86::BI__builtin_ia32_gather3siv4si:
1301 intrinsicName = "x86.avx512.mask.gather3siv4.si";
1302 break;
1303 case X86::BI__builtin_ia32_gather3siv8sf:
1304 intrinsicName = "x86.avx512.mask.gather3siv8.sf";
1305 break;
1306 case X86::BI__builtin_ia32_gather3siv8si:
1307 intrinsicName = "x86.avx512.mask.gather3siv8.si";
1308 break;
1309 case X86::BI__builtin_ia32_gathersiv8df:
1310 intrinsicName = "x86.avx512.mask.gather.dpd.512";
1311 break;
1312 case X86::BI__builtin_ia32_gathersiv16sf:
1313 intrinsicName = "x86.avx512.mask.gather.dps.512";
1314 break;
1315 case X86::BI__builtin_ia32_gatherdiv8df:
1316 intrinsicName = "x86.avx512.mask.gather.qpd.512";
1317 break;
1318 case X86::BI__builtin_ia32_gatherdiv16sf:
1319 intrinsicName = "x86.avx512.mask.gather.qps.512";
1320 break;
1321 case X86::BI__builtin_ia32_gathersiv8di:
1322 intrinsicName = "x86.avx512.mask.gather.dpq.512";
1323 break;
1324 case X86::BI__builtin_ia32_gathersiv16si:
1325 intrinsicName = "x86.avx512.mask.gather.dpi.512";
1326 break;
1327 case X86::BI__builtin_ia32_gatherdiv8di:
1328 intrinsicName = "x86.avx512.mask.gather.qpq.512";
1329 break;
1330 case X86::BI__builtin_ia32_gatherdiv16si:
1331 intrinsicName = "x86.avx512.mask.gather.qpi.512";
1332 break;
1333 }
1334
1335 mlir::Location loc = getLoc(expr->getExprLoc());
1336 unsigned minElts =
1337 std::min(cast<cir::VectorType>(ops[0].getType()).getSize(),
1338 cast<cir::VectorType>(ops[2].getType()).getSize());
1339 ops[3] = getMaskVecValue(builder, loc, ops[3], minElts);
1340 return builder.emitIntrinsicCallOp(loc, intrinsicName,
1341 convertType(expr->getType()), ops);
1342 }
1343 case X86::BI__builtin_ia32_scattersiv8df:
1344 case X86::BI__builtin_ia32_scattersiv16sf:
1345 case X86::BI__builtin_ia32_scatterdiv8df:
1346 case X86::BI__builtin_ia32_scatterdiv16sf:
1347 case X86::BI__builtin_ia32_scattersiv8di:
1348 case X86::BI__builtin_ia32_scattersiv16si:
1349 case X86::BI__builtin_ia32_scatterdiv8di:
1350 case X86::BI__builtin_ia32_scatterdiv16si:
1351 case X86::BI__builtin_ia32_scatterdiv2df:
1352 case X86::BI__builtin_ia32_scatterdiv2di:
1353 case X86::BI__builtin_ia32_scatterdiv4df:
1354 case X86::BI__builtin_ia32_scatterdiv4di:
1355 case X86::BI__builtin_ia32_scatterdiv4sf:
1356 case X86::BI__builtin_ia32_scatterdiv4si:
1357 case X86::BI__builtin_ia32_scatterdiv8sf:
1358 case X86::BI__builtin_ia32_scatterdiv8si:
1359 case X86::BI__builtin_ia32_scattersiv2df:
1360 case X86::BI__builtin_ia32_scattersiv2di:
1361 case X86::BI__builtin_ia32_scattersiv4df:
1362 case X86::BI__builtin_ia32_scattersiv4di:
1363 case X86::BI__builtin_ia32_scattersiv4sf:
1364 case X86::BI__builtin_ia32_scattersiv4si:
1365 case X86::BI__builtin_ia32_scattersiv8sf:
1366 case X86::BI__builtin_ia32_scattersiv8si: {
1367 llvm::StringRef intrinsicName;
1368 switch (builtinID) {
1369 default:
1370 llvm_unreachable("Unexpected builtin");
1371 case X86::BI__builtin_ia32_scattersiv8df:
1372 intrinsicName = "x86.avx512.mask.scatter.dpd.512";
1373 break;
1374 case X86::BI__builtin_ia32_scattersiv16sf:
1375 intrinsicName = "x86.avx512.mask.scatter.dps.512";
1376 break;
1377 case X86::BI__builtin_ia32_scatterdiv8df:
1378 intrinsicName = "x86.avx512.mask.scatter.qpd.512";
1379 break;
1380 case X86::BI__builtin_ia32_scatterdiv16sf:
1381 intrinsicName = "x86.avx512.mask.scatter.qps.512";
1382 break;
1383 case X86::BI__builtin_ia32_scattersiv8di:
1384 intrinsicName = "x86.avx512.mask.scatter.dpq.512";
1385 break;
1386 case X86::BI__builtin_ia32_scattersiv16si:
1387 intrinsicName = "x86.avx512.mask.scatter.dpi.512";
1388 break;
1389 case X86::BI__builtin_ia32_scatterdiv8di:
1390 intrinsicName = "x86.avx512.mask.scatter.qpq.512";
1391 break;
1392 case X86::BI__builtin_ia32_scatterdiv16si:
1393 intrinsicName = "x86.avx512.mask.scatter.qpi.512";
1394 break;
1395 case X86::BI__builtin_ia32_scatterdiv2df:
1396 intrinsicName = "x86.avx512.mask.scatterdiv2.df";
1397 break;
1398 case X86::BI__builtin_ia32_scatterdiv2di:
1399 intrinsicName = "x86.avx512.mask.scatterdiv2.di";
1400 break;
1401 case X86::BI__builtin_ia32_scatterdiv4df:
1402 intrinsicName = "x86.avx512.mask.scatterdiv4.df";
1403 break;
1404 case X86::BI__builtin_ia32_scatterdiv4di:
1405 intrinsicName = "x86.avx512.mask.scatterdiv4.di";
1406 break;
1407 case X86::BI__builtin_ia32_scatterdiv4sf:
1408 intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
1409 break;
1410 case X86::BI__builtin_ia32_scatterdiv4si:
1411 intrinsicName = "x86.avx512.mask.scatterdiv4.si";
1412 break;
1413 case X86::BI__builtin_ia32_scatterdiv8sf:
1414 intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
1415 break;
1416 case X86::BI__builtin_ia32_scatterdiv8si:
1417 intrinsicName = "x86.avx512.mask.scatterdiv8.si";
1418 break;
1419 case X86::BI__builtin_ia32_scattersiv2df:
1420 intrinsicName = "x86.avx512.mask.scattersiv2.df";
1421 break;
1422 case X86::BI__builtin_ia32_scattersiv2di:
1423 intrinsicName = "x86.avx512.mask.scattersiv2.di";
1424 break;
1425 case X86::BI__builtin_ia32_scattersiv4df:
1426 intrinsicName = "x86.avx512.mask.scattersiv4.df";
1427 break;
1428 case X86::BI__builtin_ia32_scattersiv4di:
1429 intrinsicName = "x86.avx512.mask.scattersiv4.di";
1430 break;
1431 case X86::BI__builtin_ia32_scattersiv4sf:
1432 intrinsicName = "x86.avx512.mask.scattersiv4.sf";
1433 break;
1434 case X86::BI__builtin_ia32_scattersiv4si:
1435 intrinsicName = "x86.avx512.mask.scattersiv4.si";
1436 break;
1437 case X86::BI__builtin_ia32_scattersiv8sf:
1438 intrinsicName = "x86.avx512.mask.scattersiv8.sf";
1439 break;
1440 case X86::BI__builtin_ia32_scattersiv8si:
1441 intrinsicName = "x86.avx512.mask.scattersiv8.si";
1442 break;
1443 }
1444
1445 mlir::Location loc = getLoc(expr->getExprLoc());
1446 unsigned minElts =
1447 std::min(cast<cir::VectorType>(ops[2].getType()).getSize(),
1448 cast<cir::VectorType>(ops[3].getType()).getSize());
1449 ops[1] = getMaskVecValue(builder, loc, ops[1], minElts);
1450
1451 return builder.emitIntrinsicCallOp(loc, intrinsicName,
1452 convertType(expr->getType()), ops);
1453 }
1454 case X86::BI__builtin_ia32_vextractf128_pd256:
1455 case X86::BI__builtin_ia32_vextractf128_ps256:
1456 case X86::BI__builtin_ia32_vextractf128_si256:
1457 case X86::BI__builtin_ia32_extract128i256:
1458 case X86::BI__builtin_ia32_extractf64x4_mask:
1459 case X86::BI__builtin_ia32_extractf32x4_mask:
1460 case X86::BI__builtin_ia32_extracti64x4_mask:
1461 case X86::BI__builtin_ia32_extracti32x4_mask:
1462 case X86::BI__builtin_ia32_extractf32x8_mask:
1463 case X86::BI__builtin_ia32_extracti32x8_mask:
1464 case X86::BI__builtin_ia32_extractf32x4_256_mask:
1465 case X86::BI__builtin_ia32_extracti32x4_256_mask:
1466 case X86::BI__builtin_ia32_extractf64x2_256_mask:
1467 case X86::BI__builtin_ia32_extracti64x2_256_mask:
1468 case X86::BI__builtin_ia32_extractf64x2_512_mask:
1469 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
1470 mlir::Location loc = getLoc(expr->getExprLoc());
1471 cir::VectorType dstTy = cast<cir::VectorType>(convertType(expr->getType()));
1472 unsigned numElts = dstTy.getSize();
1473 unsigned srcNumElts = cast<cir::VectorType>(ops[0].getType()).getSize();
1474 unsigned subVectors = srcNumElts / numElts;
1475 assert(llvm::isPowerOf2_32(subVectors) && "Expected power of 2 subvectors");
1476 unsigned index =
1477 ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
1478
1479 index &= subVectors - 1; // Remove any extra bits.
1480 index *= numElts;
1481
1482 int64_t indices[16];
1483 std::iota(indices, indices + numElts, index);
1484
1485 mlir::Value poison =
1486 builder.getConstant(loc, cir::PoisonAttr::get(ops[0].getType()));
1487 mlir::Value res = builder.createVecShuffle(loc, ops[0], poison,
1488 ArrayRef(indices, numElts));
1489 if (ops.size() == 4)
1490 res = emitX86Select(builder, loc, ops[3], res, ops[2]);
1491
1492 return res;
1493 }
1494 case X86::BI__builtin_ia32_vinsertf128_pd256:
1495 case X86::BI__builtin_ia32_vinsertf128_ps256:
1496 case X86::BI__builtin_ia32_vinsertf128_si256:
1497 case X86::BI__builtin_ia32_insert128i256:
1498 case X86::BI__builtin_ia32_insertf64x4:
1499 case X86::BI__builtin_ia32_insertf32x4:
1500 case X86::BI__builtin_ia32_inserti64x4:
1501 case X86::BI__builtin_ia32_inserti32x4:
1502 case X86::BI__builtin_ia32_insertf32x8:
1503 case X86::BI__builtin_ia32_inserti32x8:
1504 case X86::BI__builtin_ia32_insertf32x4_256:
1505 case X86::BI__builtin_ia32_inserti32x4_256:
1506 case X86::BI__builtin_ia32_insertf64x2_256:
1507 case X86::BI__builtin_ia32_inserti64x2_256:
1508 case X86::BI__builtin_ia32_insertf64x2_512:
1509 case X86::BI__builtin_ia32_inserti64x2_512: {
1510 unsigned dstNumElts = cast<cir::VectorType>(ops[0].getType()).getSize();
1511 unsigned srcNumElts = cast<cir::VectorType>(ops[1].getType()).getSize();
1512 unsigned subVectors = dstNumElts / srcNumElts;
1513 assert(llvm::isPowerOf2_32(subVectors) && "Expected power of 2 subvectors");
1514 assert(dstNumElts <= 16);
1515
1516 uint64_t index = getZExtIntValueFromConstOp(ops[2]);
1517 index &= subVectors - 1; // Remove any extra bits.
1518 index *= srcNumElts;
1519
1520 llvm::SmallVector<int64_t, 16> mask(dstNumElts);
1521 for (unsigned i = 0; i != dstNumElts; ++i)
1522 mask[i] = (i >= srcNumElts) ? srcNumElts + (i % srcNumElts) : i;
1523
1524 mlir::Value op1 =
1525 builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[1], mask);
1526
1527 for (unsigned i = 0; i != dstNumElts; ++i) {
1528 if (i >= index && i < (index + srcNumElts))
1529 mask[i] = (i - index) + dstNumElts;
1530 else
1531 mask[i] = i;
1532 }
1533
1534 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], op1,
1535 mask);
1536 }
1537 case X86::BI__builtin_ia32_pmovqd512_mask:
1538 case X86::BI__builtin_ia32_pmovwb512_mask: {
1539 mlir::Value Res =
1540 builder.createIntCast(ops[0], cast<cir::VectorType>(ops[1].getType()));
1541 return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res,
1542 ops[1]);
1543 }
1544 case X86::BI__builtin_ia32_pblendw128:
1545 case X86::BI__builtin_ia32_blendpd:
1546 case X86::BI__builtin_ia32_blendps:
1547 case X86::BI__builtin_ia32_blendpd256:
1548 case X86::BI__builtin_ia32_blendps256:
1549 case X86::BI__builtin_ia32_pblendw256:
1550 case X86::BI__builtin_ia32_pblendd128:
1551 case X86::BI__builtin_ia32_pblendd256: {
1552 uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
1553 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
1554
1556 // If there are more than 8 elements, the immediate is used twice so make
1557 // sure we handle that.
1558 mlir::Type i32Ty = builder.getSInt32Ty();
1559 for (unsigned i = 0; i != numElts; ++i)
1560 indices.push_back(
1561 cir::IntAttr::get(i32Ty, ((imm >> (i % 8)) & 0x1) ? numElts + i : i));
1562
1563 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
1564 indices);
1565 }
1566 case X86::BI__builtin_ia32_pshuflw:
1567 case X86::BI__builtin_ia32_pshuflw256:
1568 case X86::BI__builtin_ia32_pshuflw512:
1569 return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
1570 true);
1571 case X86::BI__builtin_ia32_pshufhw:
1572 case X86::BI__builtin_ia32_pshufhw256:
1573 case X86::BI__builtin_ia32_pshufhw512:
1574 return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
1575 false);
1576 case X86::BI__builtin_ia32_pshufd:
1577 case X86::BI__builtin_ia32_pshufd256:
1578 case X86::BI__builtin_ia32_pshufd512:
1579 case X86::BI__builtin_ia32_vpermilpd:
1580 case X86::BI__builtin_ia32_vpermilps:
1581 case X86::BI__builtin_ia32_vpermilpd256:
1582 case X86::BI__builtin_ia32_vpermilps256:
1583 case X86::BI__builtin_ia32_vpermilpd512:
1584 case X86::BI__builtin_ia32_vpermilps512: {
1585 const uint32_t imm = getSExtIntValueFromConstOp(ops[1]);
1586
1588 computeFullLaneShuffleMask(*this, ops[0], imm, false, mask);
1589
1590 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask);
1591 }
1592 case X86::BI__builtin_ia32_shufpd:
1593 case X86::BI__builtin_ia32_shufpd256:
1594 case X86::BI__builtin_ia32_shufpd512:
1595 case X86::BI__builtin_ia32_shufps:
1596 case X86::BI__builtin_ia32_shufps256:
1597 case X86::BI__builtin_ia32_shufps512: {
1598 const uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
1599
1601 computeFullLaneShuffleMask(*this, ops[0], imm, true, mask);
1602
1603 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
1604 mask);
1605 }
1606 case X86::BI__builtin_ia32_permdi256:
1607 case X86::BI__builtin_ia32_permdf256:
1608 case X86::BI__builtin_ia32_permdi512:
1609 case X86::BI__builtin_ia32_permdf512: {
1610 unsigned imm =
1611 ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
1612 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
1613
1614 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
1615 int64_t Indices[8];
1616
1617 for (unsigned l = 0; l != numElts; l += 4)
1618 for (unsigned i = 0; i != 4; ++i)
1619 Indices[l + i] = l + ((imm >> (2 * i)) & 0x3);
1620
1621 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0],
1622 ArrayRef(Indices, numElts));
1623 }
1624 case X86::BI__builtin_ia32_palignr128:
1625 case X86::BI__builtin_ia32_palignr256:
1626 case X86::BI__builtin_ia32_palignr512: {
1627 uint32_t shiftVal = getZExtIntValueFromConstOp(ops[2]) & 0xff;
1628
1629 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
1630 assert(numElts % 16 == 0);
1631
1632 // If palignr is shifting the pair of vectors more than the size of two
1633 // lanes, emit zero.
1634 if (shiftVal >= 32)
1635 return builder.getNullValue(convertType(expr->getType()),
1636 getLoc(expr->getExprLoc()));
1637
1638 // If palignr is shifting the pair of input vectors more than one lane,
1639 // but less than two lanes, convert to shifting in zeroes.
1640 if (shiftVal > 16) {
1641 shiftVal -= 16;
1642 ops[1] = ops[0];
1643 ops[0] =
1644 builder.getNullValue(ops[0].getType(), getLoc(expr->getExprLoc()));
1645 }
1646
1647 int64_t indices[64];
1648 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1649 for (unsigned l = 0; l != numElts; l += 16) {
1650 for (unsigned i = 0; i != 16; ++i) {
1651 uint32_t idx = shiftVal + i;
1652 if (idx >= 16)
1653 idx += numElts - 16; // End of lane, switch operand.
1654 indices[l + i] = l + idx;
1655 }
1656 }
1657
1658 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[1], ops[0],
1659 ArrayRef(indices, numElts));
1660 }
1661 case X86::BI__builtin_ia32_alignd128:
1662 case X86::BI__builtin_ia32_alignd256:
1663 case X86::BI__builtin_ia32_alignd512:
1664 case X86::BI__builtin_ia32_alignq128:
1665 case X86::BI__builtin_ia32_alignq256:
1666 case X86::BI__builtin_ia32_alignq512: {
1667 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
1668 unsigned shiftVal =
1669 ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
1670 0xff;
1671
1672 // Mask the shift amount to width of a vector.
1673 shiftVal &= numElts - 1;
1674
1676 mlir::Type i32Ty = builder.getSInt32Ty();
1677 for (unsigned i = 0; i != numElts; ++i)
1678 indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal));
1679
1680 return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
1681 indices);
1682 }
1683 case X86::BI__builtin_ia32_shuf_f32x4_256:
1684 case X86::BI__builtin_ia32_shuf_f64x2_256:
1685 case X86::BI__builtin_ia32_shuf_i32x4_256:
1686 case X86::BI__builtin_ia32_shuf_i64x2_256:
1687 case X86::BI__builtin_ia32_shuf_f32x4:
1688 case X86::BI__builtin_ia32_shuf_f64x2:
1689 case X86::BI__builtin_ia32_shuf_i32x4:
1690 case X86::BI__builtin_ia32_shuf_i64x2: {
1691 mlir::Value src1 = ops[0];
1692 mlir::Value src2 = ops[1];
1693
1694 unsigned imm =
1695 ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
1696
1697 unsigned numElems = cast<cir::VectorType>(src1.getType()).getSize();
1698 unsigned totalBits = getContext().getTypeSize(expr->getArg(0)->getType());
1699 unsigned numLanes = totalBits == 512 ? 4 : 2;
1700 unsigned numElemsPerLane = numElems / numLanes;
1701
1703 mlir::Type i32Ty = builder.getSInt32Ty();
1704
1705 for (unsigned l = 0; l != numElems; l += numElemsPerLane) {
1706 unsigned index = (imm % numLanes) * numElemsPerLane;
1707 imm /= numLanes;
1708 if (l >= (numElems / 2))
1709 index += numElems;
1710 for (unsigned i = 0; i != numElemsPerLane; ++i) {
1711 indices.push_back(cir::IntAttr::get(i32Ty, index + i));
1712 }
1713 }
1714
1715 return builder.createVecShuffle(getLoc(expr->getExprLoc()), src1, src2,
1716 indices);
1717 }
1718 case X86::BI__builtin_ia32_vperm2f128_pd256:
1719 case X86::BI__builtin_ia32_vperm2f128_ps256:
1720 case X86::BI__builtin_ia32_vperm2f128_si256:
1721 case X86::BI__builtin_ia32_permti256:
1722 case X86::BI__builtin_ia32_pslldqi128_byteshift:
1723 case X86::BI__builtin_ia32_pslldqi256_byteshift:
1724 case X86::BI__builtin_ia32_pslldqi512_byteshift:
1725 case X86::BI__builtin_ia32_psrldqi128_byteshift:
1726 case X86::BI__builtin_ia32_psrldqi256_byteshift:
1727 case X86::BI__builtin_ia32_psrldqi512_byteshift:
1728 cgm.errorNYI(expr->getSourceRange(),
1729 std::string("unimplemented X86 builtin call: ") +
1730 getContext().BuiltinInfo.getName(builtinID));
1731 return mlir::Value{};
1732 case X86::BI__builtin_ia32_kshiftliqi:
1733 case X86::BI__builtin_ia32_kshiftlihi:
1734 case X86::BI__builtin_ia32_kshiftlisi:
1735 case X86::BI__builtin_ia32_kshiftlidi: {
1736 mlir::Location loc = getLoc(expr->getExprLoc());
1737 unsigned shiftVal =
1738 ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
1739 0xff;
1740 unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
1741
1742 if (shiftVal >= numElems)
1743 return builder.getNullValue(ops[0].getType(), loc);
1744
1745 mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
1746
1748 mlir::Type i32Ty = builder.getSInt32Ty();
1749 for (auto i : llvm::seq<unsigned>(0, numElems))
1750 indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal));
1751
1752 mlir::Value zero = builder.getNullValue(in.getType(), loc);
1753 mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices);
1754 return builder.createBitcast(sv, ops[0].getType());
1755 }
1756 case X86::BI__builtin_ia32_kshiftriqi:
1757 case X86::BI__builtin_ia32_kshiftrihi:
1758 case X86::BI__builtin_ia32_kshiftrisi:
1759 case X86::BI__builtin_ia32_kshiftridi: {
1760 mlir::Location loc = getLoc(expr->getExprLoc());
1761 unsigned shiftVal =
1762 ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
1763 0xff;
1764 unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
1765
1766 if (shiftVal >= numElems)
1767 return builder.getNullValue(ops[0].getType(), loc);
1768
1769 mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
1770
1772 mlir::Type i32Ty = builder.getSInt32Ty();
1773 for (auto i : llvm::seq<unsigned>(0, numElems))
1774 indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal));
1775
1776 mlir::Value zero = builder.getNullValue(in.getType(), loc);
1777 mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices);
1778 return builder.createBitcast(sv, ops[0].getType());
1779 }
1780 case X86::BI__builtin_ia32_vprotbi:
1781 case X86::BI__builtin_ia32_vprotwi:
1782 case X86::BI__builtin_ia32_vprotdi:
1783 case X86::BI__builtin_ia32_vprotqi:
1784 case X86::BI__builtin_ia32_prold128:
1785 case X86::BI__builtin_ia32_prold256:
1786 case X86::BI__builtin_ia32_prold512:
1787 case X86::BI__builtin_ia32_prolq128:
1788 case X86::BI__builtin_ia32_prolq256:
1789 case X86::BI__builtin_ia32_prolq512:
1790 return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
1791 ops[0], ops[1], false);
1792 case X86::BI__builtin_ia32_prord128:
1793 case X86::BI__builtin_ia32_prord256:
1794 case X86::BI__builtin_ia32_prord512:
1795 case X86::BI__builtin_ia32_prorq128:
1796 case X86::BI__builtin_ia32_prorq256:
1797 case X86::BI__builtin_ia32_prorq512:
1798 return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
1799 ops[0], ops[1], true);
1800 case X86::BI__builtin_ia32_selectb_128:
1801 case X86::BI__builtin_ia32_selectb_256:
1802 case X86::BI__builtin_ia32_selectb_512:
1803 case X86::BI__builtin_ia32_selectw_128:
1804 case X86::BI__builtin_ia32_selectw_256:
1805 case X86::BI__builtin_ia32_selectw_512:
1806 case X86::BI__builtin_ia32_selectd_128:
1807 case X86::BI__builtin_ia32_selectd_256:
1808 case X86::BI__builtin_ia32_selectd_512:
1809 case X86::BI__builtin_ia32_selectq_128:
1810 case X86::BI__builtin_ia32_selectq_256:
1811 case X86::BI__builtin_ia32_selectq_512:
1812 case X86::BI__builtin_ia32_selectph_128:
1813 case X86::BI__builtin_ia32_selectph_256:
1814 case X86::BI__builtin_ia32_selectph_512:
1815 case X86::BI__builtin_ia32_selectpbf_128:
1816 case X86::BI__builtin_ia32_selectpbf_256:
1817 case X86::BI__builtin_ia32_selectpbf_512:
1818 case X86::BI__builtin_ia32_selectps_128:
1819 case X86::BI__builtin_ia32_selectps_256:
1820 case X86::BI__builtin_ia32_selectps_512:
1821 case X86::BI__builtin_ia32_selectpd_128:
1822 case X86::BI__builtin_ia32_selectpd_256:
1823 case X86::BI__builtin_ia32_selectpd_512:
1824 return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
1825 ops[2]);
1826 case X86::BI__builtin_ia32_selectsh_128:
1827 case X86::BI__builtin_ia32_selectsbf_128:
1828 case X86::BI__builtin_ia32_selectss_128:
1829 case X86::BI__builtin_ia32_selectsd_128: {
1830 mlir::Location loc = getLoc(expr->getExprLoc());
1831 mlir::Value scalar1 =
1832 builder.createExtractElement(loc, ops[1], uint64_t(0));
1833 mlir::Value scalar2 =
1834 builder.createExtractElement(loc, ops[2], uint64_t(0));
1835 mlir::Value result =
1836 emitX86ScalarSelect(builder, loc, ops[0], scalar1, scalar2);
1837 return builder.createInsertElement(loc, ops[1], result, uint64_t(0));
1838 }
1839 case X86::BI__builtin_ia32_cmpb128_mask:
1840 case X86::BI__builtin_ia32_cmpb256_mask:
1841 case X86::BI__builtin_ia32_cmpb512_mask:
1842 case X86::BI__builtin_ia32_cmpw128_mask:
1843 case X86::BI__builtin_ia32_cmpw256_mask:
1844 case X86::BI__builtin_ia32_cmpw512_mask:
1845 case X86::BI__builtin_ia32_cmpd128_mask:
1846 case X86::BI__builtin_ia32_cmpd256_mask:
1847 case X86::BI__builtin_ia32_cmpd512_mask:
1848 case X86::BI__builtin_ia32_cmpq128_mask:
1849 case X86::BI__builtin_ia32_cmpq256_mask:
1850 case X86::BI__builtin_ia32_cmpq512_mask:
1851 case X86::BI__builtin_ia32_ucmpb128_mask:
1852 case X86::BI__builtin_ia32_ucmpb256_mask:
1853 case X86::BI__builtin_ia32_ucmpb512_mask:
1854 case X86::BI__builtin_ia32_ucmpw128_mask:
1855 case X86::BI__builtin_ia32_ucmpw256_mask:
1856 case X86::BI__builtin_ia32_ucmpw512_mask:
1857 case X86::BI__builtin_ia32_ucmpd128_mask:
1858 case X86::BI__builtin_ia32_ucmpd256_mask:
1859 case X86::BI__builtin_ia32_ucmpd512_mask:
1860 case X86::BI__builtin_ia32_ucmpq128_mask:
1861 case X86::BI__builtin_ia32_ucmpq256_mask:
1862 case X86::BI__builtin_ia32_ucmpq512_mask:
1863 cgm.errorNYI(expr->getSourceRange(),
1864 std::string("unimplemented X86 builtin call: ") +
1865 getContext().BuiltinInfo.getName(builtinID));
1866 return mlir::Value{};
1867 case X86::BI__builtin_ia32_vpcomb:
1868 case X86::BI__builtin_ia32_vpcomw:
1869 case X86::BI__builtin_ia32_vpcomd:
1870 case X86::BI__builtin_ia32_vpcomq:
1871 return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
1872 case X86::BI__builtin_ia32_vpcomub:
1873 case X86::BI__builtin_ia32_vpcomuw:
1874 case X86::BI__builtin_ia32_vpcomud:
1875 case X86::BI__builtin_ia32_vpcomuq:
1876 return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
1877 case X86::BI__builtin_ia32_kortestcqi:
1878 case X86::BI__builtin_ia32_kortestchi:
1879 case X86::BI__builtin_ia32_kortestcsi:
1880 case X86::BI__builtin_ia32_kortestcdi: {
1881 mlir::Location loc = getLoc(expr->getExprLoc());
1882 cir::IntType ty = cast<cir::IntType>(ops[0].getType());
1883 mlir::Value allOnesOp =
1884 builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth()));
1885 mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
1886 mlir::Value cmp =
1887 cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp);
1888 return builder.createCast(cir::CastKind::bool_to_int, cmp,
1889 cgm.convertType(expr->getType()));
1890 }
1891 case X86::BI__builtin_ia32_kortestzqi:
1892 case X86::BI__builtin_ia32_kortestzhi:
1893 case X86::BI__builtin_ia32_kortestzsi:
1894 case X86::BI__builtin_ia32_kortestzdi: {
1895 mlir::Location loc = getLoc(expr->getExprLoc());
1896 cir::IntType ty = cast<cir::IntType>(ops[0].getType());
1897 mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult();
1898 mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
1899 mlir::Value cmp =
1900 cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp);
1901 return builder.createCast(cir::CastKind::bool_to_int, cmp,
1902 cgm.convertType(expr->getType()));
1903 }
1904 case X86::BI__builtin_ia32_ktestcqi:
1905 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1906 "x86.avx512.ktestc.b", ops);
1907 case X86::BI__builtin_ia32_ktestzqi:
1908 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1909 "x86.avx512.ktestz.b", ops);
1910 case X86::BI__builtin_ia32_ktestchi:
1911 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1912 "x86.avx512.ktestc.w", ops);
1913 case X86::BI__builtin_ia32_ktestzhi:
1914 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1915 "x86.avx512.ktestz.w", ops);
1916 case X86::BI__builtin_ia32_ktestcsi:
1917 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1918 "x86.avx512.ktestc.d", ops);
1919 case X86::BI__builtin_ia32_ktestzsi:
1920 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1921 "x86.avx512.ktestz.d", ops);
1922 case X86::BI__builtin_ia32_ktestcdi:
1923 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1924 "x86.avx512.ktestc.q", ops);
1925 case X86::BI__builtin_ia32_ktestzdi:
1926 return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1927 "x86.avx512.ktestz.q", ops);
1928 case X86::BI__builtin_ia32_kaddqi:
1929 return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
1930 "x86.avx512.kadd.b", ops);
1931 case X86::BI__builtin_ia32_kaddhi:
1932 return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
1933 "x86.avx512.kadd.w", ops);
1934 case X86::BI__builtin_ia32_kaddsi:
1935 return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
1936 "x86.avx512.kadd.d", ops);
1937 case X86::BI__builtin_ia32_kadddi:
1938 return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
1939 "x86.avx512.kadd.q", ops);
1940 case X86::BI__builtin_ia32_kandqi:
1941 case X86::BI__builtin_ia32_kandhi:
1942 case X86::BI__builtin_ia32_kandsi:
1943 case X86::BI__builtin_ia32_kanddi:
1944 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1945 cir::BinOpKind::And, ops);
1946 case X86::BI__builtin_ia32_kandnqi:
1947 case X86::BI__builtin_ia32_kandnhi:
1948 case X86::BI__builtin_ia32_kandnsi:
1949 case X86::BI__builtin_ia32_kandndi:
1950 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1951 cir::BinOpKind::And, ops, true);
1952 case X86::BI__builtin_ia32_korqi:
1953 case X86::BI__builtin_ia32_korhi:
1954 case X86::BI__builtin_ia32_korsi:
1955 case X86::BI__builtin_ia32_kordi:
1956 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1957 cir::BinOpKind::Or, ops);
1958 case X86::BI__builtin_ia32_kxnorqi:
1959 case X86::BI__builtin_ia32_kxnorhi:
1960 case X86::BI__builtin_ia32_kxnorsi:
1961 case X86::BI__builtin_ia32_kxnordi:
1962 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1963 cir::BinOpKind::Xor, ops, true);
1964 case X86::BI__builtin_ia32_kxorqi:
1965 case X86::BI__builtin_ia32_kxorhi:
1966 case X86::BI__builtin_ia32_kxorsi:
1967 case X86::BI__builtin_ia32_kxordi:
1968 return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
1969 cir::BinOpKind::Xor, ops);
1970 case X86::BI__builtin_ia32_knotqi:
1971 case X86::BI__builtin_ia32_knothi:
1972 case X86::BI__builtin_ia32_knotsi:
1973 case X86::BI__builtin_ia32_knotdi: {
1974 cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
1975 unsigned numElts = intTy.getWidth();
1976 mlir::Value resVec =
1977 getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
1978 return builder.createBitcast(builder.createNot(resVec), ops[0].getType());
1979 }
1980 case X86::BI__builtin_ia32_kmovb:
1981 case X86::BI__builtin_ia32_kmovw:
1982 case X86::BI__builtin_ia32_kmovd:
1983 case X86::BI__builtin_ia32_kmovq: {
1984 // Bitcast to vXi1 type and then back to integer. This gets the mask
1985 // register type into the IR, but might be optimized out depending on
1986 // what's around it.
1987 cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
1988 unsigned numElts = intTy.getWidth();
1989 mlir::Value resVec =
1990 getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
1991 return builder.createBitcast(resVec, ops[0].getType());
1992 }
1993 case X86::BI__builtin_ia32_sqrtsh_round_mask:
1994 case X86::BI__builtin_ia32_sqrtsd_round_mask:
1995 case X86::BI__builtin_ia32_sqrtss_round_mask:
1996 cgm.errorNYI(expr->getSourceRange(),
1997 std::string("unimplemented X86 builtin call: ") +
1998 getContext().BuiltinInfo.getName(builtinID));
1999 return mlir::Value{};
2000 case X86::BI__builtin_ia32_sqrtph512:
2001 case X86::BI__builtin_ia32_sqrtps512:
2002 case X86::BI__builtin_ia32_sqrtpd512: {
2003 mlir::Location loc = getLoc(expr->getExprLoc());
2004 mlir::Value arg = ops[0];
2005 return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
2006 }
2007 case X86::BI__builtin_ia32_pmuludq128:
2008 case X86::BI__builtin_ia32_pmuludq256:
2009 case X86::BI__builtin_ia32_pmuludq512: {
2010 unsigned opTypePrimitiveSizeInBits =
2011 cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
2012 return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false,
2013 ops, opTypePrimitiveSizeInBits);
2014 }
2015 case X86::BI__builtin_ia32_pmuldq128:
2016 case X86::BI__builtin_ia32_pmuldq256:
2017 case X86::BI__builtin_ia32_pmuldq512: {
2018 unsigned opTypePrimitiveSizeInBits =
2019 cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
2020 return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true,
2021 ops, opTypePrimitiveSizeInBits);
2022 }
2023 case X86::BI__builtin_ia32_pternlogd512_mask:
2024 case X86::BI__builtin_ia32_pternlogq512_mask:
2025 case X86::BI__builtin_ia32_pternlogd128_mask:
2026 case X86::BI__builtin_ia32_pternlogd256_mask:
2027 case X86::BI__builtin_ia32_pternlogq128_mask:
2028 case X86::BI__builtin_ia32_pternlogq256_mask:
2029 case X86::BI__builtin_ia32_pternlogd512_maskz:
2030 case X86::BI__builtin_ia32_pternlogq512_maskz:
2031 case X86::BI__builtin_ia32_pternlogd128_maskz:
2032 case X86::BI__builtin_ia32_pternlogd256_maskz:
2033 case X86::BI__builtin_ia32_pternlogq128_maskz:
2034 case X86::BI__builtin_ia32_pternlogq256_maskz:
2035 case X86::BI__builtin_ia32_vpshldd128:
2036 case X86::BI__builtin_ia32_vpshldd256:
2037 case X86::BI__builtin_ia32_vpshldd512:
2038 case X86::BI__builtin_ia32_vpshldq128:
2039 case X86::BI__builtin_ia32_vpshldq256:
2040 case X86::BI__builtin_ia32_vpshldq512:
2041 case X86::BI__builtin_ia32_vpshldw128:
2042 case X86::BI__builtin_ia32_vpshldw256:
2043 case X86::BI__builtin_ia32_vpshldw512:
2044 case X86::BI__builtin_ia32_vpshrdd128:
2045 case X86::BI__builtin_ia32_vpshrdd256:
2046 case X86::BI__builtin_ia32_vpshrdd512:
2047 case X86::BI__builtin_ia32_vpshrdq128:
2048 case X86::BI__builtin_ia32_vpshrdq256:
2049 case X86::BI__builtin_ia32_vpshrdq512:
2050 case X86::BI__builtin_ia32_vpshrdw128:
2051 case X86::BI__builtin_ia32_vpshrdw256:
2052 case X86::BI__builtin_ia32_vpshrdw512:
2053 cgm.errorNYI(expr->getSourceRange(),
2054 std::string("unimplemented X86 builtin call: ") +
2055 getContext().BuiltinInfo.getName(builtinID));
2056 return mlir::Value{};
2057 case X86::BI__builtin_ia32_reduce_fadd_pd512:
2058 case X86::BI__builtin_ia32_reduce_fadd_ps512:
2059 case X86::BI__builtin_ia32_reduce_fadd_ph512:
2060 case X86::BI__builtin_ia32_reduce_fadd_ph256:
2061 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
2063 return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()),
2064 "vector.reduce.fadd", ops[0].getType(),
2065 mlir::ValueRange{ops[0], ops[1]});
2066 }
2067 case X86::BI__builtin_ia32_reduce_fmul_pd512:
2068 case X86::BI__builtin_ia32_reduce_fmul_ps512:
2069 case X86::BI__builtin_ia32_reduce_fmul_ph512:
2070 case X86::BI__builtin_ia32_reduce_fmul_ph256:
2071 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
2073 return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()),
2074 "vector.reduce.fmul", ops[0].getType(),
2075 mlir::ValueRange{ops[0], ops[1]});
2076 }
2077 case X86::BI__builtin_ia32_reduce_fmax_pd512:
2078 case X86::BI__builtin_ia32_reduce_fmax_ps512:
2079 case X86::BI__builtin_ia32_reduce_fmax_ph512:
2080 case X86::BI__builtin_ia32_reduce_fmax_ph256:
2081 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
2083 cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
2084 return builder.emitIntrinsicCallOp(
2085 getLoc(expr->getExprLoc()), "vector.reduce.fmax",
2086 vecTy.getElementType(), mlir::ValueRange{ops[0]});
2087 }
2088 case X86::BI__builtin_ia32_reduce_fmin_pd512:
2089 case X86::BI__builtin_ia32_reduce_fmin_ps512:
2090 case X86::BI__builtin_ia32_reduce_fmin_ph512:
2091 case X86::BI__builtin_ia32_reduce_fmin_ph256:
2092 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
2094 cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
2095 return builder.emitIntrinsicCallOp(
2096 getLoc(expr->getExprLoc()), "vector.reduce.fmin",
2097 vecTy.getElementType(), mlir::ValueRange{ops[0]});
2098 }
2099 case X86::BI__builtin_ia32_rdrand16_step:
2100 case X86::BI__builtin_ia32_rdrand32_step:
2101 case X86::BI__builtin_ia32_rdrand64_step:
2102 case X86::BI__builtin_ia32_rdseed16_step:
2103 case X86::BI__builtin_ia32_rdseed32_step:
2104 case X86::BI__builtin_ia32_rdseed64_step: {
2105 llvm::StringRef intrinsicName;
2106 switch (builtinID) {
2107 default:
2108 llvm_unreachable("Unsupported intrinsic!");
2109 case X86::BI__builtin_ia32_rdrand16_step:
2110 intrinsicName = "x86.rdrand.16";
2111 break;
2112 case X86::BI__builtin_ia32_rdrand32_step:
2113 intrinsicName = "x86.rdrand.32";
2114 break;
2115 case X86::BI__builtin_ia32_rdrand64_step:
2116 intrinsicName = "x86.rdrand.64";
2117 break;
2118 case X86::BI__builtin_ia32_rdseed16_step:
2119 intrinsicName = "x86.rdseed.16";
2120 break;
2121 case X86::BI__builtin_ia32_rdseed32_step:
2122 intrinsicName = "x86.rdseed.32";
2123 break;
2124 case X86::BI__builtin_ia32_rdseed64_step:
2125 intrinsicName = "x86.rdseed.64";
2126 break;
2127 }
2128
2129 mlir::Location loc = getLoc(expr->getExprLoc());
2130 mlir::Type randTy = cast<cir::PointerType>(ops[0].getType()).getPointee();
2131 llvm::SmallVector<mlir::Type, 2> resultTypes = {randTy,
2132 builder.getUInt32Ty()};
2133 cir::RecordType resRecord =
2134 cir::RecordType::get(&getMLIRContext(), resultTypes, false, false,
2135 cir::RecordType::RecordKind::Struct);
2136
2137 mlir::Value call =
2138 builder.emitIntrinsicCallOp(loc, intrinsicName, resRecord);
2139 mlir::Value rand =
2140 cir::ExtractMemberOp::create(builder, loc, randTy, call, 0);
2141 builder.CIRBaseBuilderTy::createStore(loc, rand, ops[0]);
2142
2143 return cir::ExtractMemberOp::create(builder, loc, builder.getUInt32Ty(),
2144 call, 1);
2145 }
2146 case X86::BI__builtin_ia32_addcarryx_u32:
2147 case X86::BI__builtin_ia32_addcarryx_u64:
2148 case X86::BI__builtin_ia32_subborrow_u32:
2149 case X86::BI__builtin_ia32_subborrow_u64:
2150 cgm.errorNYI(expr->getSourceRange(),
2151 std::string("unimplemented X86 builtin call: ") +
2152 getContext().BuiltinInfo.getName(builtinID));
2153 return mlir::Value{};
2154 case X86::BI__builtin_ia32_fpclassps128_mask:
2155 case X86::BI__builtin_ia32_fpclassps256_mask:
2156 case X86::BI__builtin_ia32_fpclassps512_mask:
2157 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
2158 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
2159 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
2160 case X86::BI__builtin_ia32_fpclassph128_mask:
2161 case X86::BI__builtin_ia32_fpclassph256_mask:
2162 case X86::BI__builtin_ia32_fpclassph512_mask:
2163 case X86::BI__builtin_ia32_fpclasspd128_mask:
2164 case X86::BI__builtin_ia32_fpclasspd256_mask:
2165 case X86::BI__builtin_ia32_fpclasspd512_mask:
2166 return emitX86Fpclass(builder, getLoc(expr->getExprLoc()), builtinID, ops);
2167 case X86::BI__builtin_ia32_vp2intersect_q_512:
2168 case X86::BI__builtin_ia32_vp2intersect_q_256:
2169 case X86::BI__builtin_ia32_vp2intersect_q_128:
2170 case X86::BI__builtin_ia32_vp2intersect_d_512:
2171 case X86::BI__builtin_ia32_vp2intersect_d_256:
2172 case X86::BI__builtin_ia32_vp2intersect_d_128: {
2173 unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
2174 mlir::Location loc = getLoc(expr->getExprLoc());
2175 StringRef intrinsicName;
2176
2177 switch (builtinID) {
2178 default:
2179 llvm_unreachable("Unexpected builtin");
2180 case X86::BI__builtin_ia32_vp2intersect_q_512:
2181 intrinsicName = "x86.avx512.vp2intersect.q.512";
2182 break;
2183 case X86::BI__builtin_ia32_vp2intersect_q_256:
2184 intrinsicName = "x86.avx512.vp2intersect.q.256";
2185 break;
2186 case X86::BI__builtin_ia32_vp2intersect_q_128:
2187 intrinsicName = "x86.avx512.vp2intersect.q.128";
2188 break;
2189 case X86::BI__builtin_ia32_vp2intersect_d_512:
2190 intrinsicName = "x86.avx512.vp2intersect.d.512";
2191 break;
2192 case X86::BI__builtin_ia32_vp2intersect_d_256:
2193 intrinsicName = "x86.avx512.vp2intersect.d.256";
2194 break;
2195 case X86::BI__builtin_ia32_vp2intersect_d_128:
2196 intrinsicName = "x86.avx512.vp2intersect.d.128";
2197 break;
2198 }
2199
2200 auto resVector = cir::VectorType::get(builder.getBoolTy(), numElts);
2201
2202 cir::RecordType resRecord =
2203 cir::RecordType::get(&getMLIRContext(), {resVector, resVector}, false,
2204 false, cir::RecordType::RecordKind::Struct);
2205
2206 mlir::Value call = builder.emitIntrinsicCallOp(
2207 getLoc(expr->getExprLoc()), intrinsicName, resRecord,
2208 mlir::ValueRange{ops[0], ops[1]});
2209 mlir::Value result =
2210 cir::ExtractMemberOp::create(builder, loc, resVector, call, 0);
2211 result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, loc);
2212 Address addr = Address(
2213 ops[2], clang::CharUnits::fromQuantity(std::max(1U, numElts / 8)));
2214 builder.createStore(loc, result, addr);
2215
2216 result = cir::ExtractMemberOp::create(builder, loc, resVector, call, 1);
2217 result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, loc);
2218 addr = Address(ops[3],
2219 clang::CharUnits::fromQuantity(std::max(1U, numElts / 8)));
2220 builder.createStore(loc, result, addr);
2221 return mlir::Value{};
2222 }
2223 case X86::BI__builtin_ia32_vpmultishiftqb128:
2224 case X86::BI__builtin_ia32_vpmultishiftqb256:
2225 case X86::BI__builtin_ia32_vpmultishiftqb512:
2226 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
2227 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
2228 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
2229 case X86::BI__builtin_ia32_cmpeqps:
2230 case X86::BI__builtin_ia32_cmpeqpd:
2231 case X86::BI__builtin_ia32_cmpltps:
2232 case X86::BI__builtin_ia32_cmpltpd:
2233 case X86::BI__builtin_ia32_cmpleps:
2234 case X86::BI__builtin_ia32_cmplepd:
2235 case X86::BI__builtin_ia32_cmpunordps:
2236 case X86::BI__builtin_ia32_cmpunordpd:
2237 case X86::BI__builtin_ia32_cmpneqps:
2238 case X86::BI__builtin_ia32_cmpneqpd:
2239 cgm.errorNYI(expr->getSourceRange(),
2240 std::string("unimplemented X86 builtin call: ") +
2241 getContext().BuiltinInfo.getName(builtinID));
2242 return mlir::Value{};
2243 case X86::BI__builtin_ia32_cmpnltps:
2244 case X86::BI__builtin_ia32_cmpnltpd:
2245 return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
2246 cir::CmpOpKind::lt, /*shouldInvert=*/true);
2247 case X86::BI__builtin_ia32_cmpnleps:
2248 case X86::BI__builtin_ia32_cmpnlepd:
2249 return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
2250 cir::CmpOpKind::le, /*shouldInvert=*/true);
2251 case X86::BI__builtin_ia32_cmpordps:
2252 case X86::BI__builtin_ia32_cmpordpd:
2253 case X86::BI__builtin_ia32_cmpph128_mask:
2254 case X86::BI__builtin_ia32_cmpph256_mask:
2255 case X86::BI__builtin_ia32_cmpph512_mask:
2256 case X86::BI__builtin_ia32_cmpps128_mask:
2257 case X86::BI__builtin_ia32_cmpps256_mask:
2258 case X86::BI__builtin_ia32_cmpps512_mask:
2259 case X86::BI__builtin_ia32_cmppd128_mask:
2260 case X86::BI__builtin_ia32_cmppd256_mask:
2261 case X86::BI__builtin_ia32_cmppd512_mask:
2262 case X86::BI__builtin_ia32_vcmpbf16512_mask:
2263 case X86::BI__builtin_ia32_vcmpbf16256_mask:
2264 case X86::BI__builtin_ia32_vcmpbf16128_mask:
2265 case X86::BI__builtin_ia32_cmpps:
2266 case X86::BI__builtin_ia32_cmpps256:
2267 case X86::BI__builtin_ia32_cmppd:
2268 case X86::BI__builtin_ia32_cmppd256:
2269 case X86::BI__builtin_ia32_cmpeqss:
2270 case X86::BI__builtin_ia32_cmpltss:
2271 case X86::BI__builtin_ia32_cmpless:
2272 case X86::BI__builtin_ia32_cmpunordss:
2273 case X86::BI__builtin_ia32_cmpneqss:
2274 case X86::BI__builtin_ia32_cmpnltss:
2275 case X86::BI__builtin_ia32_cmpnless:
2276 case X86::BI__builtin_ia32_cmpordss:
2277 case X86::BI__builtin_ia32_cmpeqsd:
2278 case X86::BI__builtin_ia32_cmpltsd:
2279 case X86::BI__builtin_ia32_cmplesd:
2280 case X86::BI__builtin_ia32_cmpunordsd:
2281 case X86::BI__builtin_ia32_cmpneqsd:
2282 case X86::BI__builtin_ia32_cmpnltsd:
2283 case X86::BI__builtin_ia32_cmpnlesd:
2284 case X86::BI__builtin_ia32_cmpordsd:
2285 cgm.errorNYI(expr->getSourceRange(),
2286 std::string("unimplemented X86 builtin call: ") +
2287 getContext().BuiltinInfo.getName(builtinID));
2288 return {};
2289 case X86::BI__builtin_ia32_vcvtph2ps_mask:
2290 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
2291 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
2292 mlir::Location loc = getLoc(expr->getExprLoc());
2293 return emitX86CvtF16ToFloatExpr(builder, loc, ops,
2294 convertType(expr->getType()));
2295 }
2296 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
2297 mlir::Location loc = getLoc(expr->getExprLoc());
2298 cir::VectorType resTy = cast<cir::VectorType>(convertType(expr->getType()));
2299
2300 cir::VectorType inputTy = cast<cir::VectorType>(ops[0].getType());
2301 unsigned numElts = inputTy.getSize();
2302
2303 mlir::Value mask = getMaskVecValue(builder, loc, ops[2], numElts);
2304
2306 args.push_back(ops[0]);
2307 args.push_back(ops[1]);
2308 args.push_back(mask);
2309
2310 return builder.emitIntrinsicCallOp(
2311 loc, "x86.avx512bf16.mask.cvtneps2bf16.128", resTy, args);
2312 }
2313 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
2314 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
2315 mlir::Location loc = getLoc(expr->getExprLoc());
2316 cir::VectorType resTy = cast<cir::VectorType>(convertType(expr->getType()));
2317 StringRef intrinsicName;
2318 if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask) {
2319 intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
2320 } else {
2321 assert(builtinID == X86::BI__builtin_ia32_cvtneps2bf16_512_mask);
2322 intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
2323 }
2324
2325 mlir::Value res = builder.emitIntrinsicCallOp(loc, intrinsicName, resTy,
2326 mlir::ValueRange{ops[0]});
2327
2328 return emitX86Select(builder, loc, ops[2], res, ops[1]);
2329 }
2330 case X86::BI__cpuid:
2331 case X86::BI__cpuidex: {
2332 mlir::Location loc = getLoc(expr->getExprLoc());
2333 mlir::Value subFuncId = builtinID == X86::BI__cpuidex
2334 ? ops[2]
2335 : builder.getConstInt(loc, sInt32Ty, 0);
2336 cir::CpuIdOp::create(builder, loc, /*cpuInfo=*/ops[0],
2337 /*functionId=*/ops[1], /*subFunctionId=*/subFuncId);
2338 return mlir::Value{};
2339 }
2340 case X86::BI__emul:
2341 case X86::BI__emulu:
2342 case X86::BI__mulh:
2343 case X86::BI__umulh:
2344 case X86::BI_mul128:
2345 case X86::BI_umul128: {
2346 cgm.errorNYI(expr->getSourceRange(),
2347 std::string("unimplemented X86 builtin call: ") +
2348 getContext().BuiltinInfo.getName(builtinID));
2349 return mlir::Value{};
2350 }
2351 case X86::BI__faststorefence: {
2352 cir::AtomicFenceOp::create(
2353 builder, getLoc(expr->getExprLoc()),
2354 cir::MemOrder::SequentiallyConsistent,
2355 cir::SyncScopeKindAttr::get(&getMLIRContext(),
2356 cir::SyncScopeKind::System));
2357 return mlir::Value{};
2358 }
2359 case X86::BI__shiftleft128:
2360 case X86::BI__shiftright128: {
2361 // Flip low/high ops and zero-extend amount to matching type.
2362 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
2363 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
2364 std::swap(ops[0], ops[1]);
2365
2366 // Zero-extend shift amount to i64 if needed
2367 auto amtTy = mlir::cast<cir::IntType>(ops[2].getType());
2368 cir::IntType i64Ty = builder.getUInt64Ty();
2369
2370 if (amtTy != i64Ty)
2371 ops[2] = builder.createIntCast(ops[2], i64Ty);
2372
2373 const StringRef intrinsicName =
2374 (builtinID == X86::BI__shiftleft128) ? "fshl" : "fshr";
2375 return builder.emitIntrinsicCallOp(
2376 getLoc(expr->getExprLoc()), intrinsicName, i64Ty,
2377 mlir::ValueRange{ops[0], ops[1], ops[2]});
2378 }
2379 case X86::BI_ReadWriteBarrier:
2380 case X86::BI_ReadBarrier:
2381 case X86::BI_WriteBarrier: {
2382 cir::AtomicFenceOp::create(
2383 builder, getLoc(expr->getExprLoc()),
2384 cir::MemOrder::SequentiallyConsistent,
2385 cir::SyncScopeKindAttr::get(&getMLIRContext(),
2386 cir::SyncScopeKind::SingleThread));
2387 return mlir::Value{};
2388 }
2389 case X86::BI_AddressOfReturnAddress: {
2390 mlir::Location loc = getLoc(expr->getExprLoc());
2391 mlir::Value addr =
2392 cir::AddrOfReturnAddrOp::create(builder, loc, allocaInt8PtrTy);
2393 return builder.createCast(loc, cir::CastKind::bitcast, addr, voidPtrTy);
2394 }
2395 case X86::BI__stosb:
2396 case X86::BI__ud2:
2397 case X86::BI__int2c:
2398 case X86::BI__readfsbyte:
2399 case X86::BI__readfsword:
2400 case X86::BI__readfsdword:
2401 case X86::BI__readfsqword:
2402 case X86::BI__readgsbyte:
2403 case X86::BI__readgsword:
2404 case X86::BI__readgsdword:
2405 case X86::BI__readgsqword:
2406 case X86::BI__builtin_ia32_encodekey128_u32:
2407 case X86::BI__builtin_ia32_encodekey256_u32: {
2408 cgm.errorNYI(expr->getSourceRange(),
2409 std::string("unimplemented X86 builtin call: ") +
2410 getContext().BuiltinInfo.getName(builtinID));
2411 return mlir::Value{};
2412 }
2413 case X86::BI__builtin_ia32_aesenc128kl_u8:
2414 case X86::BI__builtin_ia32_aesdec128kl_u8:
2415 case X86::BI__builtin_ia32_aesenc256kl_u8:
2416 case X86::BI__builtin_ia32_aesdec256kl_u8: {
2417 llvm::StringRef intrinsicName;
2418 switch (builtinID) {
2419 default:
2420 llvm_unreachable("Unexpected builtin");
2421 case X86::BI__builtin_ia32_aesenc128kl_u8:
2422 intrinsicName = "x86.aesenc128kl";
2423 break;
2424 case X86::BI__builtin_ia32_aesdec128kl_u8:
2425 intrinsicName = "x86.aesdec128kl";
2426 break;
2427 case X86::BI__builtin_ia32_aesenc256kl_u8:
2428 intrinsicName = "x86.aesenc256kl";
2429 break;
2430 case X86::BI__builtin_ia32_aesdec256kl_u8:
2431 intrinsicName = "x86.aesdec256kl";
2432 break;
2433 }
2434
2435 return emitX86Aes(builder, getLoc(expr->getExprLoc()), intrinsicName,
2436 convertType(expr->getType()), ops);
2437 }
2438 case X86::BI__builtin_ia32_aesencwide128kl_u8:
2439 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
2440 case X86::BI__builtin_ia32_aesencwide256kl_u8:
2441 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
2442 llvm::StringRef intrinsicName;
2443 switch (builtinID) {
2444 default:
2445 llvm_unreachable("Unexpected builtin");
2446 case X86::BI__builtin_ia32_aesencwide128kl_u8:
2447 intrinsicName = "x86.aesencwide128kl";
2448 break;
2449 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
2450 intrinsicName = "x86.aesdecwide128kl";
2451 break;
2452 case X86::BI__builtin_ia32_aesencwide256kl_u8:
2453 intrinsicName = "x86.aesencwide256kl";
2454 break;
2455 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
2456 intrinsicName = "x86.aesdecwide256kl";
2457 break;
2458 }
2459
2460 return emitX86Aeswide(builder, getLoc(expr->getExprLoc()), intrinsicName,
2461 convertType(expr->getType()), ops);
2462 }
2463 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
2464 case X86::BI__builtin_ia32_vfmaddcph512_mask:
2465 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
2466 case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
2467 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
2468 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
2469 case X86::BI__builtin_ia32_prefetchi:
2470 cgm.errorNYI(expr->getSourceRange(),
2471 std::string("unimplemented X86 builtin call: ") +
2472 getContext().BuiltinInfo.getName(builtinID));
2473 return mlir::Value{};
2474 }
2475}
Defines enum values for all the target-independent builtin functions.
static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, llvm::SmallVector< mlir::Value > ops, bool isSigned)
static std::optional< mlir::Value > emitX86ConvertToMask(CIRGenFunction &cgf, CIRGenBuilderTy &builder, mlir::Value in, mlir::Location loc)
static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value source, mlir::Value mask, mlir::Value inputVector, const std::string &id)
static void computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec, uint32_t imm, const bool isShufP, llvm::SmallVectorImpl< int64_t > &outIndices)
static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, llvm::SmallVector< mlir::Value > &ops, mlir::Location loc, cir::CmpOpKind pred, bool shouldInvert)
static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder, mlir::Location loc, cir::BinOpKind binOpKind, SmallVectorImpl< mlir::Value > &ops, bool invertLHS=false)
static std::optional< mlir::Value > emitX86MaskedCompare(CIRGenFunction &cgf, CIRGenBuilderTy &builder, unsigned cc, bool isSigned, ArrayRef< mlir::Value > ops, mlir::Location loc)
static std::optional< mlir::Value > emitX86SExtMask(CIRGenBuilderTy &builder, mlir::Value op, mlir::Type dstTy, mlir::Location loc)
static mlir::Value emitPrefetch(CIRGenFunction &cgf, unsigned builtinID, const CallExpr *e, const SmallVector< mlir::Value > &ops)
static mlir::Value getMaskZeroBitAsBool(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value mask)
static mlir::Value emitX86Aeswide(CIRGenBuilderTy &builder, mlir::Location loc, llvm::StringRef intrinsicName, mlir::Type retType, llvm::ArrayRef< mlir::Value > ops)
static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, const std::string &intrinsicName, SmallVectorImpl< mlir::Value > &ops)
static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value vec, mlir::Value value, mlir::Value indexOp)
static mlir::Value emitX86Fpclass(CIRGenBuilderTy &builder, mlir::Location loc, unsigned builtinID, SmallVectorImpl< mlir::Value > &ops)
static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder, mlir::Location loc, const std::string &intrinsicName, SmallVectorImpl< mlir::Value > &ops)
static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder, const mlir::Value vec, const mlir::Value immediate, const mlir::Location loc, const bool isLow)
static mlir::Value emitX86MaskedCompareResult(CIRGenBuilderTy &builder, mlir::Value cmp, unsigned numElts, mlir::Value maskIn, mlir::Location loc)
static mlir::Value emitX86Select(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value mask, mlir::Value op0, mlir::Value op1)
static mlir::Value emitX86Aes(CIRGenBuilderTy &builder, mlir::Location loc, llvm::StringRef intrinsicName, mlir::Type retType, llvm::ArrayRef< mlir::Value > ops)
static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, mlir::Location loc, llvm::ArrayRef< mlir::Value > ops, mlir::Type dstTy)
static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder, mlir::Location location, mlir::Value &op0, mlir::Value &op1, mlir::Value &amt, bool isRight)
static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, bool isSigned, SmallVectorImpl< mlir::Value > &ops, unsigned opTypePrimitiveSizeInBits)
static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value mask, unsigned numElems)
static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder, mlir::Location loc, const std::string &intrinsicName, SmallVectorImpl< mlir::Value > &ops)
static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value mask, mlir::Value op0, mlir::Value op1)
TokenType getType() const
Returns the token's type, e.g.
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 b
__device__ __2f16 float c
mlir::Value getConstAPInt(mlir::Location loc, mlir::Type typ, const llvm::APInt &val)
mlir::Value createShift(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, bool isShiftLeft)
cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc)
cir::ConstantOp getConstant(mlir::Location loc, mlir::TypedAttr attr)
mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy)
mlir::Value createNot(mlir::Value value)
mlir::Value createPtrBitcast(mlir::Value src, mlir::Type newPointeeTy)
mlir::Value createAnd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec, uint64_t idx)
cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
mlir::Value createIntCast(mlir::Value src, mlir::Type newTy)
mlir::Value createBitcast(mlir::Value src, mlir::Type newTy)
mlir::Value createBinop(mlir::Location loc, mlir::Value lhs, cir::BinOpKind kind, mlir::Value rhs)
mlir::Value createSelect(mlir::Location loc, mlir::Value condition, mlir::Value trueValue, mlir::Value falseValue)
mlir::Value createMul(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::YieldOp createYield(mlir::Location loc, mlir::ValueRange value={})
Create a yield operation.
cir::BoolType getBoolTy()
llvm::TypeSize getTypeSizeInBits(mlir::Type ty) const
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
mlir::Value getPointer() const
Definition Address.h:95
cir::ConstantOp getUInt64(uint64_t c, mlir::Location loc)
mlir::Value emitIntrinsicCallOp(mlir::Location loc, const llvm::StringRef str, const mlir::Type &resTy, Operands &&...op)
cir::IntType getSIntNTy(int n)
cir::ConstantOp getUInt32(uint32_t c, mlir::Location loc)
cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1, mlir::Value vec2, llvm::ArrayRef< mlir::Attribute > maskAttrs)
cir::RecordType getAnonRecordTy(llvm::ArrayRef< mlir::Type > members, bool packed=false, bool padded=false)
Get a CIR anonymous record type.
cir::LoadOp createAlignedLoad(mlir::Location loc, mlir::Type ty, mlir::Value ptr, llvm::MaybeAlign align)
cir::StoreOp createStore(mlir::Location loc, mlir::Value val, Address dst, bool isVolatile=false, mlir::IntegerAttr align={}, cir::SyncScopeKindAttr scope={}, cir::MemOrderAttr order={})
cir::IntType getUIntNTy(int n)
mlir::Value getArrayElement(mlir::Location arrayLocBegin, mlir::Location arrayLocEnd, mlir::Value arrayPtr, mlir::Type eltTy, mlir::Value idx, bool shouldDecay)
Create a cir.ptr_stride operation to get access to an array element.
mlir::Type convertType(clang::QualType t)
mlir::Location getLoc(clang::SourceLocation srcLoc)
Helpers to convert Clang's SourceLocation to a MLIR Location.
static int64_t getZExtIntValueFromConstOp(mlir::Value val)
Get zero-extended integer from a mlir::Value that is an int constant or a constant op.
static int64_t getSExtIntValueFromConstOp(mlir::Value val)
Get integer from a mlir::Value that is an int constant or a constant op.
std::optional< mlir::Value > emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr)
CIRGenBuilderTy & getBuilder()
mlir::MLIRContext & getMLIRContext()
clang::ASTContext & getContext() const
Address createMemTemp(QualType t, mlir::Location loc, const Twine &name="tmp", Address *alloca=nullptr, mlir::OpBuilder::InsertPoint ip={})
Create a temporary memory object of the given type, with appropriate alignmen and cast it to the defa...
mlir::Value emitScalarOrConstFoldImmArg(unsigned iceArguments, unsigned idx, const Expr *argExpr)
DiagnosticBuilder errorNYI(SourceLocation, llvm::StringRef)
Helpers to emit "not yet implemented" error diagnostics.
const cir::CIRDataLayout getDataLayout() const
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2943
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:276
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
The JSON file list parser is used to communicate input to InstallAPI.
U cast(CodeGen::Address addr)
Definition Address.h:327
static bool msvcBuiltins()
static bool handleBuiltinICEArguments()
static bool cgFPOptionsRAII()
static bool emitConstrainedFPCall()
static bool fastMathFlags()
cir::PointerType allocaInt8PtrTy
void* in alloca address space
cir::PointerType voidPtrTy
void* in address space 0