clang 23.0.0git
CIRGenBuiltinAArch64.cpp
Go to the documentation of this file.
1//===---- CIRGenBuiltinAArch64.cpp - Emit CIR for AArch64 builtins --------===//
2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3// See https://llvm.org/LICENSE.txt for license information.
4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5//
6//===----------------------------------------------------------------------===//
7//
8// This contains code to emit ARM64 Builtin calls as CIR or a function call
9// to be later resolved.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CIRGenBuilder.h"
14#include "CIRGenFunction.h"
19
20// TODO(cir): once all builtins are covered, decide whether we still
21// need to use LLVM intrinsics or if there's a better approach to follow. Right
22// now the intrinsics are reused to make it convenient to encode all thousands
23// of them and passing down to LLVM lowering.
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsAArch64.h"
26
27#include "mlir/IR/BuiltinTypes.h"
28#include "mlir/IR/Value.h"
31
32using namespace clang;
33using namespace clang::CIRGen;
34using namespace llvm;
35using namespace clang::aarch64;
36
37// Generate vscale * scalingFactor
38static mlir::Value genVscaleTimesFactor(mlir::Location loc,
39 CIRGenBuilderTy builder,
40 mlir::Type cirTy,
41 int32_t scalingFactor) {
42 mlir::Value vscale = builder.emitIntrinsicCallOp(loc, "vscale", cirTy);
43 return builder.createNUWAMul(loc, vscale,
44 builder.getUInt64(scalingFactor, loc));
45}
46
47#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
48 {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
49 TypeModifier}
50
51#define SVEMAP2(NameBase, TypeModifier) \
52 {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier}
54#define GET_SVE_LLVM_INTRINSIC_MAP
55#include "clang/Basic/arm_sve_builtin_cg.inc"
56#undef GET_SVE_LLVM_INTRINSIC_MAP
57};
58
62
63// Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns
64// the corresponding info struct.
65static const ARMVectorIntrinsicInfo *
67 unsigned builtinID, bool &mapProvenSorted) {
68
69#ifndef NDEBUG
70 if (!mapProvenSorted) {
71 assert(llvm::is_sorted(intrinsicMap));
72 mapProvenSorted = true;
73 }
74#endif
75
76 const ARMVectorIntrinsicInfo *info =
77 llvm::lower_bound(intrinsicMap, builtinID);
78
79 if (info != intrinsicMap.end() && info->BuiltinID == builtinID)
80 return info;
81
82 return nullptr;
83}
84
85//===----------------------------------------------------------------------===//
86// Generic helpers
87//===----------------------------------------------------------------------===//
88// Emit an intrinsic where all operands are of the same type as the result.
89// Depending on mode, this may be a constrained floating-point intrinsic.
90static mlir::Value
92 StringRef intrName, mlir::Type retTy,
95
96 return builder.emitIntrinsicCallOp(loc, intrName, retTy, ops);
97}
98
99static llvm::StringRef getLLVMIntrNameNoPrefix(llvm::Intrinsic::ID intrID) {
100 llvm::StringRef llvmIntrName = llvm::Intrinsic::getBaseName(intrID);
101 assert(llvmIntrName.starts_with("llvm.") && "Not an LLVM intrinsic!");
102 return llvmIntrName.drop_front(/*strlen("llvm.")=*/5);
103}
104
105//===----------------------------------------------------------------------===//
106// NEON helpers
107//===----------------------------------------------------------------------===//
108/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
109/// argument that specifies the vector type. The additional argument is meant
110/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
111/// should be kept consistent with the logic in Sema.
112/// TODO: Make this return false for SISD builtins.
113/// TODO(cir): Share this with ARM.cpp
114static bool hasExtraNeonArgument(unsigned builtinID) {
115 // Required by the headers included below, but not in this particular
116 // function.
117 [[maybe_unused]] int PtrArgNum = -1;
118 [[maybe_unused]] bool HasConstPtr = false;
119
120 // The mask encodes the type. We don't care about the actual value. Instead,
121 // we just check whether its been set.
122 uint64_t mask = 0;
123 switch (builtinID) {
124#define GET_NEON_OVERLOAD_CHECK
125#include "clang/Basic/arm_fp16.inc"
126#include "clang/Basic/arm_neon.inc"
127#undef GET_NEON_OVERLOAD_CHECK
128 // Non-neon builtins for controling VFP that take extra argument for
129 // discriminating the type.
130 case ARM::BI__builtin_arm_vcvtr_f:
131 case ARM::BI__builtin_arm_vcvtr_d:
132 mask = 1;
133 }
134 switch (builtinID) {
135 default:
136 break;
137 }
138
139 return mask != 0;
140}
141
142static cir::VectorType getFloatNeonType(CIRGenFunction &cgf,
143 NeonTypeFlags intTypeFlags) {
144 int isQuad = intTypeFlags.isQuad();
145 switch (intTypeFlags.getEltType()) {
147 return cir::VectorType::get(cgf.fP16Ty, (4 << isQuad));
149 return cir::VectorType::get(cgf.floatTy, (2 << isQuad));
151 return cir::VectorType::get(cgf.doubleTy, (1 << isQuad));
152 default:
153 llvm_unreachable("Type can't be converted to floating-point!");
154 }
155}
156
157static int64_t getIntValueFromConstOp(mlir::Value val) {
158 return val.getDefiningOp<cir::ConstantOp>().getIntValue().getSExtValue();
159}
160
161/// Build a constant shift amount vector of `vecTy` to shift a vector
162/// Here `shiftVal` is a constant integer that will be broadcast into a
163/// a const vector of `vecTy` which is the return value of this function
164/// If `neg` is true, the shift amount is negated before splatting (used
165/// when encoding a right shift as a left shift by a negative amount for
166/// intrinsics like aarch64.neon.{s,u}rshl).
167static mlir::Value emitNeonShiftVector(CIRGenBuilderTy &builder,
168 mlir::Value shiftVal,
169 cir::VectorType vecTy,
170 mlir::Location loc, bool neg) {
171 if (neg) {
172 int64_t shiftAmt = -getIntValueFromConstOp(shiftVal);
173 shiftVal = builder.getConstantInt(loc, vecTy.getElementType(), shiftAmt);
174 }
175 mlir::Type eltTy = vecTy.getElementType();
176 if (shiftVal.getType() != eltTy) {
177 shiftVal = builder.createIntCast(shiftVal, eltTy);
178 }
179 return cir::VecSplatOp::create(builder, loc, vecTy, shiftVal);
180}
181
182// TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone.
183template <typename Operation>
184static mlir::Value
188 std::optional<llvm::StringRef> intrinsicName,
189 mlir::Type funcResTy, mlir::Location loc,
190 bool isConstrainedFPIntrinsic = false, unsigned shift = 0,
191 bool rightshift = false) {
192 // TODO(cir): Consider removing the following unreachable when we have
193 // emitConstrainedFPCall feature implemented
195 if (isConstrainedFPIntrinsic)
196 cgm.errorNYI(loc, std::string("constrained FP intrinsic"));
197
198 for (unsigned j = 0; j < argTypes.size(); ++j) {
199 if (isConstrainedFPIntrinsic) {
201 }
202 if (shift > 0 && shift == j) {
203 args[j] = emitNeonShiftVector(builder, args[j],
204 mlir::cast<cir::VectorType>(argTypes[j]),
205 loc, rightshift);
206 } else {
207 args[j] = builder.createBitcast(args[j], argTypes[j]);
208 }
209 }
210 if (isConstrainedFPIntrinsic) {
212 return nullptr;
213 }
214 if constexpr (std::is_same_v<Operation, cir::LLVMIntrinsicCallOp>) {
215 return Operation::create(builder, loc,
216 builder.getStringAttr(intrinsicName.value()),
217 funcResTy, args)
218 .getResult();
219 } else {
220 return Operation::create(builder, loc, funcResTy, args).getResult();
221 }
222}
223
224// TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone.
225static mlir::Value emitNeonCall(CIRGenModule &cgm, CIRGenBuilderTy &builder,
228 llvm::StringRef intrinsicName,
229 mlir::Type funcResTy, mlir::Location loc,
230 bool isConstrainedFPIntrinsic = false,
231 unsigned shift = 0, bool rightshift = false) {
233 cgm, builder, std::move(argTypes), args, intrinsicName, funcResTy, loc,
234 isConstrainedFPIntrinsic, shift, rightshift);
235}
236
237// Computes the input vector type for a NEON pairwise widening operation (e.g.
238// vpaddl/vpadal). Given a result vector type, it derives the corresponding
239// input type by halving the element bit width and doubling the number of lanes,
240// while setting the signedness based on usgn.
241static cir::VectorType getNeonPairwiseWidenInputType(cir::VectorType resType,
242 bool usgn) {
243 mlir::Type elemTy = resType.getElementType();
244 uint64_t resLanes = resType.getSize();
245 auto intTy = mlir::dyn_cast<cir::IntType>(elemTy);
246 assert(intTy && "vpaddl result type must be an integer vector");
247
248 unsigned resWidth = intTy.getWidth();
249 assert((resWidth == 16 || resWidth == 32 || resWidth == 64) &&
250 "unexpected vpaddl result element width");
251
252 unsigned argWidth = resWidth / 2;
253 unsigned argLanes = resLanes * 2;
254 cir::VectorType result = cir::VectorType::get(
255 cir::IntType::get(resType.getContext(), argWidth, /* is_signed */ !usgn),
256 argLanes);
257 return result;
258}
259
260// Derive the LLVM intrinsic's per-operand argument types and its result
261// type for use when emitting the intrinsic call.
262//
263// `modifier` is the TypeModifier bitmask from `ARMVectorIntrinsicInfo`
264// (callers pass `info.TypeModifier`; see AArch64CodeGenUtils.h). It encodes
265// how the intrinsic's argument and return types relate to the builtin's
266// scalar types. For SISD builtins the key flags are:
267// - VectorizeArgTypes: wrap each arg type into a fixed-width vector
268// - Use64BitVectors / Use128BitVectors: choose the vector width
269// (when neither is set the vector has 1 element)
270// - AddRetType / VectorizeRetType: analogous flags for the return type
271//
272// ARM.cpp lets LLVM resolve the intrinsic's signature (via
273// `CGM.getIntrinsic`) and then walks the resolved Function* formal
274// parameter types. CIR has no LLVMContext here, so we derive the same
275// argument/result types directly from the Clang operand types.
276static std::pair<mlir::Type, llvm::SmallVector<mlir::Type>>
278 mlir::Type arg0Ty, mlir::Type resultTy,
280 int vectorSize = 0;
281 if (modifier & Use64BitVectors)
282 vectorSize = 64;
283 else if (modifier & Use128BitVectors)
284 vectorSize = 128;
285
286 auto wrapAsVector = [&](mlir::Type ty) -> cir::VectorType {
287 unsigned bits = cgf.cgm.getDataLayout().getTypeSizeInBits(ty);
288 unsigned elts = vectorSize ? vectorSize / bits : 1;
289 return cir::VectorType::get(ty, elts);
290 };
291
292 // Determine the vectorized data type.
293 cir::VectorType vecArgTy;
294 if (modifier & VectorizeArgTypes)
295 vecArgTy = wrapAsVector(arg0Ty);
296
297 // Determine the intrinsic result type: `VectorizeRetType` returns a
298 // vector; otherwise, if data args are vectorized and `AddRetType` is
299 // unset, use a vector return with the same shape as those args.
300 mlir::Type funcResTy = resultTy;
301 if (modifier & VectorizeRetType)
302 funcResTy = wrapAsVector(resultTy);
303 else if (vecArgTy && !(modifier & AddRetType))
304 funcResTy = wrapAsVector(resultTy);
305
306 // When VectorizeArgTypes is set, wrap every operand that has the same
307 // scalar type as arg0 into a vector. This covers intrinsics with multiple
308 // data operands of the same type (e.g. vsri takes two data operands,
309 // both of which must be wrapped into the same vector type).
311 argTypes.reserve(ops.size());
312 for (mlir::Value op : ops) {
313 if ((modifier & VectorizeArgTypes) && vecArgTy && op.getType() == arg0Ty)
314 argTypes.push_back(vecArgTy);
315 else
316 argTypes.push_back(op.getType());
317 }
318
319 return {funcResTy, std::move(argTypes)};
320}
321
323 CIRGenFunction &cgf, const ARMVectorIntrinsicInfo &info,
325 assert(info.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
326
327 switch (info.BuiltinID) {
328 case NEON::BI__builtin_neon_vcled_s64:
329 case NEON::BI__builtin_neon_vcled_u64:
330 case NEON::BI__builtin_neon_vcles_f32:
331 case NEON::BI__builtin_neon_vcled_f64:
332 case NEON::BI__builtin_neon_vcltd_s64:
333 case NEON::BI__builtin_neon_vcltd_u64:
334 case NEON::BI__builtin_neon_vclts_f32:
335 case NEON::BI__builtin_neon_vcltd_f64:
336 case NEON::BI__builtin_neon_vcales_f32:
337 case NEON::BI__builtin_neon_vcaled_f64:
338 case NEON::BI__builtin_neon_vcalts_f32:
339 case NEON::BI__builtin_neon_vcaltd_f64:
340 cgf.cgm.errorNYI(expr->getSourceRange(),
341 std::string("unimplemented AArch64 builtin call: ") +
343 break;
344 }
345
346 llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(
347 static_cast<llvm::Intrinsic::ID>(info.LLVMIntrinsic));
348 mlir::Location loc = cgf.getLoc(expr->getExprLoc());
349
350 // The switch stmt is intended to help catch NYI cases and will be removed
351 // once the CIR implementation is complete. Avoid adding specialized
352 // code in cases - that should only be required for a handful of examples.
353 switch (info.BuiltinID) {
354 default:
355 cgf.cgm.errorNYI(expr->getSourceRange(),
356 std::string("unimplemented AArch64 builtin call: ") +
358 break;
359 case NEON::BI__builtin_neon_vminv_s8:
360 case NEON::BI__builtin_neon_vminvq_s8:
361 case NEON::BI__builtin_neon_vminv_s16:
362 case NEON::BI__builtin_neon_vminvq_s16:
363 case NEON::BI__builtin_neon_vminv_s32:
364 case NEON::BI__builtin_neon_vminvq_s32:
365 case NEON::BI__builtin_neon_vminv_u8:
366 case NEON::BI__builtin_neon_vminvq_u8:
367 case NEON::BI__builtin_neon_vminv_u16:
368 case NEON::BI__builtin_neon_vminvq_u16:
369 case NEON::BI__builtin_neon_vminv_u32:
370 case NEON::BI__builtin_neon_vminvq_u32:
371 case NEON::BI__builtin_neon_vminv_f32:
372 case NEON::BI__builtin_neon_vminvq_f32:
373 case NEON::BI__builtin_neon_vminvq_f64:
374 case NEON::BI__builtin_neon_vminnmv_f32:
375 case NEON::BI__builtin_neon_vminnmvq_f32:
376 case NEON::BI__builtin_neon_vminnmvq_f64:
377 case NEON::BI__builtin_neon_vabdd_f64:
378 case NEON::BI__builtin_neon_vabds_f32:
379 case NEON::BI__builtin_neon_vshld_s64:
380 case NEON::BI__builtin_neon_vshld_u64:
381 case NEON::BI__builtin_neon_vpmins_f32:
382 case NEON::BI__builtin_neon_vpminqd_f64:
383 case NEON::BI__builtin_neon_vpminnms_f32:
384 case NEON::BI__builtin_neon_vpminnmqd_f64:
385 case NEON::BI__builtin_neon_vcvts_n_f32_s32:
386 case NEON::BI__builtin_neon_vcvts_n_f32_u32:
387 case NEON::BI__builtin_neon_vcvts_n_s32_f32:
388 case NEON::BI__builtin_neon_vcvts_n_u32_f32:
389 case NEON::BI__builtin_neon_vcvtd_n_f64_s64:
390 case NEON::BI__builtin_neon_vcvtd_n_f64_u64:
391 case NEON::BI__builtin_neon_vcvtd_n_s64_f64:
392 case NEON::BI__builtin_neon_vcvtd_n_u64_f64:
393 case NEON::BI__builtin_neon_vaddlv_s32:
394 case NEON::BI__builtin_neon_vaddlv_u32:
395 case NEON::BI__builtin_neon_vaddlvq_s32:
396 case NEON::BI__builtin_neon_vaddlvq_u32:
397 case NEON::BI__builtin_neon_vaddv_s8:
398 case NEON::BI__builtin_neon_vaddv_s16:
399 case NEON::BI__builtin_neon_vaddv_s32:
400 case NEON::BI__builtin_neon_vaddv_u8:
401 case NEON::BI__builtin_neon_vaddv_u16:
402 case NEON::BI__builtin_neon_vaddv_u32:
403 case NEON::BI__builtin_neon_vaddv_f32:
404 case NEON::BI__builtin_neon_vaddvq_s8:
405 case NEON::BI__builtin_neon_vaddvq_s16:
406 case NEON::BI__builtin_neon_vaddvq_s32:
407 case NEON::BI__builtin_neon_vaddvq_s64:
408 case NEON::BI__builtin_neon_vaddvq_u8:
409 case NEON::BI__builtin_neon_vaddvq_u16:
410 case NEON::BI__builtin_neon_vaddvq_u32:
411 case NEON::BI__builtin_neon_vaddvq_u64:
412 case NEON::BI__builtin_neon_vaddvq_f32:
413 case NEON::BI__builtin_neon_vaddvq_f64:
414 case NEON::BI__builtin_neon_vabdh_f16:
415 case NEON::BI__builtin_neon_vrecpeh_f16:
416 case NEON::BI__builtin_neon_vrecpxh_f16:
417 case NEON::BI__builtin_neon_vrsqrteh_f16:
418 case NEON::BI__builtin_neon_vrsqrtsh_f16:
419 case NEON::BI__builtin_neon_vmaxv_s8:
420 case NEON::BI__builtin_neon_vmaxvq_s8:
421 case NEON::BI__builtin_neon_vmaxv_s16:
422 case NEON::BI__builtin_neon_vmaxvq_s16:
423 case NEON::BI__builtin_neon_vmaxv_s32:
424 case NEON::BI__builtin_neon_vmaxvq_s32:
425 case NEON::BI__builtin_neon_vmaxv_u8:
426 case NEON::BI__builtin_neon_vmaxvq_u8:
427 case NEON::BI__builtin_neon_vmaxv_u16:
428 case NEON::BI__builtin_neon_vmaxvq_u16:
429 case NEON::BI__builtin_neon_vmaxv_u32:
430 case NEON::BI__builtin_neon_vmaxvq_u32:
431 case NEON::BI__builtin_neon_vmaxv_f32:
432 case NEON::BI__builtin_neon_vmaxvq_f32:
433 case NEON::BI__builtin_neon_vmaxvq_f64:
434 case NEON::BI__builtin_neon_vqrshrund_n_s64:
435 case NEON::BI__builtin_neon_vqrshrnd_n_s64:
436 case NEON::BI__builtin_neon_vqrshrnd_n_u64:
437 case NEON::BI__builtin_neon_vmaxnmv_f32:
438 case NEON::BI__builtin_neon_vmaxnmvq_f32:
439 case NEON::BI__builtin_neon_vmaxnmvq_f64:
440 case NEON::BI__builtin_neon_vsrid_n_s64:
441 case NEON::BI__builtin_neon_vsrid_n_u64:
442 case NEON::BI__builtin_neon_vslid_n_s64:
443 case NEON::BI__builtin_neon_vslid_n_u64:
444 case NEON::BI__builtin_neon_vpmaxs_f32:
445 case NEON::BI__builtin_neon_vpmaxqd_f64:
446 case NEON::BI__builtin_neon_vpmaxnms_f32:
447 case NEON::BI__builtin_neon_vpmaxnmqd_f64:
448 break;
449 }
450
451 CIRGenBuilderTy &builder = cgf.getBuilder();
452 mlir::Type arg0Ty = cgf.convertType(expr->getArg(0)->getType());
453 mlir::Type resultTy = cgf.convertType(expr->getType());
454
455 // Derive per-operand argument types and the result type from the
456 // TypeModifier flags. `emitNeonCall` takes care of per-operand
457 // bitcasts to `argTypes`.
458 auto [funcResTy, argTypes] = deriveNeonSISDIntrinsicOperandTypes(
459 cgf, info.TypeModifier, arg0Ty, resultTy, ops);
460
461 return emitNeonCall(cgf.cgm, builder, std::move(argTypes), ops, llvmIntrName,
462 funcResTy, loc);
463}
464
465//===----------------------------------------------------------------------===//
466// Emit-helpers
467//===----------------------------------------------------------------------===//
468static mlir::Value
470 mlir::Location loc, mlir::Value src,
471 mlir::Type retTy, const cir::CmpOpKind kind) {
472
473 bool scalarCmp = !isa<cir::VectorType>(src.getType());
474 if (!scalarCmp) {
475 assert(!cast<cir::VectorType>(retTy).getIsScalable() &&
476 "This is only intended for fixed-width vectors");
477 // Vector types are cast to i8 vectors. Recover original type.
478 src = builder.createBitcast(src, retTy);
479 }
480
481 mlir::Value zero = builder.getNullValue(src.getType(), loc);
482
483 if (!scalarCmp)
484 return builder.createVecCompare(loc, kind, src, zero);
485
486 // For scalars, cast !cir.bool to !cir.int<s, 1> so that the compare
487 // result is sign- rather zero-extended when casting to the output
488 // retType.
489 mlir::Value cmp = builder.createCast(
490 loc, cir::CastKind::bool_to_int,
491 builder.createCompare(loc, kind, src, zero), builder.getSIntNTy(1));
492
493 return builder.createCast(loc, cir::CastKind::integral, cmp, retTy);
494}
495
496// TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone.
497static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
498 mlir::Location loc,
499 bool hasLegalHalfType = true,
500 bool v1Ty = false,
501 bool allowBFloatArgsAndRet = true) {
502 int isQuad = typeFlags.isQuad();
503 switch (typeFlags.getEltType()) {
506 return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty
507 : cgf->sInt8Ty,
508 v1Ty ? 1 : (8 << isQuad));
510 return cir::VectorType::get(cgf->uInt8Ty, v1Ty ? 1 : (8 << isQuad));
513 return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty
514 : cgf->sInt16Ty,
515 v1Ty ? 1 : (4 << isQuad));
517 if (allowBFloatArgsAndRet)
518 return cir::VectorType::get(cgf->getCIRGenModule().bFloat16Ty,
519 v1Ty ? 1 : (4 << isQuad));
520 return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad));
522 if (hasLegalHalfType)
523 return cir::VectorType::get(cgf->getCIRGenModule().fP16Ty,
524 v1Ty ? 1 : (4 << isQuad));
525 return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad));
527 return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
528 : cgf->sInt32Ty,
529 v1Ty ? 1 : (2 << isQuad));
532 return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty
533 : cgf->sInt64Ty,
534 v1Ty ? 1 : (1 << isQuad));
536 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
537 // There is a lot of i128 and f128 API missing.
538 // so we use v16i8 to represent poly128 and get pattern matched.
539 cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128"));
540 [[fallthrough]];
542 return cir::VectorType::get(cgf->getCIRGenModule().floatTy,
543 v1Ty ? 1 : (2 << isQuad));
545 return cir::VectorType::get(cgf->getCIRGenModule().doubleTy,
546 v1Ty ? 1 : (1 << isQuad));
547 }
548 llvm_unreachable("Unknown vector element type!");
549}
550
551static mlir::Value emitNeonSplat(CIRGenBuilderTy &builder, mlir::Location loc,
552 mlir::Value v, mlir::Value lane,
553 unsigned int resEltCnt) {
554 assert(isa<cir::ConstantOp>(lane.getDefiningOp()) &&
555 "lane number is not a constant!");
556 int64_t laneCst = getIntValueFromConstOp(lane);
557 llvm::SmallVector<int64_t, 4> shuffleMask(resEltCnt, laneCst);
558 return builder.createVecShuffle(loc, v, shuffleMask);
559}
560
561/// Flip the signedness of `vecTy`'s element type, keeping the width and
562/// number of lanes the same. Used when a NEON intrinsic takes a shift
563/// amount vector that must be signed (e.g. aarch64.neon.urshl takes a
564/// signed amount even though the data vector is unsigned).
565static cir::VectorType getSignChangedVectorType(CIRGenBuilderTy &builder,
566 cir::VectorType vecTy) {
567 auto elemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
568 elemTy = elemTy.isSigned() ? builder.getUIntNTy(elemTy.getWidth())
569 : builder.getSIntNTy(elemTy.getWidth());
570 return cir::VectorType::get(elemTy, vecTy.getSize());
571}
572
573static mlir::Value emitCommonNeonShift(CIRGenBuilderTy &builder,
574 mlir::Location loc,
575 cir::VectorType resTy,
576 mlir::Value shifTgt,
577 mlir::Value shiftAmt, bool shiftLeft) {
578 shiftAmt = emitNeonShiftVector(builder, shiftAmt, resTy, loc, /*neg=*/false);
579 return cir::ShiftOp::create(builder, loc, resTy,
580 builder.createBitcast(shifTgt, resTy), shiftAmt,
581 shiftLeft);
582}
583
584// Right-shift a vector by a constant.
585static mlir::Value emitNeonRShiftImm(CIRGenFunction &cgf, mlir::Value shiftVec,
586 mlir::Value shiftVal,
587 cir::VectorType vecTy, bool usgn,
588 mlir::Location loc) {
589 CIRGenBuilderTy &builder = cgf.getBuilder();
590 int64_t shiftAmt = getIntValueFromConstOp(shiftVal);
591 int eltSize =
592 cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy.getElementType());
593
594 shiftVec = builder.createBitcast(shiftVec, vecTy);
595 // lshr/ashr are undefined when the shift amount is equal to the vector
596 // element size.
597 if (shiftAmt == eltSize) {
598 if (usgn) {
599 // Right-shifting an unsigned value by its size yields 0.
600 return builder.getZero(loc, vecTy);
601 }
602 // Right-shifting a signed value by its size is equivalent
603 // to a shift of size-1.
604 --shiftAmt;
605 shiftVal = builder.getConstInt(loc, vecTy.getElementType(), shiftAmt);
606 }
607 return emitCommonNeonShift(builder, loc, vecTy, shiftVec, shiftVal,
608 /*shiftLeft=*/false);
609}
610
611static cir::VectorType getIntVecFromVecTy(CIRGenBuilderTy &builder,
612 cir::VectorType vecTy) {
613 if (!cir::isAnyFloatingPointType(vecTy.getElementType()))
614 return vecTy;
615
616 if (mlir::isa<cir::SingleType>(vecTy.getElementType()))
617 return cir::VectorType::get(builder.getSInt32Ty(), vecTy.getSize());
618 if (mlir::isa<cir::DoubleType>(vecTy.getElementType()))
619 return cir::VectorType::get(builder.getSInt64Ty(), vecTy.getSize());
620 llvm_unreachable(
621 "Unsupported element type in getVecOfIntTypeWithSameEltWidth");
622}
623
624static mlir::Value emitCommonNeonBuiltinExpr(
625 CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic,
626 unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier,
628 mlir::Location loc = cgf.getLoc(expr->getExprLoc());
629 clang::ASTContext &ctx = cgf.getContext();
630
631 // Extract the trailing immediate argument that encodes the type discriminator
632 // for this overloaded intrinsic.
633 // TODO: Move to the parent code that takes care of argument processing.
634 const clang::Expr *arg = expr->getArg(expr->getNumArgs() - 1);
635 std::optional<llvm::APSInt> neonTypeConst = arg->getIntegerConstantExpr(ctx);
636 if (!neonTypeConst)
637 return nullptr;
638
639 // Determine the type of this overloaded NEON intrinsic.
640 NeonTypeFlags neonType(neonTypeConst->getZExtValue());
641 const bool isUnsigned = neonType.isUnsigned();
642 const bool hasLegalHalfType = cgf.getTarget().hasFastHalfType();
643 const bool usgn = neonType.isUnsigned();
644
645 // The value of allowBFloatArgsAndRet is true for AArch64, but it should
646 // come from ABI info.
647 // TODO(cir): Use ABInfo to extract this information
648 const bool allowBFloatArgsAndRet = cgf.getTarget().hasFastHalfType();
649 // FIXME
650 // getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
651
652 cir::VectorType vTy = getNeonType(&cgf, neonType, loc, hasLegalHalfType,
653 false, allowBFloatArgsAndRet);
654 cir::VectorType ty = vTy;
655 if (!ty)
656 return nullptr;
657
658 switch (builtinID) {
659 case NEON::BI__builtin_neon_splat_lane_v:
660 case NEON::BI__builtin_neon_splat_laneq_v:
661 case NEON::BI__builtin_neon_splatq_lane_v:
662 case NEON::BI__builtin_neon_splatq_laneq_v: {
663 uint64_t numElements = vTy.getSize();
664 if (builtinID == NEON::BI__builtin_neon_splatq_lane_v)
665 numElements *= 2;
666 if (builtinID == NEON::BI__builtin_neon_splat_laneq_v)
667 numElements /= 2;
668 ops[0] = cgf.getBuilder().createBitcast(loc, ops[0], vTy);
669 return emitNeonSplat(cgf.getBuilder(), loc, ops[0], ops[1], numElements);
670 }
671 case NEON::BI__builtin_neon_vpadd_v:
672 case NEON::BI__builtin_neon_vpaddq_v:
673 case NEON::BI__builtin_neon_vabs_v:
674 case NEON::BI__builtin_neon_vabsq_v:
675 cgf.cgm.errorNYI(expr->getSourceRange(),
676 std::string("unimplemented AArch64 builtin call: ") +
677 ctx.BuiltinInfo.getName(builtinID));
678 return mlir::Value{};
679 case NEON::BI__builtin_neon_vadd_v:
680 case NEON::BI__builtin_neon_vaddq_v: {
681 unsigned numBytes = (builtinID == NEON::BI__builtin_neon_vaddq_v) ? 16 : 8;
682 cir::VectorType byteTy =
683 cir::VectorType::get(cgf.getBuilder().getUInt8Ty(), numBytes);
684 ops[0] = cgf.getBuilder().createBitcast(ops[0], byteTy);
685 ops[1] = cgf.getBuilder().createBitcast(ops[1], byteTy);
686 mlir::Value result = cgf.getBuilder().createXor(loc, ops[0], ops[1]);
687 return cgf.getBuilder().createBitcast(result, ty);
688 }
689 case NEON::BI__builtin_neon_vaddhn_v:
690 case NEON::BI__builtin_neon_vcale_v:
691 case NEON::BI__builtin_neon_vcaleq_v:
692 case NEON::BI__builtin_neon_vcalt_v:
693 case NEON::BI__builtin_neon_vcaltq_v:
694 case NEON::BI__builtin_neon_vcage_v:
695 case NEON::BI__builtin_neon_vcageq_v:
696 case NEON::BI__builtin_neon_vcagt_v:
697 case NEON::BI__builtin_neon_vcagtq_v:
698 cgf.cgm.errorNYI(expr->getSourceRange(),
699 std::string("unimplemented AArch64 builtin call: ") +
700 ctx.BuiltinInfo.getName(builtinID));
701 return mlir::Value{};
702 case NEON::BI__builtin_neon_vceqz_v:
703 case NEON::BI__builtin_neon_vceqzq_v:
704 return emitAArch64CompareBuiltinExpr(cgf, cgf.getBuilder(), loc, ops[0],
705 vTy, cir::CmpOpKind::eq);
706 case NEON::BI__builtin_neon_vcgez_v:
707 case NEON::BI__builtin_neon_vcgezq_v:
708 case NEON::BI__builtin_neon_vclez_v:
709 case NEON::BI__builtin_neon_vclezq_v:
710 case NEON::BI__builtin_neon_vcgtz_v:
711 case NEON::BI__builtin_neon_vcgtzq_v:
712 case NEON::BI__builtin_neon_vcltz_v:
713 case NEON::BI__builtin_neon_vcltzq_v:
714 case NEON::BI__builtin_neon_vclz_v:
715 case NEON::BI__builtin_neon_vclzq_v:
716 case NEON::BI__builtin_neon_vcvt_f32_v:
717 case NEON::BI__builtin_neon_vcvtq_f32_v:
718 case NEON::BI__builtin_neon_vcvt_f16_s16:
719 case NEON::BI__builtin_neon_vcvt_f16_u16:
720 case NEON::BI__builtin_neon_vcvtq_f16_s16:
721 case NEON::BI__builtin_neon_vcvtq_f16_u16:
722 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
723 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
724 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
725 case NEON::BI__builtin_neon_vcvtq_n_f16_u16:
726 cgf.cgm.errorNYI(expr->getSourceRange(),
727 std::string("unimplemented AArch64 builtin call: ") +
728 ctx.BuiltinInfo.getName(builtinID));
729 return mlir::Value{};
730 case NEON::BI__builtin_neon_vcvt_n_f32_v:
731 case NEON::BI__builtin_neon_vcvt_n_f64_v:
732 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
733 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
734 // The constant argument to an _n_ intrinsic always is Int32Ty.
735 mlir::Type cstIntTy = cgf.sInt32Ty;
736 llvm::StringRef llvmIntrName =
737 getLLVMIntrNameNoPrefix(static_cast<llvm::Intrinsic::ID>(
738 usgn ? llvmIntrinsic : altLLVMIntrinsic));
739 return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(),
740 /*argTypes=*/{vTy, cstIntTy}, ops, llvmIntrName,
741 /*funcResTy=*/getFloatNeonType(cgf, neonType), loc);
742 }
743 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
744 case NEON::BI__builtin_neon_vcvt_n_s32_v:
745 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
746 case NEON::BI__builtin_neon_vcvt_n_u32_v:
747 case NEON::BI__builtin_neon_vcvt_n_s64_v:
748 case NEON::BI__builtin_neon_vcvt_n_u64_v:
749 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
750 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
751 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
752 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
753 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
754 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
755 // The constant argument to an _n_ intrinsic always is Int32Ty.
756 mlir::Type cstIntTy = cgf.sInt32Ty;
757 llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(
758 static_cast<llvm::Intrinsic::ID>(llvmIntrinsic));
759 return emitNeonCall(
760 cgf.getCIRGenModule(), cgf.getBuilder(),
761 /*argTypes=*/{getFloatNeonType(cgf, neonType), cstIntTy}, ops,
762 llvmIntrName,
763 /*funcResTy=*/vTy, loc);
764 }
765 case NEON::BI__builtin_neon_vcvt_s32_v:
766 case NEON::BI__builtin_neon_vcvt_u32_v:
767 case NEON::BI__builtin_neon_vcvt_s64_v:
768 case NEON::BI__builtin_neon_vcvt_u64_v:
769 case NEON::BI__builtin_neon_vcvt_s16_f16:
770 case NEON::BI__builtin_neon_vcvt_u16_f16:
771 case NEON::BI__builtin_neon_vcvtq_s32_v:
772 case NEON::BI__builtin_neon_vcvtq_u32_v:
773 case NEON::BI__builtin_neon_vcvtq_s64_v:
774 case NEON::BI__builtin_neon_vcvtq_u64_v:
775 case NEON::BI__builtin_neon_vcvtq_s16_f16:
776 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
777 auto ty = getFloatNeonType(cgf, neonType);
778 // Undo the bitcast inserted by intrinsics that expand to this builtin
779 // (e.g. vcvt_u32_f32).
780 // TODO: While the bitcasts eventually cancel each other out, we should
781 // avoid them altogether.
782 ops[0] =
783 cgf.getBuilder().createCast(loc, cir::CastKind::bitcast, ops[0], ty);
785 // AArch64: use fptosi.sat/fptoui.sat unless under strict FP.
786 llvm::StringRef llvmIntrName = usgn ? "fptoui.sat" : "fptosi.sat";
787 return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(),
788 /*argTypes=*/{ty}, ops, llvmIntrName, vTy, loc);
789 }
790 case NEON::BI__builtin_neon_vcvta_s16_f16:
791 case NEON::BI__builtin_neon_vcvta_s32_v:
792 case NEON::BI__builtin_neon_vcvta_s64_v:
793 case NEON::BI__builtin_neon_vcvta_u16_f16:
794 case NEON::BI__builtin_neon_vcvta_u32_v:
795 case NEON::BI__builtin_neon_vcvta_u64_v:
796 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
797 case NEON::BI__builtin_neon_vcvtaq_s32_v:
798 case NEON::BI__builtin_neon_vcvtaq_s64_v:
799 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
800 case NEON::BI__builtin_neon_vcvtaq_u32_v:
801 case NEON::BI__builtin_neon_vcvtaq_u64_v:
802 case NEON::BI__builtin_neon_vcvtn_s16_f16:
803 case NEON::BI__builtin_neon_vcvtn_s32_v:
804 case NEON::BI__builtin_neon_vcvtn_s64_v:
805 case NEON::BI__builtin_neon_vcvtn_u16_f16:
806 case NEON::BI__builtin_neon_vcvtn_u32_v:
807 case NEON::BI__builtin_neon_vcvtn_u64_v:
808 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
809 case NEON::BI__builtin_neon_vcvtnq_s32_v:
810 case NEON::BI__builtin_neon_vcvtnq_s64_v:
811 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
812 case NEON::BI__builtin_neon_vcvtnq_u32_v:
813 case NEON::BI__builtin_neon_vcvtnq_u64_v:
814 case NEON::BI__builtin_neon_vcvtp_s16_f16:
815 case NEON::BI__builtin_neon_vcvtp_s32_v:
816 case NEON::BI__builtin_neon_vcvtp_s64_v:
817 case NEON::BI__builtin_neon_vcvtp_u16_f16:
818 case NEON::BI__builtin_neon_vcvtp_u32_v:
819 case NEON::BI__builtin_neon_vcvtp_u64_v:
820 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
821 case NEON::BI__builtin_neon_vcvtpq_s32_v:
822 case NEON::BI__builtin_neon_vcvtpq_s64_v:
823 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
824 case NEON::BI__builtin_neon_vcvtpq_u32_v:
825 case NEON::BI__builtin_neon_vcvtpq_u64_v:
826 case NEON::BI__builtin_neon_vcvtm_s16_f16:
827 case NEON::BI__builtin_neon_vcvtm_s32_v:
828 case NEON::BI__builtin_neon_vcvtm_s64_v:
829 case NEON::BI__builtin_neon_vcvtm_u16_f16:
830 case NEON::BI__builtin_neon_vcvtm_u32_v:
831 case NEON::BI__builtin_neon_vcvtm_u64_v:
832 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
833 case NEON::BI__builtin_neon_vcvtmq_s32_v:
834 case NEON::BI__builtin_neon_vcvtmq_s64_v:
835 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
836 case NEON::BI__builtin_neon_vcvtmq_u32_v:
837 case NEON::BI__builtin_neon_vcvtmq_u64_v:
838 case NEON::BI__builtin_neon_vcvtx_f32_v:
839 case NEON::BI__builtin_neon_vext_v:
840 case NEON::BI__builtin_neon_vextq_v:
841 case NEON::BI__builtin_neon_vfma_v:
842 cgf.cgm.errorNYI(expr->getSourceRange(),
843 std::string("unimplemented AArch64 builtin call: ") +
844 ctx.BuiltinInfo.getName(builtinID));
845 return mlir::Value{};
846 case NEON::BI__builtin_neon_vfmaq_v: {
847 // NEON intrinsic: vfmaq(accumulator, multiplicand1, multiplicand2)
848 // LLVM intrinsic: fma(multiplicand1, multiplicand2, accumulator)
849 // Reorder arguments to match LLVM fma signature
850 mlir::Value op0 = cgf.getBuilder().createBitcast(ops[0], ty);
851 mlir::Value op1 = cgf.getBuilder().createBitcast(ops[1], ty);
852 mlir::Value op2 = cgf.getBuilder().createBitcast(ops[2], ty);
853 llvm::SmallVector<mlir::Value> fmaOps = {op1, op2, op0};
854 return emitCallMaybeConstrainedBuiltin(cgf.getBuilder(), loc, "fma", ty,
855 fmaOps);
856 }
857 case NEON::BI__builtin_neon_vld1_v:
858 case NEON::BI__builtin_neon_vld1q_v:
859 case NEON::BI__builtin_neon_vld1_x2_v:
860 case NEON::BI__builtin_neon_vld1q_x2_v:
861 case NEON::BI__builtin_neon_vld1_x3_v:
862 case NEON::BI__builtin_neon_vld1q_x3_v:
863 case NEON::BI__builtin_neon_vld1_x4_v:
864 case NEON::BI__builtin_neon_vld1q_x4_v:
865 case NEON::BI__builtin_neon_vld2_v:
866 case NEON::BI__builtin_neon_vld2q_v:
867 case NEON::BI__builtin_neon_vld3_v:
868 case NEON::BI__builtin_neon_vld3q_v:
869 case NEON::BI__builtin_neon_vld4_v:
870 case NEON::BI__builtin_neon_vld4q_v:
871 case NEON::BI__builtin_neon_vld2_dup_v:
872 case NEON::BI__builtin_neon_vld2q_dup_v:
873 case NEON::BI__builtin_neon_vld3_dup_v:
874 case NEON::BI__builtin_neon_vld3q_dup_v:
875 case NEON::BI__builtin_neon_vld4_dup_v:
876 case NEON::BI__builtin_neon_vld4q_dup_v:
877 case NEON::BI__builtin_neon_vld1_dup_v:
878 case NEON::BI__builtin_neon_vld1q_dup_v:
879 case NEON::BI__builtin_neon_vld2_lane_v:
880 case NEON::BI__builtin_neon_vld2q_lane_v:
881 case NEON::BI__builtin_neon_vld3_lane_v:
882 case NEON::BI__builtin_neon_vld3q_lane_v:
883 case NEON::BI__builtin_neon_vld4_lane_v:
884 case NEON::BI__builtin_neon_vld4q_lane_v:
885 case NEON::BI__builtin_neon_vmovl_v:
886 case NEON::BI__builtin_neon_vmovn_v:
887 case NEON::BI__builtin_neon_vmull_v:
888 case NEON::BI__builtin_neon_vpadal_v:
889 case NEON::BI__builtin_neon_vpadalq_v:
890 cgf.cgm.errorNYI(expr->getSourceRange(),
891 std::string("Reached code-path for ARM builtin call ") +
892 ctx.BuiltinInfo.getName(builtinID) +
893 "(ARM builtins are not supported ATM)");
894 return mlir::Value{};
895 case NEON::BI__builtin_neon_vpaddl_v:
896 case NEON::BI__builtin_neon_vpaddlq_v: {
897 llvm::StringRef llvmIntrName =
898 getLLVMIntrNameNoPrefix(static_cast<llvm::Intrinsic::ID>(
899 usgn ? llvmIntrinsic : altLLVMIntrinsic));
900 return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(),
901 /*argTypes=*/{getNeonPairwiseWidenInputType(vTy, usgn)},
902 ops, llvmIntrName,
903 /*funcResTy=*/vTy, loc);
904 }
905 case NEON::BI__builtin_neon_vqdmlal_v:
906 case NEON::BI__builtin_neon_vqdmlsl_v:
907 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
908 case NEON::BI__builtin_neon_vqdmulh_lane_v:
909 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
910 case NEON::BI__builtin_neon_vqrdmulh_lane_v:
911 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
912 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
913 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
914 case NEON::BI__builtin_neon_vqrdmulh_laneq_v:
915 case NEON::BI__builtin_neon_vqshl_n_v:
916 case NEON::BI__builtin_neon_vqshlq_n_v:
917 case NEON::BI__builtin_neon_vqshlu_n_v:
918 case NEON::BI__builtin_neon_vqshluq_n_v:
919 case NEON::BI__builtin_neon_vrecpe_v:
920 case NEON::BI__builtin_neon_vrecpeq_v:
921 case NEON::BI__builtin_neon_vrsqrte_v:
922 case NEON::BI__builtin_neon_vrsqrteq_v:
923 case NEON::BI__builtin_neon_vrndi_v:
924 case NEON::BI__builtin_neon_vrndiq_v:
925 cgf.cgm.errorNYI(expr->getSourceRange(),
926 std::string("unimplemented AArch64 builtin call: ") +
927 ctx.BuiltinInfo.getName(builtinID));
928 return mlir::Value{};
929 case NEON::BI__builtin_neon_vrshr_n_v:
930 case NEON::BI__builtin_neon_vrshrq_n_v: {
931 llvm::StringRef intrName =
932 usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl";
933 return emitNeonCall(
934 cgf.cgm, cgf.getBuilder(),
935 {ty, usgn ? getSignChangedVectorType(cgf.getBuilder(), ty) : ty}, ops,
936 intrName, ty, loc, /*isConstrainedFPIntrinsic=*/false,
937 /*shift=*/1,
938 /*rightshift=*/true);
939 }
940 case NEON::BI__builtin_neon_vsha512hq_u64:
941 case NEON::BI__builtin_neon_vsha512h2q_u64:
942 case NEON::BI__builtin_neon_vsha512su0q_u64:
943 case NEON::BI__builtin_neon_vsha512su1q_u64:
944 cgf.cgm.errorNYI(expr->getSourceRange(),
945 std::string("unimplemented AArch64 builtin call: ") +
946 ctx.BuiltinInfo.getName(builtinID));
947 return mlir::Value{};
948 case NEON::BI__builtin_neon_vshl_n_v:
949 case NEON::BI__builtin_neon_vshlq_n_v:
950 return emitCommonNeonShift(cgf.getBuilder(), loc, vTy, ops[0], ops[1],
951 /*shiftLeft=*/true);
952 case NEON::BI__builtin_neon_vshll_n_v:
953 case NEON::BI__builtin_neon_vshrn_n_v:
954 cgf.cgm.errorNYI(expr->getSourceRange(),
955 std::string("unimplemented AArch64 builtin call: ") +
956 ctx.BuiltinInfo.getName(builtinID));
957 return mlir::Value{};
958 case NEON::BI__builtin_neon_vshr_n_v:
959 case NEON::BI__builtin_neon_vshrq_n_v:
960 return emitNeonRShiftImm(cgf, ops[0], ops[1], vTy, isUnsigned, loc);
961 case NEON::BI__builtin_neon_vst1_v:
962 case NEON::BI__builtin_neon_vst1q_v:
963 case NEON::BI__builtin_neon_vst2_v:
964 case NEON::BI__builtin_neon_vst2q_v:
965 case NEON::BI__builtin_neon_vst3_v:
966 case NEON::BI__builtin_neon_vst3q_v:
967 case NEON::BI__builtin_neon_vst4_v:
968 case NEON::BI__builtin_neon_vst4q_v:
969 case NEON::BI__builtin_neon_vst2_lane_v:
970 case NEON::BI__builtin_neon_vst2q_lane_v:
971 case NEON::BI__builtin_neon_vst3_lane_v:
972 case NEON::BI__builtin_neon_vst3q_lane_v:
973 case NEON::BI__builtin_neon_vst4_lane_v:
974 case NEON::BI__builtin_neon_vst4q_lane_v:
975 case NEON::BI__builtin_neon_vsm3partw1q_u32:
976 case NEON::BI__builtin_neon_vsm3partw2q_u32:
977 case NEON::BI__builtin_neon_vsm3ss1q_u32:
978 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
979 case NEON::BI__builtin_neon_vsm4eq_u32:
980 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
981 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
982 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
983 case NEON::BI__builtin_neon_vsm3tt2bq_u32:
984 case NEON::BI__builtin_neon_vst1_x2_v:
985 case NEON::BI__builtin_neon_vst1q_x2_v:
986 case NEON::BI__builtin_neon_vst1_x3_v:
987 case NEON::BI__builtin_neon_vst1q_x3_v:
988 case NEON::BI__builtin_neon_vst1_x4_v:
989 case NEON::BI__builtin_neon_vst1q_x4_v:
990 case NEON::BI__builtin_neon_vsubhn_v:
991 case NEON::BI__builtin_neon_vtrn_v:
992 case NEON::BI__builtin_neon_vtrnq_v:
993 case NEON::BI__builtin_neon_vtst_v:
994 case NEON::BI__builtin_neon_vtstq_v:
995 case NEON::BI__builtin_neon_vuzp_v:
996 case NEON::BI__builtin_neon_vuzpq_v:
997 case NEON::BI__builtin_neon_vxarq_u64:
998 case NEON::BI__builtin_neon_vzip_v:
999 case NEON::BI__builtin_neon_vzipq_v:
1000 case NEON::BI__builtin_neon_vdot_s32:
1001 case NEON::BI__builtin_neon_vdot_u32:
1002 case NEON::BI__builtin_neon_vdotq_s32:
1003 case NEON::BI__builtin_neon_vdotq_u32:
1004 case NEON::BI__builtin_neon_vfmlal_low_f16:
1005 case NEON::BI__builtin_neon_vfmlalq_low_f16:
1006 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1007 case NEON::BI__builtin_neon_vfmlslq_low_f16:
1008 case NEON::BI__builtin_neon_vfmlal_high_f16:
1009 case NEON::BI__builtin_neon_vfmlalq_high_f16:
1010 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1011 case NEON::BI__builtin_neon_vfmlslq_high_f16:
1012 case NEON::BI__builtin_neon_vmmlaq_s32:
1013 case NEON::BI__builtin_neon_vmmlaq_u32:
1014 cgf.cgm.errorNYI(expr->getSourceRange(),
1015 std::string("unimplemented AArch64 builtin call: ") +
1016 ctx.BuiltinInfo.getName(builtinID));
1017 return mlir::Value{};
1018 case NEON::BI__builtin_neon_vmul_v:
1019 case NEON::BI__builtin_neon_vmulq_v:
1020 return cgf.getBuilder().emitIntrinsicCallOp(loc, "aarch64.neon.pmul", vTy,
1021 ops);
1022 case NEON::BI__builtin_neon_vusmmlaq_s32:
1023 case NEON::BI__builtin_neon_vusdot_s32:
1024 case NEON::BI__builtin_neon_vusdotq_s32:
1025 case NEON::BI__builtin_neon_vbfdot_f32:
1026 case NEON::BI__builtin_neon_vbfdotq_f32:
1027 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32:
1028 cgf.cgm.errorNYI(expr->getSourceRange(),
1029 std::string("unimplemented AArch64 builtin call: ") +
1030 ctx.BuiltinInfo.getName(builtinID));
1031 return mlir::Value{};
1032 }
1033
1034 // The switch stmt is intended to help catch NYI cases and will be removed
1035 // once the CIR implementation is complete. Avoid adding specialized
1036 // code in cases - that should only be required for a handful of examples.
1037 switch (builtinID) {
1038 default:
1039 cgf.cgm.errorNYI(expr->getSourceRange(),
1040 std::string("unimplemented AArch64 builtin call: ") +
1041 cgf.getContext().BuiltinInfo.getName(builtinID));
1042 break;
1043 case NEON::BI__builtin_neon_vshl_v:
1044 case NEON::BI__builtin_neon_vshlq_v: {
1045 llvm::StringRef llvmIntrName =
1046 getLLVMIntrNameNoPrefix(static_cast<llvm::Intrinsic::ID>(
1047 usgn ? llvmIntrinsic : altLLVMIntrinsic));
1048
1049 mlir::Value result =
1051 /*argTypes=*/{vTy, vTy}, ops, llvmIntrName,
1052 /*funcResTy=*/vTy, loc);
1053 mlir::Type resultType = cgf.convertType(expr->getType());
1054 return cgf.getBuilder().createBitcast(result, resultType);
1055 }
1056 }
1057
1058 // NYI
1059 return nullptr;
1060}
1061
1063 unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> &ops,
1064 SVETypeFlags typeFlags) {
1065 // Find out if any arguments are required to be integer constant expressions.
1066 unsigned iceArguments = 0;
1068 getContext().GetBuiltinType(builtinID, error, &iceArguments);
1069 assert(error == ASTContext::GE_None && "Should not codegen an error");
1070
1071 for (unsigned i = 0, e = expr->getNumArgs(); i != e; i++) {
1072 bool isIce = iceArguments & (1 << i);
1073 mlir::Value arg = emitScalarExpr(expr->getArg(i));
1074
1075 if (isIce) {
1076 cgm.errorNYI(expr->getSourceRange(),
1077 std::string("unimplemented AArch64 builtin call: ") +
1078 getContext().BuiltinInfo.getName(builtinID));
1079 }
1080
1081 // FIXME: Handle types like svint16x2_t, which are currently incorrectly
1082 // converted to i32. These should be treated as structs and unpacked.
1083
1084 ops.push_back(arg);
1085 }
1086 return true;
1087}
1088
1089// Reinterpret the input predicate so that it can be used to correctly isolate
1090// the elements of the specified datatype.
1091mlir::Value CIRGenFunction::emitSVEPredicateCast(mlir::Value pred,
1092 unsigned minNumElts,
1093 mlir::Location loc) {
1094
1095 // TODO: Handle "aarch64.svcount" once we get round to supporting SME.
1096
1097 auto retTy = cir::VectorType::get(builder.getUIntNTy(1), minNumElts,
1098 /*is_scalable=*/true);
1099 if (pred.getType() == retTy)
1100 return pred;
1101
1102 llvm::Intrinsic::ID intID;
1103 switch (minNumElts) {
1104 default:
1105 llvm_unreachable("unsupported element count!");
1106 case 1:
1107 case 2:
1108 case 4:
1109 case 8:
1110 intID = Intrinsic::aarch64_sve_convert_from_svbool;
1111 break;
1112 case 16:
1113 intID = Intrinsic::aarch64_sve_convert_to_svbool;
1114 break;
1115 }
1116
1117 llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(intID);
1118 auto call = builder.emitIntrinsicCallOp(loc, llvmIntrName, retTy,
1119 mlir::ValueRange{pred});
1120 assert(call.getType() == retTy && "Unexpected return type!");
1121 return call;
1122}
1123
1124//===----------------------------------------------------------------------===//
1125// SVE helpers
1126//===----------------------------------------------------------------------===//
1127// Get the minimum number of elements in an SVE vector for the given element
1128// type. The actual number of elements in the vector would be an integer (power
1129// of two) multiple of this value.
1131 switch (sveType) {
1132 default:
1133 llvm_unreachable("Invalid SVETypeFlag!");
1134
1135 case SVETypeFlags::EltTyInt8:
1136 return 16;
1137 case SVETypeFlags::EltTyInt16:
1138 return 8;
1139 case SVETypeFlags::EltTyInt32:
1140 return 4;
1141 case SVETypeFlags::EltTyInt64:
1142 return 2;
1143
1144 case SVETypeFlags::EltTyMFloat8:
1145 return 16;
1146 case SVETypeFlags::EltTyFloat16:
1147 case SVETypeFlags::EltTyBFloat16:
1148 return 8;
1149 case SVETypeFlags::EltTyFloat32:
1150 return 4;
1151 case SVETypeFlags::EltTyFloat64:
1152 return 2;
1153
1154 case SVETypeFlags::EltTyBool8:
1155 return 16;
1156 case SVETypeFlags::EltTyBool16:
1157 return 8;
1158 case SVETypeFlags::EltTyBool32:
1159 return 4;
1160 case SVETypeFlags::EltTyBool64:
1161 return 2;
1162 }
1163}
1164
1165// TODO(cir): Share with OGCG
1166constexpr unsigned sveBitsPerBlock = 128;
1167
1168static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm,
1169 mlir::Type eltTy) {
1170 unsigned numElts =
1172 return cir::VectorType::get(eltTy, numElts, /*is_scalable=*/true);
1173}
1174
1175//===----------------------------------------------------------------------===//
1176// SVE helpers
1177//===----------------------------------------------------------------------===//
1178std::optional<mlir::Value>
1180 const CallExpr *expr) {
1181 mlir::Type ty = convertType(expr->getType());
1182
1183 if (builtinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
1184 builtinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
1185 cgm.errorNYI(expr->getSourceRange(),
1186 std::string("unimplemented AArch64 builtin call: ") +
1187 getContext().BuiltinInfo.getName(builtinID));
1188 return mlir::Value{};
1189 }
1190
1192
1193 auto *builtinIntrInfo = findARMVectorIntrinsicInMap(
1195
1196 // The operands of the builtin call
1198
1199 SVETypeFlags typeFlags(builtinIntrInfo->TypeModifier);
1201 typeFlags))
1202 return mlir::Value{};
1203
1204 if (typeFlags.isLoad() || typeFlags.isStore() || typeFlags.isGatherLoad() ||
1205 typeFlags.isScatterStore() || typeFlags.isPrefetch() ||
1206 typeFlags.isGatherPrefetch() || typeFlags.isStructLoad() ||
1207 typeFlags.isStructStore() || typeFlags.isTupleSet() ||
1208 typeFlags.isTupleGet() || typeFlags.isTupleCreate() ||
1209 typeFlags.isUndef())
1210 cgm.errorNYI(expr->getSourceRange(),
1211 std::string("unimplemented AArch64 builtin call: ") +
1212 getContext().BuiltinInfo.getName(builtinID));
1213
1214 mlir::Location loc = getLoc(expr->getExprLoc());
1215
1216 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
1217 // -------------------------------------------------------------------
1218 if (builtinIntrInfo->LLVMIntrinsic != 0) {
1219 // Emit set FPMR for intrinsics that require it.
1220 if (typeFlags.setsFPMR())
1221 cgm.errorNYI(expr->getSourceRange(),
1222 std::string("unimplemented AArch64 builtin call: ") +
1223 getContext().BuiltinInfo.getName(builtinID));
1224
1225 // Zero-ing predication
1226 if (typeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) {
1227 auto null = builder.getNullValue(convertType(expr->getType()),
1228 getLoc(expr->getExprLoc()));
1229 ops.insert(ops.begin(), null);
1230 }
1231
1232 if (typeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
1233 ops.insert(ops.begin(),
1234 builder.getConstant(loc, cir::UndefAttr::get(ty)));
1235
1236 // Some ACLE builtins leave out the argument to specify the predicate
1237 // pattern, which is expected to be expanded to an SV_ALL pattern.
1238 if (typeFlags.isAppendSVALL())
1239 cgm.errorNYI(expr->getSourceRange(),
1240 std::string("unimplemented AArch64 builtin call: ") +
1241 getContext().BuiltinInfo.getName(builtinID));
1242 if (typeFlags.isInsertOp1SVALL())
1243 cgm.errorNYI(expr->getSourceRange(),
1244 std::string("unimplemented AArch64 builtin call: ") +
1245 getContext().BuiltinInfo.getName(builtinID));
1246
1247 // Predicates must match the main datatype.
1248 for (mlir::Value &op : ops)
1249 if (auto predTy = dyn_cast<cir::VectorType>(op.getType()))
1250 if (auto cirInt = dyn_cast<cir::IntType>(predTy.getElementType()))
1251 if (cirInt.getWidth() == 1)
1253 op, getSVEMinEltCount(typeFlags.getEltType()), loc);
1254
1255 // Splat scalar operand to vector (intrinsics with _n infix)
1256 if (typeFlags.hasSplatOperand()) {
1257 unsigned opNo = typeFlags.getSplatOperand();
1258 ops[opNo] = cir::VecSplatOp::create(
1259 builder, loc, getSVEVectorForElementType(cgm, ops[opNo].getType()),
1260 ops[opNo]);
1261 }
1262
1263 if (typeFlags.isReverseCompare())
1264 cgm.errorNYI(expr->getSourceRange(),
1265 std::string("unimplemented AArch64 builtin call: ") +
1266 getContext().BuiltinInfo.getName(builtinID));
1267 if (typeFlags.isReverseUSDOT())
1268 cgm.errorNYI(expr->getSourceRange(),
1269 std::string("unimplemented AArch64 builtin call: ") +
1270 getContext().BuiltinInfo.getName(builtinID));
1271 if (typeFlags.isReverseMergeAnyBinOp() &&
1272 typeFlags.getMergeType() == SVETypeFlags::MergeAny)
1273 cgm.errorNYI(expr->getSourceRange(),
1274 std::string("unimplemented AArch64 builtin call: ") +
1275 getContext().BuiltinInfo.getName(builtinID));
1276 if (typeFlags.isReverseMergeAnyAccOp() &&
1277 typeFlags.getMergeType() == SVETypeFlags::MergeAny)
1278 cgm.errorNYI(expr->getSourceRange(),
1279 std::string("unimplemented AArch64 builtin call: ") +
1280 getContext().BuiltinInfo.getName(builtinID));
1281
1282 // Predicated intrinsics with _z suffix.
1283 if (typeFlags.getMergeType() == SVETypeFlags::MergeZero) {
1284 cgm.errorNYI(expr->getSourceRange(),
1285 std::string("unimplemented AArch64 builtin call: ") +
1286 getContext().BuiltinInfo.getName(builtinID));
1287 }
1288
1289 llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(
1290 static_cast<llvm::Intrinsic::ID>(builtinIntrInfo->LLVMIntrinsic));
1291 auto retTy = convertType(expr->getType());
1292
1293 auto call = builder.emitIntrinsicCallOp(loc, llvmIntrName, retTy,
1294 mlir::ValueRange{ops});
1295 if (call.getType() == retTy)
1296 return call;
1297
1298 // Predicate results must be converted to svbool_t.
1299 if (isa<mlir::VectorType>(retTy) &&
1300 cast<mlir::VectorType>(retTy).isScalable())
1301 cgm.errorNYI(expr->getSourceRange(),
1302 std::string("unimplemented AArch64 builtin call: ") +
1303 getContext().BuiltinInfo.getName(builtinID));
1304 // TODO Handle struct types, e.g. svint8x2_t (update the converter first).
1305
1306 llvm_unreachable("unsupported element count!");
1307 }
1308
1309 // Handle the remaining built-ins.
1310 // -------------------------------
1311 switch (builtinID) {
1312 default:
1313 return std::nullopt;
1314
1315 case SVE::BI__builtin_sve_svreinterpret_b:
1316 case SVE::BI__builtin_sve_svreinterpret_c:
1317 case SVE::BI__builtin_sve_svpsel_lane_b8:
1318 case SVE::BI__builtin_sve_svpsel_lane_b16:
1319 case SVE::BI__builtin_sve_svpsel_lane_b32:
1320 case SVE::BI__builtin_sve_svpsel_lane_b64:
1321 case SVE::BI__builtin_sve_svpsel_lane_c8:
1322 case SVE::BI__builtin_sve_svpsel_lane_c16:
1323 case SVE::BI__builtin_sve_svpsel_lane_c32:
1324 case SVE::BI__builtin_sve_svpsel_lane_c64:
1325 case SVE::BI__builtin_sve_svmov_b_z:
1326 case SVE::BI__builtin_sve_svnot_b_z:
1327 case SVE::BI__builtin_sve_svmovlb_u16:
1328 case SVE::BI__builtin_sve_svmovlb_u32:
1329 case SVE::BI__builtin_sve_svmovlb_u64:
1330 case SVE::BI__builtin_sve_svmovlb_s16:
1331 case SVE::BI__builtin_sve_svmovlb_s32:
1332 case SVE::BI__builtin_sve_svmovlb_s64:
1333 case SVE::BI__builtin_sve_svmovlt_u16:
1334 case SVE::BI__builtin_sve_svmovlt_u32:
1335 case SVE::BI__builtin_sve_svmovlt_u64:
1336 case SVE::BI__builtin_sve_svmovlt_s16:
1337 case SVE::BI__builtin_sve_svmovlt_s32:
1338 case SVE::BI__builtin_sve_svmovlt_s64:
1339 case SVE::BI__builtin_sve_svpmullt_u16:
1340 case SVE::BI__builtin_sve_svpmullt_u64:
1341 case SVE::BI__builtin_sve_svpmullt_n_u16:
1342 case SVE::BI__builtin_sve_svpmullt_n_u64:
1343 case SVE::BI__builtin_sve_svpmullb_u16:
1344 case SVE::BI__builtin_sve_svpmullb_u64:
1345 case SVE::BI__builtin_sve_svpmullb_n_u16:
1346 case SVE::BI__builtin_sve_svpmullb_n_u64:
1347
1348 case SVE::BI__builtin_sve_svdup_n_b8:
1349 case SVE::BI__builtin_sve_svdup_n_b16:
1350 case SVE::BI__builtin_sve_svdup_n_b32:
1351 case SVE::BI__builtin_sve_svdup_n_b64:
1352
1353 case SVE::BI__builtin_sve_svdupq_n_b8:
1354 case SVE::BI__builtin_sve_svdupq_n_b16:
1355 case SVE::BI__builtin_sve_svdupq_n_b32:
1356 case SVE::BI__builtin_sve_svdupq_n_b64:
1357 case SVE::BI__builtin_sve_svdupq_n_u8:
1358 case SVE::BI__builtin_sve_svdupq_n_s8:
1359 case SVE::BI__builtin_sve_svdupq_n_u64:
1360 case SVE::BI__builtin_sve_svdupq_n_f64:
1361 case SVE::BI__builtin_sve_svdupq_n_s64:
1362 case SVE::BI__builtin_sve_svdupq_n_u16:
1363 case SVE::BI__builtin_sve_svdupq_n_f16:
1364 case SVE::BI__builtin_sve_svdupq_n_bf16:
1365 case SVE::BI__builtin_sve_svdupq_n_s16:
1366 case SVE::BI__builtin_sve_svdupq_n_u32:
1367 case SVE::BI__builtin_sve_svdupq_n_f32:
1368 case SVE::BI__builtin_sve_svdupq_n_s32:
1369 case SVE::BI__builtin_sve_svpfalse_b:
1370 case SVE::BI__builtin_sve_svpfalse_c:
1371 cgm.errorNYI(expr->getSourceRange(),
1372 std::string("unimplemented AArch64 builtin call: ") +
1373 getContext().BuiltinInfo.getName(builtinID));
1374 return mlir::Value{};
1375
1376 case SVE::BI__builtin_sve_svlen_u8:
1377 case SVE::BI__builtin_sve_svlen_s8:
1378 return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 16);
1379
1380 case SVE::BI__builtin_sve_svlen_u16:
1381 case SVE::BI__builtin_sve_svlen_s16:
1382 case SVE::BI__builtin_sve_svlen_f16:
1383 case SVE::BI__builtin_sve_svlen_bf16:
1384 return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 8);
1385
1386 case SVE::BI__builtin_sve_svlen_u32:
1387 case SVE::BI__builtin_sve_svlen_s32:
1388 case SVE::BI__builtin_sve_svlen_f32:
1389 return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 4);
1390
1391 case SVE::BI__builtin_sve_svlen_u64:
1392 case SVE::BI__builtin_sve_svlen_s64:
1393 case SVE::BI__builtin_sve_svlen_f64:
1394 return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 2);
1395
1396 case SVE::BI__builtin_sve_svtbl2_u8:
1397 case SVE::BI__builtin_sve_svtbl2_s8:
1398 case SVE::BI__builtin_sve_svtbl2_u16:
1399 case SVE::BI__builtin_sve_svtbl2_s16:
1400 case SVE::BI__builtin_sve_svtbl2_u32:
1401 case SVE::BI__builtin_sve_svtbl2_s32:
1402 case SVE::BI__builtin_sve_svtbl2_u64:
1403 case SVE::BI__builtin_sve_svtbl2_s64:
1404 case SVE::BI__builtin_sve_svtbl2_f16:
1405 case SVE::BI__builtin_sve_svtbl2_bf16:
1406 case SVE::BI__builtin_sve_svtbl2_f32:
1407 case SVE::BI__builtin_sve_svtbl2_f64:
1408 case SVE::BI__builtin_sve_svset_neonq_s8:
1409 case SVE::BI__builtin_sve_svset_neonq_s16:
1410 case SVE::BI__builtin_sve_svset_neonq_s32:
1411 case SVE::BI__builtin_sve_svset_neonq_s64:
1412 case SVE::BI__builtin_sve_svset_neonq_u8:
1413 case SVE::BI__builtin_sve_svset_neonq_u16:
1414 case SVE::BI__builtin_sve_svset_neonq_u32:
1415 case SVE::BI__builtin_sve_svset_neonq_u64:
1416 case SVE::BI__builtin_sve_svset_neonq_f16:
1417 case SVE::BI__builtin_sve_svset_neonq_f32:
1418 case SVE::BI__builtin_sve_svset_neonq_f64:
1419 case SVE::BI__builtin_sve_svset_neonq_bf16:
1420 case SVE::BI__builtin_sve_svget_neonq_s8:
1421 case SVE::BI__builtin_sve_svget_neonq_s16:
1422 case SVE::BI__builtin_sve_svget_neonq_s32:
1423 case SVE::BI__builtin_sve_svget_neonq_s64:
1424 case SVE::BI__builtin_sve_svget_neonq_u8:
1425 case SVE::BI__builtin_sve_svget_neonq_u16:
1426 case SVE::BI__builtin_sve_svget_neonq_u32:
1427 case SVE::BI__builtin_sve_svget_neonq_u64:
1428 case SVE::BI__builtin_sve_svget_neonq_f16:
1429 case SVE::BI__builtin_sve_svget_neonq_f32:
1430 case SVE::BI__builtin_sve_svget_neonq_f64:
1431 case SVE::BI__builtin_sve_svget_neonq_bf16:
1432 case SVE::BI__builtin_sve_svdup_neonq_s8:
1433 case SVE::BI__builtin_sve_svdup_neonq_s16:
1434 case SVE::BI__builtin_sve_svdup_neonq_s32:
1435 case SVE::BI__builtin_sve_svdup_neonq_s64:
1436 case SVE::BI__builtin_sve_svdup_neonq_u8:
1437 case SVE::BI__builtin_sve_svdup_neonq_u16:
1438 case SVE::BI__builtin_sve_svdup_neonq_u32:
1439 case SVE::BI__builtin_sve_svdup_neonq_u64:
1440 case SVE::BI__builtin_sve_svdup_neonq_f16:
1441 case SVE::BI__builtin_sve_svdup_neonq_f32:
1442 case SVE::BI__builtin_sve_svdup_neonq_f64:
1443 case SVE::BI__builtin_sve_svdup_neonq_bf16:
1444 cgm.errorNYI(expr->getSourceRange(),
1445 std::string("unimplemented AArch64 builtin call: ") +
1446 getContext().BuiltinInfo.getName(builtinID));
1447 return mlir::Value{};
1448 }
1449
1450 // Unreachable: All cases in the switch above return.
1451}
1452
1453std::optional<mlir::Value>
1455 const CallExpr *expr) {
1457
1458 cgm.errorNYI(expr->getSourceRange(),
1459 std::string("unimplemented AArch64 builtin call: ") +
1460 getContext().BuiltinInfo.getName(builtinID));
1461 return mlir::Value{};
1462}
1463
1464// Some intrinsics are equivalent for codegen.
1465static const std::pair<unsigned, unsigned> neonEquivalentIntrinsicMap[] = {
1466 {
1467 NEON::BI__builtin_neon_splat_lane_bf16,
1468 NEON::BI__builtin_neon_splat_lane_v,
1469 },
1470 {
1471 NEON::BI__builtin_neon_splat_laneq_bf16,
1472 NEON::BI__builtin_neon_splat_laneq_v,
1473 },
1474 {
1475 NEON::BI__builtin_neon_splatq_lane_bf16,
1476 NEON::BI__builtin_neon_splatq_lane_v,
1477 },
1478 {
1479 NEON::BI__builtin_neon_splatq_laneq_bf16,
1480 NEON::BI__builtin_neon_splatq_laneq_v,
1481 },
1482 {
1483 NEON::BI__builtin_neon_vabd_f16,
1484 NEON::BI__builtin_neon_vabd_v,
1485 },
1486 {
1487 NEON::BI__builtin_neon_vabdq_f16,
1488 NEON::BI__builtin_neon_vabdq_v,
1489 },
1490 {
1491 NEON::BI__builtin_neon_vabs_f16,
1492 NEON::BI__builtin_neon_vabs_v,
1493 },
1494 {
1495 NEON::BI__builtin_neon_vabsq_f16,
1496 NEON::BI__builtin_neon_vabsq_v,
1497 },
1498 {
1499 NEON::BI__builtin_neon_vcage_f16,
1500 NEON::BI__builtin_neon_vcage_v,
1501 },
1502 {
1503 NEON::BI__builtin_neon_vcageq_f16,
1504 NEON::BI__builtin_neon_vcageq_v,
1505 },
1506 {
1507 NEON::BI__builtin_neon_vcagt_f16,
1508 NEON::BI__builtin_neon_vcagt_v,
1509 },
1510 {
1511 NEON::BI__builtin_neon_vcagtq_f16,
1512 NEON::BI__builtin_neon_vcagtq_v,
1513 },
1514 {
1515 NEON::BI__builtin_neon_vcale_f16,
1516 NEON::BI__builtin_neon_vcale_v,
1517 },
1518 {
1519 NEON::BI__builtin_neon_vcaleq_f16,
1520 NEON::BI__builtin_neon_vcaleq_v,
1521 },
1522 {
1523 NEON::BI__builtin_neon_vcalt_f16,
1524 NEON::BI__builtin_neon_vcalt_v,
1525 },
1526 {
1527 NEON::BI__builtin_neon_vcaltq_f16,
1528 NEON::BI__builtin_neon_vcaltq_v,
1529 },
1530 {
1531 NEON::BI__builtin_neon_vceqz_f16,
1532 NEON::BI__builtin_neon_vceqz_v,
1533 },
1534 {
1535 NEON::BI__builtin_neon_vceqzq_f16,
1536 NEON::BI__builtin_neon_vceqzq_v,
1537 },
1538 {
1539 NEON::BI__builtin_neon_vcgez_f16,
1540 NEON::BI__builtin_neon_vcgez_v,
1541 },
1542 {
1543 NEON::BI__builtin_neon_vcgezq_f16,
1544 NEON::BI__builtin_neon_vcgezq_v,
1545 },
1546 {
1547 NEON::BI__builtin_neon_vcgtz_f16,
1548 NEON::BI__builtin_neon_vcgtz_v,
1549 },
1550 {
1551 NEON::BI__builtin_neon_vcgtzq_f16,
1552 NEON::BI__builtin_neon_vcgtzq_v,
1553 },
1554 {
1555 NEON::BI__builtin_neon_vclez_f16,
1556 NEON::BI__builtin_neon_vclez_v,
1557 },
1558 {
1559 NEON::BI__builtin_neon_vclezq_f16,
1560 NEON::BI__builtin_neon_vclezq_v,
1561 },
1562 {
1563 NEON::BI__builtin_neon_vcltz_f16,
1564 NEON::BI__builtin_neon_vcltz_v,
1565 },
1566 {
1567 NEON::BI__builtin_neon_vcltzq_f16,
1568 NEON::BI__builtin_neon_vcltzq_v,
1569 },
1570 {
1571 NEON::BI__builtin_neon_vfma_f16,
1572 NEON::BI__builtin_neon_vfma_v,
1573 },
1574 {
1575 NEON::BI__builtin_neon_vfma_lane_f16,
1576 NEON::BI__builtin_neon_vfma_lane_v,
1577 },
1578 {
1579 NEON::BI__builtin_neon_vfma_laneq_f16,
1580 NEON::BI__builtin_neon_vfma_laneq_v,
1581 },
1582 {
1583 NEON::BI__builtin_neon_vfmaq_f16,
1584 NEON::BI__builtin_neon_vfmaq_v,
1585 },
1586 {
1587 NEON::BI__builtin_neon_vfmaq_lane_f16,
1588 NEON::BI__builtin_neon_vfmaq_lane_v,
1589 },
1590 {
1591 NEON::BI__builtin_neon_vfmaq_laneq_f16,
1592 NEON::BI__builtin_neon_vfmaq_laneq_v,
1593 },
1594 {NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v},
1595 {NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v},
1596 {NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v},
1597 {NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v},
1598 {NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v},
1599 {NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v},
1600 {NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v},
1601 {NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v},
1602 {NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v},
1603 {NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v},
1604 {NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v},
1605 {NEON::BI__builtin_neon_vld1q_lane_bf16,
1606 NEON::BI__builtin_neon_vld1q_lane_v},
1607 {NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v},
1608 {NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v},
1609 {NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v},
1610 {NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v},
1611 {NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v},
1612 {NEON::BI__builtin_neon_vld2q_lane_bf16,
1613 NEON::BI__builtin_neon_vld2q_lane_v},
1614 {NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v},
1615 {NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v},
1616 {NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v},
1617 {NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v},
1618 {NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v},
1619 {NEON::BI__builtin_neon_vld3q_lane_bf16,
1620 NEON::BI__builtin_neon_vld3q_lane_v},
1621 {NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v},
1622 {NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v},
1623 {NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v},
1624 {NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v},
1625 {NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v},
1626 {NEON::BI__builtin_neon_vld4q_lane_bf16,
1627 NEON::BI__builtin_neon_vld4q_lane_v},
1628 {
1629 NEON::BI__builtin_neon_vmax_f16,
1630 NEON::BI__builtin_neon_vmax_v,
1631 },
1632 {
1633 NEON::BI__builtin_neon_vmaxnm_f16,
1634 NEON::BI__builtin_neon_vmaxnm_v,
1635 },
1636 {
1637 NEON::BI__builtin_neon_vmaxnmq_f16,
1638 NEON::BI__builtin_neon_vmaxnmq_v,
1639 },
1640 {
1641 NEON::BI__builtin_neon_vmaxq_f16,
1642 NEON::BI__builtin_neon_vmaxq_v,
1643 },
1644 {
1645 NEON::BI__builtin_neon_vmin_f16,
1646 NEON::BI__builtin_neon_vmin_v,
1647 },
1648 {
1649 NEON::BI__builtin_neon_vminnm_f16,
1650 NEON::BI__builtin_neon_vminnm_v,
1651 },
1652 {
1653 NEON::BI__builtin_neon_vminnmq_f16,
1654 NEON::BI__builtin_neon_vminnmq_v,
1655 },
1656 {
1657 NEON::BI__builtin_neon_vminq_f16,
1658 NEON::BI__builtin_neon_vminq_v,
1659 },
1660 {
1661 NEON::BI__builtin_neon_vmulx_f16,
1662 NEON::BI__builtin_neon_vmulx_v,
1663 },
1664 {
1665 NEON::BI__builtin_neon_vmulxq_f16,
1666 NEON::BI__builtin_neon_vmulxq_v,
1667 },
1668 {
1669 NEON::BI__builtin_neon_vpadd_f16,
1670 NEON::BI__builtin_neon_vpadd_v,
1671 },
1672 {
1673 NEON::BI__builtin_neon_vpaddq_f16,
1674 NEON::BI__builtin_neon_vpaddq_v,
1675 },
1676 {
1677 NEON::BI__builtin_neon_vpmax_f16,
1678 NEON::BI__builtin_neon_vpmax_v,
1679 },
1680 {
1681 NEON::BI__builtin_neon_vpmaxnm_f16,
1682 NEON::BI__builtin_neon_vpmaxnm_v,
1683 },
1684 {
1685 NEON::BI__builtin_neon_vpmaxnmq_f16,
1686 NEON::BI__builtin_neon_vpmaxnmq_v,
1687 },
1688 {
1689 NEON::BI__builtin_neon_vpmaxq_f16,
1690 NEON::BI__builtin_neon_vpmaxq_v,
1691 },
1692 {
1693 NEON::BI__builtin_neon_vpmin_f16,
1694 NEON::BI__builtin_neon_vpmin_v,
1695 },
1696 {
1697 NEON::BI__builtin_neon_vpminnm_f16,
1698 NEON::BI__builtin_neon_vpminnm_v,
1699 },
1700 {
1701 NEON::BI__builtin_neon_vpminnmq_f16,
1702 NEON::BI__builtin_neon_vpminnmq_v,
1703 },
1704 {
1705 NEON::BI__builtin_neon_vpminq_f16,
1706 NEON::BI__builtin_neon_vpminq_v,
1707 },
1708 {
1709 NEON::BI__builtin_neon_vrecpe_f16,
1710 NEON::BI__builtin_neon_vrecpe_v,
1711 },
1712 {
1713 NEON::BI__builtin_neon_vrecpeq_f16,
1714 NEON::BI__builtin_neon_vrecpeq_v,
1715 },
1716 {
1717 NEON::BI__builtin_neon_vrecps_f16,
1718 NEON::BI__builtin_neon_vrecps_v,
1719 },
1720 {
1721 NEON::BI__builtin_neon_vrecpsq_f16,
1722 NEON::BI__builtin_neon_vrecpsq_v,
1723 },
1724 {
1725 NEON::BI__builtin_neon_vrnd_f16,
1726 NEON::BI__builtin_neon_vrnd_v,
1727 },
1728 {
1729 NEON::BI__builtin_neon_vrnda_f16,
1730 NEON::BI__builtin_neon_vrnda_v,
1731 },
1732 {
1733 NEON::BI__builtin_neon_vrndaq_f16,
1734 NEON::BI__builtin_neon_vrndaq_v,
1735 },
1736 {
1737 NEON::BI__builtin_neon_vrndi_f16,
1738 NEON::BI__builtin_neon_vrndi_v,
1739 },
1740 {
1741 NEON::BI__builtin_neon_vrndiq_f16,
1742 NEON::BI__builtin_neon_vrndiq_v,
1743 },
1744 {
1745 NEON::BI__builtin_neon_vrndm_f16,
1746 NEON::BI__builtin_neon_vrndm_v,
1747 },
1748 {
1749 NEON::BI__builtin_neon_vrndmq_f16,
1750 NEON::BI__builtin_neon_vrndmq_v,
1751 },
1752 {
1753 NEON::BI__builtin_neon_vrndn_f16,
1754 NEON::BI__builtin_neon_vrndn_v,
1755 },
1756 {
1757 NEON::BI__builtin_neon_vrndnq_f16,
1758 NEON::BI__builtin_neon_vrndnq_v,
1759 },
1760 {
1761 NEON::BI__builtin_neon_vrndp_f16,
1762 NEON::BI__builtin_neon_vrndp_v,
1763 },
1764 {
1765 NEON::BI__builtin_neon_vrndpq_f16,
1766 NEON::BI__builtin_neon_vrndpq_v,
1767 },
1768 {
1769 NEON::BI__builtin_neon_vrndq_f16,
1770 NEON::BI__builtin_neon_vrndq_v,
1771 },
1772 {
1773 NEON::BI__builtin_neon_vrndx_f16,
1774 NEON::BI__builtin_neon_vrndx_v,
1775 },
1776 {
1777 NEON::BI__builtin_neon_vrndxq_f16,
1778 NEON::BI__builtin_neon_vrndxq_v,
1779 },
1780 {
1781 NEON::BI__builtin_neon_vrsqrte_f16,
1782 NEON::BI__builtin_neon_vrsqrte_v,
1783 },
1784 {
1785 NEON::BI__builtin_neon_vrsqrteq_f16,
1786 NEON::BI__builtin_neon_vrsqrteq_v,
1787 },
1788 {
1789 NEON::BI__builtin_neon_vrsqrts_f16,
1790 NEON::BI__builtin_neon_vrsqrts_v,
1791 },
1792 {
1793 NEON::BI__builtin_neon_vrsqrtsq_f16,
1794 NEON::BI__builtin_neon_vrsqrtsq_v,
1795 },
1796 {
1797 NEON::BI__builtin_neon_vsqrt_f16,
1798 NEON::BI__builtin_neon_vsqrt_v,
1799 },
1800 {
1801 NEON::BI__builtin_neon_vsqrtq_f16,
1802 NEON::BI__builtin_neon_vsqrtq_v,
1803 },
1804 {NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v},
1805 {NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v},
1806 {NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v},
1807 {NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v},
1808 {NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v},
1809 {NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v},
1810 {NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v},
1811 {NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v},
1812 {NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v},
1813 {NEON::BI__builtin_neon_vst1q_lane_bf16,
1814 NEON::BI__builtin_neon_vst1q_lane_v},
1815 {NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v},
1816 {NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v},
1817 {NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v},
1818 {NEON::BI__builtin_neon_vst2q_lane_bf16,
1819 NEON::BI__builtin_neon_vst2q_lane_v},
1820 {NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v},
1821 {NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v},
1822 {NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v},
1823 {NEON::BI__builtin_neon_vst3q_lane_bf16,
1824 NEON::BI__builtin_neon_vst3q_lane_v},
1825 {NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v},
1826 {NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v},
1827 {NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v},
1828 {NEON::BI__builtin_neon_vst4q_lane_bf16,
1829 NEON::BI__builtin_neon_vst4q_lane_v},
1830 // The mangling rules cause us to have one ID for each type for
1831 // vldap1(q)_lane and vstl1(q)_lane, but codegen is equivalent for all of
1832 // them. Choose an arbitrary one to be handled as tha canonical variation.
1833 {NEON::BI__builtin_neon_vldap1_lane_u64,
1834 NEON::BI__builtin_neon_vldap1_lane_s64},
1835 {NEON::BI__builtin_neon_vldap1_lane_f64,
1836 NEON::BI__builtin_neon_vldap1_lane_s64},
1837 {NEON::BI__builtin_neon_vldap1_lane_p64,
1838 NEON::BI__builtin_neon_vldap1_lane_s64},
1839 {NEON::BI__builtin_neon_vldap1q_lane_u64,
1840 NEON::BI__builtin_neon_vldap1q_lane_s64},
1841 {NEON::BI__builtin_neon_vldap1q_lane_f64,
1842 NEON::BI__builtin_neon_vldap1q_lane_s64},
1843 {NEON::BI__builtin_neon_vldap1q_lane_p64,
1844 NEON::BI__builtin_neon_vldap1q_lane_s64},
1845 {NEON::BI__builtin_neon_vstl1_lane_u64,
1846 NEON::BI__builtin_neon_vstl1_lane_s64},
1847 {NEON::BI__builtin_neon_vstl1_lane_f64,
1848 NEON::BI__builtin_neon_vstl1_lane_s64},
1849 {NEON::BI__builtin_neon_vstl1_lane_p64,
1850 NEON::BI__builtin_neon_vstl1_lane_s64},
1851 {NEON::BI__builtin_neon_vstl1q_lane_u64,
1852 NEON::BI__builtin_neon_vstl1q_lane_s64},
1853 {NEON::BI__builtin_neon_vstl1q_lane_f64,
1854 NEON::BI__builtin_neon_vstl1q_lane_s64},
1855 {NEON::BI__builtin_neon_vstl1q_lane_p64,
1856 NEON::BI__builtin_neon_vstl1q_lane_s64},
1857};
1858
1859std::optional<mlir::Value>
1862 llvm::Triple::ArchType arch) {
1863 if (builtinID >= clang::AArch64::FirstSVEBuiltin &&
1864 builtinID <= clang::AArch64::LastSVEBuiltin)
1865 return emitAArch64SVEBuiltinExpr(builtinID, expr);
1866
1867 if (builtinID >= clang::AArch64::FirstSMEBuiltin &&
1868 builtinID <= clang::AArch64::LastSMEBuiltin)
1869 return emitAArch64SMEBuiltinExpr(builtinID, expr);
1870
1871 if (builtinID == Builtin::BI__builtin_cpu_supports) {
1872 cgm.errorNYI(expr->getSourceRange(),
1873 std::string("unimplemented AArch64 builtin call: ") +
1874 getContext().BuiltinInfo.getName(builtinID));
1875 return mlir::Value{};
1876 }
1877
1878 switch (builtinID) {
1879 default:
1880 break;
1881 case clang::AArch64::BI__builtin_arm_nop:
1882 case clang::AArch64::BI__builtin_arm_yield:
1883 case clang::AArch64::BI__yield:
1884 case clang::AArch64::BI__builtin_arm_wfe:
1885 case clang::AArch64::BI__wfe:
1886 case clang::AArch64::BI__builtin_arm_wfi:
1887 case clang::AArch64::BI__wfi:
1888 case clang::AArch64::BI__builtin_arm_sev:
1889 case clang::AArch64::BI__sev:
1890 case clang::AArch64::BI__builtin_arm_sevl:
1891 case clang::AArch64::BI__sevl:
1892 cgm.errorNYI(expr->getSourceRange(),
1893 std::string("unimplemented AArch64 builtin call: ") +
1894 getContext().BuiltinInfo.getName(builtinID));
1895 return mlir::Value{};
1896 }
1897
1898 if (builtinID == clang::AArch64::BI__builtin_arm_trap) {
1899 cgm.errorNYI(expr->getSourceRange(),
1900 std::string("unimplemented AArch64 builtin call: ") +
1901 getContext().BuiltinInfo.getName(builtinID));
1902 return mlir::Value{};
1903 }
1904
1905 if (builtinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
1906 cgm.errorNYI(expr->getSourceRange(),
1907 std::string("unimplemented AArch64 builtin call: ") +
1908 getContext().BuiltinInfo.getName(builtinID));
1909 return mlir::Value{};
1910 }
1911
1912 if (builtinID == clang::AArch64::BI__builtin_arm_rbit) {
1913 cgm.errorNYI(expr->getSourceRange(),
1914 std::string("unimplemented AArch64 builtin call: ") +
1915 getContext().BuiltinInfo.getName(builtinID));
1916 return mlir::Value{};
1917 }
1918 if (builtinID == clang::AArch64::BI__builtin_arm_rbit64) {
1919 cgm.errorNYI(expr->getSourceRange(),
1920 std::string("unimplemented AArch64 builtin call: ") +
1921 getContext().BuiltinInfo.getName(builtinID));
1922 return mlir::Value{};
1923 }
1924
1925 if (builtinID == clang::AArch64::BI__builtin_arm_clz ||
1926 builtinID == clang::AArch64::BI__builtin_arm_clz64) {
1927 cgm.errorNYI(expr->getSourceRange(),
1928 std::string("unimplemented AArch64 builtin call: ") +
1929 getContext().BuiltinInfo.getName(builtinID));
1930 return mlir::Value{};
1931 }
1932
1933 if (builtinID == clang::AArch64::BI__builtin_arm_cls) {
1934 cgm.errorNYI(expr->getSourceRange(),
1935 std::string("unimplemented AArch64 builtin call: ") +
1936 getContext().BuiltinInfo.getName(builtinID));
1937 return mlir::Value{};
1938 }
1939 if (builtinID == clang::AArch64::BI__builtin_arm_cls64) {
1940 cgm.errorNYI(expr->getSourceRange(),
1941 std::string("unimplemented AArch64 builtin call: ") +
1942 getContext().BuiltinInfo.getName(builtinID));
1943 return mlir::Value{};
1944 }
1945
1946 if (builtinID == clang::AArch64::BI__builtin_arm_rint32zf ||
1947 builtinID == clang::AArch64::BI__builtin_arm_rint32z) {
1948 cgm.errorNYI(expr->getSourceRange(),
1949 std::string("unimplemented AArch64 builtin call: ") +
1950 getContext().BuiltinInfo.getName(builtinID));
1951 return mlir::Value{};
1952 }
1953
1954 if (builtinID == clang::AArch64::BI__builtin_arm_rint64zf ||
1955 builtinID == clang::AArch64::BI__builtin_arm_rint64z) {
1956 cgm.errorNYI(expr->getSourceRange(),
1957 std::string("unimplemented AArch64 builtin call: ") +
1958 getContext().BuiltinInfo.getName(builtinID));
1959 return mlir::Value{};
1960 }
1961
1962 if (builtinID == clang::AArch64::BI__builtin_arm_rint32xf ||
1963 builtinID == clang::AArch64::BI__builtin_arm_rint32x) {
1964 cgm.errorNYI(expr->getSourceRange(),
1965 std::string("unimplemented AArch64 builtin call: ") +
1966 getContext().BuiltinInfo.getName(builtinID));
1967 return mlir::Value{};
1968 }
1969
1970 if (builtinID == clang::AArch64::BI__builtin_arm_rint64xf ||
1971 builtinID == clang::AArch64::BI__builtin_arm_rint64x) {
1972 cgm.errorNYI(expr->getSourceRange(),
1973 std::string("unimplemented AArch64 builtin call: ") +
1974 getContext().BuiltinInfo.getName(builtinID));
1975 return mlir::Value{};
1976 }
1977
1978 if (builtinID == clang::AArch64::BI__builtin_arm_jcvt) {
1979 cgm.errorNYI(expr->getSourceRange(),
1980 std::string("unimplemented AArch64 builtin call: ") +
1981 getContext().BuiltinInfo.getName(builtinID));
1982 return mlir::Value{};
1983 }
1984
1985 if (builtinID == clang::AArch64::BI__builtin_arm_ld64b ||
1986 builtinID == clang::AArch64::BI__builtin_arm_st64b ||
1987 builtinID == clang::AArch64::BI__builtin_arm_st64bv ||
1988 builtinID == clang::AArch64::BI__builtin_arm_st64bv0) {
1989 cgm.errorNYI(expr->getSourceRange(),
1990 std::string("unimplemented AArch64 builtin call: ") +
1991 getContext().BuiltinInfo.getName(builtinID));
1992 return mlir::Value{};
1993 }
1994
1995 if (builtinID == clang::AArch64::BI__builtin_arm_rndr ||
1996 builtinID == clang::AArch64::BI__builtin_arm_rndrrs) {
1997 cgm.errorNYI(expr->getSourceRange(),
1998 std::string("unimplemented AArch64 builtin call: ") +
1999 getContext().BuiltinInfo.getName(builtinID));
2000 return mlir::Value{};
2001 }
2002
2003 if (builtinID == clang::AArch64::BI__clear_cache) {
2004 cgm.errorNYI(expr->getSourceRange(),
2005 std::string("unimplemented AArch64 builtin call: ") +
2006 getContext().BuiltinInfo.getName(builtinID));
2007 return mlir::Value{};
2008 }
2009
2010 if ((builtinID == clang::AArch64::BI__builtin_arm_ldrex ||
2011 builtinID == clang::AArch64::BI__builtin_arm_ldaex) &&
2012 getContext().getTypeSize(expr->getType()) == 128) {
2013 cgm.errorNYI(expr->getSourceRange(),
2014 std::string("unimplemented AArch64 builtin call: ") +
2015 getContext().BuiltinInfo.getName(builtinID));
2016 return mlir::Value{};
2017 }
2018 if (builtinID == clang::AArch64::BI__builtin_arm_ldrex ||
2019 builtinID == clang::AArch64::BI__builtin_arm_ldaex) {
2020 cgm.errorNYI(expr->getSourceRange(),
2021 std::string("unimplemented AArch64 builtin call: ") +
2022 getContext().BuiltinInfo.getName(builtinID));
2023 return mlir::Value{};
2024 }
2025
2026 if ((builtinID == clang::AArch64::BI__builtin_arm_strex ||
2027 builtinID == clang::AArch64::BI__builtin_arm_stlex) &&
2028 getContext().getTypeSize(expr->getArg(0)->getType()) == 128) {
2029 cgm.errorNYI(expr->getSourceRange(),
2030 std::string("unimplemented AArch64 builtin call: ") +
2031 getContext().BuiltinInfo.getName(builtinID));
2032 return mlir::Value{};
2033 }
2034
2035 if (builtinID == clang::AArch64::BI__builtin_arm_strex ||
2036 builtinID == clang::AArch64::BI__builtin_arm_stlex) {
2037 cgm.errorNYI(expr->getSourceRange(),
2038 std::string("unimplemented AArch64 builtin call: ") +
2039 getContext().BuiltinInfo.getName(builtinID));
2040 return mlir::Value{};
2041 }
2042
2043 if (builtinID == clang::AArch64::BI__getReg ||
2044 builtinID == clang::AArch64::BI__setReg ||
2045 builtinID == clang::AArch64::BI__getRegFp ||
2046 builtinID == clang::AArch64::BI__setRegFp) {
2047 cgm.errorNYI(expr->getSourceRange(),
2048 std::string("unimplemented AArch64 builtin call: ") +
2049 getContext().BuiltinInfo.getName(builtinID));
2050 return mlir::Value{};
2051 }
2052
2053 if (builtinID == clang::AArch64::BI__break) {
2054 cgm.errorNYI(expr->getSourceRange(),
2055 std::string("unimplemented AArch64 builtin call: ") +
2056 getContext().BuiltinInfo.getName(builtinID));
2057 return mlir::Value{};
2058 }
2059
2060 if (builtinID == clang::AArch64::BI__builtin_arm_clrex) {
2061 cgm.errorNYI(expr->getSourceRange(),
2062 std::string("unimplemented AArch64 builtin call: ") +
2063 getContext().BuiltinInfo.getName(builtinID));
2064 return mlir::Value{};
2065 }
2066
2067 if (builtinID == clang::AArch64::BI_ReadWriteBarrier) {
2068 cgm.errorNYI(expr->getSourceRange(),
2069 std::string("unimplemented AArch64 builtin call: ") +
2070 getContext().BuiltinInfo.getName(builtinID));
2071 return mlir::Value{};
2072 }
2073
2074 // CRC32
2075 Intrinsic::ID crcIntrinsicID = Intrinsic::not_intrinsic;
2076 switch (builtinID) {
2077 case clang::AArch64::BI__builtin_arm_crc32b:
2078 crcIntrinsicID = Intrinsic::aarch64_crc32b;
2079 break;
2080 case clang::AArch64::BI__builtin_arm_crc32cb:
2081 crcIntrinsicID = Intrinsic::aarch64_crc32cb;
2082 break;
2083 case clang::AArch64::BI__builtin_arm_crc32h:
2084 crcIntrinsicID = Intrinsic::aarch64_crc32h;
2085 break;
2086 case clang::AArch64::BI__builtin_arm_crc32ch:
2087 crcIntrinsicID = Intrinsic::aarch64_crc32ch;
2088 break;
2089 case clang::AArch64::BI__builtin_arm_crc32w:
2090 crcIntrinsicID = Intrinsic::aarch64_crc32w;
2091 break;
2092 case clang::AArch64::BI__builtin_arm_crc32cw:
2093 crcIntrinsicID = Intrinsic::aarch64_crc32cw;
2094 break;
2095 case clang::AArch64::BI__builtin_arm_crc32d:
2096 crcIntrinsicID = Intrinsic::aarch64_crc32x;
2097 break;
2098 case clang::AArch64::BI__builtin_arm_crc32cd:
2099 crcIntrinsicID = Intrinsic::aarch64_crc32cx;
2100 break;
2101 }
2102
2103 if (crcIntrinsicID != Intrinsic::not_intrinsic) {
2104 cgm.errorNYI(expr->getSourceRange(),
2105 std::string("unimplemented AArch64 builtin call: ") +
2106 getContext().BuiltinInfo.getName(builtinID));
2107 return mlir::Value{};
2108 }
2109
2110 // Memory Operations (MOPS)
2111 if (builtinID == AArch64::BI__builtin_arm_mops_memset_tag) {
2112 cgm.errorNYI(expr->getSourceRange(),
2113 std::string("unimplemented AArch64 builtin call: ") +
2114 getContext().BuiltinInfo.getName(builtinID));
2115 return mlir::Value{};
2116 }
2117
2118 // Memory Tagging Extensions (MTE) Intrinsics
2119 Intrinsic::ID mteIntrinsicID = Intrinsic::not_intrinsic;
2120 switch (builtinID) {
2121 case clang::AArch64::BI__builtin_arm_irg:
2122 mteIntrinsicID = Intrinsic::aarch64_irg;
2123 break;
2124 case clang::AArch64::BI__builtin_arm_addg:
2125 mteIntrinsicID = Intrinsic::aarch64_addg;
2126 break;
2127 case clang::AArch64::BI__builtin_arm_gmi:
2128 mteIntrinsicID = Intrinsic::aarch64_gmi;
2129 break;
2130 case clang::AArch64::BI__builtin_arm_ldg:
2131 mteIntrinsicID = Intrinsic::aarch64_ldg;
2132 break;
2133 case clang::AArch64::BI__builtin_arm_stg:
2134 mteIntrinsicID = Intrinsic::aarch64_stg;
2135 break;
2136 case clang::AArch64::BI__builtin_arm_subp:
2137 mteIntrinsicID = Intrinsic::aarch64_subp;
2138 break;
2139 }
2140
2141 if (mteIntrinsicID != Intrinsic::not_intrinsic) {
2142 cgm.errorNYI(expr->getSourceRange(),
2143 std::string("unimplemented AArch64 builtin call: ") +
2144 getContext().BuiltinInfo.getName(builtinID));
2145 return mlir::Value{};
2146 }
2147
2148 if (builtinID == clang::AArch64::BI__builtin_arm_rsr ||
2149 builtinID == clang::AArch64::BI__builtin_arm_rsr64 ||
2150 builtinID == clang::AArch64::BI__builtin_arm_rsr128 ||
2151 builtinID == clang::AArch64::BI__builtin_arm_rsrp ||
2152 builtinID == clang::AArch64::BI__builtin_arm_wsr ||
2153 builtinID == clang::AArch64::BI__builtin_arm_wsr64 ||
2154 builtinID == clang::AArch64::BI__builtin_arm_wsr128 ||
2155 builtinID == clang::AArch64::BI__builtin_arm_wsrp) {
2156 cgm.errorNYI(expr->getSourceRange(),
2157 std::string("unimplemented AArch64 builtin call: ") +
2158 getContext().BuiltinInfo.getName(builtinID));
2159 return mlir::Value{};
2160 }
2161
2162 if (builtinID == clang::AArch64::BI_ReadStatusReg ||
2163 builtinID == clang::AArch64::BI_WriteStatusReg ||
2164 builtinID == clang::AArch64::BI__sys) {
2165 cgm.errorNYI(expr->getSourceRange(),
2166 std::string("unimplemented AArch64 builtin call: ") +
2167 getContext().BuiltinInfo.getName(builtinID));
2168 return mlir::Value{};
2169 }
2170
2171 if (builtinID == clang::AArch64::BI_AddressOfReturnAddress) {
2172 cgm.errorNYI(expr->getSourceRange(),
2173 std::string("unimplemented AArch64 builtin call: ") +
2174 getContext().BuiltinInfo.getName(builtinID));
2175 return mlir::Value{};
2176 }
2177
2178 if (builtinID == clang::AArch64::BI__builtin_sponentry) {
2179 cgm.errorNYI(expr->getSourceRange(),
2180 std::string("unimplemented AArch64 builtin call: ") +
2181 getContext().BuiltinInfo.getName(builtinID));
2182 return mlir::Value{};
2183 }
2184
2185 if (builtinID == clang::AArch64::BI__mulh ||
2186 builtinID == clang::AArch64::BI__umulh) {
2187 cgm.errorNYI(expr->getSourceRange(),
2188 std::string("unimplemented AArch64 builtin call: ") +
2189 getContext().BuiltinInfo.getName(builtinID));
2190 return mlir::Value{};
2191 }
2192
2193 if (builtinID == AArch64::BI__writex18byte ||
2194 builtinID == AArch64::BI__writex18word ||
2195 builtinID == AArch64::BI__writex18dword ||
2196 builtinID == AArch64::BI__writex18qword) {
2197 cgm.errorNYI(expr->getSourceRange(),
2198 std::string("unimplemented AArch64 builtin call: ") +
2199 getContext().BuiltinInfo.getName(builtinID));
2200 return mlir::Value{};
2201 }
2202
2203 if (builtinID == AArch64::BI__readx18byte ||
2204 builtinID == AArch64::BI__readx18word ||
2205 builtinID == AArch64::BI__readx18dword ||
2206 builtinID == AArch64::BI__readx18qword) {
2207 cgm.errorNYI(expr->getSourceRange(),
2208 std::string("unimplemented AArch64 builtin call: ") +
2209 getContext().BuiltinInfo.getName(builtinID));
2210 return mlir::Value{};
2211 }
2212
2213 if (builtinID == AArch64::BI__addx18byte ||
2214 builtinID == AArch64::BI__addx18word ||
2215 builtinID == AArch64::BI__addx18dword ||
2216 builtinID == AArch64::BI__addx18qword ||
2217 builtinID == AArch64::BI__incx18byte ||
2218 builtinID == AArch64::BI__incx18word ||
2219 builtinID == AArch64::BI__incx18dword ||
2220 builtinID == AArch64::BI__incx18qword) {
2221 cgm.errorNYI(expr->getSourceRange(),
2222 std::string("unimplemented AArch64 builtin call: ") +
2223 getContext().BuiltinInfo.getName(builtinID));
2224 return mlir::Value{};
2225 }
2226
2227 if (builtinID == AArch64::BI_CopyDoubleFromInt64 ||
2228 builtinID == AArch64::BI_CopyFloatFromInt32 ||
2229 builtinID == AArch64::BI_CopyInt32FromFloat ||
2230 builtinID == AArch64::BI_CopyInt64FromDouble) {
2231 cgm.errorNYI(expr->getSourceRange(),
2232 std::string("unimplemented AArch64 builtin call: ") +
2233 getContext().BuiltinInfo.getName(builtinID));
2234 return mlir::Value{};
2235 }
2236
2237 if (builtinID == AArch64::BI_CountLeadingOnes ||
2238 builtinID == AArch64::BI_CountLeadingOnes64 ||
2239 builtinID == AArch64::BI_CountLeadingZeros ||
2240 builtinID == AArch64::BI_CountLeadingZeros64) {
2241 cgm.errorNYI(expr->getSourceRange(),
2242 std::string("unimplemented AArch64 builtin call: ") +
2243 getContext().BuiltinInfo.getName(builtinID));
2244 return mlir::Value{};
2245 }
2246
2247 if (builtinID == AArch64::BI_CountLeadingSigns ||
2248 builtinID == AArch64::BI_CountLeadingSigns64) {
2249 cgm.errorNYI(expr->getSourceRange(),
2250 std::string("unimplemented AArch64 builtin call: ") +
2251 getContext().BuiltinInfo.getName(builtinID));
2252 return mlir::Value{};
2253 }
2254
2255 if (builtinID == AArch64::BI_CountOneBits ||
2256 builtinID == AArch64::BI_CountOneBits64 ||
2257 builtinID == AArch64::BI_CountTrailingZeros ||
2258 builtinID == AArch64::BI_CountTrailingZeros64) {
2259 cgm.errorNYI(expr->getSourceRange(),
2260 std::string("unimplemented AArch64 builtin call: ") +
2261 getContext().BuiltinInfo.getName(builtinID));
2262 return mlir::Value{};
2263 }
2264
2265 if (builtinID == AArch64::BI__prefetch ||
2266 builtinID == AArch64::BI__prefetch2) {
2267 cgm.errorNYI(expr->getSourceRange(),
2268 std::string("unimplemented AArch64 builtin call: ") +
2269 getContext().BuiltinInfo.getName(builtinID));
2270 return mlir::Value{};
2271 }
2272
2273 if (builtinID == AArch64::BI__hlt) {
2274 cgm.errorNYI(expr->getSourceRange(),
2275 std::string("unimplemented AArch64 builtin call: ") +
2276 getContext().BuiltinInfo.getName(builtinID));
2277 return mlir::Value{};
2278 }
2279
2280 if (builtinID == NEON::BI__builtin_neon_vcvth_bf16_f32) {
2281 cgm.errorNYI(expr->getSourceRange(),
2282 std::string("unimplemented AArch64 builtin call: ") +
2283 getContext().BuiltinInfo.getName(builtinID));
2284 return mlir::Value{};
2285 }
2286
2287 // Handle MSVC intrinsics before argument evaluation to prevent double
2288 // evaluation.
2290
2291 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
2292 auto it = llvm::find_if(neonEquivalentIntrinsicMap, [builtinID](auto &p) {
2293 return p.first == builtinID;
2294 });
2295 if (it != end(neonEquivalentIntrinsicMap))
2296 builtinID = it->second;
2297
2298 // Find out if any arguments are required to be integer constant
2299 // expressions.
2301 unsigned iceArguments = 0;
2303 getContext().GetBuiltinType(builtinID, error, &iceArguments);
2304 assert(error == ASTContext::GE_None && "Should not codegen an error");
2306
2307 // Skip extra arguments used to discriminate vector types and that are
2308 // intended for Sema checking.
2309 bool hasExtraArg = hasExtraNeonArgument(builtinID);
2310 unsigned numArgs = expr->getNumArgs() - (hasExtraArg ? 1 : 0);
2311 for (unsigned i = 0, e = numArgs; i != e; i++) {
2312 if (i == 0) {
2313 switch (builtinID) {
2314 case NEON::BI__builtin_neon_vld1_v:
2315 case NEON::BI__builtin_neon_vld1q_v:
2316 case NEON::BI__builtin_neon_vld1_dup_v:
2317 case NEON::BI__builtin_neon_vld1q_dup_v:
2318 case NEON::BI__builtin_neon_vld1_lane_v:
2319 case NEON::BI__builtin_neon_vld1q_lane_v:
2320 case NEON::BI__builtin_neon_vst1_v:
2321 case NEON::BI__builtin_neon_vst1q_v:
2322 case NEON::BI__builtin_neon_vst1_lane_v:
2323 case NEON::BI__builtin_neon_vst1q_lane_v:
2324 case NEON::BI__builtin_neon_vldap1_lane_s64:
2325 case NEON::BI__builtin_neon_vldap1q_lane_s64:
2326 case NEON::BI__builtin_neon_vstl1_lane_s64:
2327 case NEON::BI__builtin_neon_vstl1q_lane_s64:
2328 // Get the alignment for the argument in addition to the value;
2329 // we'll use it later.
2330 cgm.errorNYI(
2331 expr->getSourceRange(),
2332 std::string("unimplemented AArch64 builtin argument handling ") +
2333 getContext().BuiltinInfo.getName(builtinID));
2334 }
2335 }
2336 ops.push_back(
2337 emitScalarOrConstFoldImmArg(iceArguments, i, expr->getArg(i)));
2338 }
2339
2342 if (builtin)
2343 return emitCommonNeonSISDBuiltinExpr(*this, *builtin, ops, expr);
2344
2345 // Not all intrinsics handled by the common case work for AArch64 yet, so only
2346 // defer to common code if it's been added to our special map.
2348
2350
2351 const Expr *arg = expr->getArg(expr->getNumArgs() - 1);
2353 // A trailing constant integer is used for discriminating overloaded builtin
2354 // calls. Use it to determine the type of this overloaded NEON intrinsic.
2355 if (std::optional<llvm::APSInt> result =
2356 arg->getIntegerConstantExpr(getContext()))
2357 type = NeonTypeFlags(result->getZExtValue());
2358
2359 bool usgn = type.isUnsigned();
2360
2361 mlir::Location loc = getLoc(expr->getExprLoc());
2362
2363 // Not all intrinsics handled by the common case work for AArch64 yet, so only
2364 // defer to common code if it's been added to our special map.
2367 if (builtin)
2369 *this, builtin->BuiltinID, builtin->LLVMIntrinsic,
2370 builtin->AltLLVMIntrinsic, builtin->NameHint, builtin->TypeModifier,
2371 expr, ops);
2372
2373 // Handle non-overloaded intrinsics first.
2374 switch (builtinID) {
2375 default:
2376 break;
2377 case NEON::BI__builtin_neon_vabsh_f16: {
2378 return cir::FAbsOp::create(builder, loc, ops);
2379 }
2380 case NEON::BI__builtin_neon_vaddq_p128: {
2381 cir::VectorType byteTy = cir::VectorType::get(builder.getUInt8Ty(), 16);
2382 ops[0] = builder.createBitcast(ops[0], byteTy);
2383 ops[1] = builder.createBitcast(ops[1], byteTy);
2384 mlir::Value result = builder.createXor(loc, ops[0], ops[1]);
2385 return builder.createBitcast(result, convertType(expr->getType()));
2386 }
2387 case NEON::BI__builtin_neon_vldrq_p128:
2388 case NEON::BI__builtin_neon_vstrq_p128:
2389 case NEON::BI__builtin_neon_vcvts_f32_u32:
2390 case NEON::BI__builtin_neon_vcvtd_f64_u64:
2391 case NEON::BI__builtin_neon_vcvts_f32_s32:
2392 case NEON::BI__builtin_neon_vcvtd_f64_s64:
2393 case NEON::BI__builtin_neon_vcvth_f16_u16:
2394 case NEON::BI__builtin_neon_vcvth_f16_u32:
2395 case NEON::BI__builtin_neon_vcvth_f16_u64:
2396 case NEON::BI__builtin_neon_vcvth_f16_s16:
2397 case NEON::BI__builtin_neon_vcvth_f16_s32:
2398 case NEON::BI__builtin_neon_vcvth_f16_s64:
2399 case NEON::BI__builtin_neon_vcvtah_u16_f16:
2400 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
2401 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
2402 case NEON::BI__builtin_neon_vcvtph_u16_f16:
2403 case NEON::BI__builtin_neon_vcvth_u16_f16:
2404 case NEON::BI__builtin_neon_vcvtah_s16_f16:
2405 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
2406 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
2407 case NEON::BI__builtin_neon_vcvtph_s16_f16:
2408 case NEON::BI__builtin_neon_vcvth_s16_f16:
2409 case NEON::BI__builtin_neon_vcaleh_f16:
2410 case NEON::BI__builtin_neon_vcalth_f16:
2411 case NEON::BI__builtin_neon_vcageh_f16:
2412 case NEON::BI__builtin_neon_vcagth_f16:
2413 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
2414 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
2415 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
2416 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
2417 case NEON::BI__builtin_neon_vpaddd_s64:
2418 case NEON::BI__builtin_neon_vpaddd_f64:
2419 case NEON::BI__builtin_neon_vpadds_f32:
2420 cgm.errorNYI(expr->getSourceRange(),
2421 std::string("unimplemented AArch64 builtin call: ") +
2422 getContext().BuiltinInfo.getName(builtinID));
2423 return mlir::Value{};
2424 case NEON::BI__builtin_neon_vceqzd_s64:
2425 case NEON::BI__builtin_neon_vceqzd_f64:
2426 case NEON::BI__builtin_neon_vceqzs_f32:
2427 case NEON::BI__builtin_neon_vceqzh_f16:
2429 *this, builder, loc, ops[0],
2430 convertType(expr->getCallReturnType(getContext())), cir::CmpOpKind::eq);
2431 case NEON::BI__builtin_neon_vcgezd_s64:
2432 case NEON::BI__builtin_neon_vcgezd_f64:
2433 case NEON::BI__builtin_neon_vcgezs_f32:
2434 case NEON::BI__builtin_neon_vcgezh_f16:
2435 case NEON::BI__builtin_neon_vclezd_s64:
2436 case NEON::BI__builtin_neon_vclezd_f64:
2437 case NEON::BI__builtin_neon_vclezs_f32:
2438 case NEON::BI__builtin_neon_vclezh_f16:
2439 case NEON::BI__builtin_neon_vcgtzd_s64:
2440 case NEON::BI__builtin_neon_vcgtzd_f64:
2441 case NEON::BI__builtin_neon_vcgtzs_f32:
2442 case NEON::BI__builtin_neon_vcgtzh_f16:
2443 case NEON::BI__builtin_neon_vcltzd_s64:
2444 case NEON::BI__builtin_neon_vcltzd_f64:
2445 case NEON::BI__builtin_neon_vcltzs_f32:
2446 case NEON::BI__builtin_neon_vcltzh_f16:
2447 case NEON::BI__builtin_neon_vceqzd_u64: {
2449 *this, builder, loc, ops[0],
2450 convertType(expr->getCallReturnType(getContext())), cir::CmpOpKind::eq);
2451 }
2452 case NEON::BI__builtin_neon_vceqd_f64:
2453 case NEON::BI__builtin_neon_vcled_f64:
2454 case NEON::BI__builtin_neon_vcltd_f64:
2455 case NEON::BI__builtin_neon_vcged_f64:
2456 case NEON::BI__builtin_neon_vcgtd_f64:
2457 case NEON::BI__builtin_neon_vceqs_f32:
2458 case NEON::BI__builtin_neon_vcles_f32:
2459 case NEON::BI__builtin_neon_vclts_f32:
2460 case NEON::BI__builtin_neon_vcges_f32:
2461 case NEON::BI__builtin_neon_vcgts_f32:
2462 case NEON::BI__builtin_neon_vceqh_f16:
2463 case NEON::BI__builtin_neon_vcleh_f16:
2464 case NEON::BI__builtin_neon_vclth_f16:
2465 case NEON::BI__builtin_neon_vcgeh_f16:
2466 case NEON::BI__builtin_neon_vcgth_f16:
2467 case NEON::BI__builtin_neon_vceqd_s64:
2468 case NEON::BI__builtin_neon_vceqd_u64:
2469 case NEON::BI__builtin_neon_vcgtd_s64:
2470 case NEON::BI__builtin_neon_vcgtd_u64:
2471 case NEON::BI__builtin_neon_vcltd_s64:
2472 case NEON::BI__builtin_neon_vcltd_u64:
2473 case NEON::BI__builtin_neon_vcged_u64:
2474 case NEON::BI__builtin_neon_vcged_s64:
2475 case NEON::BI__builtin_neon_vcled_u64:
2476 case NEON::BI__builtin_neon_vcled_s64:
2477 cgm.errorNYI(expr->getSourceRange(),
2478 std::string("unimplemented AArch64 builtin call: ") +
2479 getContext().BuiltinInfo.getName(builtinID));
2480 return mlir::Value{};
2481 case NEON::BI__builtin_neon_vnegd_s64: {
2482 return builder.createNeg(loc, ops[0]);
2483 }
2484 case NEON::BI__builtin_neon_vnegh_f16: {
2485 return builder.createFNeg(loc, ops[0]);
2486 }
2487 case NEON::BI__builtin_neon_vtstd_s64:
2488 case NEON::BI__builtin_neon_vtstd_u64:
2489 case NEON::BI__builtin_neon_vset_lane_i8:
2490 case NEON::BI__builtin_neon_vset_lane_i16:
2491 case NEON::BI__builtin_neon_vset_lane_i32:
2492 case NEON::BI__builtin_neon_vset_lane_i64:
2493 case NEON::BI__builtin_neon_vset_lane_bf16:
2494 case NEON::BI__builtin_neon_vset_lane_f32:
2495 case NEON::BI__builtin_neon_vsetq_lane_i8:
2496 case NEON::BI__builtin_neon_vsetq_lane_i16:
2497 case NEON::BI__builtin_neon_vsetq_lane_i32:
2498 case NEON::BI__builtin_neon_vsetq_lane_i64:
2499 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2500 case NEON::BI__builtin_neon_vsetq_lane_f32:
2501 case NEON::BI__builtin_neon_vset_lane_f64:
2502 case NEON::BI__builtin_neon_vset_lane_mf8:
2503 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2504 case NEON::BI__builtin_neon_vsetq_lane_f64:
2505 cgm.errorNYI(expr->getSourceRange(),
2506 std::string("unimplemented AArch64 builtin call: ") +
2507 getContext().BuiltinInfo.getName(builtinID));
2508 return mlir::Value{};
2509
2510 case NEON::BI__builtin_neon_vget_lane_i8:
2511 case NEON::BI__builtin_neon_vdupb_lane_i8:
2512 case NEON::BI__builtin_neon_vgetq_lane_i8:
2513 case NEON::BI__builtin_neon_vdupb_laneq_i8:
2514 case NEON::BI__builtin_neon_vget_lane_mf8:
2515 case NEON::BI__builtin_neon_vdupb_lane_mf8:
2516 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2517 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
2518 case NEON::BI__builtin_neon_vget_lane_i16:
2519 case NEON::BI__builtin_neon_vduph_lane_i16:
2520 case NEON::BI__builtin_neon_vgetq_lane_i16:
2521 case NEON::BI__builtin_neon_vduph_laneq_i16:
2522 case NEON::BI__builtin_neon_vget_lane_i32:
2523 case NEON::BI__builtin_neon_vdups_lane_i32:
2524 case NEON::BI__builtin_neon_vdups_lane_f32:
2525 case NEON::BI__builtin_neon_vgetq_lane_i32:
2526 case NEON::BI__builtin_neon_vdups_laneq_i32:
2527 case NEON::BI__builtin_neon_vget_lane_i64:
2528 case NEON::BI__builtin_neon_vdupd_lane_i64:
2529 case NEON::BI__builtin_neon_vdupd_lane_f64:
2530 case NEON::BI__builtin_neon_vgetq_lane_i64:
2531 case NEON::BI__builtin_neon_vdupd_laneq_i64:
2532 case NEON::BI__builtin_neon_vget_lane_f32:
2533 case NEON::BI__builtin_neon_vget_lane_f64:
2534 case NEON::BI__builtin_neon_vgetq_lane_f32:
2535 case NEON::BI__builtin_neon_vdups_laneq_f32:
2536 case NEON::BI__builtin_neon_vgetq_lane_f64:
2537 case NEON::BI__builtin_neon_vdupd_laneq_f64:
2538 return cir::VecExtractOp::create(builder, loc, ops[0],
2539 emitScalarExpr(expr->getArg(1)));
2540 case NEON::BI__builtin_neon_vaddh_f16:
2541 return builder.createFAdd(loc, ops[0], ops[1]);
2542 case NEON::BI__builtin_neon_vsubh_f16:
2543 return builder.createFSub(loc, ops[0], ops[1]);
2544 case NEON::BI__builtin_neon_vmulh_f16:
2545 return builder.createFMul(loc, ops[0], ops[1]);
2546 case NEON::BI__builtin_neon_vdivh_f16:
2547 return builder.createFDiv(loc, ops[0], ops[1]);
2548 case NEON::BI__builtin_neon_vfmah_f16:
2549 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2550 std::rotate(ops.begin(), ops.begin() + 1, ops.end());
2551 return emitCallMaybeConstrainedBuiltin(builder, loc, "fma",
2552 convertType(expr->getType()), ops);
2553 break;
2554 case NEON::BI__builtin_neon_vfmsh_f16:
2555 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2556 std::rotate(ops.begin(), ops.begin() + 1, ops.end());
2557 ops[0] = builder.createFNeg(loc, ops[0]);
2558 return emitCallMaybeConstrainedBuiltin(builder, loc, "fma",
2559 convertType(expr->getType()), ops);
2560 case NEON::BI__builtin_neon_vaddd_s64:
2561 case NEON::BI__builtin_neon_vaddd_u64:
2562 return builder.createAdd(loc, ops[0], ops[1]);
2563 case NEON::BI__builtin_neon_vsubd_s64:
2564 case NEON::BI__builtin_neon_vsubd_u64:
2565 case NEON::BI__builtin_neon_vqdmlalh_s16:
2566 case NEON::BI__builtin_neon_vqdmlslh_s16:
2567 cgm.errorNYI(expr->getSourceRange(),
2568 std::string("unimplemented AArch64 builtin call: ") +
2569 getContext().BuiltinInfo.getName(builtinID));
2570 return mlir::Value{};
2571 case NEON::BI__builtin_neon_vqshlud_n_s64: {
2572 cir::IntType int64Type = builder.getSInt64Ty();
2573 ops[1] = builder.getSInt64(getZExtIntValueFromConstOp(ops[1]), loc);
2574 return emitNeonCall(cgm, builder, {int64Type, int64Type}, ops,
2575 "aarch64.neon.sqshlu", convertType(expr->getType()),
2576 loc);
2577 }
2578 case NEON::BI__builtin_neon_vqshld_n_u64:
2579 case NEON::BI__builtin_neon_vqshld_n_s64: {
2580 cir::IntType int64Type = builtinID == NEON::BI__builtin_neon_vqshld_n_u64
2581 ? builder.getUInt64Ty()
2582 : builder.getSInt64Ty();
2583 llvm::StringRef intrinsicName =
2584 builtinID == NEON::BI__builtin_neon_vqshld_n_u64 ? "aarch64.neon.uqshl"
2585 : "aarch64.neon.sqshl";
2586 ops[1] = builder.getSInt64(getZExtIntValueFromConstOp(ops[1]), loc);
2587 return emitNeonCall(cgm, builder, {int64Type, int64Type}, ops,
2588 intrinsicName, convertType(expr->getType()), loc);
2589 }
2590 case NEON::BI__builtin_neon_vrshrd_n_u64:
2591 case NEON::BI__builtin_neon_vrshrd_n_s64: {
2592 llvm::StringRef intrName = builtinID == NEON::BI__builtin_neon_vrshrd_n_s64
2593 ? "aarch64.neon.srshl"
2594 : "aarch64.neon.urshl";
2595 cir::IntType int64Ty = builtinID == NEON::BI__builtin_neon_vqshld_n_u64
2596 ? builder.getUInt64Ty()
2597 : builder.getSInt64Ty();
2598 int64_t sv = -cast<cir::IntAttr>(
2599 cast<cir::ConstantOp>(ops[1].getDefiningOp()).getValue())
2600 .getSInt();
2601 ops[1] = builder.getSInt64(sv, loc);
2602 return emitNeonCall(cgm, builder, {int64Ty, builder.getSInt64Ty()}, ops,
2603 intrName, int64Ty, loc);
2604 }
2605 case NEON::BI__builtin_neon_vrsrad_n_u64:
2606 case NEON::BI__builtin_neon_vrsrad_n_s64: {
2607 cir::IntType int64Type = builtinID == NEON::BI__builtin_neon_vrsrad_n_u64
2608 ? builder.getUInt64Ty()
2609 : builder.getSInt64Ty();
2610 ops[2] = builder.createNeg(loc, ops[2]);
2611 const StringRef intrName = builtinID == NEON::BI__builtin_neon_vrsrad_n_u64
2612 ? "aarch64.neon.urshl"
2613 : "aarch64.neon.srshl";
2614
2616 ops[1], builder.createIntCast(ops[2], builder.getSInt64Ty())};
2617 ops[1] = builder.emitIntrinsicCallOp(loc, intrName, int64Type, args);
2618 return builder.createAdd(loc, ops[0],
2619 builder.createBitcast(ops[1], int64Type));
2620 }
2621 case NEON::BI__builtin_neon_vshld_n_s64:
2622 case NEON::BI__builtin_neon_vshld_n_u64: {
2623 auto loc = getLoc(expr->getExprLoc());
2624 std::optional<llvm::APSInt> amt =
2625 expr->getArg(1)->getIntegerConstantExpr(getContext());
2626 assert(amt && "Expected argument to be a constant");
2627 return builder.createShiftLeft(loc, ops[0], amt->getZExtValue());
2628 }
2629 case NEON::BI__builtin_neon_vshrd_n_s64: {
2630 std::optional<llvm::APSInt> amt =
2631 expr->getArg(1)->getIntegerConstantExpr(getContext());
2632 assert(amt && "Expected argument to be a constant");
2633 return builder.createShiftRight(
2634 loc, ops[0], std::min(static_cast<uint64_t>(63), amt->getZExtValue()));
2635 }
2636 case NEON::BI__builtin_neon_vshrd_n_u64: {
2637 std::optional<llvm::APSInt> amt =
2638 expr->getArg(1)->getIntegerConstantExpr(getContext());
2639 assert(amt && "Expected argument to be a constant");
2640 uint64_t shiftAmt = amt->getZExtValue();
2641 // Right-shifting an unsigned value by its size yields 0.
2642 if (shiftAmt == 64)
2643 return builder.getConstInt(loc, builder.getUInt64Ty(), 0);
2644 return builder.createShiftRight(loc, ops[0], shiftAmt);
2645 }
2646 case NEON::BI__builtin_neon_vsrad_n_s64: {
2647 std::optional<llvm::APSInt> amt =
2648 expr->getArg(2)->getIntegerConstantExpr(getContext());
2649 assert(amt && "Expected argument to be a constant");
2650 uint64_t shiftAmt =
2651 std::min(static_cast<uint64_t>(63), amt->getZExtValue());
2652 mlir::Value shifted =
2653 builder.createShiftRight(loc, ops[1], static_cast<unsigned>(shiftAmt));
2654 return builder.createAdd(loc, ops[0], shifted);
2655 }
2656 case NEON::BI__builtin_neon_vsrad_n_u64: {
2657 std::optional<llvm::APSInt> amt =
2658 expr->getArg(2)->getIntegerConstantExpr(getContext());
2659 assert(amt && "Expected argument to be a constant");
2660 uint64_t shiftAmt = amt->getZExtValue();
2661 // Right-shifting an unsigned value by its size yields 0, so a + 0 = a.
2662 if (shiftAmt == 64)
2663 return ops[0];
2664 mlir::Value shifted =
2665 builder.createShiftRight(loc, ops[1], static_cast<unsigned>(shiftAmt));
2666 return builder.createAdd(loc, ops[0], shifted);
2667 }
2668 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
2669 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
2670 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
2671 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16:
2672 case NEON::BI__builtin_neon_vqdmlals_s32:
2673 case NEON::BI__builtin_neon_vqdmlsls_s32:
2674 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
2675 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
2676 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
2677 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
2678 cgm.errorNYI(expr->getSourceRange(),
2679 std::string("unimplemented AArch64 builtin call: ") +
2680 getContext().BuiltinInfo.getName(builtinID));
2681 return mlir::Value{};
2682 }
2683 case NEON::BI__builtin_neon_vget_lane_bf16:
2684 case NEON::BI__builtin_neon_vduph_lane_bf16:
2685 case NEON::BI__builtin_neon_vduph_lane_f16:
2686 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2687 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2688 case NEON::BI__builtin_neon_vduph_laneq_f16: {
2689 return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]);
2690 }
2691 case NEON::BI__builtin_neon_vcvt_bf16_f32:
2692 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32:
2693 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32:
2694 case clang::AArch64::BI_InterlockedAdd:
2695 case clang::AArch64::BI_InterlockedAdd_acq:
2696 case clang::AArch64::BI_InterlockedAdd_rel:
2697 case clang::AArch64::BI_InterlockedAdd_nf:
2698 case clang::AArch64::BI_InterlockedAdd64:
2699 case clang::AArch64::BI_InterlockedAdd64_acq:
2700 case clang::AArch64::BI_InterlockedAdd64_rel:
2701 case clang::AArch64::BI_InterlockedAdd64_nf:
2702 cgm.errorNYI(expr->getSourceRange(),
2703 std::string("unimplemented AArch64 builtin call: ") +
2704 getContext().BuiltinInfo.getName(builtinID));
2705 return mlir::Value{};
2706 }
2707
2708 cir::VectorType ty = getNeonType(this, type, loc);
2709 if (!ty)
2710 return nullptr;
2711
2712 llvm::StringRef intrName;
2713
2714 switch (builtinID) {
2715 default:
2716 return std::nullopt;
2717 case NEON::BI__builtin_neon_vbsl_v:
2718 case NEON::BI__builtin_neon_vbslq_v: {
2719
2720 cir::VectorType bitTy = getIntVecFromVecTy(builder, ty);
2721 ops[0] = builder.createBitcast(ops[0], bitTy);
2722 ops[1] = builder.createBitcast(ops[1], bitTy);
2723 ops[2] = builder.createBitcast(ops[2], bitTy);
2724
2725 ops[1] = builder.createAnd(loc, ops[0], ops[1]);
2726 ops[2] = builder.createAnd(loc, builder.createNot(ops[0]), ops[2]);
2727 ops[0] = builder.createOr(loc, ops[1], ops[2]);
2728 return builder.createBitcast(ops[0], ty);
2729 }
2730 case NEON::BI__builtin_neon_vfma_lane_v:
2731 cgm.errorNYI(expr->getSourceRange(),
2732 std::string("unimplemented AArch64 builtin call: ") +
2733 getContext().BuiltinInfo.getName(builtinID));
2734 return mlir::Value{};
2735 case NEON::BI__builtin_neon_vfmaq_lane_v: {
2736 mlir::Value addend = builder.createBitcast(ops[0], ty);
2737 mlir::Value multiplicand = builder.createBitcast(ops[1], ty);
2738 // The lane source operand is the non-quad vector, so it has half as many
2739 // lanes as the quad result vector.
2740 cir::VectorType sourceTy =
2741 cir::VectorType::get(ty.getElementType(), ty.getSize() / 2);
2742 mlir::Value laneSource = builder.createBitcast(ops[2], sourceTy);
2743 laneSource = emitNeonSplat(builder, loc, laneSource, ops[3], ty.getSize());
2744
2745 llvm::SmallVector<mlir::Value> fmaOps = {multiplicand, laneSource, addend};
2746 return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", ty, fmaOps);
2747 }
2748 case NEON::BI__builtin_neon_vfma_laneq_v: {
2749 // v1f64 fma should be mapped to Neon scalar f64 fma.
2750 if (ty.getElementType() == cgm.doubleTy) {
2751 mlir::Value addend = builder.createBitcast(ops[0], cgm.doubleTy);
2752 mlir::Value multiplicand = builder.createBitcast(ops[1], cgm.doubleTy);
2753 // The laneq source operand is float64x2_t, so the source vector has two
2754 // double lanes.
2755 cir::VectorType sourceTy = cir::VectorType::get(cgm.doubleTy, 2);
2756 mlir::Value laneSource = builder.createBitcast(ops[2], sourceTy);
2757 laneSource = builder.createExtractElement(
2758 loc, laneSource,
2759 static_cast<uint64_t>(getIntValueFromConstOp(ops[3])));
2760
2761 llvm::SmallVector<mlir::Value> fmaOps = {multiplicand, laneSource,
2762 addend};
2763 return builder.createBitcast(
2764 emitCallMaybeConstrainedBuiltin(builder, loc, "fma", cgm.doubleTy,
2765 fmaOps),
2766 ty);
2767 }
2768
2769 mlir::Value addend = builder.createBitcast(ops[0], ty);
2770 mlir::Value multiplicand = builder.createBitcast(ops[1], ty);
2771 // The laneq source operand is the quad vector, so it has twice as many
2772 // lanes as the non-quad result vector.
2773 cir::VectorType sourceTy =
2774 cir::VectorType::get(ty.getElementType(), ty.getSize() * 2);
2775 mlir::Value laneSource = builder.createBitcast(ops[2], sourceTy);
2776 laneSource = emitNeonSplat(builder, loc, laneSource, ops[3], ty.getSize());
2777
2778 llvm::SmallVector<mlir::Value> fmaOps = {laneSource, multiplicand, addend};
2779 return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", ty, fmaOps);
2780 }
2781 case NEON::BI__builtin_neon_vfmaq_laneq_v:
2782 case NEON::BI__builtin_neon_vfmah_lane_f16:
2783 case NEON::BI__builtin_neon_vfmas_lane_f32:
2784 case NEON::BI__builtin_neon_vfmah_laneq_f16:
2785 case NEON::BI__builtin_neon_vfmas_laneq_f32:
2786 case NEON::BI__builtin_neon_vfmad_lane_f64:
2787 case NEON::BI__builtin_neon_vfmad_laneq_f64:
2788 cgm.errorNYI(expr->getSourceRange(),
2789 std::string("unimplemented AArch64 builtin call: ") +
2790 getContext().BuiltinInfo.getName(builtinID));
2791 return mlir::Value{};
2792 case NEON::BI__builtin_neon_vmull_v: {
2793 intrName = usgn ? "aarch64.neon.umull" : "aarch64.neon.smull";
2794 if (type.isPoly())
2795 intrName = "aarch64.neon.pmull";
2796 cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType(
2797 ty, /*isExtended*/ false, !usgn);
2798 return emitNeonCall(cgm, builder, {argTy, argTy}, ops, intrName, ty, loc);
2799 }
2800 case NEON::BI__builtin_neon_vmax_v:
2801 case NEON::BI__builtin_neon_vmaxq_v:
2802 intrName = usgn ? "aarch64.neon.umax" : "aarch64.neon.smax";
2803 if (cir::isFPOrVectorOfFPType(ty))
2804 intrName = "aarch64.neon.fmax";
2805 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2806 case NEON::BI__builtin_neon_vmaxh_f16:
2807 cgm.errorNYI(expr->getSourceRange(),
2808 std::string("unimplemented AArch64 builtin call: ") +
2809 getContext().BuiltinInfo.getName(builtinID));
2810 return mlir::Value{};
2811 case NEON::BI__builtin_neon_vmin_v:
2812 case NEON::BI__builtin_neon_vminq_v:
2813 intrName = usgn ? "aarch64.neon.umin" : "aarch64.neon.smin";
2814 if (cir::isFPOrVectorOfFPType(ty))
2815 intrName = "aarch64.neon.fmin";
2816 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2817 case NEON::BI__builtin_neon_vminh_f16:
2818 cgm.errorNYI(expr->getSourceRange(),
2819 std::string("unimplemented AArch64 builtin call: ") +
2820 getContext().BuiltinInfo.getName(builtinID));
2821 return mlir::Value{};
2822 case NEON::BI__builtin_neon_vabd_v:
2823 case NEON::BI__builtin_neon_vabdq_v:
2824 intrName = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd";
2825 if (cir::isFPOrVectorOfFPType(ty))
2826 intrName = "aarch64.neon.fabd";
2827 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2828 case NEON::BI__builtin_neon_vpadal_v:
2829 case NEON::BI__builtin_neon_vpadalq_v: {
2830 intrName = usgn ? "aarch64.neon.uaddlp" : "aarch64.neon.saddlp";
2831 llvm::SmallVector<mlir::Value> inputs{ops[1]};
2832 mlir::Value pairwiseSum =
2833 emitNeonCall(cgm, builder, {getNeonPairwiseWidenInputType(ty, usgn)},
2834 inputs, intrName, ty, loc);
2835 mlir::Value accumValue = builder.createBitcast(loc, ops[0], ty);
2836 return cir::AddOp::create(builder, loc, ty, pairwiseSum, accumValue);
2837 }
2838 case NEON::BI__builtin_neon_vpmin_v:
2839 case NEON::BI__builtin_neon_vpminq_v:
2840 intrName = usgn ? "aarch64.neon.uminp" : "aarch64.neon.sminp";
2841 if (cir::isFPOrVectorOfFPType(ty))
2842 intrName = "aarch64.neon.fminp";
2843 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2844 case NEON::BI__builtin_neon_vpmax_v:
2845 case NEON::BI__builtin_neon_vpmaxq_v:
2846 intrName = usgn ? "aarch64.neon.umaxp" : "aarch64.neon.smaxp";
2847 if (cir::isFPOrVectorOfFPType(ty))
2848 intrName = "aarch64.neon.fmaxp";
2849 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2850 case NEON::BI__builtin_neon_vminnm_v:
2851 case NEON::BI__builtin_neon_vminnmq_v:
2852 intrName = "aarch64.neon.fminnm";
2853 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2854 case NEON::BI__builtin_neon_vminnmh_f16:
2855 cgm.errorNYI(expr->getSourceRange(),
2856 std::string("unimplemented AArch64 builtin call: ") +
2857 getContext().BuiltinInfo.getName(builtinID));
2858 return mlir::Value{};
2859 case NEON::BI__builtin_neon_vmaxnm_v:
2860 case NEON::BI__builtin_neon_vmaxnmq_v:
2861 intrName = "aarch64.neon.fmaxnm";
2862 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2863 case NEON::BI__builtin_neon_vmaxnmh_f16:
2864 case NEON::BI__builtin_neon_vrecpss_f32:
2865 case NEON::BI__builtin_neon_vrecpsd_f64:
2866 case NEON::BI__builtin_neon_vrecpsh_f16:
2867 case NEON::BI__builtin_neon_vqshrun_n_v:
2868 cgm.errorNYI(expr->getSourceRange(),
2869 std::string("unimplemented AArch64 builtin call: ") +
2870 getContext().BuiltinInfo.getName(builtinID));
2871 return mlir::Value{};
2872 case NEON::BI__builtin_neon_vqrshrun_n_v: {
2873 cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType(
2874 ty, /*isExtended=*/true, /*isSigned=*/true);
2875 return emitNeonCall(cgm, builder, {argTy, sInt32Ty}, ops,
2876 "aarch64.neon.sqrshrun", ty, loc);
2877 }
2878 case NEON::BI__builtin_neon_vqshrn_n_v:
2879 cgm.errorNYI(expr->getSourceRange(),
2880 std::string("unimplemented AArch64 builtin call: ") +
2881 getContext().BuiltinInfo.getName(builtinID));
2882 return mlir::Value{};
2883 case NEON::BI__builtin_neon_vrshrn_n_v:
2884 cgm.errorNYI(expr->getSourceRange(),
2885 std::string("unimplemented AArch64 builtin call: ") +
2886 getContext().BuiltinInfo.getName(builtinID));
2887 return mlir::Value{};
2888 case NEON::BI__builtin_neon_vqrshrn_n_v: {
2889 cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType(
2890 ty, /*isExtended=*/true, /*isSigned=*/!usgn);
2891 llvm::StringRef intrName =
2892 usgn ? "aarch64.neon.uqrshrn" : "aarch64.neon.sqrshrn";
2893 return emitNeonCall(cgm, builder, {argTy, sInt32Ty}, ops, intrName, ty,
2894 loc);
2895 }
2896 case NEON::BI__builtin_neon_vrndah_f16:
2897 case NEON::BI__builtin_neon_vrnda_v:
2898 case NEON::BI__builtin_neon_vrndaq_v:
2899 case NEON::BI__builtin_neon_vrndih_f16:
2900 case NEON::BI__builtin_neon_vrndmh_f16:
2901 case NEON::BI__builtin_neon_vrndm_v:
2902 case NEON::BI__builtin_neon_vrndmq_v:
2903 case NEON::BI__builtin_neon_vrndnh_f16:
2904 case NEON::BI__builtin_neon_vrndn_v:
2905 case NEON::BI__builtin_neon_vrndnq_v:
2906 case NEON::BI__builtin_neon_vrndns_f32:
2907 case NEON::BI__builtin_neon_vrndph_f16:
2908 case NEON::BI__builtin_neon_vrndp_v:
2909 case NEON::BI__builtin_neon_vrndpq_v:
2910 case NEON::BI__builtin_neon_vrndxh_f16:
2911 case NEON::BI__builtin_neon_vrndx_v:
2912 case NEON::BI__builtin_neon_vrndxq_v:
2913 case NEON::BI__builtin_neon_vrndh_f16:
2914 case NEON::BI__builtin_neon_vrnd32x_f32:
2915 case NEON::BI__builtin_neon_vrnd32xq_f32:
2916 case NEON::BI__builtin_neon_vrnd32x_f64:
2917 case NEON::BI__builtin_neon_vrnd32xq_f64:
2918 case NEON::BI__builtin_neon_vrnd32z_f32:
2919 case NEON::BI__builtin_neon_vrnd32zq_f32:
2920 case NEON::BI__builtin_neon_vrnd32z_f64:
2921 case NEON::BI__builtin_neon_vrnd32zq_f64:
2922 case NEON::BI__builtin_neon_vrnd64x_f32:
2923 case NEON::BI__builtin_neon_vrnd64xq_f32:
2924 case NEON::BI__builtin_neon_vrnd64x_f64:
2925 case NEON::BI__builtin_neon_vrnd64xq_f64:
2926 case NEON::BI__builtin_neon_vrnd64z_f32:
2927 case NEON::BI__builtin_neon_vrnd64zq_f32:
2928 case NEON::BI__builtin_neon_vrnd64z_f64:
2929 case NEON::BI__builtin_neon_vrnd64zq_f64:
2930 case NEON::BI__builtin_neon_vrnd_v:
2931 case NEON::BI__builtin_neon_vrndq_v:
2932 cgm.errorNYI(expr->getSourceRange(),
2933 std::string("unimplemented AArch64 builtin call: ") +
2934 getContext().BuiltinInfo.getName(builtinID));
2935 return mlir::Value{};
2936 case NEON::BI__builtin_neon_vcvt_f64_v:
2937 case NEON::BI__builtin_neon_vcvtq_f64_v:
2938 ops[0] = builder.createBitcast(ops[0], ty);
2939 ty = getNeonType(
2940 this, NeonTypeFlags(NeonTypeFlags::Float64, false, type.isQuad()), loc);
2941 return builder.createCast(loc, cir::CastKind::int_to_float, ops[0], ty);
2942 case NEON::BI__builtin_neon_vcvt_f64_f32:
2943 case NEON::BI__builtin_neon_vcvt_f32_f64:
2944 case NEON::BI__builtin_neon_vcvt_s32_v:
2945 case NEON::BI__builtin_neon_vcvt_u32_v:
2946 case NEON::BI__builtin_neon_vcvt_s64_v:
2947 case NEON::BI__builtin_neon_vcvt_u64_v:
2948 case NEON::BI__builtin_neon_vcvt_s16_f16:
2949 case NEON::BI__builtin_neon_vcvt_u16_f16:
2950 case NEON::BI__builtin_neon_vcvtq_s32_v:
2951 case NEON::BI__builtin_neon_vcvtq_u32_v:
2952 case NEON::BI__builtin_neon_vcvtq_s64_v:
2953 case NEON::BI__builtin_neon_vcvtq_u64_v:
2954 case NEON::BI__builtin_neon_vcvtq_s16_f16:
2955 case NEON::BI__builtin_neon_vcvtq_u16_f16:
2956 case NEON::BI__builtin_neon_vcvta_s16_f16:
2957 case NEON::BI__builtin_neon_vcvta_u16_f16:
2958 case NEON::BI__builtin_neon_vcvta_s32_v:
2959 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
2960 case NEON::BI__builtin_neon_vcvtaq_s32_v:
2961 case NEON::BI__builtin_neon_vcvta_u32_v:
2962 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2963 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2964 case NEON::BI__builtin_neon_vcvta_s64_v:
2965 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2966 case NEON::BI__builtin_neon_vcvta_u64_v:
2967 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2968 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2969 case NEON::BI__builtin_neon_vcvtm_s32_v:
2970 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2971 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2972 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2973 case NEON::BI__builtin_neon_vcvtm_u32_v:
2974 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2975 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2976 case NEON::BI__builtin_neon_vcvtm_s64_v:
2977 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2978 case NEON::BI__builtin_neon_vcvtm_u64_v:
2979 case NEON::BI__builtin_neon_vcvtmq_u64_v:
2980 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2981 case NEON::BI__builtin_neon_vcvtn_s32_v:
2982 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2983 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2984 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2985 case NEON::BI__builtin_neon_vcvtn_u32_v:
2986 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2987 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2988 case NEON::BI__builtin_neon_vcvtn_s64_v:
2989 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2990 case NEON::BI__builtin_neon_vcvtn_u64_v:
2991 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2992 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2993 case NEON::BI__builtin_neon_vcvtp_s32_v:
2994 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2995 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2996 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2997 case NEON::BI__builtin_neon_vcvtp_u32_v:
2998 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2999 case NEON::BI__builtin_neon_vcvtpq_u32_v:
3000 case NEON::BI__builtin_neon_vcvtp_s64_v:
3001 case NEON::BI__builtin_neon_vcvtpq_s64_v:
3002 case NEON::BI__builtin_neon_vcvtp_u64_v:
3003 case NEON::BI__builtin_neon_vcvtpq_u64_v:
3004 case NEON::BI__builtin_neon_vmulx_v:
3005 case NEON::BI__builtin_neon_vmulxq_v:
3006 case NEON::BI__builtin_neon_vmulxh_lane_f16:
3007 case NEON::BI__builtin_neon_vmulxh_laneq_f16:
3008 case NEON::BI__builtin_neon_vmul_lane_v:
3009 case NEON::BI__builtin_neon_vmul_laneq_v:
3010 case NEON::BI__builtin_neon_vpmaxnm_v:
3011 case NEON::BI__builtin_neon_vpmaxnmq_v:
3012 cgm.errorNYI(expr->getSourceRange(),
3013 std::string("unimplemented AArch64 builtin call: ") +
3014 getContext().BuiltinInfo.getName(builtinID));
3015 return mlir::Value{};
3016 case NEON::BI__builtin_neon_vpminnm_v:
3017 case NEON::BI__builtin_neon_vpminnmq_v:
3018 intrName = "aarch64.neon.fminnmp";
3019 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
3020 case NEON::BI__builtin_neon_vsqrth_f16:
3021 cgm.errorNYI(expr->getSourceRange(),
3022 std::string("unimplemented AArch64 builtin call: ") +
3023 getContext().BuiltinInfo.getName(builtinID));
3024 return mlir::Value{};
3025 case NEON::BI__builtin_neon_vsqrt_v:
3026 case NEON::BI__builtin_neon_vsqrtq_v:
3028 return emitNeonCall(cgm, builder, {ty}, ops, "sqrt", ty, loc);
3029 case NEON::BI__builtin_neon_vrbit_v:
3030 case NEON::BI__builtin_neon_vrbitq_v:
3031 case NEON::BI__builtin_neon_vmaxv_f16:
3032 case NEON::BI__builtin_neon_vmaxvq_f16:
3033 case NEON::BI__builtin_neon_vminv_f16:
3034 case NEON::BI__builtin_neon_vminvq_f16:
3035 case NEON::BI__builtin_neon_vmaxnmv_f16:
3036 case NEON::BI__builtin_neon_vmaxnmvq_f16:
3037 case NEON::BI__builtin_neon_vminnmv_f16:
3038 case NEON::BI__builtin_neon_vminnmvq_f16:
3039 case NEON::BI__builtin_neon_vmul_n_f64:
3040 cgm.errorNYI(expr->getSourceRange(),
3041 std::string("unimplemented AArch64 builtin call: ") +
3042 getContext().BuiltinInfo.getName(builtinID));
3043 return mlir::Value{};
3044 case NEON::BI__builtin_neon_vaddlv_u8:
3045 case NEON::BI__builtin_neon_vaddlvq_u8:
3046 case NEON::BI__builtin_neon_vaddlv_u16:
3047 case NEON::BI__builtin_neon_vaddlvq_u16:
3048 case NEON::BI__builtin_neon_vaddlv_s8:
3049 case NEON::BI__builtin_neon_vaddlvq_s8:
3050 case NEON::BI__builtin_neon_vaddlv_s16:
3051 case NEON::BI__builtin_neon_vaddlvq_s16: {
3052 mlir::Type argTy = convertType(expr->getArg(0)->getType());
3053 mlir::Type userRetTy = convertType(expr->getType());
3054 auto eltTy = mlir::cast<cir::IntType>(
3055 mlir::cast<cir::VectorType>(argTy).getElementType());
3056 bool isUnsigned = !eltTy.isSigned();
3057 // These builtins only use 8 and 16-bit element vectors; the intrinsic
3058 // always produces i32. The C result is i32 for 16-bit elements, but i16
3059 // for 8-bit elements, so we emit at i32 and narrow only in that case.
3060 bool needsTrunc = eltTy.getWidth() == 8;
3061 intrName = isUnsigned ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv";
3062 mlir::Type intrRetTy = userRetTy;
3063 if (needsTrunc)
3064 intrRetTy = isUnsigned ? builder.getUInt32Ty() : builder.getSInt32Ty();
3065 mlir::Value result =
3066 emitNeonCall(cgm, builder, {argTy}, ops, intrName, intrRetTy, loc);
3067 if (needsTrunc)
3068 result = builder.createIntCast(result, userRetTy);
3069 return result;
3070 }
3071 case NEON::BI__builtin_neon_vsri_n_v:
3072 case NEON::BI__builtin_neon_vsriq_n_v: {
3074 ops[0], ops[1], builder.createIntCast(ops[2], builder.getUInt32Ty())};
3075 return emitNeonCall(cgm, builder, {ty, ty, builder.getUInt32Ty()}, vsriArgs,
3076 "aarch64.neon.vsri", ty, loc);
3077 }
3078 case NEON::BI__builtin_neon_vsli_n_v:
3079 case NEON::BI__builtin_neon_vsliq_n_v: {
3080
3081 intrName = "aarch64.neon.vsli";
3082
3083 llvm::SmallVector<mlir::Type> argTypes = {ty, ty, ops[2].getType()};
3084
3085 return emitNeonCall(cgm, builder, argTypes, ops, intrName, ty, loc,
3086 /*isConstrainedFPIntrinsic=*/false,
3087 /*shift=*/0, /*rightshift=*/false);
3088 }
3089 case NEON::BI__builtin_neon_vsra_n_v:
3090 case NEON::BI__builtin_neon_vsraq_n_v: {
3091 ops[0] = builder.createBitcast(ops[0], ty);
3092 ops[1] = emitNeonRShiftImm(*this, ops[1], ops[2], ty, usgn, loc);
3093 return builder.createAdd(loc, ops[0], ops[1]);
3094 }
3095 case NEON::BI__builtin_neon_vrsra_n_v:
3096 case NEON::BI__builtin_neon_vrsraq_n_v: {
3097 intrName = usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl";
3098 // The llvm intrinsic is expecting negative shift amount for right shift.
3099 // Thus we have to make shift amount vec type to be signed.
3100 cir::VectorType shiftAmtVecTy =
3101 usgn ? getSignChangedVectorType(builder, ty) : ty;
3102 llvm::SmallVector<mlir::Value, 2> tmpOps = {ops[1], ops[2]};
3103 mlir::Value tmp = emitNeonCall(cgm, builder, {ty, shiftAmtVecTy}, tmpOps,
3104 intrName, ty, loc,
3105 /*isConstrainedFPIntrinsic=*/false,
3106 /*shift=*/1, /*rightshift=*/true);
3107 ops[0] = builder.createBitcast(ops[0], ty);
3108 return builder.createAdd(loc, ops[0], tmp);
3109 }
3110 case NEON::BI__builtin_neon_vld1_v:
3111 case NEON::BI__builtin_neon_vld1q_v:
3112 case NEON::BI__builtin_neon_vst1_v:
3113 case NEON::BI__builtin_neon_vst1q_v:
3114 case NEON::BI__builtin_neon_vld1_lane_v:
3115 case NEON::BI__builtin_neon_vld1q_lane_v:
3116 case NEON::BI__builtin_neon_vldap1_lane_s64:
3117 case NEON::BI__builtin_neon_vldap1q_lane_s64:
3118 case NEON::BI__builtin_neon_vld1_dup_v:
3119 case NEON::BI__builtin_neon_vld1q_dup_v:
3120 case NEON::BI__builtin_neon_vst1_lane_v:
3121 case NEON::BI__builtin_neon_vst1q_lane_v:
3122 case NEON::BI__builtin_neon_vstl1_lane_s64:
3123 case NEON::BI__builtin_neon_vstl1q_lane_s64:
3124 case NEON::BI__builtin_neon_vld2_v:
3125 case NEON::BI__builtin_neon_vld2q_v:
3126 case NEON::BI__builtin_neon_vld3_v:
3127 case NEON::BI__builtin_neon_vld3q_v:
3128 case NEON::BI__builtin_neon_vld4_v:
3129 case NEON::BI__builtin_neon_vld4q_v:
3130 case NEON::BI__builtin_neon_vld2_dup_v:
3131 case NEON::BI__builtin_neon_vld2q_dup_v:
3132 case NEON::BI__builtin_neon_vld3_dup_v:
3133 case NEON::BI__builtin_neon_vld3q_dup_v:
3134 case NEON::BI__builtin_neon_vld4_dup_v:
3135 case NEON::BI__builtin_neon_vld4q_dup_v:
3136 case NEON::BI__builtin_neon_vld2_lane_v:
3137 case NEON::BI__builtin_neon_vld2q_lane_v:
3138 case NEON::BI__builtin_neon_vld3_lane_v:
3139 case NEON::BI__builtin_neon_vld3q_lane_v:
3140 case NEON::BI__builtin_neon_vld4_lane_v:
3141 case NEON::BI__builtin_neon_vld4q_lane_v:
3142 case NEON::BI__builtin_neon_vst2_v:
3143 case NEON::BI__builtin_neon_vst2q_v:
3144 case NEON::BI__builtin_neon_vst2_lane_v:
3145 case NEON::BI__builtin_neon_vst2q_lane_v:
3146 case NEON::BI__builtin_neon_vst3_v:
3147 case NEON::BI__builtin_neon_vst3q_v:
3148 case NEON::BI__builtin_neon_vst3_lane_v:
3149 case NEON::BI__builtin_neon_vst3q_lane_v:
3150 case NEON::BI__builtin_neon_vst4_v:
3151 case NEON::BI__builtin_neon_vst4q_v:
3152 case NEON::BI__builtin_neon_vst4_lane_v:
3153 case NEON::BI__builtin_neon_vst4q_lane_v:
3154 cgm.errorNYI(expr->getSourceRange(),
3155 std::string("unimplemented AArch64 builtin call: ") +
3156 getContext().BuiltinInfo.getName(builtinID));
3157 return mlir::Value{};
3158 case NEON::BI__builtin_neon_vtrn_v:
3159 case NEON::BI__builtin_neon_vtrnq_v: {
3160 ops[1] = builder.createBitcast(ops[1], ty);
3161 ops[2] = builder.createBitcast(ops[2], ty);
3162 // Adding a bitcast here as Ops[0] might be a void pointer.
3163 mlir::Value baseAddr =
3164 builder.createBitcast(ops[0], builder.getPointerTo(ty));
3165 mlir::Value sv;
3166
3167 for (unsigned vi = 0; vi != 2; ++vi) {
3169 for (unsigned i = 0, e = ty.getSize(); i != e; i += 2) {
3170 indices.push_back(i + vi);
3171 indices.push_back(i + e + vi);
3172 }
3173 cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), vi);
3174 mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx);
3175 sv = builder.createVecShuffle(loc, ops[1], ops[2], indices);
3176 (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr);
3177 }
3178 return sv;
3179 }
3180 case NEON::BI__builtin_neon_vuzp_v:
3181 case NEON::BI__builtin_neon_vuzpq_v: {
3182 ops[1] = builder.createBitcast(ops[1], ty);
3183 ops[2] = builder.createBitcast(ops[2], ty);
3184 // Adding a bitcast here as Ops[0] might be a void pointer.
3185 mlir::Value baseAddr =
3186 builder.createBitcast(ops[0], builder.getPointerTo(ty));
3187 mlir::Value sv;
3188 for (unsigned vi = 0; vi != 2; ++vi) {
3190 for (unsigned i = 0, e = ty.getSize(); i != e; ++i) {
3191 indices.push_back(2 * i + vi);
3192 }
3193 cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), vi);
3194 mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx);
3195 sv = builder.createVecShuffle(loc, ops[1], ops[2], indices);
3196 (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr);
3197 }
3198 return sv;
3199 }
3200 case NEON::BI__builtin_neon_vzip_v:
3201 case NEON::BI__builtin_neon_vzipq_v: {
3202 ops[1] = builder.createBitcast(ops[1], ty);
3203 ops[2] = builder.createBitcast(ops[2], ty);
3204 // Adding a bitcast here as Ops[0] might be a void pointer.
3205 mlir::Value baseAddr =
3206 builder.createBitcast(ops[0], builder.getPointerTo(ty));
3207 mlir::Value sv;
3208 for (unsigned vi = 0; vi != 2; ++vi) {
3210 for (unsigned i = 0, e = ty.getSize(); i != e; i += 2) {
3211 indices.push_back((i + vi * e) >> 1);
3212 indices.push_back(((i + vi * e) >> 1) + e);
3213 }
3214 cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), vi);
3215 mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx);
3216 sv = builder.createVecShuffle(loc, ops[1], ops[2], indices);
3217 (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr);
3218 }
3219 return sv;
3220 }
3221 case NEON::BI__builtin_neon_vqtbl1q_v:
3222 case NEON::BI__builtin_neon_vqtbl2q_v:
3223 case NEON::BI__builtin_neon_vqtbl3q_v:
3224 case NEON::BI__builtin_neon_vqtbl4q_v:
3225 case NEON::BI__builtin_neon_vqtbx1q_v:
3226 case NEON::BI__builtin_neon_vqtbx2q_v:
3227 case NEON::BI__builtin_neon_vqtbx3q_v:
3228 case NEON::BI__builtin_neon_vqtbx4q_v:
3229 case NEON::BI__builtin_neon_vsqadd_v:
3230 case NEON::BI__builtin_neon_vsqaddq_v:
3231 case NEON::BI__builtin_neon_vuqadd_v:
3232 case NEON::BI__builtin_neon_vuqaddq_v:
3233 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
3234 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
3235 case NEON::BI__builtin_neon_vluti2_laneq_f16:
3236 case NEON::BI__builtin_neon_vluti2_laneq_p16:
3237 case NEON::BI__builtin_neon_vluti2_laneq_p8:
3238 case NEON::BI__builtin_neon_vluti2_laneq_s16:
3239 case NEON::BI__builtin_neon_vluti2_laneq_s8:
3240 case NEON::BI__builtin_neon_vluti2_laneq_u16:
3241 case NEON::BI__builtin_neon_vluti2_laneq_u8:
3242 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
3243 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
3244 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
3245 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
3246 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
3247 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
3248 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
3249 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
3250 case NEON::BI__builtin_neon_vluti2q_laneq_u8:
3251 case NEON::BI__builtin_neon_vluti2_lane_mf8:
3252 case NEON::BI__builtin_neon_vluti2_lane_bf16:
3253 case NEON::BI__builtin_neon_vluti2_lane_f16:
3254 case NEON::BI__builtin_neon_vluti2_lane_p16:
3255 case NEON::BI__builtin_neon_vluti2_lane_p8:
3256 case NEON::BI__builtin_neon_vluti2_lane_s16:
3257 case NEON::BI__builtin_neon_vluti2_lane_s8:
3258 case NEON::BI__builtin_neon_vluti2_lane_u16:
3259 case NEON::BI__builtin_neon_vluti2_lane_u8:
3260 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
3261 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
3262 case NEON::BI__builtin_neon_vluti2q_lane_f16:
3263 case NEON::BI__builtin_neon_vluti2q_lane_p16:
3264 case NEON::BI__builtin_neon_vluti2q_lane_p8:
3265 case NEON::BI__builtin_neon_vluti2q_lane_s16:
3266 case NEON::BI__builtin_neon_vluti2q_lane_s8:
3267 case NEON::BI__builtin_neon_vluti2q_lane_u16:
3268 case NEON::BI__builtin_neon_vluti2q_lane_u8:
3269 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
3270 case NEON::BI__builtin_neon_vluti4q_lane_p8:
3271 case NEON::BI__builtin_neon_vluti4q_lane_s8:
3272 case NEON::BI__builtin_neon_vluti4q_lane_u8:
3273 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
3274 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
3275 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
3276 case NEON::BI__builtin_neon_vluti4q_laneq_u8:
3277 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
3278 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
3279 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
3280 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
3281 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2:
3282 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
3283 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
3284 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
3285 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
3286 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2:
3287 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
3288 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
3289 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
3290 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
3291 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
3292 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
3293 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
3294 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
3295 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
3296 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
3297 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
3298 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
3299 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
3300 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
3301 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
3302 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
3303 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
3304 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm:
3305 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
3306 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
3307 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
3308 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
3309 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
3310 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
3311 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
3312 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
3313 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
3314 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
3315 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
3316 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
3317 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
3318 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
3319 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
3320 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
3321 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
3322 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
3323 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
3324 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
3325 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
3326 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
3327 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
3328 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
3329 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
3330 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
3331 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
3332 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
3333 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
3334 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
3335 case NEON::BI__builtin_neon_vamin_f16:
3336 case NEON::BI__builtin_neon_vaminq_f16:
3337 case NEON::BI__builtin_neon_vamin_f32:
3338 case NEON::BI__builtin_neon_vaminq_f32:
3339 case NEON::BI__builtin_neon_vaminq_f64:
3340 case NEON::BI__builtin_neon_vamax_f16:
3341 case NEON::BI__builtin_neon_vamaxq_f16:
3342 case NEON::BI__builtin_neon_vamax_f32:
3343 case NEON::BI__builtin_neon_vamaxq_f32:
3344 case NEON::BI__builtin_neon_vamaxq_f64:
3345 case NEON::BI__builtin_neon_vscale_f16:
3346 case NEON::BI__builtin_neon_vscaleq_f16:
3347 case NEON::BI__builtin_neon_vscale_f32:
3348 case NEON::BI__builtin_neon_vscaleq_f32:
3349 case NEON::BI__builtin_neon_vscaleq_f64:
3350 cgm.errorNYI(expr->getSourceRange(),
3351 std::string("unimplemented AArch64 builtin call: ") +
3352 getContext().BuiltinInfo.getName(builtinID));
3353 return mlir::Value{};
3354 }
3355
3356 // Unreachable: All cases in the switch above return.
3357}
Utilities used for generating code for AArch64 that are shared between the classic and ClangIR code-g...
static bool isUnsigned(SValBuilder &SVB, NonLoc Value)
Defines enum values for all the target-independent builtin functions.
static mlir::Value emitCommonNeonSISDBuiltinExpr(CIRGenFunction &cgf, const ARMVectorIntrinsicInfo &info, llvm::SmallVectorImpl< mlir::Value > &ops, const CallExpr *expr)
static bool hasExtraNeonArgument(unsigned builtinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
static bool aarch64SVEIntrinsicsProvenSorted
static const std::pair< unsigned, unsigned > neonEquivalentIntrinsicMap[]
static std::pair< mlir::Type, llvm::SmallVector< mlir::Type > > deriveNeonSISDIntrinsicOperandTypes(CIRGenFunction &cgf, unsigned modifier, mlir::Type arg0Ty, mlir::Type resultTy, llvm::ArrayRef< mlir::Value > ops)
static mlir::Value emitNeonSplat(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value v, mlir::Value lane, unsigned int resEltCnt)
static mlir::Value emitNeonCallToOp(CIRGenModule &cgm, CIRGenBuilderTy &builder, llvm::SmallVector< mlir::Type > argTypes, llvm::SmallVectorImpl< mlir::Value > &args, std::optional< llvm::StringRef > intrinsicName, mlir::Type funcResTy, mlir::Location loc, bool isConstrainedFPIntrinsic=false, unsigned shift=0, bool rightshift=false)
static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[]
static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, mlir::Type eltTy)
static unsigned getSVEMinEltCount(clang::SVETypeFlags::EltType sveType)
static mlir::Value genVscaleTimesFactor(mlir::Location loc, CIRGenBuilderTy builder, mlir::Type cirTy, int32_t scalingFactor)
static cir::VectorType getFloatNeonType(CIRGenFunction &cgf, NeonTypeFlags intTypeFlags)
static llvm::StringRef getLLVMIntrNameNoPrefix(llvm::Intrinsic::ID intrID)
static int64_t getIntValueFromConstOp(mlir::Value val)
static mlir::Value emitCallMaybeConstrainedBuiltin(CIRGenBuilderTy &builder, mlir::Location loc, StringRef intrName, mlir::Type retTy, llvm::SmallVector< mlir::Value > &ops)
static mlir::Value emitCommonNeonBuiltinExpr(CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic, unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier, const CallExpr *expr, llvm::SmallVectorImpl< mlir::Value > &ops)
static mlir::Value emitNeonCall(CIRGenModule &cgm, CIRGenBuilderTy &builder, llvm::SmallVector< mlir::Type > argTypes, llvm::SmallVectorImpl< mlir::Value > &args, llvm::StringRef intrinsicName, mlir::Type funcResTy, mlir::Location loc, bool isConstrainedFPIntrinsic=false, unsigned shift=0, bool rightshift=false)
static cir::VectorType getSignChangedVectorType(CIRGenBuilderTy &builder, cir::VectorType vecTy)
Flip the signedness of vecTy's element type, keeping the width and number of lanes the same.
static cir::VectorType getNeonPairwiseWidenInputType(cir::VectorType resType, bool usgn)
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > intrinsicMap, unsigned builtinID, bool &mapProvenSorted)
static mlir::Value emitCommonNeonShift(CIRGenBuilderTy &builder, mlir::Location loc, cir::VectorType resTy, mlir::Value shifTgt, mlir::Value shiftAmt, bool shiftLeft)
static bool aarch64SIMDIntrinsicsProvenSorted
static cir::VectorType getIntVecFromVecTy(CIRGenBuilderTy &builder, cir::VectorType vecTy)
constexpr unsigned sveBitsPerBlock
static mlir::Value emitNeonShiftVector(CIRGenBuilderTy &builder, mlir::Value shiftVal, cir::VectorType vecTy, mlir::Location loc, bool neg)
Build a constant shift amount vector of vecTy to shift a vector Here shiftVal is a constant integer t...
static mlir::Value emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value src, mlir::Type retTy, const cir::CmpOpKind kind)
static mlir::Value emitNeonRShiftImm(CIRGenFunction &cgf, mlir::Value shiftVec, mlir::Value shiftVal, cir::VectorType vecTy, bool usgn, mlir::Location loc)
static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags, mlir::Location loc, bool hasLegalHalfType=true, bool v1Ty=false, bool allowBFloatArgsAndRet=true)
static bool aarch64SISDIntrinsicsProvenSorted
TokenType getType() const
Returns the token's type, e.g.
*collection of selector each with an associated kind and an ordered *collection of selectors A selector has a kind
Enumerates target-specific builtins in their own namespaces within namespace clang.
cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc)
mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy)
mlir::Value createNUWAMul(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
mlir::Value createIntCast(mlir::Value src, mlir::Type newTy)
mlir::Value createBitcast(mlir::Value src, mlir::Type newTy)
cir::CmpOp createCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
cir::ConstantOp getConstantInt(mlir::Location loc, mlir::Type ty, int64_t value)
mlir::Value createXor(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
llvm::TypeSize getTypeSizeInBits(mlir::Type ty) const
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:223
Builtin::Context & BuiltinInfo
Definition ASTContext.h:804
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
std::string getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition Builtins.cpp:80
cir::ConstantOp getUInt64(uint64_t c, mlir::Location loc)
mlir::Value emitIntrinsicCallOp(mlir::Location loc, const llvm::StringRef str, const mlir::Type &resTy, Operands &&...op)
cir::IntType getSIntNTy(int n)
cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1, mlir::Value vec2, llvm::ArrayRef< mlir::Attribute > maskAttrs)
cir::ConstantOp getZero(mlir::Location loc, mlir::Type ty)
cir::ConstantOp getConstInt(mlir::Location loc, llvm::APSInt intVal)
cir::IntType getUIntNTy(int n)
mlir::Type convertType(clang::QualType t)
const TargetInfo & getTarget() const
mlir::Location getLoc(clang::SourceLocation srcLoc)
Helpers to convert Clang's SourceLocation to a MLIR Location.
static int64_t getZExtIntValueFromConstOp(mlir::Value val)
Get zero-extended integer from a mlir::Value that is an int constant or a constant op.
mlir::Value emitSVEPredicateCast(mlir::Value pred, unsigned minNumElts, mlir::Location loc)
bool getAArch64SVEProcessedOperands(unsigned builtinID, const CallExpr *expr, SmallVectorImpl< mlir::Value > &ops, clang::SVETypeFlags typeFlags)
Address returnValue
The temporary alloca to hold the return value.
std::optional< mlir::Value > emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ReturnValueSlot returnValue, llvm::Triple::ArchType arch)
std::optional< mlir::Value > emitAArch64SMEBuiltinExpr(unsigned builtinID, const CallExpr *expr)
mlir::Value emitScalarExpr(const clang::Expr *e, bool ignoreResultAssign=false)
Emit the computation of the specified expression of scalar type.
CIRGenBuilderTy & getBuilder()
clang::ASTContext & getContext() const
std::optional< mlir::Value > emitAArch64SVEBuiltinExpr(unsigned builtinID, const CallExpr *expr)
mlir::Value emitScalarOrConstFoldImmArg(unsigned iceArguments, unsigned idx, const Expr *argExpr)
This class organizes the cross-function state that is used while generating CIR code.
DiagnosticBuilder errorNYI(SourceLocation, llvm::StringRef)
Helpers to emit "not yet implemented" error diagnostics.
const cir::CIRDataLayout getDataLayout() const
Contains the address where the return value of a function can be stored, and whether the address is v...
Definition CIRGenCall.h:260
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
This represents one expression.
Definition Expr.h:112
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
Flags to identify the types for overloaded SVE builtins.
bool isReverseUSDOT() const
bool isGatherLoad() const
EltType getEltType() const
bool isPrefetch() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isStructLoad() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:712
const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
const internal::VariadicAllOfMatcher< Type > type
Matches Types in the clang AST.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 int32_t
static bool msvcBuiltins()
static bool handleBuiltinICEArguments()
static bool aarch64SIMDIntrinsics()
static bool aarch64SVEIntrinsics()
static bool emitConstrainedFPCall()
static bool aarch64SMEIntrinsics()
static bool aarch64TblBuiltinExpr()