clang 23.0.0git
CIRGenBuiltinAArch64.cpp
Go to the documentation of this file.
1//===---- CIRGenBuiltinAArch64.cpp - Emit CIR for AArch64 builtins --------===//
2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3// See https://llvm.org/LICENSE.txt for license information.
4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5//
6//===----------------------------------------------------------------------===//
7//
8// This contains code to emit ARM64 Builtin calls as CIR or a function call
9// to be later resolved.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CIRGenBuilder.h"
14#include "CIRGenFunction.h"
19
20// TODO(cir): once all builtins are covered, decide whether we still
21// need to use LLVM intrinsics or if there's a better approach to follow. Right
22// now the intrinsics are reused to make it convenient to encode all thousands
23// of them and passing down to LLVM lowering.
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsAArch64.h"
26
27#include "mlir/IR/BuiltinTypes.h"
28#include "mlir/IR/Value.h"
31
32using namespace clang;
33using namespace clang::CIRGen;
34using namespace llvm;
35using namespace clang::aarch64;
36
37// Generate vscale * scalingFactor
38static mlir::Value genVscaleTimesFactor(mlir::Location loc,
39 CIRGenBuilderTy builder,
40 mlir::Type cirTy,
41 int32_t scalingFactor) {
42 mlir::Value vscale = builder.emitIntrinsicCallOp(loc, "vscale", cirTy);
43 return builder.createNUWAMul(loc, vscale,
44 builder.getUInt64(scalingFactor, loc));
45}
46
47#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
48 {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
49 TypeModifier}
50
51#define SVEMAP2(NameBase, TypeModifier) \
52 {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier}
54#define GET_SVE_LLVM_INTRINSIC_MAP
55#include "clang/Basic/arm_sve_builtin_cg.inc"
56#undef GET_SVE_LLVM_INTRINSIC_MAP
57};
58
62
63// Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns
64// the corresponding info struct.
65static const ARMVectorIntrinsicInfo *
67 unsigned builtinID, bool &mapProvenSorted) {
68
69#ifndef NDEBUG
70 if (!mapProvenSorted) {
71 assert(llvm::is_sorted(intrinsicMap));
72 mapProvenSorted = true;
73 }
74#endif
75
76 const ARMVectorIntrinsicInfo *info =
77 llvm::lower_bound(intrinsicMap, builtinID);
78
79 if (info != intrinsicMap.end() && info->BuiltinID == builtinID)
80 return info;
81
82 return nullptr;
83}
84
85//===----------------------------------------------------------------------===//
86// Generic helpers
87//===----------------------------------------------------------------------===//
88// Emit an intrinsic where all operands are of the same type as the result.
89// Depending on mode, this may be a constrained floating-point intrinsic.
90static mlir::Value
92 StringRef intrName, mlir::Type retTy,
95
96 return builder.emitIntrinsicCallOp(loc, intrName, retTy, ops);
97}
98
99static llvm::StringRef getLLVMIntrNameNoPrefix(llvm::Intrinsic::ID intrID) {
100 llvm::StringRef llvmIntrName = llvm::Intrinsic::getBaseName(intrID);
101 assert(llvmIntrName.starts_with("llvm.") && "Not an LLVM intrinsic!");
102 return llvmIntrName.drop_front(/*strlen("llvm.")=*/5);
103}
104
105//===----------------------------------------------------------------------===//
106// NEON helpers
107//===----------------------------------------------------------------------===//
108/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
109/// argument that specifies the vector type. The additional argument is meant
110/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
111/// should be kept consistent with the logic in Sema.
112/// TODO: Make this return false for SISD builtins.
113/// TODO(cir): Share this with ARM.cpp
114static bool hasExtraNeonArgument(unsigned builtinID) {
115 // Required by the headers included below, but not in this particular
116 // function.
117 [[maybe_unused]] int PtrArgNum = -1;
118 [[maybe_unused]] bool HasConstPtr = false;
119
120 // The mask encodes the type. We don't care about the actual value. Instead,
121 // we just check whether its been set.
122 uint64_t mask = 0;
123 switch (builtinID) {
124#define GET_NEON_OVERLOAD_CHECK
125#include "clang/Basic/arm_fp16.inc"
126#include "clang/Basic/arm_neon.inc"
127#undef GET_NEON_OVERLOAD_CHECK
128 // Non-neon builtins for controling VFP that take extra argument for
129 // discriminating the type.
130 case ARM::BI__builtin_arm_vcvtr_f:
131 case ARM::BI__builtin_arm_vcvtr_d:
132 mask = 1;
133 }
134 switch (builtinID) {
135 default:
136 break;
137 }
138
139 return mask != 0;
140}
141
142static cir::VectorType getFloatNeonType(CIRGenFunction &cgf,
143 NeonTypeFlags intTypeFlags) {
144 int isQuad = intTypeFlags.isQuad();
145 switch (intTypeFlags.getEltType()) {
147 return cir::VectorType::get(cgf.fP16Ty, (4 << isQuad));
149 return cir::VectorType::get(cgf.floatTy, (2 << isQuad));
151 return cir::VectorType::get(cgf.doubleTy, (1 << isQuad));
152 default:
153 llvm_unreachable("Type can't be converted to floating-point!");
154 }
155}
156
157static int64_t getIntValueFromConstOp(mlir::Value val) {
158 return val.getDefiningOp<cir::ConstantOp>().getIntValue().getSExtValue();
159}
160
161/// Build a constant shift amount vector of `vecTy` to shift a vector
162/// Here `shiftVal` is a constant integer that will be broadcast into a
163/// a const vector of `vecTy` which is the return value of this function
164/// If `neg` is true, the shift amount is negated before splatting (used
165/// when encoding a right shift as a left shift by a negative amount for
166/// intrinsics like aarch64.neon.{s,u}rshl).
167static mlir::Value emitNeonShiftVector(CIRGenBuilderTy &builder,
168 mlir::Value shiftVal,
169 cir::VectorType vecTy,
170 mlir::Location loc, bool neg) {
171 if (neg) {
172 int64_t shiftAmt = -getIntValueFromConstOp(shiftVal);
173 shiftVal = builder.getConstantInt(loc, vecTy.getElementType(), shiftAmt);
174 }
175 mlir::Type eltTy = vecTy.getElementType();
176 if (shiftVal.getType() != eltTy) {
177 shiftVal = builder.createIntCast(shiftVal, eltTy);
178 }
179 return cir::VecSplatOp::create(builder, loc, vecTy, shiftVal);
180}
181
182// TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone.
183template <typename Operation>
184static mlir::Value
188 std::optional<llvm::StringRef> intrinsicName,
189 mlir::Type funcResTy, mlir::Location loc,
190 bool isConstrainedFPIntrinsic = false, unsigned shift = 0,
191 bool rightshift = false) {
192 // TODO(cir): Consider removing the following unreachable when we have
193 // emitConstrainedFPCall feature implemented
195 if (isConstrainedFPIntrinsic)
196 cgm.errorNYI(loc, std::string("constrained FP intrinsic"));
197
198 for (unsigned j = 0; j < argTypes.size(); ++j) {
199 if (isConstrainedFPIntrinsic) {
201 }
202 if (shift > 0 && shift == j) {
203 args[j] = emitNeonShiftVector(builder, args[j],
204 mlir::cast<cir::VectorType>(argTypes[j]),
205 loc, rightshift);
206 } else {
207 args[j] = builder.createBitcast(args[j], argTypes[j]);
208 }
209 }
210 if (isConstrainedFPIntrinsic) {
212 return nullptr;
213 }
214 if constexpr (std::is_same_v<Operation, cir::LLVMIntrinsicCallOp>) {
215 return Operation::create(builder, loc,
216 builder.getStringAttr(intrinsicName.value()),
217 funcResTy, args)
218 .getResult();
219 } else {
220 return Operation::create(builder, loc, funcResTy, args).getResult();
221 }
222}
223
224// TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone.
225static mlir::Value emitNeonCall(CIRGenModule &cgm, CIRGenBuilderTy &builder,
228 llvm::StringRef intrinsicName,
229 mlir::Type funcResTy, mlir::Location loc,
230 bool isConstrainedFPIntrinsic = false,
231 unsigned shift = 0, bool rightshift = false) {
233 cgm, builder, std::move(argTypes), args, intrinsicName, funcResTy, loc,
234 isConstrainedFPIntrinsic, shift, rightshift);
235}
236
237// Computes the input vector type for a NEON pairwise widening operation (e.g.
238// vpaddl/vpadal). Given a result vector type, it derives the corresponding
239// input type by halving the element bit width and doubling the number of lanes,
240// while setting the signedness based on usgn.
241static cir::VectorType getNeonPairwiseWidenInputType(cir::VectorType resType,
242 bool usgn) {
243 mlir::Type elemTy = resType.getElementType();
244 uint64_t resLanes = resType.getSize();
245 auto intTy = mlir::dyn_cast<cir::IntType>(elemTy);
246 assert(intTy && "vpaddl result type must be an integer vector");
247
248 unsigned resWidth = intTy.getWidth();
249 assert((resWidth == 16 || resWidth == 32 || resWidth == 64) &&
250 "unexpected vpaddl result element width");
251
252 unsigned argWidth = resWidth / 2;
253 unsigned argLanes = resLanes * 2;
254 cir::VectorType result = cir::VectorType::get(
255 cir::IntType::get(resType.getContext(), argWidth, /* is_signed */ !usgn),
256 argLanes);
257 return result;
258}
259
260// Derive the LLVM intrinsic's per-operand argument types and its result
261// type for use when emitting the intrinsic call.
262//
263// `modifier` is the TypeModifier bitmask from `ARMVectorIntrinsicInfo`
264// (callers pass `info.TypeModifier`; see AArch64CodeGenUtils.h). It encodes
265// how the intrinsic's argument and return types relate to the builtin's
266// scalar types. For SISD builtins the key flags are:
267// - VectorizeArgTypes: wrap each arg type into a fixed-width vector
268// - Use64BitVectors / Use128BitVectors: choose the vector width
269// (when neither is set the vector has 1 element)
270// - AddRetType / VectorizeRetType: analogous flags for the return type
271//
272// ARM.cpp lets LLVM resolve the intrinsic's signature (via
273// `CGM.getIntrinsic`) and then walks the resolved Function* formal
274// parameter types. CIR has no LLVMContext here, so we derive the same
275// argument/result types directly from the Clang operand types.
276static std::pair<mlir::Type, llvm::SmallVector<mlir::Type>>
278 mlir::Type arg0Ty, mlir::Type resultTy,
280 int vectorSize = 0;
281 if (modifier & Use64BitVectors)
282 vectorSize = 64;
283 else if (modifier & Use128BitVectors)
284 vectorSize = 128;
285
286 auto wrapAsVector = [&](mlir::Type ty) -> cir::VectorType {
287 unsigned bits = cgf.cgm.getDataLayout().getTypeSizeInBits(ty);
288 unsigned elts = vectorSize ? vectorSize / bits : 1;
289 return cir::VectorType::get(ty, elts);
290 };
291
292 // Determine the vectorized data type.
293 cir::VectorType vecArgTy;
294 if (modifier & VectorizeArgTypes)
295 vecArgTy = wrapAsVector(arg0Ty);
296
297 // Determine the intrinsic result type: `VectorizeRetType` returns a
298 // vector; otherwise, if data args are vectorized and `AddRetType` is
299 // unset, use a vector return with the same shape as those args.
300 mlir::Type funcResTy = resultTy;
301 if (modifier & VectorizeRetType)
302 funcResTy = wrapAsVector(resultTy);
303 else if (vecArgTy && !(modifier & AddRetType))
304 funcResTy = wrapAsVector(resultTy);
305
306 // When VectorizeArgTypes is set, wrap every operand that has the same
307 // scalar type as arg0 into a vector. This covers intrinsics with multiple
308 // data operands of the same type (e.g. vsri takes two data operands,
309 // both of which must be wrapped into the same vector type).
311 argTypes.reserve(ops.size());
312 for (mlir::Value op : ops) {
313 if ((modifier & VectorizeArgTypes) && vecArgTy && op.getType() == arg0Ty)
314 argTypes.push_back(vecArgTy);
315 else
316 argTypes.push_back(op.getType());
317 }
318
319 return {funcResTy, std::move(argTypes)};
320}
321
323 CIRGenFunction &cgf, const ARMVectorIntrinsicInfo &info,
325 assert(info.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
326
327 switch (info.BuiltinID) {
328 case NEON::BI__builtin_neon_vcled_s64:
329 case NEON::BI__builtin_neon_vcled_u64:
330 case NEON::BI__builtin_neon_vcles_f32:
331 case NEON::BI__builtin_neon_vcled_f64:
332 case NEON::BI__builtin_neon_vcltd_s64:
333 case NEON::BI__builtin_neon_vcltd_u64:
334 case NEON::BI__builtin_neon_vclts_f32:
335 case NEON::BI__builtin_neon_vcltd_f64:
336 case NEON::BI__builtin_neon_vcales_f32:
337 case NEON::BI__builtin_neon_vcaled_f64:
338 case NEON::BI__builtin_neon_vcalts_f32:
339 case NEON::BI__builtin_neon_vcaltd_f64:
340 cgf.cgm.errorNYI(expr->getSourceRange(),
341 std::string("unimplemented AArch64 builtin call: ") +
343 break;
344 }
345
346 llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(
347 static_cast<llvm::Intrinsic::ID>(info.LLVMIntrinsic));
348 mlir::Location loc = cgf.getLoc(expr->getExprLoc());
349
350 // The switch stmt is intended to help catch NYI cases and will be removed
351 // once the CIR implementation is complete. Avoid adding specialized
352 // code in cases - that should only be required for a handful of examples.
353 switch (info.BuiltinID) {
354 default:
355 cgf.cgm.errorNYI(expr->getSourceRange(),
356 std::string("unimplemented AArch64 builtin call: ") +
358 break;
359 case NEON::BI__builtin_neon_vminv_s8:
360 case NEON::BI__builtin_neon_vminvq_s8:
361 case NEON::BI__builtin_neon_vminv_s16:
362 case NEON::BI__builtin_neon_vminvq_s16:
363 case NEON::BI__builtin_neon_vminv_s32:
364 case NEON::BI__builtin_neon_vminvq_s32:
365 case NEON::BI__builtin_neon_vminv_u8:
366 case NEON::BI__builtin_neon_vminvq_u8:
367 case NEON::BI__builtin_neon_vminv_u16:
368 case NEON::BI__builtin_neon_vminvq_u16:
369 case NEON::BI__builtin_neon_vminv_u32:
370 case NEON::BI__builtin_neon_vminvq_u32:
371 case NEON::BI__builtin_neon_vminv_f32:
372 case NEON::BI__builtin_neon_vminvq_f32:
373 case NEON::BI__builtin_neon_vminvq_f64:
374 case NEON::BI__builtin_neon_vminnmv_f32:
375 case NEON::BI__builtin_neon_vminnmvq_f32:
376 case NEON::BI__builtin_neon_vminnmvq_f64:
377 case NEON::BI__builtin_neon_vabdd_f64:
378 case NEON::BI__builtin_neon_vabds_f32:
379 case NEON::BI__builtin_neon_vshld_s64:
380 case NEON::BI__builtin_neon_vshld_u64:
381 case NEON::BI__builtin_neon_vpmins_f32:
382 case NEON::BI__builtin_neon_vpminqd_f64:
383 case NEON::BI__builtin_neon_vpminnms_f32:
384 case NEON::BI__builtin_neon_vpminnmqd_f64:
385 case NEON::BI__builtin_neon_vcvts_n_f32_s32:
386 case NEON::BI__builtin_neon_vcvts_n_f32_u32:
387 case NEON::BI__builtin_neon_vcvts_n_s32_f32:
388 case NEON::BI__builtin_neon_vcvts_n_u32_f32:
389 case NEON::BI__builtin_neon_vcvtd_n_f64_s64:
390 case NEON::BI__builtin_neon_vcvtd_n_f64_u64:
391 case NEON::BI__builtin_neon_vcvtd_n_s64_f64:
392 case NEON::BI__builtin_neon_vcvtd_n_u64_f64:
393 case NEON::BI__builtin_neon_vaddlv_s32:
394 case NEON::BI__builtin_neon_vaddlv_u32:
395 case NEON::BI__builtin_neon_vaddlvq_s32:
396 case NEON::BI__builtin_neon_vaddlvq_u32:
397 case NEON::BI__builtin_neon_vaddv_s8:
398 case NEON::BI__builtin_neon_vaddv_s16:
399 case NEON::BI__builtin_neon_vaddv_s32:
400 case NEON::BI__builtin_neon_vaddv_u8:
401 case NEON::BI__builtin_neon_vaddv_u16:
402 case NEON::BI__builtin_neon_vaddv_u32:
403 case NEON::BI__builtin_neon_vaddv_f32:
404 case NEON::BI__builtin_neon_vaddvq_s8:
405 case NEON::BI__builtin_neon_vaddvq_s16:
406 case NEON::BI__builtin_neon_vaddvq_s32:
407 case NEON::BI__builtin_neon_vaddvq_s64:
408 case NEON::BI__builtin_neon_vaddvq_u8:
409 case NEON::BI__builtin_neon_vaddvq_u16:
410 case NEON::BI__builtin_neon_vaddvq_u32:
411 case NEON::BI__builtin_neon_vaddvq_u64:
412 case NEON::BI__builtin_neon_vaddvq_f32:
413 case NEON::BI__builtin_neon_vaddvq_f64:
414 case NEON::BI__builtin_neon_vabdh_f16:
415 case NEON::BI__builtin_neon_vrecpeh_f16:
416 case NEON::BI__builtin_neon_vrecpxh_f16:
417 case NEON::BI__builtin_neon_vrsqrteh_f16:
418 case NEON::BI__builtin_neon_vrsqrtsh_f16:
419 case NEON::BI__builtin_neon_vmaxv_s8:
420 case NEON::BI__builtin_neon_vmaxvq_s8:
421 case NEON::BI__builtin_neon_vmaxv_s16:
422 case NEON::BI__builtin_neon_vmaxvq_s16:
423 case NEON::BI__builtin_neon_vmaxv_s32:
424 case NEON::BI__builtin_neon_vmaxvq_s32:
425 case NEON::BI__builtin_neon_vmaxv_u8:
426 case NEON::BI__builtin_neon_vmaxvq_u8:
427 case NEON::BI__builtin_neon_vmaxv_u16:
428 case NEON::BI__builtin_neon_vmaxvq_u16:
429 case NEON::BI__builtin_neon_vmaxv_u32:
430 case NEON::BI__builtin_neon_vmaxvq_u32:
431 case NEON::BI__builtin_neon_vmaxv_f32:
432 case NEON::BI__builtin_neon_vmaxvq_f32:
433 case NEON::BI__builtin_neon_vmaxvq_f64:
434 case NEON::BI__builtin_neon_vqrshrund_n_s64:
435 case NEON::BI__builtin_neon_vqrshrnd_n_s64:
436 case NEON::BI__builtin_neon_vqrshrnd_n_u64:
437 case NEON::BI__builtin_neon_vsrid_n_s64:
438 case NEON::BI__builtin_neon_vsrid_n_u64:
439 break;
440 }
441
442 CIRGenBuilderTy &builder = cgf.getBuilder();
443 mlir::Type arg0Ty = cgf.convertType(expr->getArg(0)->getType());
444 mlir::Type resultTy = cgf.convertType(expr->getType());
445
446 // Derive per-operand argument types and the result type from the
447 // TypeModifier flags. `emitNeonCall` takes care of per-operand
448 // bitcasts to `argTypes`.
449 auto [funcResTy, argTypes] = deriveNeonSISDIntrinsicOperandTypes(
450 cgf, info.TypeModifier, arg0Ty, resultTy, ops);
451
452 return emitNeonCall(cgf.cgm, builder, std::move(argTypes), ops, llvmIntrName,
453 funcResTy, loc);
454}
455
456//===----------------------------------------------------------------------===//
457// Emit-helpers
458//===----------------------------------------------------------------------===//
459static mlir::Value
461 mlir::Location loc, mlir::Value src,
462 mlir::Type retTy, const cir::CmpOpKind kind) {
463
464 bool scalarCmp = !isa<cir::VectorType>(src.getType());
465 if (!scalarCmp) {
466 assert(!cast<cir::VectorType>(retTy).getIsScalable() &&
467 "This is only intended for fixed-width vectors");
468 // Vector types are cast to i8 vectors. Recover original type.
469 src = builder.createBitcast(src, retTy);
470 }
471
472 mlir::Value zero = builder.getNullValue(src.getType(), loc);
473
474 if (!scalarCmp)
475 return builder.createVecCompare(loc, kind, src, zero);
476
477 // For scalars, cast !cir.bool to !cir.int<s, 1> so that the compare
478 // result is sign- rather zero-extended when casting to the output
479 // retType.
480 mlir::Value cmp = builder.createCast(
481 loc, cir::CastKind::bool_to_int,
482 builder.createCompare(loc, kind, src, zero), builder.getSIntNTy(1));
483
484 return builder.createCast(loc, cir::CastKind::integral, cmp, retTy);
485}
486
487// TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone.
488static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
489 mlir::Location loc,
490 bool hasLegalHalfType = true,
491 bool v1Ty = false,
492 bool allowBFloatArgsAndRet = true) {
493 int isQuad = typeFlags.isQuad();
494 switch (typeFlags.getEltType()) {
497 return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty
498 : cgf->sInt8Ty,
499 v1Ty ? 1 : (8 << isQuad));
501 return cir::VectorType::get(cgf->uInt8Ty, v1Ty ? 1 : (8 << isQuad));
504 return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty
505 : cgf->sInt16Ty,
506 v1Ty ? 1 : (4 << isQuad));
508 if (allowBFloatArgsAndRet)
509 return cir::VectorType::get(cgf->getCIRGenModule().bFloat16Ty,
510 v1Ty ? 1 : (4 << isQuad));
511 return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad));
513 if (hasLegalHalfType)
514 return cir::VectorType::get(cgf->getCIRGenModule().fP16Ty,
515 v1Ty ? 1 : (4 << isQuad));
516 return cir::VectorType::get(cgf->uInt16Ty, v1Ty ? 1 : (4 << isQuad));
518 return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
519 : cgf->sInt32Ty,
520 v1Ty ? 1 : (2 << isQuad));
523 return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty
524 : cgf->sInt64Ty,
525 v1Ty ? 1 : (1 << isQuad));
527 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
528 // There is a lot of i128 and f128 API missing.
529 // so we use v16i8 to represent poly128 and get pattern matched.
530 cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128"));
531 [[fallthrough]];
533 return cir::VectorType::get(cgf->getCIRGenModule().floatTy,
534 v1Ty ? 1 : (2 << isQuad));
536 return cir::VectorType::get(cgf->getCIRGenModule().doubleTy,
537 v1Ty ? 1 : (1 << isQuad));
538 }
539 llvm_unreachable("Unknown vector element type!");
540}
541
542static mlir::Value emitNeonSplat(CIRGenBuilderTy &builder, mlir::Location loc,
543 mlir::Value v, mlir::Value lane,
544 unsigned int resEltCnt) {
545 assert(isa<cir::ConstantOp>(lane.getDefiningOp()) &&
546 "lane number is not a constant!");
547 int64_t laneCst = getIntValueFromConstOp(lane);
548 llvm::SmallVector<int64_t, 4> shuffleMask(resEltCnt, laneCst);
549 return builder.createVecShuffle(loc, v, shuffleMask);
550}
551
552/// Flip the signedness of `vecTy`'s element type, keeping the width and
553/// number of lanes the same. Used when a NEON intrinsic takes a shift
554/// amount vector that must be signed (e.g. aarch64.neon.urshl takes a
555/// signed amount even though the data vector is unsigned).
556static cir::VectorType getSignChangedVectorType(CIRGenBuilderTy &builder,
557 cir::VectorType vecTy) {
558 auto elemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
559 elemTy = elemTy.isSigned() ? builder.getUIntNTy(elemTy.getWidth())
560 : builder.getSIntNTy(elemTy.getWidth());
561 return cir::VectorType::get(elemTy, vecTy.getSize());
562}
563
564static mlir::Value emitCommonNeonShift(CIRGenBuilderTy &builder,
565 mlir::Location loc,
566 cir::VectorType resTy,
567 mlir::Value shifTgt,
568 mlir::Value shiftAmt, bool shiftLeft) {
569 shiftAmt = emitNeonShiftVector(builder, shiftAmt, resTy, loc, /*neg=*/false);
570 return cir::ShiftOp::create(builder, loc, resTy,
571 builder.createBitcast(shifTgt, resTy), shiftAmt,
572 shiftLeft);
573}
574
575// Right-shift a vector by a constant.
576static mlir::Value emitNeonRShiftImm(CIRGenFunction &cgf, mlir::Value shiftVec,
577 mlir::Value shiftVal,
578 cir::VectorType vecTy, bool usgn,
579 mlir::Location loc) {
580 CIRGenBuilderTy &builder = cgf.getBuilder();
581 int64_t shiftAmt = getIntValueFromConstOp(shiftVal);
582 int eltSize =
583 cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy.getElementType());
584
585 shiftVec = builder.createBitcast(shiftVec, vecTy);
586 // lshr/ashr are undefined when the shift amount is equal to the vector
587 // element size.
588 if (shiftAmt == eltSize) {
589 if (usgn) {
590 // Right-shifting an unsigned value by its size yields 0.
591 return builder.getZero(loc, vecTy);
592 }
593 // Right-shifting a signed value by its size is equivalent
594 // to a shift of size-1.
595 --shiftAmt;
596 shiftVal = builder.getConstInt(loc, vecTy.getElementType(), shiftAmt);
597 }
598 return emitCommonNeonShift(builder, loc, vecTy, shiftVec, shiftVal,
599 /*shiftLeft=*/false);
600}
601
602static cir::VectorType getIntVecFromVecTy(CIRGenBuilderTy &builder,
603 cir::VectorType vecTy) {
604 if (!cir::isAnyFloatingPointType(vecTy.getElementType()))
605 return vecTy;
606
607 if (mlir::isa<cir::SingleType>(vecTy.getElementType()))
608 return cir::VectorType::get(builder.getSInt32Ty(), vecTy.getSize());
609 if (mlir::isa<cir::DoubleType>(vecTy.getElementType()))
610 return cir::VectorType::get(builder.getSInt64Ty(), vecTy.getSize());
611 llvm_unreachable(
612 "Unsupported element type in getVecOfIntTypeWithSameEltWidth");
613}
614
615static mlir::Value emitCommonNeonBuiltinExpr(
616 CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic,
617 unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier,
619 mlir::Location loc = cgf.getLoc(expr->getExprLoc());
620 clang::ASTContext &ctx = cgf.getContext();
621
622 // Extract the trailing immediate argument that encodes the type discriminator
623 // for this overloaded intrinsic.
624 // TODO: Move to the parent code that takes care of argument processing.
625 const clang::Expr *arg = expr->getArg(expr->getNumArgs() - 1);
626 std::optional<llvm::APSInt> neonTypeConst = arg->getIntegerConstantExpr(ctx);
627 if (!neonTypeConst)
628 return nullptr;
629
630 // Determine the type of this overloaded NEON intrinsic.
631 NeonTypeFlags neonType(neonTypeConst->getZExtValue());
632 const bool isUnsigned = neonType.isUnsigned();
633 const bool hasLegalHalfType = cgf.getTarget().hasFastHalfType();
634 const bool usgn = neonType.isUnsigned();
635
636 // The value of allowBFloatArgsAndRet is true for AArch64, but it should
637 // come from ABI info.
638 // TODO(cir): Use ABInfo to extract this information
639 const bool allowBFloatArgsAndRet = cgf.getTarget().hasFastHalfType();
640 // FIXME
641 // getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
642
643 cir::VectorType vTy = getNeonType(&cgf, neonType, loc, hasLegalHalfType,
644 false, allowBFloatArgsAndRet);
645 cir::VectorType ty = vTy;
646 if (!ty)
647 return nullptr;
648
649 switch (builtinID) {
650 case NEON::BI__builtin_neon_splat_lane_v:
651 case NEON::BI__builtin_neon_splat_laneq_v:
652 case NEON::BI__builtin_neon_splatq_lane_v:
653 case NEON::BI__builtin_neon_splatq_laneq_v: {
654 uint64_t numElements = vTy.getSize();
655 if (builtinID == NEON::BI__builtin_neon_splatq_lane_v)
656 numElements *= 2;
657 if (builtinID == NEON::BI__builtin_neon_splat_laneq_v)
658 numElements /= 2;
659 ops[0] = cgf.getBuilder().createBitcast(loc, ops[0], vTy);
660 return emitNeonSplat(cgf.getBuilder(), loc, ops[0], ops[1], numElements);
661 }
662 case NEON::BI__builtin_neon_vpadd_v:
663 case NEON::BI__builtin_neon_vpaddq_v:
664 case NEON::BI__builtin_neon_vabs_v:
665 case NEON::BI__builtin_neon_vabsq_v:
666 case NEON::BI__builtin_neon_vadd_v:
667 case NEON::BI__builtin_neon_vaddq_v:
668 case NEON::BI__builtin_neon_vaddhn_v:
669 case NEON::BI__builtin_neon_vcale_v:
670 case NEON::BI__builtin_neon_vcaleq_v:
671 case NEON::BI__builtin_neon_vcalt_v:
672 case NEON::BI__builtin_neon_vcaltq_v:
673 case NEON::BI__builtin_neon_vcage_v:
674 case NEON::BI__builtin_neon_vcageq_v:
675 case NEON::BI__builtin_neon_vcagt_v:
676 case NEON::BI__builtin_neon_vcagtq_v:
677 cgf.cgm.errorNYI(expr->getSourceRange(),
678 std::string("unimplemented AArch64 builtin call: ") +
679 ctx.BuiltinInfo.getName(builtinID));
680 return mlir::Value{};
681 case NEON::BI__builtin_neon_vceqz_v:
682 case NEON::BI__builtin_neon_vceqzq_v:
683 return emitAArch64CompareBuiltinExpr(cgf, cgf.getBuilder(), loc, ops[0],
684 vTy, cir::CmpOpKind::eq);
685 case NEON::BI__builtin_neon_vcgez_v:
686 case NEON::BI__builtin_neon_vcgezq_v:
687 case NEON::BI__builtin_neon_vclez_v:
688 case NEON::BI__builtin_neon_vclezq_v:
689 case NEON::BI__builtin_neon_vcgtz_v:
690 case NEON::BI__builtin_neon_vcgtzq_v:
691 case NEON::BI__builtin_neon_vcltz_v:
692 case NEON::BI__builtin_neon_vcltzq_v:
693 case NEON::BI__builtin_neon_vclz_v:
694 case NEON::BI__builtin_neon_vclzq_v:
695 case NEON::BI__builtin_neon_vcvt_f32_v:
696 case NEON::BI__builtin_neon_vcvtq_f32_v:
697 case NEON::BI__builtin_neon_vcvt_f16_s16:
698 case NEON::BI__builtin_neon_vcvt_f16_u16:
699 case NEON::BI__builtin_neon_vcvtq_f16_s16:
700 case NEON::BI__builtin_neon_vcvtq_f16_u16:
701 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
702 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
703 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
704 case NEON::BI__builtin_neon_vcvtq_n_f16_u16:
705 cgf.cgm.errorNYI(expr->getSourceRange(),
706 std::string("unimplemented AArch64 builtin call: ") +
707 ctx.BuiltinInfo.getName(builtinID));
708 return mlir::Value{};
709 case NEON::BI__builtin_neon_vcvt_n_f32_v:
710 case NEON::BI__builtin_neon_vcvt_n_f64_v:
711 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
712 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
713 // The constant argument to an _n_ intrinsic always is Int32Ty.
714 mlir::Type cstIntTy = cgf.sInt32Ty;
715 llvm::StringRef llvmIntrName =
716 getLLVMIntrNameNoPrefix(static_cast<llvm::Intrinsic::ID>(
717 usgn ? llvmIntrinsic : altLLVMIntrinsic));
718 return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(),
719 /*argTypes=*/{vTy, cstIntTy}, ops, llvmIntrName,
720 /*funcResTy=*/getFloatNeonType(cgf, neonType), loc);
721 }
722 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
723 case NEON::BI__builtin_neon_vcvt_n_s32_v:
724 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
725 case NEON::BI__builtin_neon_vcvt_n_u32_v:
726 case NEON::BI__builtin_neon_vcvt_n_s64_v:
727 case NEON::BI__builtin_neon_vcvt_n_u64_v:
728 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
729 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
730 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
731 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
732 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
733 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
734 // The constant argument to an _n_ intrinsic always is Int32Ty.
735 mlir::Type cstIntTy = cgf.sInt32Ty;
736 llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(
737 static_cast<llvm::Intrinsic::ID>(llvmIntrinsic));
738 return emitNeonCall(
739 cgf.getCIRGenModule(), cgf.getBuilder(),
740 /*argTypes=*/{getFloatNeonType(cgf, neonType), cstIntTy}, ops,
741 llvmIntrName,
742 /*funcResTy=*/vTy, loc);
743 }
744 case NEON::BI__builtin_neon_vcvt_s32_v:
745 case NEON::BI__builtin_neon_vcvt_u32_v:
746 case NEON::BI__builtin_neon_vcvt_s64_v:
747 case NEON::BI__builtin_neon_vcvt_u64_v:
748 case NEON::BI__builtin_neon_vcvt_s16_f16:
749 case NEON::BI__builtin_neon_vcvt_u16_f16:
750 case NEON::BI__builtin_neon_vcvtq_s32_v:
751 case NEON::BI__builtin_neon_vcvtq_u32_v:
752 case NEON::BI__builtin_neon_vcvtq_s64_v:
753 case NEON::BI__builtin_neon_vcvtq_u64_v:
754 case NEON::BI__builtin_neon_vcvtq_s16_f16:
755 case NEON::BI__builtin_neon_vcvtq_u16_f16:
756 case NEON::BI__builtin_neon_vcvta_s16_f16:
757 case NEON::BI__builtin_neon_vcvta_s32_v:
758 case NEON::BI__builtin_neon_vcvta_s64_v:
759 case NEON::BI__builtin_neon_vcvta_u16_f16:
760 case NEON::BI__builtin_neon_vcvta_u32_v:
761 case NEON::BI__builtin_neon_vcvta_u64_v:
762 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
763 case NEON::BI__builtin_neon_vcvtaq_s32_v:
764 case NEON::BI__builtin_neon_vcvtaq_s64_v:
765 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
766 case NEON::BI__builtin_neon_vcvtaq_u32_v:
767 case NEON::BI__builtin_neon_vcvtaq_u64_v:
768 case NEON::BI__builtin_neon_vcvtn_s16_f16:
769 case NEON::BI__builtin_neon_vcvtn_s32_v:
770 case NEON::BI__builtin_neon_vcvtn_s64_v:
771 case NEON::BI__builtin_neon_vcvtn_u16_f16:
772 case NEON::BI__builtin_neon_vcvtn_u32_v:
773 case NEON::BI__builtin_neon_vcvtn_u64_v:
774 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
775 case NEON::BI__builtin_neon_vcvtnq_s32_v:
776 case NEON::BI__builtin_neon_vcvtnq_s64_v:
777 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
778 case NEON::BI__builtin_neon_vcvtnq_u32_v:
779 case NEON::BI__builtin_neon_vcvtnq_u64_v:
780 case NEON::BI__builtin_neon_vcvtp_s16_f16:
781 case NEON::BI__builtin_neon_vcvtp_s32_v:
782 case NEON::BI__builtin_neon_vcvtp_s64_v:
783 case NEON::BI__builtin_neon_vcvtp_u16_f16:
784 case NEON::BI__builtin_neon_vcvtp_u32_v:
785 case NEON::BI__builtin_neon_vcvtp_u64_v:
786 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
787 case NEON::BI__builtin_neon_vcvtpq_s32_v:
788 case NEON::BI__builtin_neon_vcvtpq_s64_v:
789 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
790 case NEON::BI__builtin_neon_vcvtpq_u32_v:
791 case NEON::BI__builtin_neon_vcvtpq_u64_v:
792 case NEON::BI__builtin_neon_vcvtm_s16_f16:
793 case NEON::BI__builtin_neon_vcvtm_s32_v:
794 case NEON::BI__builtin_neon_vcvtm_s64_v:
795 case NEON::BI__builtin_neon_vcvtm_u16_f16:
796 case NEON::BI__builtin_neon_vcvtm_u32_v:
797 case NEON::BI__builtin_neon_vcvtm_u64_v:
798 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
799 case NEON::BI__builtin_neon_vcvtmq_s32_v:
800 case NEON::BI__builtin_neon_vcvtmq_s64_v:
801 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
802 case NEON::BI__builtin_neon_vcvtmq_u32_v:
803 case NEON::BI__builtin_neon_vcvtmq_u64_v:
804 case NEON::BI__builtin_neon_vcvtx_f32_v:
805 case NEON::BI__builtin_neon_vext_v:
806 case NEON::BI__builtin_neon_vextq_v:
807 case NEON::BI__builtin_neon_vfma_v:
808 cgf.cgm.errorNYI(expr->getSourceRange(),
809 std::string("unimplemented AArch64 builtin call: ") +
810 ctx.BuiltinInfo.getName(builtinID));
811 return mlir::Value{};
812 case NEON::BI__builtin_neon_vfmaq_v: {
813 // NEON intrinsic: vfmaq(accumulator, multiplicand1, multiplicand2)
814 // LLVM intrinsic: fma(multiplicand1, multiplicand2, accumulator)
815 // Reorder arguments to match LLVM fma signature
816 mlir::Value op0 = cgf.getBuilder().createBitcast(ops[0], ty);
817 mlir::Value op1 = cgf.getBuilder().createBitcast(ops[1], ty);
818 mlir::Value op2 = cgf.getBuilder().createBitcast(ops[2], ty);
819 llvm::SmallVector<mlir::Value> fmaOps = {op1, op2, op0};
820 return emitCallMaybeConstrainedBuiltin(cgf.getBuilder(), loc, "fma", ty,
821 fmaOps);
822 }
823 case NEON::BI__builtin_neon_vld1_v:
824 case NEON::BI__builtin_neon_vld1q_v:
825 case NEON::BI__builtin_neon_vld1_x2_v:
826 case NEON::BI__builtin_neon_vld1q_x2_v:
827 case NEON::BI__builtin_neon_vld1_x3_v:
828 case NEON::BI__builtin_neon_vld1q_x3_v:
829 case NEON::BI__builtin_neon_vld1_x4_v:
830 case NEON::BI__builtin_neon_vld1q_x4_v:
831 case NEON::BI__builtin_neon_vld2_v:
832 case NEON::BI__builtin_neon_vld2q_v:
833 case NEON::BI__builtin_neon_vld3_v:
834 case NEON::BI__builtin_neon_vld3q_v:
835 case NEON::BI__builtin_neon_vld4_v:
836 case NEON::BI__builtin_neon_vld4q_v:
837 case NEON::BI__builtin_neon_vld2_dup_v:
838 case NEON::BI__builtin_neon_vld2q_dup_v:
839 case NEON::BI__builtin_neon_vld3_dup_v:
840 case NEON::BI__builtin_neon_vld3q_dup_v:
841 case NEON::BI__builtin_neon_vld4_dup_v:
842 case NEON::BI__builtin_neon_vld4q_dup_v:
843 case NEON::BI__builtin_neon_vld1_dup_v:
844 case NEON::BI__builtin_neon_vld1q_dup_v:
845 case NEON::BI__builtin_neon_vld2_lane_v:
846 case NEON::BI__builtin_neon_vld2q_lane_v:
847 case NEON::BI__builtin_neon_vld3_lane_v:
848 case NEON::BI__builtin_neon_vld3q_lane_v:
849 case NEON::BI__builtin_neon_vld4_lane_v:
850 case NEON::BI__builtin_neon_vld4q_lane_v:
851 case NEON::BI__builtin_neon_vmovl_v:
852 case NEON::BI__builtin_neon_vmovn_v:
853 case NEON::BI__builtin_neon_vmull_v:
854 case NEON::BI__builtin_neon_vpadal_v:
855 case NEON::BI__builtin_neon_vpadalq_v:
856 cgf.cgm.errorNYI(expr->getSourceRange(),
857 std::string("Reached code-path for ARM builtin call ") +
858 ctx.BuiltinInfo.getName(builtinID) +
859 "(ARM builtins are not supported ATM)");
860 return mlir::Value{};
861 case NEON::BI__builtin_neon_vpaddl_v:
862 case NEON::BI__builtin_neon_vpaddlq_v: {
863 llvm::StringRef llvmIntrName =
864 getLLVMIntrNameNoPrefix(static_cast<llvm::Intrinsic::ID>(
865 usgn ? llvmIntrinsic : altLLVMIntrinsic));
866 return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(),
867 /*argTypes=*/{getNeonPairwiseWidenInputType(vTy, usgn)},
868 ops, llvmIntrName,
869 /*funcResTy=*/vTy, loc);
870 }
871 case NEON::BI__builtin_neon_vqdmlal_v:
872 case NEON::BI__builtin_neon_vqdmlsl_v:
873 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
874 case NEON::BI__builtin_neon_vqdmulh_lane_v:
875 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
876 case NEON::BI__builtin_neon_vqrdmulh_lane_v:
877 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
878 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
879 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
880 case NEON::BI__builtin_neon_vqrdmulh_laneq_v:
881 case NEON::BI__builtin_neon_vqshl_n_v:
882 case NEON::BI__builtin_neon_vqshlq_n_v:
883 case NEON::BI__builtin_neon_vqshlu_n_v:
884 case NEON::BI__builtin_neon_vqshluq_n_v:
885 case NEON::BI__builtin_neon_vrecpe_v:
886 case NEON::BI__builtin_neon_vrecpeq_v:
887 case NEON::BI__builtin_neon_vrsqrte_v:
888 case NEON::BI__builtin_neon_vrsqrteq_v:
889 case NEON::BI__builtin_neon_vrndi_v:
890 case NEON::BI__builtin_neon_vrndiq_v:
891 cgf.cgm.errorNYI(expr->getSourceRange(),
892 std::string("unimplemented AArch64 builtin call: ") +
893 ctx.BuiltinInfo.getName(builtinID));
894 return mlir::Value{};
895 case NEON::BI__builtin_neon_vrshr_n_v:
896 case NEON::BI__builtin_neon_vrshrq_n_v: {
897 llvm::StringRef intrName =
898 usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl";
899 return emitNeonCall(
900 cgf.cgm, cgf.getBuilder(),
901 {ty, usgn ? getSignChangedVectorType(cgf.getBuilder(), ty) : ty}, ops,
902 intrName, ty, loc, /*isConstrainedFPIntrinsic=*/false,
903 /*shift=*/1,
904 /*rightshift=*/true);
905 }
906 case NEON::BI__builtin_neon_vsha512hq_u64:
907 case NEON::BI__builtin_neon_vsha512h2q_u64:
908 case NEON::BI__builtin_neon_vsha512su0q_u64:
909 case NEON::BI__builtin_neon_vsha512su1q_u64:
910 cgf.cgm.errorNYI(expr->getSourceRange(),
911 std::string("unimplemented AArch64 builtin call: ") +
912 ctx.BuiltinInfo.getName(builtinID));
913 return mlir::Value{};
914 case NEON::BI__builtin_neon_vshl_n_v:
915 case NEON::BI__builtin_neon_vshlq_n_v:
916 return emitCommonNeonShift(cgf.getBuilder(), loc, vTy, ops[0], ops[1],
917 /*shiftLeft=*/true);
918 case NEON::BI__builtin_neon_vshll_n_v:
919 case NEON::BI__builtin_neon_vshrn_n_v:
920 cgf.cgm.errorNYI(expr->getSourceRange(),
921 std::string("unimplemented AArch64 builtin call: ") +
922 ctx.BuiltinInfo.getName(builtinID));
923 return mlir::Value{};
924 case NEON::BI__builtin_neon_vshr_n_v:
925 case NEON::BI__builtin_neon_vshrq_n_v:
926 return emitNeonRShiftImm(cgf, ops[0], ops[1], vTy, isUnsigned, loc);
927 case NEON::BI__builtin_neon_vst1_v:
928 case NEON::BI__builtin_neon_vst1q_v:
929 case NEON::BI__builtin_neon_vst2_v:
930 case NEON::BI__builtin_neon_vst2q_v:
931 case NEON::BI__builtin_neon_vst3_v:
932 case NEON::BI__builtin_neon_vst3q_v:
933 case NEON::BI__builtin_neon_vst4_v:
934 case NEON::BI__builtin_neon_vst4q_v:
935 case NEON::BI__builtin_neon_vst2_lane_v:
936 case NEON::BI__builtin_neon_vst2q_lane_v:
937 case NEON::BI__builtin_neon_vst3_lane_v:
938 case NEON::BI__builtin_neon_vst3q_lane_v:
939 case NEON::BI__builtin_neon_vst4_lane_v:
940 case NEON::BI__builtin_neon_vst4q_lane_v:
941 case NEON::BI__builtin_neon_vsm3partw1q_u32:
942 case NEON::BI__builtin_neon_vsm3partw2q_u32:
943 case NEON::BI__builtin_neon_vsm3ss1q_u32:
944 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
945 case NEON::BI__builtin_neon_vsm4eq_u32:
946 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
947 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
948 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
949 case NEON::BI__builtin_neon_vsm3tt2bq_u32:
950 case NEON::BI__builtin_neon_vst1_x2_v:
951 case NEON::BI__builtin_neon_vst1q_x2_v:
952 case NEON::BI__builtin_neon_vst1_x3_v:
953 case NEON::BI__builtin_neon_vst1q_x3_v:
954 case NEON::BI__builtin_neon_vst1_x4_v:
955 case NEON::BI__builtin_neon_vst1q_x4_v:
956 case NEON::BI__builtin_neon_vsubhn_v:
957 case NEON::BI__builtin_neon_vtrn_v:
958 case NEON::BI__builtin_neon_vtrnq_v:
959 case NEON::BI__builtin_neon_vtst_v:
960 case NEON::BI__builtin_neon_vtstq_v:
961 case NEON::BI__builtin_neon_vuzp_v:
962 case NEON::BI__builtin_neon_vuzpq_v:
963 case NEON::BI__builtin_neon_vxarq_u64:
964 case NEON::BI__builtin_neon_vzip_v:
965 case NEON::BI__builtin_neon_vzipq_v:
966 case NEON::BI__builtin_neon_vdot_s32:
967 case NEON::BI__builtin_neon_vdot_u32:
968 case NEON::BI__builtin_neon_vdotq_s32:
969 case NEON::BI__builtin_neon_vdotq_u32:
970 case NEON::BI__builtin_neon_vfmlal_low_f16:
971 case NEON::BI__builtin_neon_vfmlalq_low_f16:
972 case NEON::BI__builtin_neon_vfmlsl_low_f16:
973 case NEON::BI__builtin_neon_vfmlslq_low_f16:
974 case NEON::BI__builtin_neon_vfmlal_high_f16:
975 case NEON::BI__builtin_neon_vfmlalq_high_f16:
976 case NEON::BI__builtin_neon_vfmlsl_high_f16:
977 case NEON::BI__builtin_neon_vfmlslq_high_f16:
978 case NEON::BI__builtin_neon_vmmlaq_s32:
979 case NEON::BI__builtin_neon_vmmlaq_u32:
980 cgf.cgm.errorNYI(expr->getSourceRange(),
981 std::string("unimplemented AArch64 builtin call: ") +
982 ctx.BuiltinInfo.getName(builtinID));
983 return mlir::Value{};
984 case NEON::BI__builtin_neon_vmul_v:
985 case NEON::BI__builtin_neon_vmulq_v:
986 return cgf.getBuilder().emitIntrinsicCallOp(loc, "aarch64.neon.pmul", vTy,
987 ops);
988 case NEON::BI__builtin_neon_vusmmlaq_s32:
989 case NEON::BI__builtin_neon_vusdot_s32:
990 case NEON::BI__builtin_neon_vusdotq_s32:
991 case NEON::BI__builtin_neon_vbfdot_f32:
992 case NEON::BI__builtin_neon_vbfdotq_f32:
993 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32:
994 cgf.cgm.errorNYI(expr->getSourceRange(),
995 std::string("unimplemented AArch64 builtin call: ") +
996 ctx.BuiltinInfo.getName(builtinID));
997 return mlir::Value{};
998 }
999
1000 // The switch stmt is intended to help catch NYI cases and will be removed
1001 // once the CIR implementation is complete. Avoid adding specialized
1002 // code in cases - that should only be required for a handful of examples.
1003 switch (builtinID) {
1004 default:
1005 cgf.cgm.errorNYI(expr->getSourceRange(),
1006 std::string("unimplemented AArch64 builtin call: ") +
1007 cgf.getContext().BuiltinInfo.getName(builtinID));
1008 break;
1009 case NEON::BI__builtin_neon_vshl_v:
1010 case NEON::BI__builtin_neon_vshlq_v: {
1011 llvm::StringRef llvmIntrName =
1012 getLLVMIntrNameNoPrefix(static_cast<llvm::Intrinsic::ID>(
1013 usgn ? llvmIntrinsic : altLLVMIntrinsic));
1014
1015 mlir::Value result =
1017 /*argTypes=*/{vTy, vTy}, ops, llvmIntrName,
1018 /*funcResTy=*/vTy, loc);
1019 mlir::Type resultType = cgf.convertType(expr->getType());
1020 return cgf.getBuilder().createBitcast(result, resultType);
1021 }
1022 }
1023
1024 // NYI
1025 return nullptr;
1026}
1027
1029 unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> &ops,
1030 SVETypeFlags typeFlags) {
1031 // Find out if any arguments are required to be integer constant expressions.
1032 unsigned iceArguments = 0;
1034 getContext().GetBuiltinType(builtinID, error, &iceArguments);
1035 assert(error == ASTContext::GE_None && "Should not codegen an error");
1036
1037 for (unsigned i = 0, e = expr->getNumArgs(); i != e; i++) {
1038 bool isIce = iceArguments & (1 << i);
1039 mlir::Value arg = emitScalarExpr(expr->getArg(i));
1040
1041 if (isIce) {
1042 cgm.errorNYI(expr->getSourceRange(),
1043 std::string("unimplemented AArch64 builtin call: ") +
1044 getContext().BuiltinInfo.getName(builtinID));
1045 }
1046
1047 // FIXME: Handle types like svint16x2_t, which are currently incorrectly
1048 // converted to i32. These should be treated as structs and unpacked.
1049
1050 ops.push_back(arg);
1051 }
1052 return true;
1053}
1054
1055// Reinterpret the input predicate so that it can be used to correctly isolate
1056// the elements of the specified datatype.
1057mlir::Value CIRGenFunction::emitSVEPredicateCast(mlir::Value pred,
1058 unsigned minNumElts,
1059 mlir::Location loc) {
1060
1061 // TODO: Handle "aarch64.svcount" once we get round to supporting SME.
1062
1063 auto retTy = cir::VectorType::get(builder.getUIntNTy(1), minNumElts,
1064 /*is_scalable=*/true);
1065 if (pred.getType() == retTy)
1066 return pred;
1067
1068 llvm::Intrinsic::ID intID;
1069 switch (minNumElts) {
1070 default:
1071 llvm_unreachable("unsupported element count!");
1072 case 1:
1073 case 2:
1074 case 4:
1075 case 8:
1076 intID = Intrinsic::aarch64_sve_convert_from_svbool;
1077 break;
1078 case 16:
1079 intID = Intrinsic::aarch64_sve_convert_to_svbool;
1080 break;
1081 }
1082
1083 llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(intID);
1084 auto call = builder.emitIntrinsicCallOp(loc, llvmIntrName, retTy,
1085 mlir::ValueRange{pred});
1086 assert(call.getType() == retTy && "Unexpected return type!");
1087 return call;
1088}
1089
1090//===----------------------------------------------------------------------===//
1091// SVE helpers
1092//===----------------------------------------------------------------------===//
1093// Get the minimum number of elements in an SVE vector for the given element
1094// type. The actual number of elements in the vector would be an integer (power
1095// of two) multiple of this value.
1097 switch (sveType) {
1098 default:
1099 llvm_unreachable("Invalid SVETypeFlag!");
1100
1101 case SVETypeFlags::EltTyInt8:
1102 return 16;
1103 case SVETypeFlags::EltTyInt16:
1104 return 8;
1105 case SVETypeFlags::EltTyInt32:
1106 return 4;
1107 case SVETypeFlags::EltTyInt64:
1108 return 2;
1109
1110 case SVETypeFlags::EltTyMFloat8:
1111 return 16;
1112 case SVETypeFlags::EltTyFloat16:
1113 case SVETypeFlags::EltTyBFloat16:
1114 return 8;
1115 case SVETypeFlags::EltTyFloat32:
1116 return 4;
1117 case SVETypeFlags::EltTyFloat64:
1118 return 2;
1119
1120 case SVETypeFlags::EltTyBool8:
1121 return 16;
1122 case SVETypeFlags::EltTyBool16:
1123 return 8;
1124 case SVETypeFlags::EltTyBool32:
1125 return 4;
1126 case SVETypeFlags::EltTyBool64:
1127 return 2;
1128 }
1129}
1130
1131// TODO(cir): Share with OGCG
1132constexpr unsigned sveBitsPerBlock = 128;
1133
1134static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm,
1135 mlir::Type eltTy) {
1136 unsigned numElts =
1138 return cir::VectorType::get(eltTy, numElts, /*is_scalable=*/true);
1139}
1140
1141//===----------------------------------------------------------------------===//
1142// SVE helpers
1143//===----------------------------------------------------------------------===//
1144std::optional<mlir::Value>
1146 const CallExpr *expr) {
1147 mlir::Type ty = convertType(expr->getType());
1148
1149 if (builtinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
1150 builtinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
1151 cgm.errorNYI(expr->getSourceRange(),
1152 std::string("unimplemented AArch64 builtin call: ") +
1153 getContext().BuiltinInfo.getName(builtinID));
1154 return mlir::Value{};
1155 }
1156
1158
1159 auto *builtinIntrInfo = findARMVectorIntrinsicInMap(
1161
1162 // The operands of the builtin call
1164
1165 SVETypeFlags typeFlags(builtinIntrInfo->TypeModifier);
1167 typeFlags))
1168 return mlir::Value{};
1169
1170 if (typeFlags.isLoad() || typeFlags.isStore() || typeFlags.isGatherLoad() ||
1171 typeFlags.isScatterStore() || typeFlags.isPrefetch() ||
1172 typeFlags.isGatherPrefetch() || typeFlags.isStructLoad() ||
1173 typeFlags.isStructStore() || typeFlags.isTupleSet() ||
1174 typeFlags.isTupleGet() || typeFlags.isTupleCreate() ||
1175 typeFlags.isUndef())
1176 cgm.errorNYI(expr->getSourceRange(),
1177 std::string("unimplemented AArch64 builtin call: ") +
1178 getContext().BuiltinInfo.getName(builtinID));
1179
1180 mlir::Location loc = getLoc(expr->getExprLoc());
1181
1182 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
1183 // -------------------------------------------------------------------
1184 if (builtinIntrInfo->LLVMIntrinsic != 0) {
1185 // Emit set FPMR for intrinsics that require it.
1186 if (typeFlags.setsFPMR())
1187 cgm.errorNYI(expr->getSourceRange(),
1188 std::string("unimplemented AArch64 builtin call: ") +
1189 getContext().BuiltinInfo.getName(builtinID));
1190
1191 // Zero-ing predication
1192 if (typeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) {
1193 auto null = builder.getNullValue(convertType(expr->getType()),
1194 getLoc(expr->getExprLoc()));
1195 ops.insert(ops.begin(), null);
1196 }
1197
1198 if (typeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
1199 ops.insert(ops.begin(),
1200 builder.getConstant(loc, cir::UndefAttr::get(ty)));
1201
1202 // Some ACLE builtins leave out the argument to specify the predicate
1203 // pattern, which is expected to be expanded to an SV_ALL pattern.
1204 if (typeFlags.isAppendSVALL())
1205 cgm.errorNYI(expr->getSourceRange(),
1206 std::string("unimplemented AArch64 builtin call: ") +
1207 getContext().BuiltinInfo.getName(builtinID));
1208 if (typeFlags.isInsertOp1SVALL())
1209 cgm.errorNYI(expr->getSourceRange(),
1210 std::string("unimplemented AArch64 builtin call: ") +
1211 getContext().BuiltinInfo.getName(builtinID));
1212
1213 // Predicates must match the main datatype.
1214 for (mlir::Value &op : ops)
1215 if (auto predTy = dyn_cast<cir::VectorType>(op.getType()))
1216 if (auto cirInt = dyn_cast<cir::IntType>(predTy.getElementType()))
1217 if (cirInt.getWidth() == 1)
1219 op, getSVEMinEltCount(typeFlags.getEltType()), loc);
1220
1221 // Splat scalar operand to vector (intrinsics with _n infix)
1222 if (typeFlags.hasSplatOperand()) {
1223 unsigned opNo = typeFlags.getSplatOperand();
1224 ops[opNo] = cir::VecSplatOp::create(
1225 builder, loc, getSVEVectorForElementType(cgm, ops[opNo].getType()),
1226 ops[opNo]);
1227 }
1228
1229 if (typeFlags.isReverseCompare())
1230 cgm.errorNYI(expr->getSourceRange(),
1231 std::string("unimplemented AArch64 builtin call: ") +
1232 getContext().BuiltinInfo.getName(builtinID));
1233 if (typeFlags.isReverseUSDOT())
1234 cgm.errorNYI(expr->getSourceRange(),
1235 std::string("unimplemented AArch64 builtin call: ") +
1236 getContext().BuiltinInfo.getName(builtinID));
1237 if (typeFlags.isReverseMergeAnyBinOp() &&
1238 typeFlags.getMergeType() == SVETypeFlags::MergeAny)
1239 cgm.errorNYI(expr->getSourceRange(),
1240 std::string("unimplemented AArch64 builtin call: ") +
1241 getContext().BuiltinInfo.getName(builtinID));
1242 if (typeFlags.isReverseMergeAnyAccOp() &&
1243 typeFlags.getMergeType() == SVETypeFlags::MergeAny)
1244 cgm.errorNYI(expr->getSourceRange(),
1245 std::string("unimplemented AArch64 builtin call: ") +
1246 getContext().BuiltinInfo.getName(builtinID));
1247
1248 // Predicated intrinsics with _z suffix.
1249 if (typeFlags.getMergeType() == SVETypeFlags::MergeZero) {
1250 cgm.errorNYI(expr->getSourceRange(),
1251 std::string("unimplemented AArch64 builtin call: ") +
1252 getContext().BuiltinInfo.getName(builtinID));
1253 }
1254
1255 llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(
1256 static_cast<llvm::Intrinsic::ID>(builtinIntrInfo->LLVMIntrinsic));
1257 auto retTy = convertType(expr->getType());
1258
1259 auto call = builder.emitIntrinsicCallOp(loc, llvmIntrName, retTy,
1260 mlir::ValueRange{ops});
1261 if (call.getType() == retTy)
1262 return call;
1263
1264 // Predicate results must be converted to svbool_t.
1265 if (isa<mlir::VectorType>(retTy) &&
1266 cast<mlir::VectorType>(retTy).isScalable())
1267 cgm.errorNYI(expr->getSourceRange(),
1268 std::string("unimplemented AArch64 builtin call: ") +
1269 getContext().BuiltinInfo.getName(builtinID));
1270 // TODO Handle struct types, e.g. svint8x2_t (update the converter first).
1271
1272 llvm_unreachable("unsupported element count!");
1273 }
1274
1275 // Handle the remaining built-ins.
1276 // -------------------------------
1277 switch (builtinID) {
1278 default:
1279 return std::nullopt;
1280
1281 case SVE::BI__builtin_sve_svreinterpret_b:
1282 case SVE::BI__builtin_sve_svreinterpret_c:
1283 case SVE::BI__builtin_sve_svpsel_lane_b8:
1284 case SVE::BI__builtin_sve_svpsel_lane_b16:
1285 case SVE::BI__builtin_sve_svpsel_lane_b32:
1286 case SVE::BI__builtin_sve_svpsel_lane_b64:
1287 case SVE::BI__builtin_sve_svpsel_lane_c8:
1288 case SVE::BI__builtin_sve_svpsel_lane_c16:
1289 case SVE::BI__builtin_sve_svpsel_lane_c32:
1290 case SVE::BI__builtin_sve_svpsel_lane_c64:
1291 case SVE::BI__builtin_sve_svmov_b_z:
1292 case SVE::BI__builtin_sve_svnot_b_z:
1293 case SVE::BI__builtin_sve_svmovlb_u16:
1294 case SVE::BI__builtin_sve_svmovlb_u32:
1295 case SVE::BI__builtin_sve_svmovlb_u64:
1296 case SVE::BI__builtin_sve_svmovlb_s16:
1297 case SVE::BI__builtin_sve_svmovlb_s32:
1298 case SVE::BI__builtin_sve_svmovlb_s64:
1299 case SVE::BI__builtin_sve_svmovlt_u16:
1300 case SVE::BI__builtin_sve_svmovlt_u32:
1301 case SVE::BI__builtin_sve_svmovlt_u64:
1302 case SVE::BI__builtin_sve_svmovlt_s16:
1303 case SVE::BI__builtin_sve_svmovlt_s32:
1304 case SVE::BI__builtin_sve_svmovlt_s64:
1305 case SVE::BI__builtin_sve_svpmullt_u16:
1306 case SVE::BI__builtin_sve_svpmullt_u64:
1307 case SVE::BI__builtin_sve_svpmullt_n_u16:
1308 case SVE::BI__builtin_sve_svpmullt_n_u64:
1309 case SVE::BI__builtin_sve_svpmullb_u16:
1310 case SVE::BI__builtin_sve_svpmullb_u64:
1311 case SVE::BI__builtin_sve_svpmullb_n_u16:
1312 case SVE::BI__builtin_sve_svpmullb_n_u64:
1313
1314 case SVE::BI__builtin_sve_svdup_n_b8:
1315 case SVE::BI__builtin_sve_svdup_n_b16:
1316 case SVE::BI__builtin_sve_svdup_n_b32:
1317 case SVE::BI__builtin_sve_svdup_n_b64:
1318
1319 case SVE::BI__builtin_sve_svdupq_n_b8:
1320 case SVE::BI__builtin_sve_svdupq_n_b16:
1321 case SVE::BI__builtin_sve_svdupq_n_b32:
1322 case SVE::BI__builtin_sve_svdupq_n_b64:
1323 case SVE::BI__builtin_sve_svdupq_n_u8:
1324 case SVE::BI__builtin_sve_svdupq_n_s8:
1325 case SVE::BI__builtin_sve_svdupq_n_u64:
1326 case SVE::BI__builtin_sve_svdupq_n_f64:
1327 case SVE::BI__builtin_sve_svdupq_n_s64:
1328 case SVE::BI__builtin_sve_svdupq_n_u16:
1329 case SVE::BI__builtin_sve_svdupq_n_f16:
1330 case SVE::BI__builtin_sve_svdupq_n_bf16:
1331 case SVE::BI__builtin_sve_svdupq_n_s16:
1332 case SVE::BI__builtin_sve_svdupq_n_u32:
1333 case SVE::BI__builtin_sve_svdupq_n_f32:
1334 case SVE::BI__builtin_sve_svdupq_n_s32:
1335 case SVE::BI__builtin_sve_svpfalse_b:
1336 case SVE::BI__builtin_sve_svpfalse_c:
1337 cgm.errorNYI(expr->getSourceRange(),
1338 std::string("unimplemented AArch64 builtin call: ") +
1339 getContext().BuiltinInfo.getName(builtinID));
1340 return mlir::Value{};
1341
1342 case SVE::BI__builtin_sve_svlen_u8:
1343 case SVE::BI__builtin_sve_svlen_s8:
1344 return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 16);
1345
1346 case SVE::BI__builtin_sve_svlen_u16:
1347 case SVE::BI__builtin_sve_svlen_s16:
1348 case SVE::BI__builtin_sve_svlen_f16:
1349 case SVE::BI__builtin_sve_svlen_bf16:
1350 return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 8);
1351
1352 case SVE::BI__builtin_sve_svlen_u32:
1353 case SVE::BI__builtin_sve_svlen_s32:
1354 case SVE::BI__builtin_sve_svlen_f32:
1355 return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 4);
1356
1357 case SVE::BI__builtin_sve_svlen_u64:
1358 case SVE::BI__builtin_sve_svlen_s64:
1359 case SVE::BI__builtin_sve_svlen_f64:
1360 return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 2);
1361
1362 case SVE::BI__builtin_sve_svtbl2_u8:
1363 case SVE::BI__builtin_sve_svtbl2_s8:
1364 case SVE::BI__builtin_sve_svtbl2_u16:
1365 case SVE::BI__builtin_sve_svtbl2_s16:
1366 case SVE::BI__builtin_sve_svtbl2_u32:
1367 case SVE::BI__builtin_sve_svtbl2_s32:
1368 case SVE::BI__builtin_sve_svtbl2_u64:
1369 case SVE::BI__builtin_sve_svtbl2_s64:
1370 case SVE::BI__builtin_sve_svtbl2_f16:
1371 case SVE::BI__builtin_sve_svtbl2_bf16:
1372 case SVE::BI__builtin_sve_svtbl2_f32:
1373 case SVE::BI__builtin_sve_svtbl2_f64:
1374 case SVE::BI__builtin_sve_svset_neonq_s8:
1375 case SVE::BI__builtin_sve_svset_neonq_s16:
1376 case SVE::BI__builtin_sve_svset_neonq_s32:
1377 case SVE::BI__builtin_sve_svset_neonq_s64:
1378 case SVE::BI__builtin_sve_svset_neonq_u8:
1379 case SVE::BI__builtin_sve_svset_neonq_u16:
1380 case SVE::BI__builtin_sve_svset_neonq_u32:
1381 case SVE::BI__builtin_sve_svset_neonq_u64:
1382 case SVE::BI__builtin_sve_svset_neonq_f16:
1383 case SVE::BI__builtin_sve_svset_neonq_f32:
1384 case SVE::BI__builtin_sve_svset_neonq_f64:
1385 case SVE::BI__builtin_sve_svset_neonq_bf16:
1386 case SVE::BI__builtin_sve_svget_neonq_s8:
1387 case SVE::BI__builtin_sve_svget_neonq_s16:
1388 case SVE::BI__builtin_sve_svget_neonq_s32:
1389 case SVE::BI__builtin_sve_svget_neonq_s64:
1390 case SVE::BI__builtin_sve_svget_neonq_u8:
1391 case SVE::BI__builtin_sve_svget_neonq_u16:
1392 case SVE::BI__builtin_sve_svget_neonq_u32:
1393 case SVE::BI__builtin_sve_svget_neonq_u64:
1394 case SVE::BI__builtin_sve_svget_neonq_f16:
1395 case SVE::BI__builtin_sve_svget_neonq_f32:
1396 case SVE::BI__builtin_sve_svget_neonq_f64:
1397 case SVE::BI__builtin_sve_svget_neonq_bf16:
1398 case SVE::BI__builtin_sve_svdup_neonq_s8:
1399 case SVE::BI__builtin_sve_svdup_neonq_s16:
1400 case SVE::BI__builtin_sve_svdup_neonq_s32:
1401 case SVE::BI__builtin_sve_svdup_neonq_s64:
1402 case SVE::BI__builtin_sve_svdup_neonq_u8:
1403 case SVE::BI__builtin_sve_svdup_neonq_u16:
1404 case SVE::BI__builtin_sve_svdup_neonq_u32:
1405 case SVE::BI__builtin_sve_svdup_neonq_u64:
1406 case SVE::BI__builtin_sve_svdup_neonq_f16:
1407 case SVE::BI__builtin_sve_svdup_neonq_f32:
1408 case SVE::BI__builtin_sve_svdup_neonq_f64:
1409 case SVE::BI__builtin_sve_svdup_neonq_bf16:
1410 cgm.errorNYI(expr->getSourceRange(),
1411 std::string("unimplemented AArch64 builtin call: ") +
1412 getContext().BuiltinInfo.getName(builtinID));
1413 return mlir::Value{};
1414 }
1415
1416 // Unreachable: All cases in the switch above return.
1417}
1418
1419std::optional<mlir::Value>
1421 const CallExpr *expr) {
1423
1424 cgm.errorNYI(expr->getSourceRange(),
1425 std::string("unimplemented AArch64 builtin call: ") +
1426 getContext().BuiltinInfo.getName(builtinID));
1427 return mlir::Value{};
1428}
1429
1430// Some intrinsics are equivalent for codegen.
1431static const std::pair<unsigned, unsigned> neonEquivalentIntrinsicMap[] = {
1432 {
1433 NEON::BI__builtin_neon_splat_lane_bf16,
1434 NEON::BI__builtin_neon_splat_lane_v,
1435 },
1436 {
1437 NEON::BI__builtin_neon_splat_laneq_bf16,
1438 NEON::BI__builtin_neon_splat_laneq_v,
1439 },
1440 {
1441 NEON::BI__builtin_neon_splatq_lane_bf16,
1442 NEON::BI__builtin_neon_splatq_lane_v,
1443 },
1444 {
1445 NEON::BI__builtin_neon_splatq_laneq_bf16,
1446 NEON::BI__builtin_neon_splatq_laneq_v,
1447 },
1448 {
1449 NEON::BI__builtin_neon_vabd_f16,
1450 NEON::BI__builtin_neon_vabd_v,
1451 },
1452 {
1453 NEON::BI__builtin_neon_vabdq_f16,
1454 NEON::BI__builtin_neon_vabdq_v,
1455 },
1456 {
1457 NEON::BI__builtin_neon_vabs_f16,
1458 NEON::BI__builtin_neon_vabs_v,
1459 },
1460 {
1461 NEON::BI__builtin_neon_vabsq_f16,
1462 NEON::BI__builtin_neon_vabsq_v,
1463 },
1464 {
1465 NEON::BI__builtin_neon_vcage_f16,
1466 NEON::BI__builtin_neon_vcage_v,
1467 },
1468 {
1469 NEON::BI__builtin_neon_vcageq_f16,
1470 NEON::BI__builtin_neon_vcageq_v,
1471 },
1472 {
1473 NEON::BI__builtin_neon_vcagt_f16,
1474 NEON::BI__builtin_neon_vcagt_v,
1475 },
1476 {
1477 NEON::BI__builtin_neon_vcagtq_f16,
1478 NEON::BI__builtin_neon_vcagtq_v,
1479 },
1480 {
1481 NEON::BI__builtin_neon_vcale_f16,
1482 NEON::BI__builtin_neon_vcale_v,
1483 },
1484 {
1485 NEON::BI__builtin_neon_vcaleq_f16,
1486 NEON::BI__builtin_neon_vcaleq_v,
1487 },
1488 {
1489 NEON::BI__builtin_neon_vcalt_f16,
1490 NEON::BI__builtin_neon_vcalt_v,
1491 },
1492 {
1493 NEON::BI__builtin_neon_vcaltq_f16,
1494 NEON::BI__builtin_neon_vcaltq_v,
1495 },
1496 {
1497 NEON::BI__builtin_neon_vceqz_f16,
1498 NEON::BI__builtin_neon_vceqz_v,
1499 },
1500 {
1501 NEON::BI__builtin_neon_vceqzq_f16,
1502 NEON::BI__builtin_neon_vceqzq_v,
1503 },
1504 {
1505 NEON::BI__builtin_neon_vcgez_f16,
1506 NEON::BI__builtin_neon_vcgez_v,
1507 },
1508 {
1509 NEON::BI__builtin_neon_vcgezq_f16,
1510 NEON::BI__builtin_neon_vcgezq_v,
1511 },
1512 {
1513 NEON::BI__builtin_neon_vcgtz_f16,
1514 NEON::BI__builtin_neon_vcgtz_v,
1515 },
1516 {
1517 NEON::BI__builtin_neon_vcgtzq_f16,
1518 NEON::BI__builtin_neon_vcgtzq_v,
1519 },
1520 {
1521 NEON::BI__builtin_neon_vclez_f16,
1522 NEON::BI__builtin_neon_vclez_v,
1523 },
1524 {
1525 NEON::BI__builtin_neon_vclezq_f16,
1526 NEON::BI__builtin_neon_vclezq_v,
1527 },
1528 {
1529 NEON::BI__builtin_neon_vcltz_f16,
1530 NEON::BI__builtin_neon_vcltz_v,
1531 },
1532 {
1533 NEON::BI__builtin_neon_vcltzq_f16,
1534 NEON::BI__builtin_neon_vcltzq_v,
1535 },
1536 {
1537 NEON::BI__builtin_neon_vfma_f16,
1538 NEON::BI__builtin_neon_vfma_v,
1539 },
1540 {
1541 NEON::BI__builtin_neon_vfma_lane_f16,
1542 NEON::BI__builtin_neon_vfma_lane_v,
1543 },
1544 {
1545 NEON::BI__builtin_neon_vfma_laneq_f16,
1546 NEON::BI__builtin_neon_vfma_laneq_v,
1547 },
1548 {
1549 NEON::BI__builtin_neon_vfmaq_f16,
1550 NEON::BI__builtin_neon_vfmaq_v,
1551 },
1552 {
1553 NEON::BI__builtin_neon_vfmaq_lane_f16,
1554 NEON::BI__builtin_neon_vfmaq_lane_v,
1555 },
1556 {
1557 NEON::BI__builtin_neon_vfmaq_laneq_f16,
1558 NEON::BI__builtin_neon_vfmaq_laneq_v,
1559 },
1560 {NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v},
1561 {NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v},
1562 {NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v},
1563 {NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v},
1564 {NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v},
1565 {NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v},
1566 {NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v},
1567 {NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v},
1568 {NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v},
1569 {NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v},
1570 {NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v},
1571 {NEON::BI__builtin_neon_vld1q_lane_bf16,
1572 NEON::BI__builtin_neon_vld1q_lane_v},
1573 {NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v},
1574 {NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v},
1575 {NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v},
1576 {NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v},
1577 {NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v},
1578 {NEON::BI__builtin_neon_vld2q_lane_bf16,
1579 NEON::BI__builtin_neon_vld2q_lane_v},
1580 {NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v},
1581 {NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v},
1582 {NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v},
1583 {NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v},
1584 {NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v},
1585 {NEON::BI__builtin_neon_vld3q_lane_bf16,
1586 NEON::BI__builtin_neon_vld3q_lane_v},
1587 {NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v},
1588 {NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v},
1589 {NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v},
1590 {NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v},
1591 {NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v},
1592 {NEON::BI__builtin_neon_vld4q_lane_bf16,
1593 NEON::BI__builtin_neon_vld4q_lane_v},
1594 {
1595 NEON::BI__builtin_neon_vmax_f16,
1596 NEON::BI__builtin_neon_vmax_v,
1597 },
1598 {
1599 NEON::BI__builtin_neon_vmaxnm_f16,
1600 NEON::BI__builtin_neon_vmaxnm_v,
1601 },
1602 {
1603 NEON::BI__builtin_neon_vmaxnmq_f16,
1604 NEON::BI__builtin_neon_vmaxnmq_v,
1605 },
1606 {
1607 NEON::BI__builtin_neon_vmaxq_f16,
1608 NEON::BI__builtin_neon_vmaxq_v,
1609 },
1610 {
1611 NEON::BI__builtin_neon_vmin_f16,
1612 NEON::BI__builtin_neon_vmin_v,
1613 },
1614 {
1615 NEON::BI__builtin_neon_vminnm_f16,
1616 NEON::BI__builtin_neon_vminnm_v,
1617 },
1618 {
1619 NEON::BI__builtin_neon_vminnmq_f16,
1620 NEON::BI__builtin_neon_vminnmq_v,
1621 },
1622 {
1623 NEON::BI__builtin_neon_vminq_f16,
1624 NEON::BI__builtin_neon_vminq_v,
1625 },
1626 {
1627 NEON::BI__builtin_neon_vmulx_f16,
1628 NEON::BI__builtin_neon_vmulx_v,
1629 },
1630 {
1631 NEON::BI__builtin_neon_vmulxq_f16,
1632 NEON::BI__builtin_neon_vmulxq_v,
1633 },
1634 {
1635 NEON::BI__builtin_neon_vpadd_f16,
1636 NEON::BI__builtin_neon_vpadd_v,
1637 },
1638 {
1639 NEON::BI__builtin_neon_vpaddq_f16,
1640 NEON::BI__builtin_neon_vpaddq_v,
1641 },
1642 {
1643 NEON::BI__builtin_neon_vpmax_f16,
1644 NEON::BI__builtin_neon_vpmax_v,
1645 },
1646 {
1647 NEON::BI__builtin_neon_vpmaxnm_f16,
1648 NEON::BI__builtin_neon_vpmaxnm_v,
1649 },
1650 {
1651 NEON::BI__builtin_neon_vpmaxnmq_f16,
1652 NEON::BI__builtin_neon_vpmaxnmq_v,
1653 },
1654 {
1655 NEON::BI__builtin_neon_vpmaxq_f16,
1656 NEON::BI__builtin_neon_vpmaxq_v,
1657 },
1658 {
1659 NEON::BI__builtin_neon_vpmin_f16,
1660 NEON::BI__builtin_neon_vpmin_v,
1661 },
1662 {
1663 NEON::BI__builtin_neon_vpminnm_f16,
1664 NEON::BI__builtin_neon_vpminnm_v,
1665 },
1666 {
1667 NEON::BI__builtin_neon_vpminnmq_f16,
1668 NEON::BI__builtin_neon_vpminnmq_v,
1669 },
1670 {
1671 NEON::BI__builtin_neon_vpminq_f16,
1672 NEON::BI__builtin_neon_vpminq_v,
1673 },
1674 {
1675 NEON::BI__builtin_neon_vrecpe_f16,
1676 NEON::BI__builtin_neon_vrecpe_v,
1677 },
1678 {
1679 NEON::BI__builtin_neon_vrecpeq_f16,
1680 NEON::BI__builtin_neon_vrecpeq_v,
1681 },
1682 {
1683 NEON::BI__builtin_neon_vrecps_f16,
1684 NEON::BI__builtin_neon_vrecps_v,
1685 },
1686 {
1687 NEON::BI__builtin_neon_vrecpsq_f16,
1688 NEON::BI__builtin_neon_vrecpsq_v,
1689 },
1690 {
1691 NEON::BI__builtin_neon_vrnd_f16,
1692 NEON::BI__builtin_neon_vrnd_v,
1693 },
1694 {
1695 NEON::BI__builtin_neon_vrnda_f16,
1696 NEON::BI__builtin_neon_vrnda_v,
1697 },
1698 {
1699 NEON::BI__builtin_neon_vrndaq_f16,
1700 NEON::BI__builtin_neon_vrndaq_v,
1701 },
1702 {
1703 NEON::BI__builtin_neon_vrndi_f16,
1704 NEON::BI__builtin_neon_vrndi_v,
1705 },
1706 {
1707 NEON::BI__builtin_neon_vrndiq_f16,
1708 NEON::BI__builtin_neon_vrndiq_v,
1709 },
1710 {
1711 NEON::BI__builtin_neon_vrndm_f16,
1712 NEON::BI__builtin_neon_vrndm_v,
1713 },
1714 {
1715 NEON::BI__builtin_neon_vrndmq_f16,
1716 NEON::BI__builtin_neon_vrndmq_v,
1717 },
1718 {
1719 NEON::BI__builtin_neon_vrndn_f16,
1720 NEON::BI__builtin_neon_vrndn_v,
1721 },
1722 {
1723 NEON::BI__builtin_neon_vrndnq_f16,
1724 NEON::BI__builtin_neon_vrndnq_v,
1725 },
1726 {
1727 NEON::BI__builtin_neon_vrndp_f16,
1728 NEON::BI__builtin_neon_vrndp_v,
1729 },
1730 {
1731 NEON::BI__builtin_neon_vrndpq_f16,
1732 NEON::BI__builtin_neon_vrndpq_v,
1733 },
1734 {
1735 NEON::BI__builtin_neon_vrndq_f16,
1736 NEON::BI__builtin_neon_vrndq_v,
1737 },
1738 {
1739 NEON::BI__builtin_neon_vrndx_f16,
1740 NEON::BI__builtin_neon_vrndx_v,
1741 },
1742 {
1743 NEON::BI__builtin_neon_vrndxq_f16,
1744 NEON::BI__builtin_neon_vrndxq_v,
1745 },
1746 {
1747 NEON::BI__builtin_neon_vrsqrte_f16,
1748 NEON::BI__builtin_neon_vrsqrte_v,
1749 },
1750 {
1751 NEON::BI__builtin_neon_vrsqrteq_f16,
1752 NEON::BI__builtin_neon_vrsqrteq_v,
1753 },
1754 {
1755 NEON::BI__builtin_neon_vrsqrts_f16,
1756 NEON::BI__builtin_neon_vrsqrts_v,
1757 },
1758 {
1759 NEON::BI__builtin_neon_vrsqrtsq_f16,
1760 NEON::BI__builtin_neon_vrsqrtsq_v,
1761 },
1762 {
1763 NEON::BI__builtin_neon_vsqrt_f16,
1764 NEON::BI__builtin_neon_vsqrt_v,
1765 },
1766 {
1767 NEON::BI__builtin_neon_vsqrtq_f16,
1768 NEON::BI__builtin_neon_vsqrtq_v,
1769 },
1770 {NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v},
1771 {NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v},
1772 {NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v},
1773 {NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v},
1774 {NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v},
1775 {NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v},
1776 {NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v},
1777 {NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v},
1778 {NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v},
1779 {NEON::BI__builtin_neon_vst1q_lane_bf16,
1780 NEON::BI__builtin_neon_vst1q_lane_v},
1781 {NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v},
1782 {NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v},
1783 {NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v},
1784 {NEON::BI__builtin_neon_vst2q_lane_bf16,
1785 NEON::BI__builtin_neon_vst2q_lane_v},
1786 {NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v},
1787 {NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v},
1788 {NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v},
1789 {NEON::BI__builtin_neon_vst3q_lane_bf16,
1790 NEON::BI__builtin_neon_vst3q_lane_v},
1791 {NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v},
1792 {NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v},
1793 {NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v},
1794 {NEON::BI__builtin_neon_vst4q_lane_bf16,
1795 NEON::BI__builtin_neon_vst4q_lane_v},
1796 // The mangling rules cause us to have one ID for each type for
1797 // vldap1(q)_lane and vstl1(q)_lane, but codegen is equivalent for all of
1798 // them. Choose an arbitrary one to be handled as tha canonical variation.
1799 {NEON::BI__builtin_neon_vldap1_lane_u64,
1800 NEON::BI__builtin_neon_vldap1_lane_s64},
1801 {NEON::BI__builtin_neon_vldap1_lane_f64,
1802 NEON::BI__builtin_neon_vldap1_lane_s64},
1803 {NEON::BI__builtin_neon_vldap1_lane_p64,
1804 NEON::BI__builtin_neon_vldap1_lane_s64},
1805 {NEON::BI__builtin_neon_vldap1q_lane_u64,
1806 NEON::BI__builtin_neon_vldap1q_lane_s64},
1807 {NEON::BI__builtin_neon_vldap1q_lane_f64,
1808 NEON::BI__builtin_neon_vldap1q_lane_s64},
1809 {NEON::BI__builtin_neon_vldap1q_lane_p64,
1810 NEON::BI__builtin_neon_vldap1q_lane_s64},
1811 {NEON::BI__builtin_neon_vstl1_lane_u64,
1812 NEON::BI__builtin_neon_vstl1_lane_s64},
1813 {NEON::BI__builtin_neon_vstl1_lane_f64,
1814 NEON::BI__builtin_neon_vstl1_lane_s64},
1815 {NEON::BI__builtin_neon_vstl1_lane_p64,
1816 NEON::BI__builtin_neon_vstl1_lane_s64},
1817 {NEON::BI__builtin_neon_vstl1q_lane_u64,
1818 NEON::BI__builtin_neon_vstl1q_lane_s64},
1819 {NEON::BI__builtin_neon_vstl1q_lane_f64,
1820 NEON::BI__builtin_neon_vstl1q_lane_s64},
1821 {NEON::BI__builtin_neon_vstl1q_lane_p64,
1822 NEON::BI__builtin_neon_vstl1q_lane_s64},
1823};
1824
1825std::optional<mlir::Value>
1828 llvm::Triple::ArchType arch) {
1829 if (builtinID >= clang::AArch64::FirstSVEBuiltin &&
1830 builtinID <= clang::AArch64::LastSVEBuiltin)
1831 return emitAArch64SVEBuiltinExpr(builtinID, expr);
1832
1833 if (builtinID >= clang::AArch64::FirstSMEBuiltin &&
1834 builtinID <= clang::AArch64::LastSMEBuiltin)
1835 return emitAArch64SMEBuiltinExpr(builtinID, expr);
1836
1837 if (builtinID == Builtin::BI__builtin_cpu_supports) {
1838 cgm.errorNYI(expr->getSourceRange(),
1839 std::string("unimplemented AArch64 builtin call: ") +
1840 getContext().BuiltinInfo.getName(builtinID));
1841 return mlir::Value{};
1842 }
1843
1844 switch (builtinID) {
1845 default:
1846 break;
1847 case clang::AArch64::BI__builtin_arm_nop:
1848 case clang::AArch64::BI__builtin_arm_yield:
1849 case clang::AArch64::BI__yield:
1850 case clang::AArch64::BI__builtin_arm_wfe:
1851 case clang::AArch64::BI__wfe:
1852 case clang::AArch64::BI__builtin_arm_wfi:
1853 case clang::AArch64::BI__wfi:
1854 case clang::AArch64::BI__builtin_arm_sev:
1855 case clang::AArch64::BI__sev:
1856 case clang::AArch64::BI__builtin_arm_sevl:
1857 case clang::AArch64::BI__sevl:
1858 cgm.errorNYI(expr->getSourceRange(),
1859 std::string("unimplemented AArch64 builtin call: ") +
1860 getContext().BuiltinInfo.getName(builtinID));
1861 return mlir::Value{};
1862 }
1863
1864 if (builtinID == clang::AArch64::BI__builtin_arm_trap) {
1865 cgm.errorNYI(expr->getSourceRange(),
1866 std::string("unimplemented AArch64 builtin call: ") +
1867 getContext().BuiltinInfo.getName(builtinID));
1868 return mlir::Value{};
1869 }
1870
1871 if (builtinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
1872 cgm.errorNYI(expr->getSourceRange(),
1873 std::string("unimplemented AArch64 builtin call: ") +
1874 getContext().BuiltinInfo.getName(builtinID));
1875 return mlir::Value{};
1876 }
1877
1878 if (builtinID == clang::AArch64::BI__builtin_arm_rbit) {
1879 cgm.errorNYI(expr->getSourceRange(),
1880 std::string("unimplemented AArch64 builtin call: ") +
1881 getContext().BuiltinInfo.getName(builtinID));
1882 return mlir::Value{};
1883 }
1884 if (builtinID == clang::AArch64::BI__builtin_arm_rbit64) {
1885 cgm.errorNYI(expr->getSourceRange(),
1886 std::string("unimplemented AArch64 builtin call: ") +
1887 getContext().BuiltinInfo.getName(builtinID));
1888 return mlir::Value{};
1889 }
1890
1891 if (builtinID == clang::AArch64::BI__builtin_arm_clz ||
1892 builtinID == clang::AArch64::BI__builtin_arm_clz64) {
1893 cgm.errorNYI(expr->getSourceRange(),
1894 std::string("unimplemented AArch64 builtin call: ") +
1895 getContext().BuiltinInfo.getName(builtinID));
1896 return mlir::Value{};
1897 }
1898
1899 if (builtinID == clang::AArch64::BI__builtin_arm_cls) {
1900 cgm.errorNYI(expr->getSourceRange(),
1901 std::string("unimplemented AArch64 builtin call: ") +
1902 getContext().BuiltinInfo.getName(builtinID));
1903 return mlir::Value{};
1904 }
1905 if (builtinID == clang::AArch64::BI__builtin_arm_cls64) {
1906 cgm.errorNYI(expr->getSourceRange(),
1907 std::string("unimplemented AArch64 builtin call: ") +
1908 getContext().BuiltinInfo.getName(builtinID));
1909 return mlir::Value{};
1910 }
1911
1912 if (builtinID == clang::AArch64::BI__builtin_arm_rint32zf ||
1913 builtinID == clang::AArch64::BI__builtin_arm_rint32z) {
1914 cgm.errorNYI(expr->getSourceRange(),
1915 std::string("unimplemented AArch64 builtin call: ") +
1916 getContext().BuiltinInfo.getName(builtinID));
1917 return mlir::Value{};
1918 }
1919
1920 if (builtinID == clang::AArch64::BI__builtin_arm_rint64zf ||
1921 builtinID == clang::AArch64::BI__builtin_arm_rint64z) {
1922 cgm.errorNYI(expr->getSourceRange(),
1923 std::string("unimplemented AArch64 builtin call: ") +
1924 getContext().BuiltinInfo.getName(builtinID));
1925 return mlir::Value{};
1926 }
1927
1928 if (builtinID == clang::AArch64::BI__builtin_arm_rint32xf ||
1929 builtinID == clang::AArch64::BI__builtin_arm_rint32x) {
1930 cgm.errorNYI(expr->getSourceRange(),
1931 std::string("unimplemented AArch64 builtin call: ") +
1932 getContext().BuiltinInfo.getName(builtinID));
1933 return mlir::Value{};
1934 }
1935
1936 if (builtinID == clang::AArch64::BI__builtin_arm_rint64xf ||
1937 builtinID == clang::AArch64::BI__builtin_arm_rint64x) {
1938 cgm.errorNYI(expr->getSourceRange(),
1939 std::string("unimplemented AArch64 builtin call: ") +
1940 getContext().BuiltinInfo.getName(builtinID));
1941 return mlir::Value{};
1942 }
1943
1944 if (builtinID == clang::AArch64::BI__builtin_arm_jcvt) {
1945 cgm.errorNYI(expr->getSourceRange(),
1946 std::string("unimplemented AArch64 builtin call: ") +
1947 getContext().BuiltinInfo.getName(builtinID));
1948 return mlir::Value{};
1949 }
1950
1951 if (builtinID == clang::AArch64::BI__builtin_arm_ld64b ||
1952 builtinID == clang::AArch64::BI__builtin_arm_st64b ||
1953 builtinID == clang::AArch64::BI__builtin_arm_st64bv ||
1954 builtinID == clang::AArch64::BI__builtin_arm_st64bv0) {
1955 cgm.errorNYI(expr->getSourceRange(),
1956 std::string("unimplemented AArch64 builtin call: ") +
1957 getContext().BuiltinInfo.getName(builtinID));
1958 return mlir::Value{};
1959 }
1960
1961 if (builtinID == clang::AArch64::BI__builtin_arm_rndr ||
1962 builtinID == clang::AArch64::BI__builtin_arm_rndrrs) {
1963 cgm.errorNYI(expr->getSourceRange(),
1964 std::string("unimplemented AArch64 builtin call: ") +
1965 getContext().BuiltinInfo.getName(builtinID));
1966 return mlir::Value{};
1967 }
1968
1969 if (builtinID == clang::AArch64::BI__clear_cache) {
1970 cgm.errorNYI(expr->getSourceRange(),
1971 std::string("unimplemented AArch64 builtin call: ") +
1972 getContext().BuiltinInfo.getName(builtinID));
1973 return mlir::Value{};
1974 }
1975
1976 if ((builtinID == clang::AArch64::BI__builtin_arm_ldrex ||
1977 builtinID == clang::AArch64::BI__builtin_arm_ldaex) &&
1978 getContext().getTypeSize(expr->getType()) == 128) {
1979 cgm.errorNYI(expr->getSourceRange(),
1980 std::string("unimplemented AArch64 builtin call: ") +
1981 getContext().BuiltinInfo.getName(builtinID));
1982 return mlir::Value{};
1983 }
1984 if (builtinID == clang::AArch64::BI__builtin_arm_ldrex ||
1985 builtinID == clang::AArch64::BI__builtin_arm_ldaex) {
1986 cgm.errorNYI(expr->getSourceRange(),
1987 std::string("unimplemented AArch64 builtin call: ") +
1988 getContext().BuiltinInfo.getName(builtinID));
1989 return mlir::Value{};
1990 }
1991
1992 if ((builtinID == clang::AArch64::BI__builtin_arm_strex ||
1993 builtinID == clang::AArch64::BI__builtin_arm_stlex) &&
1994 getContext().getTypeSize(expr->getArg(0)->getType()) == 128) {
1995 cgm.errorNYI(expr->getSourceRange(),
1996 std::string("unimplemented AArch64 builtin call: ") +
1997 getContext().BuiltinInfo.getName(builtinID));
1998 return mlir::Value{};
1999 }
2000
2001 if (builtinID == clang::AArch64::BI__builtin_arm_strex ||
2002 builtinID == clang::AArch64::BI__builtin_arm_stlex) {
2003 cgm.errorNYI(expr->getSourceRange(),
2004 std::string("unimplemented AArch64 builtin call: ") +
2005 getContext().BuiltinInfo.getName(builtinID));
2006 return mlir::Value{};
2007 }
2008
2009 if (builtinID == clang::AArch64::BI__getReg) {
2010 cgm.errorNYI(expr->getSourceRange(),
2011 std::string("unimplemented AArch64 builtin call: ") +
2012 getContext().BuiltinInfo.getName(builtinID));
2013 return mlir::Value{};
2014 }
2015
2016 if (builtinID == clang::AArch64::BI__break) {
2017 cgm.errorNYI(expr->getSourceRange(),
2018 std::string("unimplemented AArch64 builtin call: ") +
2019 getContext().BuiltinInfo.getName(builtinID));
2020 return mlir::Value{};
2021 }
2022
2023 if (builtinID == clang::AArch64::BI__builtin_arm_clrex) {
2024 cgm.errorNYI(expr->getSourceRange(),
2025 std::string("unimplemented AArch64 builtin call: ") +
2026 getContext().BuiltinInfo.getName(builtinID));
2027 return mlir::Value{};
2028 }
2029
2030 if (builtinID == clang::AArch64::BI_ReadWriteBarrier) {
2031 cgm.errorNYI(expr->getSourceRange(),
2032 std::string("unimplemented AArch64 builtin call: ") +
2033 getContext().BuiltinInfo.getName(builtinID));
2034 return mlir::Value{};
2035 }
2036
2037 // CRC32
2038 Intrinsic::ID crcIntrinsicID = Intrinsic::not_intrinsic;
2039 switch (builtinID) {
2040 case clang::AArch64::BI__builtin_arm_crc32b:
2041 crcIntrinsicID = Intrinsic::aarch64_crc32b;
2042 break;
2043 case clang::AArch64::BI__builtin_arm_crc32cb:
2044 crcIntrinsicID = Intrinsic::aarch64_crc32cb;
2045 break;
2046 case clang::AArch64::BI__builtin_arm_crc32h:
2047 crcIntrinsicID = Intrinsic::aarch64_crc32h;
2048 break;
2049 case clang::AArch64::BI__builtin_arm_crc32ch:
2050 crcIntrinsicID = Intrinsic::aarch64_crc32ch;
2051 break;
2052 case clang::AArch64::BI__builtin_arm_crc32w:
2053 crcIntrinsicID = Intrinsic::aarch64_crc32w;
2054 break;
2055 case clang::AArch64::BI__builtin_arm_crc32cw:
2056 crcIntrinsicID = Intrinsic::aarch64_crc32cw;
2057 break;
2058 case clang::AArch64::BI__builtin_arm_crc32d:
2059 crcIntrinsicID = Intrinsic::aarch64_crc32x;
2060 break;
2061 case clang::AArch64::BI__builtin_arm_crc32cd:
2062 crcIntrinsicID = Intrinsic::aarch64_crc32cx;
2063 break;
2064 }
2065
2066 if (crcIntrinsicID != Intrinsic::not_intrinsic) {
2067 cgm.errorNYI(expr->getSourceRange(),
2068 std::string("unimplemented AArch64 builtin call: ") +
2069 getContext().BuiltinInfo.getName(builtinID));
2070 return mlir::Value{};
2071 }
2072
2073 // Memory Operations (MOPS)
2074 if (builtinID == AArch64::BI__builtin_arm_mops_memset_tag) {
2075 cgm.errorNYI(expr->getSourceRange(),
2076 std::string("unimplemented AArch64 builtin call: ") +
2077 getContext().BuiltinInfo.getName(builtinID));
2078 return mlir::Value{};
2079 }
2080
2081 // Memory Tagging Extensions (MTE) Intrinsics
2082 Intrinsic::ID mteIntrinsicID = Intrinsic::not_intrinsic;
2083 switch (builtinID) {
2084 case clang::AArch64::BI__builtin_arm_irg:
2085 mteIntrinsicID = Intrinsic::aarch64_irg;
2086 break;
2087 case clang::AArch64::BI__builtin_arm_addg:
2088 mteIntrinsicID = Intrinsic::aarch64_addg;
2089 break;
2090 case clang::AArch64::BI__builtin_arm_gmi:
2091 mteIntrinsicID = Intrinsic::aarch64_gmi;
2092 break;
2093 case clang::AArch64::BI__builtin_arm_ldg:
2094 mteIntrinsicID = Intrinsic::aarch64_ldg;
2095 break;
2096 case clang::AArch64::BI__builtin_arm_stg:
2097 mteIntrinsicID = Intrinsic::aarch64_stg;
2098 break;
2099 case clang::AArch64::BI__builtin_arm_subp:
2100 mteIntrinsicID = Intrinsic::aarch64_subp;
2101 break;
2102 }
2103
2104 if (mteIntrinsicID != Intrinsic::not_intrinsic) {
2105 cgm.errorNYI(expr->getSourceRange(),
2106 std::string("unimplemented AArch64 builtin call: ") +
2107 getContext().BuiltinInfo.getName(builtinID));
2108 return mlir::Value{};
2109 }
2110
2111 if (builtinID == clang::AArch64::BI__builtin_arm_rsr ||
2112 builtinID == clang::AArch64::BI__builtin_arm_rsr64 ||
2113 builtinID == clang::AArch64::BI__builtin_arm_rsr128 ||
2114 builtinID == clang::AArch64::BI__builtin_arm_rsrp ||
2115 builtinID == clang::AArch64::BI__builtin_arm_wsr ||
2116 builtinID == clang::AArch64::BI__builtin_arm_wsr64 ||
2117 builtinID == clang::AArch64::BI__builtin_arm_wsr128 ||
2118 builtinID == clang::AArch64::BI__builtin_arm_wsrp) {
2119 cgm.errorNYI(expr->getSourceRange(),
2120 std::string("unimplemented AArch64 builtin call: ") +
2121 getContext().BuiltinInfo.getName(builtinID));
2122 return mlir::Value{};
2123 }
2124
2125 if (builtinID == clang::AArch64::BI_ReadStatusReg ||
2126 builtinID == clang::AArch64::BI_WriteStatusReg ||
2127 builtinID == clang::AArch64::BI__sys) {
2128 cgm.errorNYI(expr->getSourceRange(),
2129 std::string("unimplemented AArch64 builtin call: ") +
2130 getContext().BuiltinInfo.getName(builtinID));
2131 return mlir::Value{};
2132 }
2133
2134 if (builtinID == clang::AArch64::BI_AddressOfReturnAddress) {
2135 cgm.errorNYI(expr->getSourceRange(),
2136 std::string("unimplemented AArch64 builtin call: ") +
2137 getContext().BuiltinInfo.getName(builtinID));
2138 return mlir::Value{};
2139 }
2140
2141 if (builtinID == clang::AArch64::BI__builtin_sponentry) {
2142 cgm.errorNYI(expr->getSourceRange(),
2143 std::string("unimplemented AArch64 builtin call: ") +
2144 getContext().BuiltinInfo.getName(builtinID));
2145 return mlir::Value{};
2146 }
2147
2148 if (builtinID == clang::AArch64::BI__mulh ||
2149 builtinID == clang::AArch64::BI__umulh) {
2150 cgm.errorNYI(expr->getSourceRange(),
2151 std::string("unimplemented AArch64 builtin call: ") +
2152 getContext().BuiltinInfo.getName(builtinID));
2153 return mlir::Value{};
2154 }
2155
2156 if (builtinID == AArch64::BI__writex18byte ||
2157 builtinID == AArch64::BI__writex18word ||
2158 builtinID == AArch64::BI__writex18dword ||
2159 builtinID == AArch64::BI__writex18qword) {
2160 cgm.errorNYI(expr->getSourceRange(),
2161 std::string("unimplemented AArch64 builtin call: ") +
2162 getContext().BuiltinInfo.getName(builtinID));
2163 return mlir::Value{};
2164 }
2165
2166 if (builtinID == AArch64::BI__readx18byte ||
2167 builtinID == AArch64::BI__readx18word ||
2168 builtinID == AArch64::BI__readx18dword ||
2169 builtinID == AArch64::BI__readx18qword) {
2170 cgm.errorNYI(expr->getSourceRange(),
2171 std::string("unimplemented AArch64 builtin call: ") +
2172 getContext().BuiltinInfo.getName(builtinID));
2173 return mlir::Value{};
2174 }
2175
2176 if (builtinID == AArch64::BI__addx18byte ||
2177 builtinID == AArch64::BI__addx18word ||
2178 builtinID == AArch64::BI__addx18dword ||
2179 builtinID == AArch64::BI__addx18qword ||
2180 builtinID == AArch64::BI__incx18byte ||
2181 builtinID == AArch64::BI__incx18word ||
2182 builtinID == AArch64::BI__incx18dword ||
2183 builtinID == AArch64::BI__incx18qword) {
2184 cgm.errorNYI(expr->getSourceRange(),
2185 std::string("unimplemented AArch64 builtin call: ") +
2186 getContext().BuiltinInfo.getName(builtinID));
2187 return mlir::Value{};
2188 }
2189
2190 if (builtinID == AArch64::BI_CopyDoubleFromInt64 ||
2191 builtinID == AArch64::BI_CopyFloatFromInt32 ||
2192 builtinID == AArch64::BI_CopyInt32FromFloat ||
2193 builtinID == AArch64::BI_CopyInt64FromDouble) {
2194 cgm.errorNYI(expr->getSourceRange(),
2195 std::string("unimplemented AArch64 builtin call: ") +
2196 getContext().BuiltinInfo.getName(builtinID));
2197 return mlir::Value{};
2198 }
2199
2200 if (builtinID == AArch64::BI_CountLeadingOnes ||
2201 builtinID == AArch64::BI_CountLeadingOnes64 ||
2202 builtinID == AArch64::BI_CountLeadingZeros ||
2203 builtinID == AArch64::BI_CountLeadingZeros64) {
2204 cgm.errorNYI(expr->getSourceRange(),
2205 std::string("unimplemented AArch64 builtin call: ") +
2206 getContext().BuiltinInfo.getName(builtinID));
2207 return mlir::Value{};
2208 }
2209
2210 if (builtinID == AArch64::BI_CountLeadingSigns ||
2211 builtinID == AArch64::BI_CountLeadingSigns64) {
2212 cgm.errorNYI(expr->getSourceRange(),
2213 std::string("unimplemented AArch64 builtin call: ") +
2214 getContext().BuiltinInfo.getName(builtinID));
2215 return mlir::Value{};
2216 }
2217
2218 if (builtinID == AArch64::BI_CountOneBits ||
2219 builtinID == AArch64::BI_CountOneBits64) {
2220 cgm.errorNYI(expr->getSourceRange(),
2221 std::string("unimplemented AArch64 builtin call: ") +
2222 getContext().BuiltinInfo.getName(builtinID));
2223 return mlir::Value{};
2224 }
2225
2226 if (builtinID == AArch64::BI__prefetch) {
2227 cgm.errorNYI(expr->getSourceRange(),
2228 std::string("unimplemented AArch64 builtin call: ") +
2229 getContext().BuiltinInfo.getName(builtinID));
2230 return mlir::Value{};
2231 }
2232
2233 if (builtinID == AArch64::BI__hlt) {
2234 cgm.errorNYI(expr->getSourceRange(),
2235 std::string("unimplemented AArch64 builtin call: ") +
2236 getContext().BuiltinInfo.getName(builtinID));
2237 return mlir::Value{};
2238 }
2239
2240 if (builtinID == NEON::BI__builtin_neon_vcvth_bf16_f32) {
2241 cgm.errorNYI(expr->getSourceRange(),
2242 std::string("unimplemented AArch64 builtin call: ") +
2243 getContext().BuiltinInfo.getName(builtinID));
2244 return mlir::Value{};
2245 }
2246
2247 // Handle MSVC intrinsics before argument evaluation to prevent double
2248 // evaluation.
2250
2251 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
2252 auto it = llvm::find_if(neonEquivalentIntrinsicMap, [builtinID](auto &p) {
2253 return p.first == builtinID;
2254 });
2255 if (it != end(neonEquivalentIntrinsicMap))
2256 builtinID = it->second;
2257
2258 // Find out if any arguments are required to be integer constant
2259 // expressions.
2261 unsigned iceArguments = 0;
2263 getContext().GetBuiltinType(builtinID, error, &iceArguments);
2264 assert(error == ASTContext::GE_None && "Should not codegen an error");
2266
2267 // Skip extra arguments used to discriminate vector types and that are
2268 // intended for Sema checking.
2269 bool hasExtraArg = hasExtraNeonArgument(builtinID);
2270 unsigned numArgs = expr->getNumArgs() - (hasExtraArg ? 1 : 0);
2271 for (unsigned i = 0, e = numArgs; i != e; i++) {
2272 if (i == 0) {
2273 switch (builtinID) {
2274 case NEON::BI__builtin_neon_vld1_v:
2275 case NEON::BI__builtin_neon_vld1q_v:
2276 case NEON::BI__builtin_neon_vld1_dup_v:
2277 case NEON::BI__builtin_neon_vld1q_dup_v:
2278 case NEON::BI__builtin_neon_vld1_lane_v:
2279 case NEON::BI__builtin_neon_vld1q_lane_v:
2280 case NEON::BI__builtin_neon_vst1_v:
2281 case NEON::BI__builtin_neon_vst1q_v:
2282 case NEON::BI__builtin_neon_vst1_lane_v:
2283 case NEON::BI__builtin_neon_vst1q_lane_v:
2284 case NEON::BI__builtin_neon_vldap1_lane_s64:
2285 case NEON::BI__builtin_neon_vldap1q_lane_s64:
2286 case NEON::BI__builtin_neon_vstl1_lane_s64:
2287 case NEON::BI__builtin_neon_vstl1q_lane_s64:
2288 // Get the alignment for the argument in addition to the value;
2289 // we'll use it later.
2290 cgm.errorNYI(
2291 expr->getSourceRange(),
2292 std::string("unimplemented AArch64 builtin argument handling ") +
2293 getContext().BuiltinInfo.getName(builtinID));
2294 }
2295 }
2296 ops.push_back(
2297 emitScalarOrConstFoldImmArg(iceArguments, i, expr->getArg(i)));
2298 }
2299
2302 if (builtin)
2303 return emitCommonNeonSISDBuiltinExpr(*this, *builtin, ops, expr);
2304
2305 // Not all intrinsics handled by the common case work for AArch64 yet, so only
2306 // defer to common code if it's been added to our special map.
2308
2310
2311 const Expr *arg = expr->getArg(expr->getNumArgs() - 1);
2313 // A trailing constant integer is used for discriminating overloaded builtin
2314 // calls. Use it to determine the type of this overloaded NEON intrinsic.
2315 if (std::optional<llvm::APSInt> result =
2316 arg->getIntegerConstantExpr(getContext()))
2317 type = NeonTypeFlags(result->getZExtValue());
2318
2319 bool usgn = type.isUnsigned();
2320
2321 mlir::Location loc = getLoc(expr->getExprLoc());
2322
2323 // Not all intrinsics handled by the common case work for AArch64 yet, so only
2324 // defer to common code if it's been added to our special map.
2327 if (builtin)
2329 *this, builtin->BuiltinID, builtin->LLVMIntrinsic,
2330 builtin->AltLLVMIntrinsic, builtin->NameHint, builtin->TypeModifier,
2331 expr, ops);
2332
2333 // Handle non-overloaded intrinsics first.
2334 switch (builtinID) {
2335 default:
2336 break;
2337 case NEON::BI__builtin_neon_vabsh_f16: {
2338 return cir::FAbsOp::create(builder, loc, ops);
2339 }
2340 case NEON::BI__builtin_neon_vaddq_p128:
2341 case NEON::BI__builtin_neon_vldrq_p128:
2342 case NEON::BI__builtin_neon_vstrq_p128:
2343 case NEON::BI__builtin_neon_vcvts_f32_u32:
2344 case NEON::BI__builtin_neon_vcvtd_f64_u64:
2345 case NEON::BI__builtin_neon_vcvts_f32_s32:
2346 case NEON::BI__builtin_neon_vcvtd_f64_s64:
2347 case NEON::BI__builtin_neon_vcvth_f16_u16:
2348 case NEON::BI__builtin_neon_vcvth_f16_u32:
2349 case NEON::BI__builtin_neon_vcvth_f16_u64:
2350 case NEON::BI__builtin_neon_vcvth_f16_s16:
2351 case NEON::BI__builtin_neon_vcvth_f16_s32:
2352 case NEON::BI__builtin_neon_vcvth_f16_s64:
2353 case NEON::BI__builtin_neon_vcvtah_u16_f16:
2354 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
2355 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
2356 case NEON::BI__builtin_neon_vcvtph_u16_f16:
2357 case NEON::BI__builtin_neon_vcvth_u16_f16:
2358 case NEON::BI__builtin_neon_vcvtah_s16_f16:
2359 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
2360 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
2361 case NEON::BI__builtin_neon_vcvtph_s16_f16:
2362 case NEON::BI__builtin_neon_vcvth_s16_f16:
2363 case NEON::BI__builtin_neon_vcaleh_f16:
2364 case NEON::BI__builtin_neon_vcalth_f16:
2365 case NEON::BI__builtin_neon_vcageh_f16:
2366 case NEON::BI__builtin_neon_vcagth_f16:
2367 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
2368 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
2369 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
2370 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
2371 case NEON::BI__builtin_neon_vpaddd_s64:
2372 case NEON::BI__builtin_neon_vpaddd_f64:
2373 case NEON::BI__builtin_neon_vpadds_f32:
2374 cgm.errorNYI(expr->getSourceRange(),
2375 std::string("unimplemented AArch64 builtin call: ") +
2376 getContext().BuiltinInfo.getName(builtinID));
2377 return mlir::Value{};
2378 case NEON::BI__builtin_neon_vceqzd_s64:
2379 case NEON::BI__builtin_neon_vceqzd_f64:
2380 case NEON::BI__builtin_neon_vceqzs_f32:
2381 case NEON::BI__builtin_neon_vceqzh_f16:
2383 *this, builder, loc, ops[0],
2384 convertType(expr->getCallReturnType(getContext())), cir::CmpOpKind::eq);
2385 case NEON::BI__builtin_neon_vcgezd_s64:
2386 case NEON::BI__builtin_neon_vcgezd_f64:
2387 case NEON::BI__builtin_neon_vcgezs_f32:
2388 case NEON::BI__builtin_neon_vcgezh_f16:
2389 case NEON::BI__builtin_neon_vclezd_s64:
2390 case NEON::BI__builtin_neon_vclezd_f64:
2391 case NEON::BI__builtin_neon_vclezs_f32:
2392 case NEON::BI__builtin_neon_vclezh_f16:
2393 case NEON::BI__builtin_neon_vcgtzd_s64:
2394 case NEON::BI__builtin_neon_vcgtzd_f64:
2395 case NEON::BI__builtin_neon_vcgtzs_f32:
2396 case NEON::BI__builtin_neon_vcgtzh_f16:
2397 case NEON::BI__builtin_neon_vcltzd_s64:
2398 case NEON::BI__builtin_neon_vcltzd_f64:
2399 case NEON::BI__builtin_neon_vcltzs_f32:
2400 case NEON::BI__builtin_neon_vcltzh_f16:
2401 case NEON::BI__builtin_neon_vceqzd_u64: {
2403 *this, builder, loc, ops[0],
2404 convertType(expr->getCallReturnType(getContext())), cir::CmpOpKind::eq);
2405 }
2406 case NEON::BI__builtin_neon_vceqd_f64:
2407 case NEON::BI__builtin_neon_vcled_f64:
2408 case NEON::BI__builtin_neon_vcltd_f64:
2409 case NEON::BI__builtin_neon_vcged_f64:
2410 case NEON::BI__builtin_neon_vcgtd_f64:
2411 case NEON::BI__builtin_neon_vceqs_f32:
2412 case NEON::BI__builtin_neon_vcles_f32:
2413 case NEON::BI__builtin_neon_vclts_f32:
2414 case NEON::BI__builtin_neon_vcges_f32:
2415 case NEON::BI__builtin_neon_vcgts_f32:
2416 case NEON::BI__builtin_neon_vceqh_f16:
2417 case NEON::BI__builtin_neon_vcleh_f16:
2418 case NEON::BI__builtin_neon_vclth_f16:
2419 case NEON::BI__builtin_neon_vcgeh_f16:
2420 case NEON::BI__builtin_neon_vcgth_f16:
2421 case NEON::BI__builtin_neon_vceqd_s64:
2422 case NEON::BI__builtin_neon_vceqd_u64:
2423 case NEON::BI__builtin_neon_vcgtd_s64:
2424 case NEON::BI__builtin_neon_vcgtd_u64:
2425 case NEON::BI__builtin_neon_vcltd_s64:
2426 case NEON::BI__builtin_neon_vcltd_u64:
2427 case NEON::BI__builtin_neon_vcged_u64:
2428 case NEON::BI__builtin_neon_vcged_s64:
2429 case NEON::BI__builtin_neon_vcled_u64:
2430 case NEON::BI__builtin_neon_vcled_s64:
2431 cgm.errorNYI(expr->getSourceRange(),
2432 std::string("unimplemented AArch64 builtin call: ") +
2433 getContext().BuiltinInfo.getName(builtinID));
2434 return mlir::Value{};
2435 case NEON::BI__builtin_neon_vnegd_s64: {
2436 return builder.createNeg(ops[0]);
2437 }
2438 case NEON::BI__builtin_neon_vnegh_f16: {
2439 return builder.createFNeg(ops[0]);
2440 }
2441 case NEON::BI__builtin_neon_vtstd_s64:
2442 case NEON::BI__builtin_neon_vtstd_u64:
2443 case NEON::BI__builtin_neon_vset_lane_i8:
2444 case NEON::BI__builtin_neon_vset_lane_i16:
2445 case NEON::BI__builtin_neon_vset_lane_i32:
2446 case NEON::BI__builtin_neon_vset_lane_i64:
2447 case NEON::BI__builtin_neon_vset_lane_bf16:
2448 case NEON::BI__builtin_neon_vset_lane_f32:
2449 case NEON::BI__builtin_neon_vsetq_lane_i8:
2450 case NEON::BI__builtin_neon_vsetq_lane_i16:
2451 case NEON::BI__builtin_neon_vsetq_lane_i32:
2452 case NEON::BI__builtin_neon_vsetq_lane_i64:
2453 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2454 case NEON::BI__builtin_neon_vsetq_lane_f32:
2455 case NEON::BI__builtin_neon_vset_lane_f64:
2456 case NEON::BI__builtin_neon_vset_lane_mf8:
2457 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2458 case NEON::BI__builtin_neon_vsetq_lane_f64:
2459 cgm.errorNYI(expr->getSourceRange(),
2460 std::string("unimplemented AArch64 builtin call: ") +
2461 getContext().BuiltinInfo.getName(builtinID));
2462 return mlir::Value{};
2463
2464 case NEON::BI__builtin_neon_vget_lane_i8:
2465 case NEON::BI__builtin_neon_vdupb_lane_i8:
2466 case NEON::BI__builtin_neon_vgetq_lane_i8:
2467 case NEON::BI__builtin_neon_vdupb_laneq_i8:
2468 case NEON::BI__builtin_neon_vget_lane_mf8:
2469 case NEON::BI__builtin_neon_vdupb_lane_mf8:
2470 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2471 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
2472 case NEON::BI__builtin_neon_vget_lane_i16:
2473 case NEON::BI__builtin_neon_vduph_lane_i16:
2474 case NEON::BI__builtin_neon_vgetq_lane_i16:
2475 case NEON::BI__builtin_neon_vduph_laneq_i16:
2476 case NEON::BI__builtin_neon_vget_lane_i32:
2477 case NEON::BI__builtin_neon_vdups_lane_i32:
2478 case NEON::BI__builtin_neon_vdups_lane_f32:
2479 case NEON::BI__builtin_neon_vgetq_lane_i32:
2480 case NEON::BI__builtin_neon_vdups_laneq_i32:
2481 case NEON::BI__builtin_neon_vget_lane_i64:
2482 case NEON::BI__builtin_neon_vdupd_lane_i64:
2483 case NEON::BI__builtin_neon_vdupd_lane_f64:
2484 case NEON::BI__builtin_neon_vgetq_lane_i64:
2485 case NEON::BI__builtin_neon_vdupd_laneq_i64:
2486 case NEON::BI__builtin_neon_vget_lane_f32:
2487 case NEON::BI__builtin_neon_vget_lane_f64:
2488 case NEON::BI__builtin_neon_vgetq_lane_f32:
2489 case NEON::BI__builtin_neon_vdups_laneq_f32:
2490 case NEON::BI__builtin_neon_vgetq_lane_f64:
2491 case NEON::BI__builtin_neon_vdupd_laneq_f64:
2492 return cir::VecExtractOp::create(builder, loc, ops[0],
2493 emitScalarExpr(expr->getArg(1)));
2494 case NEON::BI__builtin_neon_vaddh_f16:
2495 return builder.createFAdd(loc, ops[0], ops[1]);
2496 case NEON::BI__builtin_neon_vsubh_f16:
2497 return builder.createFSub(loc, ops[0], ops[1]);
2498 case NEON::BI__builtin_neon_vmulh_f16:
2499 return builder.createFMul(loc, ops[0], ops[1]);
2500 case NEON::BI__builtin_neon_vdivh_f16:
2501 return builder.createFDiv(loc, ops[0], ops[1]);
2502 case NEON::BI__builtin_neon_vfmah_f16:
2503 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2504 std::rotate(ops.begin(), ops.begin() + 1, ops.end());
2505 return emitCallMaybeConstrainedBuiltin(builder, loc, "fma",
2506 convertType(expr->getType()), ops);
2507 break;
2508 case NEON::BI__builtin_neon_vfmsh_f16:
2509 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2510 std::rotate(ops.begin(), ops.begin() + 1, ops.end());
2511 ops[0] = builder.createFNeg(ops[0]);
2512 return emitCallMaybeConstrainedBuiltin(builder, loc, "fma",
2513 convertType(expr->getType()), ops);
2514 case NEON::BI__builtin_neon_vaddd_s64:
2515 case NEON::BI__builtin_neon_vaddd_u64:
2516 case NEON::BI__builtin_neon_vsubd_s64:
2517 case NEON::BI__builtin_neon_vsubd_u64:
2518 case NEON::BI__builtin_neon_vqdmlalh_s16:
2519 case NEON::BI__builtin_neon_vqdmlslh_s16:
2520 cgm.errorNYI(expr->getSourceRange(),
2521 std::string("unimplemented AArch64 builtin call: ") +
2522 getContext().BuiltinInfo.getName(builtinID));
2523 return mlir::Value{};
2524 case NEON::BI__builtin_neon_vqshlud_n_s64: {
2525 cir::IntType int64Type = builder.getSInt64Ty();
2526 ops[1] = builder.getSInt64(getZExtIntValueFromConstOp(ops[1]), loc);
2527 return emitNeonCall(cgm, builder, {int64Type, int64Type}, ops,
2528 "aarch64.neon.sqshlu", convertType(expr->getType()),
2529 loc);
2530 }
2531 case NEON::BI__builtin_neon_vqshld_n_u64:
2532 case NEON::BI__builtin_neon_vqshld_n_s64: {
2533 cir::IntType int64Type = builtinID == NEON::BI__builtin_neon_vqshld_n_u64
2534 ? builder.getUInt64Ty()
2535 : builder.getSInt64Ty();
2536 llvm::StringRef intrinsicName =
2537 builtinID == NEON::BI__builtin_neon_vqshld_n_u64 ? "aarch64.neon.uqshl"
2538 : "aarch64.neon.sqshl";
2539 ops[1] = builder.getSInt64(getZExtIntValueFromConstOp(ops[1]), loc);
2540 return emitNeonCall(cgm, builder, {int64Type, int64Type}, ops,
2541 intrinsicName, convertType(expr->getType()), loc);
2542 }
2543 case NEON::BI__builtin_neon_vrshrd_n_u64:
2544 case NEON::BI__builtin_neon_vrshrd_n_s64: {
2545 llvm::StringRef intrName = builtinID == NEON::BI__builtin_neon_vrshrd_n_s64
2546 ? "aarch64.neon.srshl"
2547 : "aarch64.neon.urshl";
2548 cir::IntType int64Ty = builtinID == NEON::BI__builtin_neon_vqshld_n_u64
2549 ? builder.getUInt64Ty()
2550 : builder.getSInt64Ty();
2551 int64_t sv = -cast<cir::IntAttr>(
2552 cast<cir::ConstantOp>(ops[1].getDefiningOp()).getValue())
2553 .getSInt();
2554 ops[1] = builder.getSInt64(sv, loc);
2555 return emitNeonCall(cgm, builder, {int64Ty, builder.getSInt64Ty()}, ops,
2556 intrName, int64Ty, loc);
2557 }
2558 case NEON::BI__builtin_neon_vrsrad_n_u64:
2559 case NEON::BI__builtin_neon_vrsrad_n_s64: {
2560 cir::IntType int64Type = builtinID == NEON::BI__builtin_neon_vrsrad_n_u64
2561 ? builder.getUInt64Ty()
2562 : builder.getSInt64Ty();
2563 ops[2] = builder.createNeg(ops[2]);
2564 const StringRef intrName = builtinID == NEON::BI__builtin_neon_vrsrad_n_u64
2565 ? "aarch64.neon.urshl"
2566 : "aarch64.neon.srshl";
2567
2569 ops[1], builder.createIntCast(ops[2], builder.getSInt64Ty())};
2570 ops[1] = builder.emitIntrinsicCallOp(loc, intrName, int64Type, args);
2571 return builder.createAdd(loc, ops[0],
2572 builder.createBitcast(ops[1], int64Type));
2573 }
2574 case NEON::BI__builtin_neon_vshld_n_s64:
2575 case NEON::BI__builtin_neon_vshld_n_u64: {
2576 auto loc = getLoc(expr->getExprLoc());
2577 std::optional<llvm::APSInt> amt =
2578 expr->getArg(1)->getIntegerConstantExpr(getContext());
2579 assert(amt && "Expected argument to be a constant");
2580 return builder.createShiftLeft(loc, ops[0], amt->getZExtValue());
2581 }
2582 case NEON::BI__builtin_neon_vshrd_n_s64: {
2583 std::optional<llvm::APSInt> amt =
2584 expr->getArg(1)->getIntegerConstantExpr(getContext());
2585 assert(amt && "Expected argument to be a constant");
2586 return builder.createShiftRight(
2587 loc, ops[0], std::min(static_cast<uint64_t>(63), amt->getZExtValue()));
2588 }
2589 case NEON::BI__builtin_neon_vshrd_n_u64: {
2590 std::optional<llvm::APSInt> amt =
2591 expr->getArg(1)->getIntegerConstantExpr(getContext());
2592 assert(amt && "Expected argument to be a constant");
2593 uint64_t shiftAmt = amt->getZExtValue();
2594 // Right-shifting an unsigned value by its size yields 0.
2595 if (shiftAmt == 64)
2596 return builder.getConstInt(loc, builder.getUInt64Ty(), 0);
2597 return builder.createShiftRight(loc, ops[0], shiftAmt);
2598 }
2599 case NEON::BI__builtin_neon_vsrad_n_s64:
2600 case NEON::BI__builtin_neon_vsrad_n_u64:
2601 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
2602 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
2603 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
2604 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16:
2605 case NEON::BI__builtin_neon_vqdmlals_s32:
2606 case NEON::BI__builtin_neon_vqdmlsls_s32:
2607 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
2608 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
2609 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
2610 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
2611 cgm.errorNYI(expr->getSourceRange(),
2612 std::string("unimplemented AArch64 builtin call: ") +
2613 getContext().BuiltinInfo.getName(builtinID));
2614 return mlir::Value{};
2615 }
2616 case NEON::BI__builtin_neon_vget_lane_bf16:
2617 case NEON::BI__builtin_neon_vduph_lane_bf16:
2618 case NEON::BI__builtin_neon_vduph_lane_f16:
2619 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2620 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2621 case NEON::BI__builtin_neon_vduph_laneq_f16: {
2622 return cir::VecExtractOp::create(builder, loc, ops[0], ops[1]);
2623 }
2624 case NEON::BI__builtin_neon_vcvt_bf16_f32:
2625 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32:
2626 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32:
2627 case clang::AArch64::BI_InterlockedAdd:
2628 case clang::AArch64::BI_InterlockedAdd_acq:
2629 case clang::AArch64::BI_InterlockedAdd_rel:
2630 case clang::AArch64::BI_InterlockedAdd_nf:
2631 case clang::AArch64::BI_InterlockedAdd64:
2632 case clang::AArch64::BI_InterlockedAdd64_acq:
2633 case clang::AArch64::BI_InterlockedAdd64_rel:
2634 case clang::AArch64::BI_InterlockedAdd64_nf:
2635 cgm.errorNYI(expr->getSourceRange(),
2636 std::string("unimplemented AArch64 builtin call: ") +
2637 getContext().BuiltinInfo.getName(builtinID));
2638 return mlir::Value{};
2639 }
2640
2641 cir::VectorType ty = getNeonType(this, type, loc);
2642 if (!ty)
2643 return nullptr;
2644
2645 llvm::StringRef intrName;
2646
2647 switch (builtinID) {
2648 default:
2649 return std::nullopt;
2650 case NEON::BI__builtin_neon_vbsl_v:
2651 case NEON::BI__builtin_neon_vbslq_v: {
2652
2653 cir::VectorType bitTy = getIntVecFromVecTy(builder, ty);
2654 ops[0] = builder.createBitcast(ops[0], bitTy);
2655 ops[1] = builder.createBitcast(ops[1], bitTy);
2656 ops[2] = builder.createBitcast(ops[2], bitTy);
2657
2658 ops[1] = builder.createAnd(loc, ops[0], ops[1]);
2659 ops[2] = builder.createAnd(loc, builder.createNot(ops[0]), ops[2]);
2660 ops[0] = builder.createOr(loc, ops[1], ops[2]);
2661 return builder.createBitcast(ops[0], ty);
2662 }
2663 case NEON::BI__builtin_neon_vfma_lane_v:
2664 case NEON::BI__builtin_neon_vfmaq_lane_v:
2665 case NEON::BI__builtin_neon_vfma_laneq_v:
2666 case NEON::BI__builtin_neon_vfmaq_laneq_v:
2667 case NEON::BI__builtin_neon_vfmah_lane_f16:
2668 case NEON::BI__builtin_neon_vfmas_lane_f32:
2669 case NEON::BI__builtin_neon_vfmah_laneq_f16:
2670 case NEON::BI__builtin_neon_vfmas_laneq_f32:
2671 case NEON::BI__builtin_neon_vfmad_lane_f64:
2672 case NEON::BI__builtin_neon_vfmad_laneq_f64:
2673 cgm.errorNYI(expr->getSourceRange(),
2674 std::string("unimplemented AArch64 builtin call: ") +
2675 getContext().BuiltinInfo.getName(builtinID));
2676 return mlir::Value{};
2677 case NEON::BI__builtin_neon_vmull_v: {
2678 intrName = usgn ? "aarch64.neon.umull" : "aarch64.neon.smull";
2679 if (type.isPoly())
2680 intrName = "aarch64.neon.pmull";
2681 cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType(
2682 ty, /*isExtended*/ false, !usgn);
2683 return emitNeonCall(cgm, builder, {argTy, argTy}, ops, intrName, ty, loc);
2684 }
2685 case NEON::BI__builtin_neon_vmax_v:
2686 case NEON::BI__builtin_neon_vmaxq_v:
2687 intrName = usgn ? "aarch64.neon.umax" : "aarch64.neon.smax";
2688 if (cir::isFPOrVectorOfFPType(ty))
2689 intrName = "aarch64.neon.fmax";
2690 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2691 case NEON::BI__builtin_neon_vmaxh_f16:
2692 cgm.errorNYI(expr->getSourceRange(),
2693 std::string("unimplemented AArch64 builtin call: ") +
2694 getContext().BuiltinInfo.getName(builtinID));
2695 return mlir::Value{};
2696 case NEON::BI__builtin_neon_vmin_v:
2697 case NEON::BI__builtin_neon_vminq_v:
2698 intrName = usgn ? "aarch64.neon.umin" : "aarch64.neon.smin";
2699 if (cir::isFPOrVectorOfFPType(ty))
2700 intrName = "aarch64.neon.fmin";
2701 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2702 case NEON::BI__builtin_neon_vminh_f16:
2703 cgm.errorNYI(expr->getSourceRange(),
2704 std::string("unimplemented AArch64 builtin call: ") +
2705 getContext().BuiltinInfo.getName(builtinID));
2706 return mlir::Value{};
2707 case NEON::BI__builtin_neon_vabd_v:
2708 case NEON::BI__builtin_neon_vabdq_v:
2709 intrName = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd";
2710 if (cir::isFPOrVectorOfFPType(ty))
2711 intrName = "aarch64.neon.fabd";
2712 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2713 case NEON::BI__builtin_neon_vpadal_v:
2714 case NEON::BI__builtin_neon_vpadalq_v: {
2715 intrName = usgn ? "aarch64.neon.uaddlp" : "aarch64.neon.saddlp";
2716 llvm::SmallVector<mlir::Value> inputs{ops[1]};
2717 mlir::Value pairwiseSum =
2718 emitNeonCall(cgm, builder, {getNeonPairwiseWidenInputType(ty, usgn)},
2719 inputs, intrName, ty, loc);
2720 mlir::Value accumValue = builder.createBitcast(loc, ops[0], ty);
2721 return cir::AddOp::create(builder, loc, ty, pairwiseSum, accumValue);
2722 }
2723 case NEON::BI__builtin_neon_vpmin_v:
2724 case NEON::BI__builtin_neon_vpminq_v:
2725 intrName = usgn ? "aarch64.neon.uminp" : "aarch64.neon.sminp";
2726 if (cir::isFPOrVectorOfFPType(ty))
2727 intrName = "aarch64.neon.fminp";
2728 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2729 case NEON::BI__builtin_neon_vpmax_v:
2730 case NEON::BI__builtin_neon_vpmaxq_v:
2731 cgm.errorNYI(expr->getSourceRange(),
2732 std::string("unimplemented AArch64 builtin call: ") +
2733 getContext().BuiltinInfo.getName(builtinID));
2734 return mlir::Value{};
2735 case NEON::BI__builtin_neon_vminnm_v:
2736 case NEON::BI__builtin_neon_vminnmq_v:
2737 intrName = "aarch64.neon.fminnm";
2738 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2739 case NEON::BI__builtin_neon_vminnmh_f16:
2740 cgm.errorNYI(expr->getSourceRange(),
2741 std::string("unimplemented AArch64 builtin call: ") +
2742 getContext().BuiltinInfo.getName(builtinID));
2743 return mlir::Value{};
2744 case NEON::BI__builtin_neon_vmaxnm_v:
2745 case NEON::BI__builtin_neon_vmaxnmq_v:
2746 intrName = "aarch64.neon.fmaxnm";
2747 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2748 case NEON::BI__builtin_neon_vmaxnmh_f16:
2749 case NEON::BI__builtin_neon_vrecpss_f32:
2750 case NEON::BI__builtin_neon_vrecpsd_f64:
2751 case NEON::BI__builtin_neon_vrecpsh_f16:
2752 case NEON::BI__builtin_neon_vqshrun_n_v:
2753 cgm.errorNYI(expr->getSourceRange(),
2754 std::string("unimplemented AArch64 builtin call: ") +
2755 getContext().BuiltinInfo.getName(builtinID));
2756 return mlir::Value{};
2757 case NEON::BI__builtin_neon_vqrshrun_n_v: {
2758 cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType(
2759 ty, /*isExtended=*/true, /*isSigned=*/true);
2760 return emitNeonCall(cgm, builder, {argTy, sInt32Ty}, ops,
2761 "aarch64.neon.sqrshrun", ty, loc);
2762 }
2763 case NEON::BI__builtin_neon_vqshrn_n_v:
2764 cgm.errorNYI(expr->getSourceRange(),
2765 std::string("unimplemented AArch64 builtin call: ") +
2766 getContext().BuiltinInfo.getName(builtinID));
2767 return mlir::Value{};
2768 case NEON::BI__builtin_neon_vrshrn_n_v:
2769 cgm.errorNYI(expr->getSourceRange(),
2770 std::string("unimplemented AArch64 builtin call: ") +
2771 getContext().BuiltinInfo.getName(builtinID));
2772 return mlir::Value{};
2773 case NEON::BI__builtin_neon_vqrshrn_n_v: {
2774 cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType(
2775 ty, /*isExtended=*/true, /*isSigned=*/!usgn);
2776 llvm::StringRef intrName =
2777 usgn ? "aarch64.neon.uqrshrn" : "aarch64.neon.sqrshrn";
2778 return emitNeonCall(cgm, builder, {argTy, sInt32Ty}, ops, intrName, ty,
2779 loc);
2780 }
2781 case NEON::BI__builtin_neon_vrndah_f16:
2782 case NEON::BI__builtin_neon_vrnda_v:
2783 case NEON::BI__builtin_neon_vrndaq_v:
2784 case NEON::BI__builtin_neon_vrndih_f16:
2785 case NEON::BI__builtin_neon_vrndmh_f16:
2786 case NEON::BI__builtin_neon_vrndm_v:
2787 case NEON::BI__builtin_neon_vrndmq_v:
2788 case NEON::BI__builtin_neon_vrndnh_f16:
2789 case NEON::BI__builtin_neon_vrndn_v:
2790 case NEON::BI__builtin_neon_vrndnq_v:
2791 case NEON::BI__builtin_neon_vrndns_f32:
2792 case NEON::BI__builtin_neon_vrndph_f16:
2793 case NEON::BI__builtin_neon_vrndp_v:
2794 case NEON::BI__builtin_neon_vrndpq_v:
2795 case NEON::BI__builtin_neon_vrndxh_f16:
2796 case NEON::BI__builtin_neon_vrndx_v:
2797 case NEON::BI__builtin_neon_vrndxq_v:
2798 case NEON::BI__builtin_neon_vrndh_f16:
2799 case NEON::BI__builtin_neon_vrnd32x_f32:
2800 case NEON::BI__builtin_neon_vrnd32xq_f32:
2801 case NEON::BI__builtin_neon_vrnd32x_f64:
2802 case NEON::BI__builtin_neon_vrnd32xq_f64:
2803 case NEON::BI__builtin_neon_vrnd32z_f32:
2804 case NEON::BI__builtin_neon_vrnd32zq_f32:
2805 case NEON::BI__builtin_neon_vrnd32z_f64:
2806 case NEON::BI__builtin_neon_vrnd32zq_f64:
2807 case NEON::BI__builtin_neon_vrnd64x_f32:
2808 case NEON::BI__builtin_neon_vrnd64xq_f32:
2809 case NEON::BI__builtin_neon_vrnd64x_f64:
2810 case NEON::BI__builtin_neon_vrnd64xq_f64:
2811 case NEON::BI__builtin_neon_vrnd64z_f32:
2812 case NEON::BI__builtin_neon_vrnd64zq_f32:
2813 case NEON::BI__builtin_neon_vrnd64z_f64:
2814 case NEON::BI__builtin_neon_vrnd64zq_f64:
2815 case NEON::BI__builtin_neon_vrnd_v:
2816 case NEON::BI__builtin_neon_vrndq_v:
2817 cgm.errorNYI(expr->getSourceRange(),
2818 std::string("unimplemented AArch64 builtin call: ") +
2819 getContext().BuiltinInfo.getName(builtinID));
2820 return mlir::Value{};
2821 case NEON::BI__builtin_neon_vcvt_f64_v:
2822 case NEON::BI__builtin_neon_vcvtq_f64_v:
2823 ops[0] = builder.createBitcast(ops[0], ty);
2824 ty = getNeonType(
2825 this, NeonTypeFlags(NeonTypeFlags::Float64, false, type.isQuad()), loc);
2826 return builder.createCast(loc, cir::CastKind::int_to_float, ops[0], ty);
2827 case NEON::BI__builtin_neon_vcvt_f64_f32:
2828 case NEON::BI__builtin_neon_vcvt_f32_f64:
2829 case NEON::BI__builtin_neon_vcvt_s32_v:
2830 case NEON::BI__builtin_neon_vcvt_u32_v:
2831 case NEON::BI__builtin_neon_vcvt_s64_v:
2832 case NEON::BI__builtin_neon_vcvt_u64_v:
2833 case NEON::BI__builtin_neon_vcvt_s16_f16:
2834 case NEON::BI__builtin_neon_vcvt_u16_f16:
2835 case NEON::BI__builtin_neon_vcvtq_s32_v:
2836 case NEON::BI__builtin_neon_vcvtq_u32_v:
2837 case NEON::BI__builtin_neon_vcvtq_s64_v:
2838 case NEON::BI__builtin_neon_vcvtq_u64_v:
2839 case NEON::BI__builtin_neon_vcvtq_s16_f16:
2840 case NEON::BI__builtin_neon_vcvtq_u16_f16:
2841 case NEON::BI__builtin_neon_vcvta_s16_f16:
2842 case NEON::BI__builtin_neon_vcvta_u16_f16:
2843 case NEON::BI__builtin_neon_vcvta_s32_v:
2844 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
2845 case NEON::BI__builtin_neon_vcvtaq_s32_v:
2846 case NEON::BI__builtin_neon_vcvta_u32_v:
2847 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2848 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2849 case NEON::BI__builtin_neon_vcvta_s64_v:
2850 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2851 case NEON::BI__builtin_neon_vcvta_u64_v:
2852 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2853 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2854 case NEON::BI__builtin_neon_vcvtm_s32_v:
2855 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2856 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2857 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2858 case NEON::BI__builtin_neon_vcvtm_u32_v:
2859 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2860 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2861 case NEON::BI__builtin_neon_vcvtm_s64_v:
2862 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2863 case NEON::BI__builtin_neon_vcvtm_u64_v:
2864 case NEON::BI__builtin_neon_vcvtmq_u64_v:
2865 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2866 case NEON::BI__builtin_neon_vcvtn_s32_v:
2867 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2868 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2869 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2870 case NEON::BI__builtin_neon_vcvtn_u32_v:
2871 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2872 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2873 case NEON::BI__builtin_neon_vcvtn_s64_v:
2874 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2875 case NEON::BI__builtin_neon_vcvtn_u64_v:
2876 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2877 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2878 case NEON::BI__builtin_neon_vcvtp_s32_v:
2879 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2880 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2881 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2882 case NEON::BI__builtin_neon_vcvtp_u32_v:
2883 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2884 case NEON::BI__builtin_neon_vcvtpq_u32_v:
2885 case NEON::BI__builtin_neon_vcvtp_s64_v:
2886 case NEON::BI__builtin_neon_vcvtpq_s64_v:
2887 case NEON::BI__builtin_neon_vcvtp_u64_v:
2888 case NEON::BI__builtin_neon_vcvtpq_u64_v:
2889 case NEON::BI__builtin_neon_vmulx_v:
2890 case NEON::BI__builtin_neon_vmulxq_v:
2891 case NEON::BI__builtin_neon_vmulxh_lane_f16:
2892 case NEON::BI__builtin_neon_vmulxh_laneq_f16:
2893 case NEON::BI__builtin_neon_vmul_lane_v:
2894 case NEON::BI__builtin_neon_vmul_laneq_v:
2895 case NEON::BI__builtin_neon_vpmaxnm_v:
2896 case NEON::BI__builtin_neon_vpmaxnmq_v:
2897 cgm.errorNYI(expr->getSourceRange(),
2898 std::string("unimplemented AArch64 builtin call: ") +
2899 getContext().BuiltinInfo.getName(builtinID));
2900 return mlir::Value{};
2901 case NEON::BI__builtin_neon_vpminnm_v:
2902 case NEON::BI__builtin_neon_vpminnmq_v:
2903 intrName = "aarch64.neon.fminnmp";
2904 return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
2905 case NEON::BI__builtin_neon_vsqrth_f16:
2906 cgm.errorNYI(expr->getSourceRange(),
2907 std::string("unimplemented AArch64 builtin call: ") +
2908 getContext().BuiltinInfo.getName(builtinID));
2909 return mlir::Value{};
2910 case NEON::BI__builtin_neon_vsqrt_v:
2911 case NEON::BI__builtin_neon_vsqrtq_v:
2913 return emitNeonCall(cgm, builder, {ty}, ops, "sqrt", ty, loc);
2914 case NEON::BI__builtin_neon_vrbit_v:
2915 case NEON::BI__builtin_neon_vrbitq_v:
2916 case NEON::BI__builtin_neon_vmaxv_f16:
2917 case NEON::BI__builtin_neon_vmaxvq_f16:
2918 case NEON::BI__builtin_neon_vminv_f16:
2919 case NEON::BI__builtin_neon_vminvq_f16:
2920 case NEON::BI__builtin_neon_vmaxnmv_f16:
2921 case NEON::BI__builtin_neon_vmaxnmvq_f16:
2922 case NEON::BI__builtin_neon_vminnmv_f16:
2923 case NEON::BI__builtin_neon_vminnmvq_f16:
2924 case NEON::BI__builtin_neon_vmul_n_f64:
2925 cgm.errorNYI(expr->getSourceRange(),
2926 std::string("unimplemented AArch64 builtin call: ") +
2927 getContext().BuiltinInfo.getName(builtinID));
2928 return mlir::Value{};
2929 case NEON::BI__builtin_neon_vaddlv_u8:
2930 case NEON::BI__builtin_neon_vaddlvq_u8:
2931 case NEON::BI__builtin_neon_vaddlv_u16:
2932 case NEON::BI__builtin_neon_vaddlvq_u16:
2933 case NEON::BI__builtin_neon_vaddlv_s8:
2934 case NEON::BI__builtin_neon_vaddlvq_s8:
2935 case NEON::BI__builtin_neon_vaddlv_s16:
2936 case NEON::BI__builtin_neon_vaddlvq_s16: {
2937 mlir::Type argTy = convertType(expr->getArg(0)->getType());
2938 mlir::Type userRetTy = convertType(expr->getType());
2939 auto eltTy = mlir::cast<cir::IntType>(
2940 mlir::cast<cir::VectorType>(argTy).getElementType());
2941 bool isUnsigned = !eltTy.isSigned();
2942 // These builtins only use 8 and 16-bit element vectors; the intrinsic
2943 // always produces i32. The C result is i32 for 16-bit elements, but i16
2944 // for 8-bit elements, so we emit at i32 and narrow only in that case.
2945 bool needsTrunc = eltTy.getWidth() == 8;
2946 intrName = isUnsigned ? "aarch64.neon.uaddlv" : "aarch64.neon.saddlv";
2947 mlir::Type intrRetTy = userRetTy;
2948 if (needsTrunc)
2949 intrRetTy = isUnsigned ? builder.getUInt32Ty() : builder.getSInt32Ty();
2950 mlir::Value result =
2951 emitNeonCall(cgm, builder, {argTy}, ops, intrName, intrRetTy, loc);
2952 if (needsTrunc)
2953 result = builder.createIntCast(result, userRetTy);
2954 return result;
2955 }
2956 case NEON::BI__builtin_neon_vsri_n_v:
2957 case NEON::BI__builtin_neon_vsriq_n_v: {
2959 ops[0], ops[1], builder.createIntCast(ops[2], builder.getUInt32Ty())};
2960 return emitNeonCall(cgm, builder, {ty, ty, builder.getUInt32Ty()}, vsriArgs,
2961 "aarch64.neon.vsri", ty, loc);
2962 }
2963 case NEON::BI__builtin_neon_vsli_n_v:
2964 case NEON::BI__builtin_neon_vsliq_n_v: {
2965
2966 intrName = "aarch64.neon.vsli";
2967
2968 llvm::SmallVector<mlir::Type> argTypes = {ty, ty, ops[2].getType()};
2969
2970 return emitNeonCall(cgm, builder, argTypes, ops, intrName, ty, loc,
2971 /*isConstrainedFPIntrinsic=*/false,
2972 /*shift=*/0, /*rightshift=*/false);
2973 }
2974 case NEON::BI__builtin_neon_vsra_n_v:
2975 case NEON::BI__builtin_neon_vsraq_n_v:
2976 cgm.errorNYI(expr->getSourceRange(),
2977 std::string("unimplemented AArch64 builtin call: ") +
2978 getContext().BuiltinInfo.getName(builtinID));
2979 return mlir::Value{};
2980 case NEON::BI__builtin_neon_vrsra_n_v:
2981 case NEON::BI__builtin_neon_vrsraq_n_v: {
2982 intrName = usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl";
2983 // The llvm intrinsic is expecting negative shift amount for right shift.
2984 // Thus we have to make shift amount vec type to be signed.
2985 cir::VectorType shiftAmtVecTy =
2986 usgn ? getSignChangedVectorType(builder, ty) : ty;
2987 llvm::SmallVector<mlir::Value, 2> tmpOps = {ops[1], ops[2]};
2988 mlir::Value tmp = emitNeonCall(cgm, builder, {ty, shiftAmtVecTy}, tmpOps,
2989 intrName, ty, loc,
2990 /*isConstrainedFPIntrinsic=*/false,
2991 /*shift=*/1, /*rightshift=*/true);
2992 ops[0] = builder.createBitcast(ops[0], ty);
2993 return builder.createAdd(loc, ops[0], tmp);
2994 }
2995 case NEON::BI__builtin_neon_vld1_v:
2996 case NEON::BI__builtin_neon_vld1q_v:
2997 case NEON::BI__builtin_neon_vst1_v:
2998 case NEON::BI__builtin_neon_vst1q_v:
2999 case NEON::BI__builtin_neon_vld1_lane_v:
3000 case NEON::BI__builtin_neon_vld1q_lane_v:
3001 case NEON::BI__builtin_neon_vldap1_lane_s64:
3002 case NEON::BI__builtin_neon_vldap1q_lane_s64:
3003 case NEON::BI__builtin_neon_vld1_dup_v:
3004 case NEON::BI__builtin_neon_vld1q_dup_v:
3005 case NEON::BI__builtin_neon_vst1_lane_v:
3006 case NEON::BI__builtin_neon_vst1q_lane_v:
3007 case NEON::BI__builtin_neon_vstl1_lane_s64:
3008 case NEON::BI__builtin_neon_vstl1q_lane_s64:
3009 case NEON::BI__builtin_neon_vld2_v:
3010 case NEON::BI__builtin_neon_vld2q_v:
3011 case NEON::BI__builtin_neon_vld3_v:
3012 case NEON::BI__builtin_neon_vld3q_v:
3013 case NEON::BI__builtin_neon_vld4_v:
3014 case NEON::BI__builtin_neon_vld4q_v:
3015 case NEON::BI__builtin_neon_vld2_dup_v:
3016 case NEON::BI__builtin_neon_vld2q_dup_v:
3017 case NEON::BI__builtin_neon_vld3_dup_v:
3018 case NEON::BI__builtin_neon_vld3q_dup_v:
3019 case NEON::BI__builtin_neon_vld4_dup_v:
3020 case NEON::BI__builtin_neon_vld4q_dup_v:
3021 case NEON::BI__builtin_neon_vld2_lane_v:
3022 case NEON::BI__builtin_neon_vld2q_lane_v:
3023 case NEON::BI__builtin_neon_vld3_lane_v:
3024 case NEON::BI__builtin_neon_vld3q_lane_v:
3025 case NEON::BI__builtin_neon_vld4_lane_v:
3026 case NEON::BI__builtin_neon_vld4q_lane_v:
3027 case NEON::BI__builtin_neon_vst2_v:
3028 case NEON::BI__builtin_neon_vst2q_v:
3029 case NEON::BI__builtin_neon_vst2_lane_v:
3030 case NEON::BI__builtin_neon_vst2q_lane_v:
3031 case NEON::BI__builtin_neon_vst3_v:
3032 case NEON::BI__builtin_neon_vst3q_v:
3033 case NEON::BI__builtin_neon_vst3_lane_v:
3034 case NEON::BI__builtin_neon_vst3q_lane_v:
3035 case NEON::BI__builtin_neon_vst4_v:
3036 case NEON::BI__builtin_neon_vst4q_v:
3037 case NEON::BI__builtin_neon_vst4_lane_v:
3038 case NEON::BI__builtin_neon_vst4q_lane_v:
3039 cgm.errorNYI(expr->getSourceRange(),
3040 std::string("unimplemented AArch64 builtin call: ") +
3041 getContext().BuiltinInfo.getName(builtinID));
3042 return mlir::Value{};
3043 case NEON::BI__builtin_neon_vtrn_v:
3044 case NEON::BI__builtin_neon_vtrnq_v: {
3045 ops[1] = builder.createBitcast(ops[1], ty);
3046 ops[2] = builder.createBitcast(ops[2], ty);
3047 // Adding a bitcast here as Ops[0] might be a void pointer.
3048 mlir::Value baseAddr =
3049 builder.createBitcast(ops[0], builder.getPointerTo(ty));
3050 mlir::Value sv;
3051
3052 for (unsigned vi = 0; vi != 2; ++vi) {
3054 for (unsigned i = 0, e = ty.getSize(); i != e; i += 2) {
3055 indices.push_back(i + vi);
3056 indices.push_back(i + e + vi);
3057 }
3058 cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), vi);
3059 mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx);
3060 sv = builder.createVecShuffle(loc, ops[1], ops[2], indices);
3061 (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr);
3062 }
3063 return sv;
3064 }
3065 case NEON::BI__builtin_neon_vuzp_v:
3066 case NEON::BI__builtin_neon_vuzpq_v: {
3067 ops[1] = builder.createBitcast(ops[1], ty);
3068 ops[2] = builder.createBitcast(ops[2], ty);
3069 // Adding a bitcast here as Ops[0] might be a void pointer.
3070 mlir::Value baseAddr =
3071 builder.createBitcast(ops[0], builder.getPointerTo(ty));
3072 mlir::Value sv;
3073 for (unsigned vi = 0; vi != 2; ++vi) {
3075 for (unsigned i = 0, e = ty.getSize(); i != e; ++i) {
3076 indices.push_back(2 * i + vi);
3077 }
3078 cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), vi);
3079 mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx);
3080 sv = builder.createVecShuffle(loc, ops[1], ops[2], indices);
3081 (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr);
3082 }
3083 return sv;
3084 }
3085 case NEON::BI__builtin_neon_vzip_v:
3086 case NEON::BI__builtin_neon_vzipq_v: {
3087 ops[1] = builder.createBitcast(ops[1], ty);
3088 ops[2] = builder.createBitcast(ops[2], ty);
3089 // Adding a bitcast here as Ops[0] might be a void pointer.
3090 mlir::Value baseAddr =
3091 builder.createBitcast(ops[0], builder.getPointerTo(ty));
3092 mlir::Value sv;
3093 for (unsigned vi = 0; vi != 2; ++vi) {
3095 for (unsigned i = 0, e = ty.getSize(); i != e; i += 2) {
3096 indices.push_back((i + vi * e) >> 1);
3097 indices.push_back(((i + vi * e) >> 1) + e);
3098 }
3099 cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), vi);
3100 mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx);
3101 sv = builder.createVecShuffle(loc, ops[1], ops[2], indices);
3102 (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr);
3103 }
3104 return sv;
3105 }
3106 case NEON::BI__builtin_neon_vqtbl1q_v:
3107 case NEON::BI__builtin_neon_vqtbl2q_v:
3108 case NEON::BI__builtin_neon_vqtbl3q_v:
3109 case NEON::BI__builtin_neon_vqtbl4q_v:
3110 case NEON::BI__builtin_neon_vqtbx1q_v:
3111 case NEON::BI__builtin_neon_vqtbx2q_v:
3112 case NEON::BI__builtin_neon_vqtbx3q_v:
3113 case NEON::BI__builtin_neon_vqtbx4q_v:
3114 case NEON::BI__builtin_neon_vsqadd_v:
3115 case NEON::BI__builtin_neon_vsqaddq_v:
3116 case NEON::BI__builtin_neon_vuqadd_v:
3117 case NEON::BI__builtin_neon_vuqaddq_v:
3118 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
3119 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
3120 case NEON::BI__builtin_neon_vluti2_laneq_f16:
3121 case NEON::BI__builtin_neon_vluti2_laneq_p16:
3122 case NEON::BI__builtin_neon_vluti2_laneq_p8:
3123 case NEON::BI__builtin_neon_vluti2_laneq_s16:
3124 case NEON::BI__builtin_neon_vluti2_laneq_s8:
3125 case NEON::BI__builtin_neon_vluti2_laneq_u16:
3126 case NEON::BI__builtin_neon_vluti2_laneq_u8:
3127 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
3128 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
3129 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
3130 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
3131 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
3132 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
3133 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
3134 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
3135 case NEON::BI__builtin_neon_vluti2q_laneq_u8:
3136 case NEON::BI__builtin_neon_vluti2_lane_mf8:
3137 case NEON::BI__builtin_neon_vluti2_lane_bf16:
3138 case NEON::BI__builtin_neon_vluti2_lane_f16:
3139 case NEON::BI__builtin_neon_vluti2_lane_p16:
3140 case NEON::BI__builtin_neon_vluti2_lane_p8:
3141 case NEON::BI__builtin_neon_vluti2_lane_s16:
3142 case NEON::BI__builtin_neon_vluti2_lane_s8:
3143 case NEON::BI__builtin_neon_vluti2_lane_u16:
3144 case NEON::BI__builtin_neon_vluti2_lane_u8:
3145 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
3146 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
3147 case NEON::BI__builtin_neon_vluti2q_lane_f16:
3148 case NEON::BI__builtin_neon_vluti2q_lane_p16:
3149 case NEON::BI__builtin_neon_vluti2q_lane_p8:
3150 case NEON::BI__builtin_neon_vluti2q_lane_s16:
3151 case NEON::BI__builtin_neon_vluti2q_lane_s8:
3152 case NEON::BI__builtin_neon_vluti2q_lane_u16:
3153 case NEON::BI__builtin_neon_vluti2q_lane_u8:
3154 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
3155 case NEON::BI__builtin_neon_vluti4q_lane_p8:
3156 case NEON::BI__builtin_neon_vluti4q_lane_s8:
3157 case NEON::BI__builtin_neon_vluti4q_lane_u8:
3158 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
3159 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
3160 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
3161 case NEON::BI__builtin_neon_vluti4q_laneq_u8:
3162 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
3163 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
3164 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
3165 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
3166 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2:
3167 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
3168 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
3169 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
3170 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
3171 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2:
3172 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
3173 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
3174 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
3175 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
3176 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
3177 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
3178 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
3179 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
3180 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
3181 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
3182 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
3183 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
3184 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
3185 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
3186 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
3187 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
3188 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
3189 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm:
3190 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
3191 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
3192 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
3193 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
3194 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
3195 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
3196 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
3197 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
3198 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
3199 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
3200 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
3201 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
3202 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
3203 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
3204 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
3205 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
3206 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
3207 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
3208 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
3209 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
3210 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
3211 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
3212 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
3213 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
3214 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
3215 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
3216 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
3217 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
3218 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
3219 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
3220 case NEON::BI__builtin_neon_vamin_f16:
3221 case NEON::BI__builtin_neon_vaminq_f16:
3222 case NEON::BI__builtin_neon_vamin_f32:
3223 case NEON::BI__builtin_neon_vaminq_f32:
3224 case NEON::BI__builtin_neon_vaminq_f64:
3225 case NEON::BI__builtin_neon_vamax_f16:
3226 case NEON::BI__builtin_neon_vamaxq_f16:
3227 case NEON::BI__builtin_neon_vamax_f32:
3228 case NEON::BI__builtin_neon_vamaxq_f32:
3229 case NEON::BI__builtin_neon_vamaxq_f64:
3230 case NEON::BI__builtin_neon_vscale_f16:
3231 case NEON::BI__builtin_neon_vscaleq_f16:
3232 case NEON::BI__builtin_neon_vscale_f32:
3233 case NEON::BI__builtin_neon_vscaleq_f32:
3234 case NEON::BI__builtin_neon_vscaleq_f64:
3235 cgm.errorNYI(expr->getSourceRange(),
3236 std::string("unimplemented AArch64 builtin call: ") +
3237 getContext().BuiltinInfo.getName(builtinID));
3238 return mlir::Value{};
3239 }
3240
3241 // Unreachable: All cases in the switch above return.
3242}
Utilities used for generating code for AArch64 that are shared between the classic and ClangIR code-g...
static bool isUnsigned(SValBuilder &SVB, NonLoc Value)
Defines enum values for all the target-independent builtin functions.
static mlir::Value emitCommonNeonSISDBuiltinExpr(CIRGenFunction &cgf, const ARMVectorIntrinsicInfo &info, llvm::SmallVectorImpl< mlir::Value > &ops, const CallExpr *expr)
static bool hasExtraNeonArgument(unsigned builtinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
static bool aarch64SVEIntrinsicsProvenSorted
static const std::pair< unsigned, unsigned > neonEquivalentIntrinsicMap[]
static std::pair< mlir::Type, llvm::SmallVector< mlir::Type > > deriveNeonSISDIntrinsicOperandTypes(CIRGenFunction &cgf, unsigned modifier, mlir::Type arg0Ty, mlir::Type resultTy, llvm::ArrayRef< mlir::Value > ops)
static mlir::Value emitNeonSplat(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value v, mlir::Value lane, unsigned int resEltCnt)
static mlir::Value emitNeonCallToOp(CIRGenModule &cgm, CIRGenBuilderTy &builder, llvm::SmallVector< mlir::Type > argTypes, llvm::SmallVectorImpl< mlir::Value > &args, std::optional< llvm::StringRef > intrinsicName, mlir::Type funcResTy, mlir::Location loc, bool isConstrainedFPIntrinsic=false, unsigned shift=0, bool rightshift=false)
static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[]
static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, mlir::Type eltTy)
static unsigned getSVEMinEltCount(clang::SVETypeFlags::EltType sveType)
static mlir::Value genVscaleTimesFactor(mlir::Location loc, CIRGenBuilderTy builder, mlir::Type cirTy, int32_t scalingFactor)
static cir::VectorType getFloatNeonType(CIRGenFunction &cgf, NeonTypeFlags intTypeFlags)
static llvm::StringRef getLLVMIntrNameNoPrefix(llvm::Intrinsic::ID intrID)
static int64_t getIntValueFromConstOp(mlir::Value val)
static mlir::Value emitCallMaybeConstrainedBuiltin(CIRGenBuilderTy &builder, mlir::Location loc, StringRef intrName, mlir::Type retTy, llvm::SmallVector< mlir::Value > &ops)
static mlir::Value emitCommonNeonBuiltinExpr(CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic, unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier, const CallExpr *expr, llvm::SmallVectorImpl< mlir::Value > &ops)
static mlir::Value emitNeonCall(CIRGenModule &cgm, CIRGenBuilderTy &builder, llvm::SmallVector< mlir::Type > argTypes, llvm::SmallVectorImpl< mlir::Value > &args, llvm::StringRef intrinsicName, mlir::Type funcResTy, mlir::Location loc, bool isConstrainedFPIntrinsic=false, unsigned shift=0, bool rightshift=false)
static cir::VectorType getSignChangedVectorType(CIRGenBuilderTy &builder, cir::VectorType vecTy)
Flip the signedness of vecTy's element type, keeping the width and number of lanes the same.
static cir::VectorType getNeonPairwiseWidenInputType(cir::VectorType resType, bool usgn)
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > intrinsicMap, unsigned builtinID, bool &mapProvenSorted)
static mlir::Value emitCommonNeonShift(CIRGenBuilderTy &builder, mlir::Location loc, cir::VectorType resTy, mlir::Value shifTgt, mlir::Value shiftAmt, bool shiftLeft)
static bool aarch64SIMDIntrinsicsProvenSorted
static cir::VectorType getIntVecFromVecTy(CIRGenBuilderTy &builder, cir::VectorType vecTy)
constexpr unsigned sveBitsPerBlock
static mlir::Value emitNeonShiftVector(CIRGenBuilderTy &builder, mlir::Value shiftVal, cir::VectorType vecTy, mlir::Location loc, bool neg)
Build a constant shift amount vector of vecTy to shift a vector Here shiftVal is a constant integer t...
static mlir::Value emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value src, mlir::Type retTy, const cir::CmpOpKind kind)
static mlir::Value emitNeonRShiftImm(CIRGenFunction &cgf, mlir::Value shiftVec, mlir::Value shiftVal, cir::VectorType vecTy, bool usgn, mlir::Location loc)
static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags, mlir::Location loc, bool hasLegalHalfType=true, bool v1Ty=false, bool allowBFloatArgsAndRet=true)
static bool aarch64SISDIntrinsicsProvenSorted
TokenType getType() const
Returns the token's type, e.g.
*collection of selector each with an associated kind and an ordered *collection of selectors A selector has a kind
Enumerates target-specific builtins in their own namespaces within namespace clang.
cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc)
mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy)
mlir::Value createNUWAMul(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
mlir::Value createIntCast(mlir::Value src, mlir::Type newTy)
mlir::Value createBitcast(mlir::Value src, mlir::Type newTy)
cir::CmpOp createCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
cir::ConstantOp getConstantInt(mlir::Location loc, mlir::Type ty, int64_t value)
llvm::TypeSize getTypeSizeInBits(mlir::Type ty) const
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:229
Builtin::Context & BuiltinInfo
Definition ASTContext.h:809
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
std::string getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition Builtins.cpp:80
cir::ConstantOp getUInt64(uint64_t c, mlir::Location loc)
mlir::Value emitIntrinsicCallOp(mlir::Location loc, const llvm::StringRef str, const mlir::Type &resTy, Operands &&...op)
cir::IntType getSIntNTy(int n)
cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1, mlir::Value vec2, llvm::ArrayRef< mlir::Attribute > maskAttrs)
cir::ConstantOp getZero(mlir::Location loc, mlir::Type ty)
cir::ConstantOp getConstInt(mlir::Location loc, llvm::APSInt intVal)
cir::IntType getUIntNTy(int n)
mlir::Type convertType(clang::QualType t)
const TargetInfo & getTarget() const
mlir::Location getLoc(clang::SourceLocation srcLoc)
Helpers to convert Clang's SourceLocation to a MLIR Location.
static int64_t getZExtIntValueFromConstOp(mlir::Value val)
Get zero-extended integer from a mlir::Value that is an int constant or a constant op.
mlir::Value emitSVEPredicateCast(mlir::Value pred, unsigned minNumElts, mlir::Location loc)
bool getAArch64SVEProcessedOperands(unsigned builtinID, const CallExpr *expr, SmallVectorImpl< mlir::Value > &ops, clang::SVETypeFlags typeFlags)
Address returnValue
The temporary alloca to hold the return value.
std::optional< mlir::Value > emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ReturnValueSlot returnValue, llvm::Triple::ArchType arch)
std::optional< mlir::Value > emitAArch64SMEBuiltinExpr(unsigned builtinID, const CallExpr *expr)
mlir::Value emitScalarExpr(const clang::Expr *e, bool ignoreResultAssign=false)
Emit the computation of the specified expression of scalar type.
CIRGenBuilderTy & getBuilder()
clang::ASTContext & getContext() const
std::optional< mlir::Value > emitAArch64SVEBuiltinExpr(unsigned builtinID, const CallExpr *expr)
mlir::Value emitScalarOrConstFoldImmArg(unsigned iceArguments, unsigned idx, const Expr *argExpr)
This class organizes the cross-function state that is used while generating CIR code.
DiagnosticBuilder errorNYI(SourceLocation, llvm::StringRef)
Helpers to emit "not yet implemented" error diagnostics.
const cir::CIRDataLayout getDataLayout() const
Contains the address where the return value of a function can be stored, and whether the address is v...
Definition CIRGenCall.h:260
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
This represents one expression.
Definition Expr.h:112
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
Flags to identify the types for overloaded SVE builtins.
bool isReverseUSDOT() const
bool isGatherLoad() const
EltType getEltType() const
bool isPrefetch() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isStructLoad() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:712
const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
const internal::VariadicAllOfMatcher< Type > type
Matches Types in the clang AST.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
static bool msvcBuiltins()
static bool handleBuiltinICEArguments()
static bool aarch64SIMDIntrinsics()
static bool aarch64SVEIntrinsics()
static bool emitConstrainedFPCall()
static bool aarch64SMEIntrinsics()
static bool aarch64TblBuiltinExpr()