clang 22.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(F, Args);
355 else
356 return CGF.Builder.CreateCall(F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasFastHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
369 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
372 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
376 else
377 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
379 if (HasFastHalfType)
380 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
381 else
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 else
445 return Builder.CreateCall(F, Ops, name);
446}
447
451 const CallExpr *E, const char *name) {
452 llvm::Value *FPM =
453 EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
454 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
455 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
456}
457
459 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
460 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
461
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
465 Ops[1]->getType()};
466 if (ExtendLaneArg) {
467 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
468 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
469 uint64_t(0));
470 }
471 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
472}
473
475 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
476 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
477
478 if (ExtendLaneArg) {
479 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
480 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
481 uint64_t(0));
482 }
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
486 Ops, E, name);
487}
488
490 bool neg) {
491 int SV = cast<ConstantInt>(V)->getSExtValue();
492 return ConstantInt::get(Ty, neg ? -SV : SV);
493}
494
495Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
496 llvm::Type *Ty1, bool Extract,
498 const CallExpr *E,
499 const char *name) {
500 llvm::Type *Tys[] = {Ty0, Ty1};
501 if (Extract) {
502 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
503 // the vector.
504 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
505 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
506 }
507 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
508}
509
510// Right-shift a vector by a constant.
512 llvm::Type *Ty, bool usgn,
513 const char *name) {
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
515
516 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
518
519 Vec = Builder.CreateBitCast(Vec, Ty);
520
521 // lshr/ashr are undefined when the shift amount is equal to the vector
522 // element size.
523 if (ShiftAmt == EltSize) {
524 if (usgn) {
525 // Right-shifting an unsigned value by its size yields 0.
526 return llvm::ConstantAggregateZero::get(VTy);
527 } else {
528 // Right-shifting a signed value by its size is equivalent
529 // to a shift of size-1.
530 --ShiftAmt;
531 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
532 }
533 }
534
535 Shift = EmitNeonShiftVector(Shift, Ty, false);
536 if (usgn)
537 return Builder.CreateLShr(Vec, Shift, name);
538 else
539 return Builder.CreateAShr(Vec, Shift, name);
540}
541
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
555
562};
563
564namespace {
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
567 unsigned BuiltinID;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
571
572 bool operator<(unsigned RHSBuiltinID) const {
573 return BuiltinID < RHSBuiltinID;
574 }
575 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
576 return BuiltinID < TE.BuiltinID;
577 }
578};
579} // end anonymous namespace
580
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
583
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
587
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
591 TypeModifier }
592
593// clang-format off
594static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
595 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
596 NEONMAP0(splat_lane_v),
597 NEONMAP0(splat_laneq_v),
598 NEONMAP0(splatq_lane_v),
599 NEONMAP0(splatq_laneq_v),
600 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
601 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
602 NEONMAP1(vabs_v, arm_neon_vabs, 0),
603 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
604 NEONMAP0(vadd_v),
605 NEONMAP0(vaddhn_v),
606 NEONMAP0(vaddq_v),
607 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
608 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
609 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
610 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
611 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
613 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
614 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
615 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
616 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
617 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
618 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
619 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
620 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
621 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
622 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
623 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
624 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
625 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
626 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
628 NEONMAP1(vcage_v, arm_neon_vacge, 0),
629 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
630 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
632 NEONMAP1(vcale_v, arm_neon_vacge, 0),
633 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
634 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
635 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
636 NEONMAP0(vceqz_v),
637 NEONMAP0(vceqzq_v),
638 NEONMAP0(vcgez_v),
639 NEONMAP0(vcgezq_v),
640 NEONMAP0(vcgtz_v),
641 NEONMAP0(vcgtzq_v),
642 NEONMAP0(vclez_v),
643 NEONMAP0(vclezq_v),
644 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
645 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
646 NEONMAP0(vcltz_v),
647 NEONMAP0(vcltzq_v),
648 NEONMAP1(vclz_v, ctlz, Add1ArgType),
649 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
650 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
651 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
652 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
653 NEONMAP0(vcvt_f16_s16),
654 NEONMAP0(vcvt_f16_u16),
655 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
656 NEONMAP0(vcvt_f32_v),
657 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
658 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
659 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
660 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
663 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
665 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
666 NEONMAP0(vcvt_s16_f16),
667 NEONMAP0(vcvt_s32_v),
668 NEONMAP0(vcvt_s64_v),
669 NEONMAP0(vcvt_u16_f16),
670 NEONMAP0(vcvt_u32_v),
671 NEONMAP0(vcvt_u64_v),
672 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
675 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
678 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
681 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
684 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
685 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
688 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
691 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
694 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
697 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
700 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
703 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
706 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
709 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
712 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
715 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
718 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
720 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
721 NEONMAP0(vcvtq_f16_s16),
722 NEONMAP0(vcvtq_f16_u16),
723 NEONMAP0(vcvtq_f32_v),
724 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
725 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
726 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
727 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
730 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
732 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
733 NEONMAP0(vcvtq_s16_f16),
734 NEONMAP0(vcvtq_s32_v),
735 NEONMAP0(vcvtq_s64_v),
736 NEONMAP0(vcvtq_u16_f16),
737 NEONMAP0(vcvtq_u32_v),
738 NEONMAP0(vcvtq_u64_v),
739 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
740 NEONMAP1(vdot_u32, arm_neon_udot, 0),
741 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
742 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
743 NEONMAP0(vext_v),
744 NEONMAP0(vextq_v),
745 NEONMAP0(vfma_v),
746 NEONMAP0(vfmaq_v),
747 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
748 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
750 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
751 NEONMAP0(vld1_dup_v),
752 NEONMAP1(vld1_v, arm_neon_vld1, 0),
753 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
754 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
755 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
756 NEONMAP0(vld1q_dup_v),
757 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
758 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
759 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
760 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
761 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
762 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
763 NEONMAP1(vld2_v, arm_neon_vld2, 0),
764 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
765 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
766 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
767 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
768 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
769 NEONMAP1(vld3_v, arm_neon_vld3, 0),
770 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
771 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
772 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
773 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
774 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
775 NEONMAP1(vld4_v, arm_neon_vld4, 0),
776 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
777 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
778 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
779 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
780 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
781 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
782 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
783 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
784 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
785 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
786 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
787 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
788 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
789 NEONMAP0(vmovl_v),
790 NEONMAP0(vmovn_v),
791 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
792 NEONMAP0(vmull_v),
793 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
794 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
795 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
796 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
797 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
798 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
799 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
800 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
801 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
802 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
803 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
804 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
805 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
806 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
807 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
808 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
809 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
810 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
811 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
812 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
813 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
814 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
815 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
816 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
817 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
818 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
819 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
820 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
821 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
822 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
823 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
824 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
825 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
826 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
827 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
828 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
829 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
830 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
831 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
832 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
833 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
834 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
835 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
836 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
837 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
838 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
839 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
840 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
841 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
842 NEONMAP1(vrnd_v, trunc, Add1ArgType),
843 NEONMAP1(vrnda_v, round, Add1ArgType),
844 NEONMAP1(vrndaq_v, round, Add1ArgType),
845 NEONMAP0(vrndi_v),
846 NEONMAP0(vrndiq_v),
847 NEONMAP1(vrndm_v, floor, Add1ArgType),
848 NEONMAP1(vrndmq_v, floor, Add1ArgType),
849 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
850 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
851 NEONMAP1(vrndp_v, ceil, Add1ArgType),
852 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
853 NEONMAP1(vrndq_v, trunc, Add1ArgType),
854 NEONMAP1(vrndx_v, rint, Add1ArgType),
855 NEONMAP1(vrndxq_v, rint, Add1ArgType),
856 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
857 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
858 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
859 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
860 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
861 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
862 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
863 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
864 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
865 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
866 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
867 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
868 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
869 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
870 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
871 NEONMAP0(vshl_n_v),
872 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
873 NEONMAP0(vshll_n_v),
874 NEONMAP0(vshlq_n_v),
875 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
876 NEONMAP0(vshr_n_v),
877 NEONMAP0(vshrn_n_v),
878 NEONMAP0(vshrq_n_v),
879 NEONMAP1(vst1_v, arm_neon_vst1, 0),
880 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
881 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
882 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
883 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
884 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
885 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
886 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
887 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
888 NEONMAP1(vst2_v, arm_neon_vst2, 0),
889 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
890 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
891 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
892 NEONMAP1(vst3_v, arm_neon_vst3, 0),
893 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
894 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
895 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
896 NEONMAP1(vst4_v, arm_neon_vst4, 0),
897 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
898 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
899 NEONMAP0(vsubhn_v),
900 NEONMAP0(vtrn_v),
901 NEONMAP0(vtrnq_v),
902 NEONMAP0(vtst_v),
903 NEONMAP0(vtstq_v),
904 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
906 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
907 NEONMAP0(vuzp_v),
908 NEONMAP0(vuzpq_v),
909 NEONMAP0(vzip_v),
910 NEONMAP0(vzipq_v)
911};
912
913static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
914 NEONMAP0(splat_lane_v),
915 NEONMAP0(splat_laneq_v),
916 NEONMAP0(splatq_lane_v),
917 NEONMAP0(splatq_laneq_v),
918 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
919 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
920 NEONMAP0(vadd_v),
921 NEONMAP0(vaddhn_v),
922 NEONMAP0(vaddq_p128),
923 NEONMAP0(vaddq_v),
924 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
925 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
926 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
927 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
928 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
929 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
930 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
931 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
932 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
933 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
936 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
938 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
939 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
940 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
941 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
942 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
943 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
944 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
945 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
946 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
948 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
951 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
953 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
955 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
957 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
958 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
959 NEONMAP0(vceqz_v),
960 NEONMAP0(vceqzq_v),
961 NEONMAP0(vcgez_v),
962 NEONMAP0(vcgezq_v),
963 NEONMAP0(vcgtz_v),
964 NEONMAP0(vcgtzq_v),
965 NEONMAP0(vclez_v),
966 NEONMAP0(vclezq_v),
967 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
968 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
969 NEONMAP0(vcltz_v),
970 NEONMAP0(vcltzq_v),
971 NEONMAP1(vclz_v, ctlz, Add1ArgType),
972 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
973 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
974 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
975 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
976 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
977 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
978 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
979 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
980 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
981 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
982 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
983 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
984 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
985 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
986 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
987 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
988 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
989 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
990 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
991 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
992 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
993 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
994 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
995 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
996 NEONMAP0(vcvt_f16_s16),
997 NEONMAP0(vcvt_f16_u16),
998 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
999 NEONMAP0(vcvt_f32_v),
1000 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1001 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1002 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1004 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1007 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1009 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1010 NEONMAP0(vcvtq_f16_s16),
1011 NEONMAP0(vcvtq_f16_u16),
1012 NEONMAP0(vcvtq_f32_v),
1013 NEONMAP0(vcvtq_high_bf16_f32),
1014 NEONMAP0(vcvtq_low_bf16_f32),
1015 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1016 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1017 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1019 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1022 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1024 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1025 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1026 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1027 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1028 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1029 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1030 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1031 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1032 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1033 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1038 NEONMAP0(vext_v),
1039 NEONMAP0(vextq_v),
1040 NEONMAP0(vfma_v),
1041 NEONMAP0(vfmaq_v),
1042 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1043 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1044 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1045 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1046 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1047 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1048 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1049 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1050 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1051 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1052 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1053 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1054 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1055 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1056 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1057 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1058 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1059 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1060 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1061 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1062 NEONMAP0(vmovl_v),
1063 NEONMAP0(vmovn_v),
1064 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1065 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1066 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1067 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1068 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1069 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1070 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1071 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1072 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1073 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1074 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1075 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1076 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1077 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1078 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1079 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1080 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1081 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1082 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1083 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1084 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1085 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1086 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1087 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1088 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1089 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1090 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1091 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1092 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1093 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1094 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1095 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1096 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1097 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1098 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1099 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1100 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1101 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1102 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1103 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1104 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1105 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1106 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1107 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1108 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1109 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1110 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1111 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1112 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1113 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1114 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1115 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1116 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1117 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1118 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1119 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1120 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1121 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1122 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1123 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1124 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1125 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1126 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1127 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1128 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1129 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1130 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1131 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1132 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1133 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1134 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1135 NEONMAP0(vrndi_v),
1136 NEONMAP0(vrndiq_v),
1137 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1138 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1139 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1140 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1141 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1142 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1143 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1144 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1145 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1146 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1147 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1148 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1149 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1150 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1151 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1152 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1153 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1154 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1155 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1156 NEONMAP0(vshl_n_v),
1157 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1158 NEONMAP0(vshll_n_v),
1159 NEONMAP0(vshlq_n_v),
1160 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1161 NEONMAP0(vshr_n_v),
1162 NEONMAP0(vshrn_n_v),
1163 NEONMAP0(vshrq_n_v),
1164 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1165 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1166 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1167 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1168 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1169 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1170 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1171 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1172 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1173 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1174 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1175 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1176 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1177 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1178 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1179 NEONMAP0(vsubhn_v),
1180 NEONMAP0(vtst_v),
1181 NEONMAP0(vtstq_v),
1182 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1184 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1185 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1186};
1187
1188static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1189 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1190 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1191 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1192 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1193 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1194 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1195 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1196 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1197 NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
1198 NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
1199 NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
1200 NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
1201 NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
1202 NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
1203 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1204 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1205 NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
1206 NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
1207 NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
1208 NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
1209 NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
1210 NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
1211 NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
1212 NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
1213 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1214 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1215 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1216 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1217 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1218 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1219 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1220 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1221 NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1222 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1223 NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1224 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1225 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1226 NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1227 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1228 NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1229 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1230 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1231 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1232 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1233 NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1234 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1237 NEONMAP0(vcvth_bf16_f32),
1238 NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1242 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1243 NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1244 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1245 NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1246 NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1247 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1248 NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1249 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1250 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1251 NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1252 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1253 NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1254 NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1255 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1256 NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1257 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1258 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1259 NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1260 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1261 NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1262 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1263 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1264 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1265 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1266 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1267 NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1268 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1269 NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1270 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1271 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1272 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1273 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1274 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1275 NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
1276 NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
1277 NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
1278 NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
1279 NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
1280 NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
1281 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1282 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1283 NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
1284 NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
1285 NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
1286 NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
1287 NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
1288 NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
1289 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1290 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1291 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1292 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1293 NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
1294 NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
1295 NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
1296 NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
1297 NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
1298 NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
1299 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1300 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1301 NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
1302 NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
1303 NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
1304 NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
1305 NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
1306 NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
1307 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1308 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1309 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1310 NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
1311 NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
1312 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1313 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1314 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1315 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1316 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1317 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1318 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1319 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1320 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1321 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1322 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1323 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1324 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1325 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1326 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1327 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1328 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1329 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1330 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1331 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1332 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1333 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1334 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1335 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1336 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1337 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1338 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1339 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1340 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1341 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1342 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1343 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1344 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1345 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1346 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1347 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1348 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1349 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1350 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1351 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1352 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1353 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1354 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1355 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1356 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1357 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1358 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1359 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1360 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1361 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1362 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1363 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1364 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1365 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1366 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1367 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1368 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1369 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1370 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1371 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1372 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1373 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1374 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1375 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1376 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1377 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1378 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1379 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1380 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1381 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1382 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1383 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1384 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1385 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1386 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1387 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1389 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1390 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1391 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1392 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1393 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1394 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1395 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1396 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1397 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1398 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1399 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1400 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1401 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1402 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1403 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1404 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1405 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1406 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1407 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1408 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1409 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1410 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1411 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1412 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1413 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1414 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1415 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1416 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1417 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1418 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1419 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1420 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1421 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1422 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1423 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1424 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1425 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1426 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1427 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1428 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1429 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1430 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1431 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1432 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1433 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1434 // FP16 scalar intrinisics go here.
1435 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1436 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1437 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1438 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1439 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1440 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1441 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1442 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1443 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1444 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1445 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1446 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1447 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1448 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1449 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1450 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1451 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1452 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1453 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1454 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1455 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1456 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1457 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1458 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1459 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1460 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1461 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1462 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1463 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1464 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1465 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1466 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1467 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1468 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1469};
1470// clang-format on
1471
1472// Some intrinsics are equivalent for codegen.
1473static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1474 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1475 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1476 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1477 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1478 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1479 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1480 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1481 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1482 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1483 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1484 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1485 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1486 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1487 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1488 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1489 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1490 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1491 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1492 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1493 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1494 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1495 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1496 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1497 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1498 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1499 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1500 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1501 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1502 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1503 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1504 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1505 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1506 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1507 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1508 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1509 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1510 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1511 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1512 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1513 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1514 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1515 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1516 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1517 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1518 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1519 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1520 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1521 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1522 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1523 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1524 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1525 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1526 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1527 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1528 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1529 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1530 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1531 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1532 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1533 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1534 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1535 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1536 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1537 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1538 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1539 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1540 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1541 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1542 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1543 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1544 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1545 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1546 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1547 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1548 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1549 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1550 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1551 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1552 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1553 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1554 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1555 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1556 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1557 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1558 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1559 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1560 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1561 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1562 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1563 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1564 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1565 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1566 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1567 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1568 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1569 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1570 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1571 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1572 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1573 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1574 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1575 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1576 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1577 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1578 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1579 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1580 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1581 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1582 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1583 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1584 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1585 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1586 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1587 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1588 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1589 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1590 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1591 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1592 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1593 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1594 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1595 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1596 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1597 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1598 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1599 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1600 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1601 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1602 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1603 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1604 // arbitrary one to be handled as tha canonical variation.
1605 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1606 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1607 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1608 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1609 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1610 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1611 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1612 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1613 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1614 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1615 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1616 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1617};
1618
1619#undef NEONMAP0
1620#undef NEONMAP1
1621#undef NEONMAP2
1622
1623#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1624 { \
1625 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1626 TypeModifier \
1627 }
1628
1629#define SVEMAP2(NameBase, TypeModifier) \
1630 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1631static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1632#define GET_SVE_LLVM_INTRINSIC_MAP
1633#include "clang/Basic/arm_sve_builtin_cg.inc"
1634#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1635#undef GET_SVE_LLVM_INTRINSIC_MAP
1636};
1637
1638#undef SVEMAP1
1639#undef SVEMAP2
1640
1641#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1642 { \
1643 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1644 TypeModifier \
1645 }
1646
1647#define SMEMAP2(NameBase, TypeModifier) \
1648 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1649static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1650#define GET_SME_LLVM_INTRINSIC_MAP
1651#include "clang/Basic/arm_sme_builtin_cg.inc"
1652#undef GET_SME_LLVM_INTRINSIC_MAP
1653};
1654
1655#undef SMEMAP1
1656#undef SMEMAP2
1657
1659
1664
1665static const ARMVectorIntrinsicInfo *
1667 unsigned BuiltinID, bool &MapProvenSorted) {
1668
1669#ifndef NDEBUG
1670 if (!MapProvenSorted) {
1671 assert(llvm::is_sorted(IntrinsicMap));
1672 MapProvenSorted = true;
1673 }
1674#endif
1675
1676 const ARMVectorIntrinsicInfo *Builtin =
1677 llvm::lower_bound(IntrinsicMap, BuiltinID);
1678
1679 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1680 return Builtin;
1681
1682 return nullptr;
1683}
1684
1686 unsigned Modifier,
1687 llvm::Type *ArgType,
1688 const CallExpr *E) {
1689 int VectorSize = 0;
1690 if (Modifier & Use64BitVectors)
1691 VectorSize = 64;
1692 else if (Modifier & Use128BitVectors)
1693 VectorSize = 128;
1694
1695 // Return type.
1697 if (Modifier & AddRetType) {
1698 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1699 if (Modifier & VectorizeRetType)
1700 Ty = llvm::FixedVectorType::get(
1701 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1702
1703 Tys.push_back(Ty);
1704 }
1705
1706 // Arguments.
1707 if (Modifier & VectorizeArgTypes) {
1708 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1709 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1710 }
1711
1712 if (Modifier & (Add1ArgType | Add2ArgTypes))
1713 Tys.push_back(ArgType);
1714
1715 if (Modifier & Add2ArgTypes)
1716 Tys.push_back(ArgType);
1717
1718 if (Modifier & InventFloatType)
1719 Tys.push_back(FloatTy);
1720
1721 return CGM.getIntrinsic(IntrinsicID, Tys);
1722}
1723
1725 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1726 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1727 unsigned BuiltinID = SISDInfo.BuiltinID;
1728 unsigned int Int = SISDInfo.LLVMIntrinsic;
1729 unsigned Modifier = SISDInfo.TypeModifier;
1730 const char *s = SISDInfo.NameHint;
1731
1732 switch (BuiltinID) {
1733 case NEON::BI__builtin_neon_vcled_s64:
1734 case NEON::BI__builtin_neon_vcled_u64:
1735 case NEON::BI__builtin_neon_vcles_f32:
1736 case NEON::BI__builtin_neon_vcled_f64:
1737 case NEON::BI__builtin_neon_vcltd_s64:
1738 case NEON::BI__builtin_neon_vcltd_u64:
1739 case NEON::BI__builtin_neon_vclts_f32:
1740 case NEON::BI__builtin_neon_vcltd_f64:
1741 case NEON::BI__builtin_neon_vcales_f32:
1742 case NEON::BI__builtin_neon_vcaled_f64:
1743 case NEON::BI__builtin_neon_vcalts_f32:
1744 case NEON::BI__builtin_neon_vcaltd_f64:
1745 // Only one direction of comparisons actually exist, cmle is actually a cmge
1746 // with swapped operands. The table gives us the right intrinsic but we
1747 // still need to do the swap.
1748 std::swap(Ops[0], Ops[1]);
1749 break;
1750 }
1751
1752 assert(Int && "Generic code assumes a valid intrinsic");
1753
1754 // Determine the type(s) of this overloaded AArch64 intrinsic.
1755 const Expr *Arg = E->getArg(0);
1756 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
1757 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
1758
1759 int j = 0;
1760 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1761 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1762 ai != ae; ++ai, ++j) {
1763 llvm::Type *ArgTy = ai->getType();
1764 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1765 ArgTy->getPrimitiveSizeInBits())
1766 continue;
1767
1768 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1769 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1770 // it before inserting.
1771 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1772 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1773 Ops[j] =
1774 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1775 }
1776
1777 Value *Result = CGF.EmitNeonCall(F, Ops, s);
1778 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1779 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1780 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1781 return CGF.Builder.CreateExtractElement(Result, C0);
1782
1783 return CGF.Builder.CreateBitCast(Result, ResultType, s);
1784}
1785
1787 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1788 const char *NameHint, unsigned Modifier, const CallExpr *E,
1789 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1790 llvm::Triple::ArchType Arch) {
1791 // Get the last argument, which specifies the vector type.
1792 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1793 std::optional<llvm::APSInt> NeonTypeConst =
1795 if (!NeonTypeConst)
1796 return nullptr;
1797
1798 // Determine the type of this overloaded NEON intrinsic.
1799 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1800 const bool Usgn = Type.isUnsigned();
1801 const bool Quad = Type.isQuad();
1802 const bool Floating = Type.isFloatingPoint();
1803 const bool HasFastHalfType = getTarget().hasFastHalfType();
1804 const bool AllowBFloatArgsAndRet =
1805 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1806
1807 llvm::FixedVectorType *VTy =
1808 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1809 llvm::Type *Ty = VTy;
1810 if (!Ty)
1811 return nullptr;
1812
1813 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1814 return Builder.getInt32(addr.getAlignment().getQuantity());
1815 };
1816
1817 unsigned Int = LLVMIntrinsic;
1818 if ((Modifier & UnsignedAlts) && !Usgn)
1819 Int = AltLLVMIntrinsic;
1820
1821 switch (BuiltinID) {
1822 default: break;
1823 case NEON::BI__builtin_neon_splat_lane_v:
1824 case NEON::BI__builtin_neon_splat_laneq_v:
1825 case NEON::BI__builtin_neon_splatq_lane_v:
1826 case NEON::BI__builtin_neon_splatq_laneq_v: {
1827 auto NumElements = VTy->getElementCount();
1828 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1829 NumElements = NumElements * 2;
1830 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1831 NumElements = NumElements.divideCoefficientBy(2);
1832
1833 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1834 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1835 }
1836 case NEON::BI__builtin_neon_vpadd_v:
1837 case NEON::BI__builtin_neon_vpaddq_v:
1838 // We don't allow fp/int overloading of intrinsics.
1839 if (VTy->getElementType()->isFloatingPointTy() &&
1840 Int == Intrinsic::aarch64_neon_addp)
1841 Int = Intrinsic::aarch64_neon_faddp;
1842 break;
1843 case NEON::BI__builtin_neon_vabs_v:
1844 case NEON::BI__builtin_neon_vabsq_v:
1845 if (VTy->getElementType()->isFloatingPointTy())
1846 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1847 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1848 case NEON::BI__builtin_neon_vadd_v:
1849 case NEON::BI__builtin_neon_vaddq_v: {
1850 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1851 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1852 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1853 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1854 return Builder.CreateBitCast(Ops[0], Ty);
1855 }
1856 case NEON::BI__builtin_neon_vaddhn_v: {
1857 llvm::FixedVectorType *SrcTy =
1858 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1859
1860 // %sum = add <4 x i32> %lhs, %rhs
1861 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1862 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1863 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1864
1865 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1866 Constant *ShiftAmt =
1867 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1868 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1869
1870 // %res = trunc <4 x i32> %high to <4 x i16>
1871 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1872 }
1873 case NEON::BI__builtin_neon_vcale_v:
1874 case NEON::BI__builtin_neon_vcaleq_v:
1875 case NEON::BI__builtin_neon_vcalt_v:
1876 case NEON::BI__builtin_neon_vcaltq_v:
1877 std::swap(Ops[0], Ops[1]);
1878 [[fallthrough]];
1879 case NEON::BI__builtin_neon_vcage_v:
1880 case NEON::BI__builtin_neon_vcageq_v:
1881 case NEON::BI__builtin_neon_vcagt_v:
1882 case NEON::BI__builtin_neon_vcagtq_v: {
1883 llvm::Type *Ty;
1884 switch (VTy->getScalarSizeInBits()) {
1885 default: llvm_unreachable("unexpected type");
1886 case 32:
1887 Ty = FloatTy;
1888 break;
1889 case 64:
1890 Ty = DoubleTy;
1891 break;
1892 case 16:
1893 Ty = HalfTy;
1894 break;
1895 }
1896 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1897 llvm::Type *Tys[] = { VTy, VecFlt };
1898 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1899 return EmitNeonCall(F, Ops, NameHint);
1900 }
1901 case NEON::BI__builtin_neon_vceqz_v:
1902 case NEON::BI__builtin_neon_vceqzq_v:
1904 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1905 case NEON::BI__builtin_neon_vcgez_v:
1906 case NEON::BI__builtin_neon_vcgezq_v:
1908 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1909 "vcgez");
1910 case NEON::BI__builtin_neon_vclez_v:
1911 case NEON::BI__builtin_neon_vclezq_v:
1913 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1914 "vclez");
1915 case NEON::BI__builtin_neon_vcgtz_v:
1916 case NEON::BI__builtin_neon_vcgtzq_v:
1918 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1919 "vcgtz");
1920 case NEON::BI__builtin_neon_vcltz_v:
1921 case NEON::BI__builtin_neon_vcltzq_v:
1923 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1924 "vcltz");
1925 case NEON::BI__builtin_neon_vclz_v:
1926 case NEON::BI__builtin_neon_vclzq_v:
1927 // We generate target-independent intrinsic, which needs a second argument
1928 // for whether or not clz of zero is undefined; on ARM it isn't.
1929 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1930 break;
1931 case NEON::BI__builtin_neon_vcvt_f32_v:
1932 case NEON::BI__builtin_neon_vcvtq_f32_v:
1933 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1934 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1935 HasFastHalfType);
1936 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1937 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1938 case NEON::BI__builtin_neon_vcvt_f16_s16:
1939 case NEON::BI__builtin_neon_vcvt_f16_u16:
1940 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1941 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1942 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1943 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1944 HasFastHalfType);
1945 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1946 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1947 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1948 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1949 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1950 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1951 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1952 Function *F = CGM.getIntrinsic(Int, Tys);
1953 return EmitNeonCall(F, Ops, "vcvt_n");
1954 }
1955 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1956 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1957 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1958 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1959 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1960 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1961 Function *F = CGM.getIntrinsic(Int, Tys);
1962 return EmitNeonCall(F, Ops, "vcvt_n");
1963 }
1964 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1965 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1966 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1967 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1968 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1969 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1970 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1971 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1972 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1973 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1974 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1975 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1976 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1977 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1978 return EmitNeonCall(F, Ops, "vcvt_n");
1979 }
1980 case NEON::BI__builtin_neon_vcvt_s32_v:
1981 case NEON::BI__builtin_neon_vcvt_u32_v:
1982 case NEON::BI__builtin_neon_vcvt_s64_v:
1983 case NEON::BI__builtin_neon_vcvt_u64_v:
1984 case NEON::BI__builtin_neon_vcvt_s16_f16:
1985 case NEON::BI__builtin_neon_vcvt_u16_f16:
1986 case NEON::BI__builtin_neon_vcvtq_s32_v:
1987 case NEON::BI__builtin_neon_vcvtq_u32_v:
1988 case NEON::BI__builtin_neon_vcvtq_s64_v:
1989 case NEON::BI__builtin_neon_vcvtq_u64_v:
1990 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1991 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1992 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1993 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1994 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1995 }
1996 case NEON::BI__builtin_neon_vcvta_s16_f16:
1997 case NEON::BI__builtin_neon_vcvta_s32_v:
1998 case NEON::BI__builtin_neon_vcvta_s64_v:
1999 case NEON::BI__builtin_neon_vcvta_u16_f16:
2000 case NEON::BI__builtin_neon_vcvta_u32_v:
2001 case NEON::BI__builtin_neon_vcvta_u64_v:
2002 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
2003 case NEON::BI__builtin_neon_vcvtaq_s32_v:
2004 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2005 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2006 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2007 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2008 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2009 case NEON::BI__builtin_neon_vcvtn_s32_v:
2010 case NEON::BI__builtin_neon_vcvtn_s64_v:
2011 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2012 case NEON::BI__builtin_neon_vcvtn_u32_v:
2013 case NEON::BI__builtin_neon_vcvtn_u64_v:
2014 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2015 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2016 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2017 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2018 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2019 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2020 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2021 case NEON::BI__builtin_neon_vcvtp_s32_v:
2022 case NEON::BI__builtin_neon_vcvtp_s64_v:
2023 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2024 case NEON::BI__builtin_neon_vcvtp_u32_v:
2025 case NEON::BI__builtin_neon_vcvtp_u64_v:
2026 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2027 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2028 case NEON::BI__builtin_neon_vcvtpq_s64_v:
2029 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2030 case NEON::BI__builtin_neon_vcvtpq_u32_v:
2031 case NEON::BI__builtin_neon_vcvtpq_u64_v:
2032 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2033 case NEON::BI__builtin_neon_vcvtm_s32_v:
2034 case NEON::BI__builtin_neon_vcvtm_s64_v:
2035 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2036 case NEON::BI__builtin_neon_vcvtm_u32_v:
2037 case NEON::BI__builtin_neon_vcvtm_u64_v:
2038 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2039 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2040 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2041 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2042 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2043 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2044 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
2045 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2046 }
2047 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2048 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2049 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2050
2051 }
2052 case NEON::BI__builtin_neon_vext_v:
2053 case NEON::BI__builtin_neon_vextq_v: {
2054 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2055 SmallVector<int, 16> Indices;
2056 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2057 Indices.push_back(i+CV);
2058
2059 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2060 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2061 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
2062 }
2063 case NEON::BI__builtin_neon_vfma_v:
2064 case NEON::BI__builtin_neon_vfmaq_v: {
2065 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2066 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2067 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2068
2069 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2071 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2072 {Ops[1], Ops[2], Ops[0]});
2073 }
2074 case NEON::BI__builtin_neon_vld1_v:
2075 case NEON::BI__builtin_neon_vld1q_v: {
2076 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2077 Ops.push_back(getAlignmentValue32(PtrOp0));
2078 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
2079 }
2080 case NEON::BI__builtin_neon_vld1_x2_v:
2081 case NEON::BI__builtin_neon_vld1q_x2_v:
2082 case NEON::BI__builtin_neon_vld1_x3_v:
2083 case NEON::BI__builtin_neon_vld1q_x3_v:
2084 case NEON::BI__builtin_neon_vld1_x4_v:
2085 case NEON::BI__builtin_neon_vld1q_x4_v: {
2086 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2087 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2088 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
2089 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2090 }
2091 case NEON::BI__builtin_neon_vld2_v:
2092 case NEON::BI__builtin_neon_vld2q_v:
2093 case NEON::BI__builtin_neon_vld3_v:
2094 case NEON::BI__builtin_neon_vld3q_v:
2095 case NEON::BI__builtin_neon_vld4_v:
2096 case NEON::BI__builtin_neon_vld4q_v:
2097 case NEON::BI__builtin_neon_vld2_dup_v:
2098 case NEON::BI__builtin_neon_vld2q_dup_v:
2099 case NEON::BI__builtin_neon_vld3_dup_v:
2100 case NEON::BI__builtin_neon_vld3q_dup_v:
2101 case NEON::BI__builtin_neon_vld4_dup_v:
2102 case NEON::BI__builtin_neon_vld4q_dup_v: {
2103 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2104 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2105 Value *Align = getAlignmentValue32(PtrOp1);
2106 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2107 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2108 }
2109 case NEON::BI__builtin_neon_vld1_dup_v:
2110 case NEON::BI__builtin_neon_vld1q_dup_v: {
2111 Value *V = PoisonValue::get(Ty);
2112 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2113 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
2114 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
2115 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2116 return EmitNeonSplat(Ops[0], CI);
2117 }
2118 case NEON::BI__builtin_neon_vld2_lane_v:
2119 case NEON::BI__builtin_neon_vld2q_lane_v:
2120 case NEON::BI__builtin_neon_vld3_lane_v:
2121 case NEON::BI__builtin_neon_vld3q_lane_v:
2122 case NEON::BI__builtin_neon_vld4_lane_v:
2123 case NEON::BI__builtin_neon_vld4q_lane_v: {
2124 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2125 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2126 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2127 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
2128 Ops.push_back(getAlignmentValue32(PtrOp1));
2129 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
2130 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2131 }
2132 case NEON::BI__builtin_neon_vmovl_v: {
2133 llvm::FixedVectorType *DTy =
2134 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2135 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2136 if (Usgn)
2137 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2138 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2139 }
2140 case NEON::BI__builtin_neon_vmovn_v: {
2141 llvm::FixedVectorType *QTy =
2142 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2143 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2144 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2145 }
2146 case NEON::BI__builtin_neon_vmull_v:
2147 // FIXME: the integer vmull operations could be emitted in terms of pure
2148 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2149 // hoisting the exts outside loops. Until global ISel comes along that can
2150 // see through such movement this leads to bad CodeGen. So we need an
2151 // intrinsic for now.
2152 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2153 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2154 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2155 case NEON::BI__builtin_neon_vpadal_v:
2156 case NEON::BI__builtin_neon_vpadalq_v: {
2157 // The source operand type has twice as many elements of half the size.
2158 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2159 llvm::Type *EltTy =
2160 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2161 auto *NarrowTy =
2162 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2163 llvm::Type *Tys[2] = { Ty, NarrowTy };
2164 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2165 }
2166 case NEON::BI__builtin_neon_vpaddl_v:
2167 case NEON::BI__builtin_neon_vpaddlq_v: {
2168 // The source operand type has twice as many elements of half the size.
2169 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2170 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2171 auto *NarrowTy =
2172 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2173 llvm::Type *Tys[2] = { Ty, NarrowTy };
2174 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2175 }
2176 case NEON::BI__builtin_neon_vqdmlal_v:
2177 case NEON::BI__builtin_neon_vqdmlsl_v: {
2178 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2179 Ops[1] =
2180 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
2181 Ops.resize(2);
2182 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2183 }
2184 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2185 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2186 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2187 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2188 auto *RTy = cast<llvm::FixedVectorType>(Ty);
2189 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2190 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2191 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2192 RTy->getNumElements() * 2);
2193 llvm::Type *Tys[2] = {
2194 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2195 /*isQuad*/ false))};
2196 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2197 }
2198 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2199 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2200 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2201 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2202 llvm::Type *Tys[2] = {
2203 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2204 /*isQuad*/ true))};
2205 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2206 }
2207 case NEON::BI__builtin_neon_vqshl_n_v:
2208 case NEON::BI__builtin_neon_vqshlq_n_v:
2209 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2210 1, false);
2211 case NEON::BI__builtin_neon_vqshlu_n_v:
2212 case NEON::BI__builtin_neon_vqshluq_n_v:
2213 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
2214 1, false);
2215 case NEON::BI__builtin_neon_vrecpe_v:
2216 case NEON::BI__builtin_neon_vrecpeq_v:
2217 case NEON::BI__builtin_neon_vrsqrte_v:
2218 case NEON::BI__builtin_neon_vrsqrteq_v:
2219 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2220 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2221 case NEON::BI__builtin_neon_vrndi_v:
2222 case NEON::BI__builtin_neon_vrndiq_v:
2223 Int = Builder.getIsFPConstrained()
2224 ? Intrinsic::experimental_constrained_nearbyint
2225 : Intrinsic::nearbyint;
2226 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2227 case NEON::BI__builtin_neon_vrshr_n_v:
2228 case NEON::BI__builtin_neon_vrshrq_n_v:
2229 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
2230 1, true);
2231 case NEON::BI__builtin_neon_vsha512hq_u64:
2232 case NEON::BI__builtin_neon_vsha512h2q_u64:
2233 case NEON::BI__builtin_neon_vsha512su0q_u64:
2234 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2235 Function *F = CGM.getIntrinsic(Int);
2236 return EmitNeonCall(F, Ops, "");
2237 }
2238 case NEON::BI__builtin_neon_vshl_n_v:
2239 case NEON::BI__builtin_neon_vshlq_n_v:
2240 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2241 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2242 "vshl_n");
2243 case NEON::BI__builtin_neon_vshll_n_v: {
2244 llvm::FixedVectorType *SrcTy =
2245 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2246 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2247 if (Usgn)
2248 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
2249 else
2250 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
2251 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
2252 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
2253 }
2254 case NEON::BI__builtin_neon_vshrn_n_v: {
2255 llvm::FixedVectorType *SrcTy =
2256 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2257 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2258 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
2259 if (Usgn)
2260 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
2261 else
2262 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
2263 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
2264 }
2265 case NEON::BI__builtin_neon_vshr_n_v:
2266 case NEON::BI__builtin_neon_vshrq_n_v:
2267 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
2268 case NEON::BI__builtin_neon_vst1_v:
2269 case NEON::BI__builtin_neon_vst1q_v:
2270 case NEON::BI__builtin_neon_vst2_v:
2271 case NEON::BI__builtin_neon_vst2q_v:
2272 case NEON::BI__builtin_neon_vst3_v:
2273 case NEON::BI__builtin_neon_vst3q_v:
2274 case NEON::BI__builtin_neon_vst4_v:
2275 case NEON::BI__builtin_neon_vst4q_v:
2276 case NEON::BI__builtin_neon_vst2_lane_v:
2277 case NEON::BI__builtin_neon_vst2q_lane_v:
2278 case NEON::BI__builtin_neon_vst3_lane_v:
2279 case NEON::BI__builtin_neon_vst3q_lane_v:
2280 case NEON::BI__builtin_neon_vst4_lane_v:
2281 case NEON::BI__builtin_neon_vst4q_lane_v: {
2282 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2283 Ops.push_back(getAlignmentValue32(PtrOp0));
2284 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
2285 }
2286 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2287 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2288 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2289 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2290 case NEON::BI__builtin_neon_vsm4eq_u32: {
2291 Function *F = CGM.getIntrinsic(Int);
2292 return EmitNeonCall(F, Ops, "");
2293 }
2294 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2295 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2296 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2297 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2298 Function *F = CGM.getIntrinsic(Int);
2299 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
2300 return EmitNeonCall(F, Ops, "");
2301 }
2302 case NEON::BI__builtin_neon_vst1_x2_v:
2303 case NEON::BI__builtin_neon_vst1q_x2_v:
2304 case NEON::BI__builtin_neon_vst1_x3_v:
2305 case NEON::BI__builtin_neon_vst1q_x3_v:
2306 case NEON::BI__builtin_neon_vst1_x4_v:
2307 case NEON::BI__builtin_neon_vst1q_x4_v: {
2308 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2309 // in AArch64 it comes last. We may want to stick to one or another.
2310 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2311 Arch == llvm::Triple::aarch64_32) {
2312 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2313 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2314 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2315 }
2316 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
2317 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2318 }
2319 case NEON::BI__builtin_neon_vsubhn_v: {
2320 llvm::FixedVectorType *SrcTy =
2321 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2322
2323 // %sum = add <4 x i32> %lhs, %rhs
2324 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2325 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2326 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
2327
2328 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2329 Constant *ShiftAmt =
2330 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2331 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
2332
2333 // %res = trunc <4 x i32> %high to <4 x i16>
2334 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
2335 }
2336 case NEON::BI__builtin_neon_vtrn_v:
2337 case NEON::BI__builtin_neon_vtrnq_v: {
2338 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2339 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2340 Value *SV = nullptr;
2341
2342 for (unsigned vi = 0; vi != 2; ++vi) {
2343 SmallVector<int, 16> Indices;
2344 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2345 Indices.push_back(i+vi);
2346 Indices.push_back(i+e+vi);
2347 }
2348 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2349 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
2350 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2351 }
2352 return SV;
2353 }
2354 case NEON::BI__builtin_neon_vtst_v:
2355 case NEON::BI__builtin_neon_vtstq_v: {
2356 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2357 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2358 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2359 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2360 ConstantAggregateZero::get(Ty));
2361 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2362 }
2363 case NEON::BI__builtin_neon_vuzp_v:
2364 case NEON::BI__builtin_neon_vuzpq_v: {
2365 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2366 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2367 Value *SV = nullptr;
2368
2369 for (unsigned vi = 0; vi != 2; ++vi) {
2370 SmallVector<int, 16> Indices;
2371 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2372 Indices.push_back(2*i+vi);
2373
2374 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2375 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
2376 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2377 }
2378 return SV;
2379 }
2380 case NEON::BI__builtin_neon_vxarq_u64: {
2381 Function *F = CGM.getIntrinsic(Int);
2382 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
2383 return EmitNeonCall(F, Ops, "");
2384 }
2385 case NEON::BI__builtin_neon_vzip_v:
2386 case NEON::BI__builtin_neon_vzipq_v: {
2387 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2388 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2389 Value *SV = nullptr;
2390
2391 for (unsigned vi = 0; vi != 2; ++vi) {
2392 SmallVector<int, 16> Indices;
2393 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2394 Indices.push_back((i + vi*e) >> 1);
2395 Indices.push_back(((i + vi*e) >> 1)+e);
2396 }
2397 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2398 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
2399 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2400 }
2401 return SV;
2402 }
2403 case NEON::BI__builtin_neon_vdot_s32:
2404 case NEON::BI__builtin_neon_vdot_u32:
2405 case NEON::BI__builtin_neon_vdotq_s32:
2406 case NEON::BI__builtin_neon_vdotq_u32: {
2407 auto *InputTy =
2408 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2409 llvm::Type *Tys[2] = { Ty, InputTy };
2410 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
2411 }
2412 case NEON::BI__builtin_neon_vfmlal_low_f16:
2413 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2414 auto *InputTy =
2415 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2416 llvm::Type *Tys[2] = { Ty, InputTy };
2417 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
2418 }
2419 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2420 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2421 auto *InputTy =
2422 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2423 llvm::Type *Tys[2] = { Ty, InputTy };
2424 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
2425 }
2426 case NEON::BI__builtin_neon_vfmlal_high_f16:
2427 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2428 auto *InputTy =
2429 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2430 llvm::Type *Tys[2] = { Ty, InputTy };
2431 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
2432 }
2433 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2434 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2435 auto *InputTy =
2436 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2437 llvm::Type *Tys[2] = { Ty, InputTy };
2438 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
2439 }
2440 case NEON::BI__builtin_neon_vmmlaq_s32:
2441 case NEON::BI__builtin_neon_vmmlaq_u32: {
2442 auto *InputTy =
2443 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2444 llvm::Type *Tys[2] = { Ty, InputTy };
2445 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
2446 }
2447 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2448 auto *InputTy =
2449 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2450 llvm::Type *Tys[2] = { Ty, InputTy };
2451 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
2452 }
2453 case NEON::BI__builtin_neon_vusdot_s32:
2454 case NEON::BI__builtin_neon_vusdotq_s32: {
2455 auto *InputTy =
2456 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2457 llvm::Type *Tys[2] = { Ty, InputTy };
2458 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
2459 }
2460 case NEON::BI__builtin_neon_vbfdot_f32:
2461 case NEON::BI__builtin_neon_vbfdotq_f32: {
2462 llvm::Type *InputTy =
2463 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2464 llvm::Type *Tys[2] = { Ty, InputTy };
2465 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
2466 }
2467 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2468 llvm::Type *Tys[1] = { Ty };
2469 Function *F = CGM.getIntrinsic(Int, Tys);
2470 return EmitNeonCall(F, Ops, "vcvtfp2bf");
2471 }
2472
2473 }
2474
2475 assert(Int && "Expected valid intrinsic number");
2476
2477 // Determine the type(s) of this overloaded AArch64 intrinsic.
2478 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
2479
2480 Value *Result = EmitNeonCall(F, Ops, NameHint);
2481 llvm::Type *ResultType = ConvertType(E->getType());
2482 // AArch64 intrinsic one-element vector type cast to
2483 // scalar type expected by the builtin
2484 return Builder.CreateBitCast(Result, ResultType, NameHint);
2485}
2486
2487Value *
2489 const CmpInst::Predicate Pred,
2490 const Twine &Name) {
2491
2492 if (isa<FixedVectorType>(Ty)) {
2493 // Vector types are cast to i8 vectors. Recover original type.
2494 Op = Builder.CreateBitCast(Op, Ty);
2495 }
2496
2497 if (CmpInst::isFPPredicate(Pred)) {
2498 if (Pred == CmpInst::FCMP_OEQ)
2499 Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
2500 else
2501 Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
2502 } else {
2503 Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
2504 }
2505
2506 llvm::Type *ResTy = Ty;
2507 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
2508 ResTy = FixedVectorType::get(
2509 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
2510 VTy->getNumElements());
2511
2512 return Builder.CreateSExt(Op, ResTy, Name);
2513}
2514
2516 Value *ExtOp, Value *IndexOp,
2517 llvm::Type *ResTy, unsigned IntID,
2518 const char *Name) {
2520 if (ExtOp)
2521 TblOps.push_back(ExtOp);
2522
2523 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2524 SmallVector<int, 16> Indices;
2525 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
2526 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2527 Indices.push_back(2*i);
2528 Indices.push_back(2*i+1);
2529 }
2530
2531 int PairPos = 0, End = Ops.size() - 1;
2532 while (PairPos < End) {
2533 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2534 Ops[PairPos+1], Indices,
2535 Name));
2536 PairPos += 2;
2537 }
2538
2539 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2540 // of the 128-bit lookup table with zero.
2541 if (PairPos == End) {
2542 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
2543 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2544 ZeroTbl, Indices, Name));
2545 }
2546
2547 Function *TblF;
2548 TblOps.push_back(IndexOp);
2549 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
2550
2551 return CGF.EmitNeonCall(TblF, TblOps, Name);
2552}
2553
2554Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2555 unsigned Value;
2556 switch (BuiltinID) {
2557 default:
2558 return nullptr;
2559 case clang::ARM::BI__builtin_arm_nop:
2560 Value = 0;
2561 break;
2562 case clang::ARM::BI__builtin_arm_yield:
2563 case clang::ARM::BI__yield:
2564 Value = 1;
2565 break;
2566 case clang::ARM::BI__builtin_arm_wfe:
2567 case clang::ARM::BI__wfe:
2568 Value = 2;
2569 break;
2570 case clang::ARM::BI__builtin_arm_wfi:
2571 case clang::ARM::BI__wfi:
2572 Value = 3;
2573 break;
2574 case clang::ARM::BI__builtin_arm_sev:
2575 case clang::ARM::BI__sev:
2576 Value = 4;
2577 break;
2578 case clang::ARM::BI__builtin_arm_sevl:
2579 case clang::ARM::BI__sevl:
2580 Value = 5;
2581 break;
2582 }
2583
2584 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2585 llvm::ConstantInt::get(Int32Ty, Value));
2586}
2587
2593
2594// Generates the IR for the read/write special register builtin,
2595// ValueType is the type of the value that is to be written or read,
2596// RegisterType is the type of the register being written to or read from.
2598 const CallExpr *E,
2599 llvm::Type *RegisterType,
2600 llvm::Type *ValueType,
2601 SpecialRegisterAccessKind AccessKind,
2602 StringRef SysReg = "") {
2603 // write and register intrinsics only support 32, 64 and 128 bit operations.
2604 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2605 RegisterType->isIntegerTy(128)) &&
2606 "Unsupported size for register.");
2607
2608 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2609 CodeGen::CodeGenModule &CGM = CGF.CGM;
2610 LLVMContext &Context = CGM.getLLVMContext();
2611
2612 if (SysReg.empty()) {
2613 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2614 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2615 }
2616
2617 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2618 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2619 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2620
2621 llvm::Type *Types[] = { RegisterType };
2622
2623 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2624 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2625 && "Can't fit 64-bit value in 32-bit register");
2626
2627 if (AccessKind != Write) {
2628 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2629 llvm::Function *F = CGM.getIntrinsic(
2630 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2631 : Intrinsic::read_register,
2632 Types);
2633 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2634
2635 if (MixedTypes)
2636 // Read into 64 bit register and then truncate result to 32 bit.
2637 return Builder.CreateTrunc(Call, ValueType);
2638
2639 if (ValueType->isPointerTy())
2640 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2641 return Builder.CreateIntToPtr(Call, ValueType);
2642
2643 return Call;
2644 }
2645
2646 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2647 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2648 if (MixedTypes) {
2649 // Extend 32 bit write value to 64 bit to pass to write.
2650 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2651 return Builder.CreateCall(F, { Metadata, ArgValue });
2652 }
2653
2654 if (ValueType->isPointerTy()) {
2655 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2656 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2657 return Builder.CreateCall(F, { Metadata, ArgValue });
2658 }
2659
2660 return Builder.CreateCall(F, { Metadata, ArgValue });
2661}
2662
2663/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2664/// argument that specifies the vector type.
2665static bool HasExtraNeonArgument(unsigned BuiltinID) {
2666 switch (BuiltinID) {
2667 default: break;
2668 case NEON::BI__builtin_neon_vget_lane_i8:
2669 case NEON::BI__builtin_neon_vget_lane_i16:
2670 case NEON::BI__builtin_neon_vget_lane_bf16:
2671 case NEON::BI__builtin_neon_vget_lane_i32:
2672 case NEON::BI__builtin_neon_vget_lane_i64:
2673 case NEON::BI__builtin_neon_vget_lane_mf8:
2674 case NEON::BI__builtin_neon_vget_lane_f32:
2675 case NEON::BI__builtin_neon_vgetq_lane_i8:
2676 case NEON::BI__builtin_neon_vgetq_lane_i16:
2677 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2678 case NEON::BI__builtin_neon_vgetq_lane_i32:
2679 case NEON::BI__builtin_neon_vgetq_lane_i64:
2680 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2681 case NEON::BI__builtin_neon_vgetq_lane_f32:
2682 case NEON::BI__builtin_neon_vduph_lane_bf16:
2683 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2684 case NEON::BI__builtin_neon_vset_lane_i8:
2685 case NEON::BI__builtin_neon_vset_lane_mf8:
2686 case NEON::BI__builtin_neon_vset_lane_i16:
2687 case NEON::BI__builtin_neon_vset_lane_bf16:
2688 case NEON::BI__builtin_neon_vset_lane_i32:
2689 case NEON::BI__builtin_neon_vset_lane_i64:
2690 case NEON::BI__builtin_neon_vset_lane_f32:
2691 case NEON::BI__builtin_neon_vsetq_lane_i8:
2692 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2693 case NEON::BI__builtin_neon_vsetq_lane_i16:
2694 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2695 case NEON::BI__builtin_neon_vsetq_lane_i32:
2696 case NEON::BI__builtin_neon_vsetq_lane_i64:
2697 case NEON::BI__builtin_neon_vsetq_lane_f32:
2698 case NEON::BI__builtin_neon_vsha1h_u32:
2699 case NEON::BI__builtin_neon_vsha1cq_u32:
2700 case NEON::BI__builtin_neon_vsha1pq_u32:
2701 case NEON::BI__builtin_neon_vsha1mq_u32:
2702 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2703 case clang::ARM::BI_MoveToCoprocessor:
2704 case clang::ARM::BI_MoveToCoprocessor2:
2705 return false;
2706 }
2707 return true;
2708}
2709
2711 const CallExpr *E,
2713 llvm::Triple::ArchType Arch) {
2714 if (auto Hint = GetValueForARMHint(BuiltinID))
2715 return Hint;
2716
2717 if (BuiltinID == clang::ARM::BI__emit) {
2718 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2719 llvm::FunctionType *FTy =
2720 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2721
2723 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2724 llvm_unreachable("Sema will ensure that the parameter is constant");
2725
2726 llvm::APSInt Value = Result.Val.getInt();
2727 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2728
2729 llvm::InlineAsm *Emit =
2730 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2731 /*hasSideEffects=*/true)
2732 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2733 /*hasSideEffects=*/true);
2734
2735 return Builder.CreateCall(Emit);
2736 }
2737
2738 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2739 Value *Option = EmitScalarExpr(E->getArg(0));
2740 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2741 }
2742
2743 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2745 Value *RW = EmitScalarExpr(E->getArg(1));
2746 Value *IsData = EmitScalarExpr(E->getArg(2));
2747
2748 // Locality is not supported on ARM target
2749 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2750
2751 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2752 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2753 }
2754
2755 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2756 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2757 return Builder.CreateCall(
2758 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2759 }
2760
2761 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2762 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2763 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2764 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2765 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2766 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2767 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2768 return Res;
2769 }
2770
2771
2772 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2773 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2774 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2775 }
2776 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2777 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2778 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2779 "cls");
2780 }
2781
2782 if (BuiltinID == clang::ARM::BI__clear_cache) {
2783 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2784 const FunctionDecl *FD = E->getDirectCallee();
2785 Value *Ops[2];
2786 for (unsigned i = 0; i < 2; i++)
2787 Ops[i] = EmitScalarExpr(E->getArg(i));
2788 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2789 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2790 StringRef Name = FD->getName();
2791 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2792 }
2793
2794 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2795 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2796 Function *F;
2797
2798 switch (BuiltinID) {
2799 default: llvm_unreachable("unexpected builtin");
2800 case clang::ARM::BI__builtin_arm_mcrr:
2801 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2802 break;
2803 case clang::ARM::BI__builtin_arm_mcrr2:
2804 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2805 break;
2806 }
2807
2808 // MCRR{2} instruction has 5 operands but
2809 // the intrinsic has 4 because Rt and Rt2
2810 // are represented as a single unsigned 64
2811 // bit integer in the intrinsic definition
2812 // but internally it's represented as 2 32
2813 // bit integers.
2814
2815 Value *Coproc = EmitScalarExpr(E->getArg(0));
2816 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2817 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2818 Value *CRm = EmitScalarExpr(E->getArg(3));
2819
2820 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2821 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2822 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2823 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2824
2825 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2826 }
2827
2828 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2829 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2830 Function *F;
2831
2832 switch (BuiltinID) {
2833 default: llvm_unreachable("unexpected builtin");
2834 case clang::ARM::BI__builtin_arm_mrrc:
2835 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2836 break;
2837 case clang::ARM::BI__builtin_arm_mrrc2:
2838 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2839 break;
2840 }
2841
2842 Value *Coproc = EmitScalarExpr(E->getArg(0));
2843 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2844 Value *CRm = EmitScalarExpr(E->getArg(2));
2845 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2846
2847 // Returns an unsigned 64 bit integer, represented
2848 // as two 32 bit integers.
2849
2850 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2851 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2852 Rt = Builder.CreateZExt(Rt, Int64Ty);
2853 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2854
2855 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2856 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2857 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2858
2859 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2860 }
2861
2862 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2863 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2864 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2865 getContext().getTypeSize(E->getType()) == 64) ||
2866 BuiltinID == clang::ARM::BI__ldrexd) {
2867 Function *F;
2868
2869 switch (BuiltinID) {
2870 default: llvm_unreachable("unexpected builtin");
2871 case clang::ARM::BI__builtin_arm_ldaex:
2872 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2873 break;
2874 case clang::ARM::BI__builtin_arm_ldrexd:
2875 case clang::ARM::BI__builtin_arm_ldrex:
2876 case clang::ARM::BI__ldrexd:
2877 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2878 break;
2879 }
2880
2881 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2882 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2883
2884 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2885 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2886 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2887 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2888
2889 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2890 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2891 Val = Builder.CreateOr(Val, Val1);
2892 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2893 }
2894
2895 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2896 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2897 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2898
2899 QualType Ty = E->getType();
2900 llvm::Type *RealResTy = ConvertType(Ty);
2901 llvm::Type *IntTy =
2902 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2903
2904 Function *F = CGM.getIntrinsic(
2905 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2906 : Intrinsic::arm_ldrex,
2907 DefaultPtrTy);
2908 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2909 Val->addParamAttr(
2910 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2911
2912 if (RealResTy->isPointerTy())
2913 return Builder.CreateIntToPtr(Val, RealResTy);
2914 else {
2915 llvm::Type *IntResTy = llvm::IntegerType::get(
2916 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2917 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2918 RealResTy);
2919 }
2920 }
2921
2922 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2923 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2924 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2925 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2926 Function *F = CGM.getIntrinsic(
2927 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2928 : Intrinsic::arm_strexd);
2929 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2930
2931 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
2932 Value *Val = EmitScalarExpr(E->getArg(0));
2933 Builder.CreateStore(Val, Tmp);
2934
2935 Address LdPtr = Tmp.withElementType(STy);
2936 Val = Builder.CreateLoad(LdPtr);
2937
2938 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2939 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2940 Value *StPtr = EmitScalarExpr(E->getArg(1));
2941 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2942 }
2943
2944 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2945 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2946 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2947 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2948
2949 QualType Ty = E->getArg(0)->getType();
2950 llvm::Type *StoreTy =
2951 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2952
2953 if (StoreVal->getType()->isPointerTy())
2954 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2955 else {
2956 llvm::Type *IntTy = llvm::IntegerType::get(
2958 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2959 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2960 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2961 }
2962
2963 Function *F = CGM.getIntrinsic(
2964 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2965 : Intrinsic::arm_strex,
2966 StoreAddr->getType());
2967
2968 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2969 CI->addParamAttr(
2970 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2971 return CI;
2972 }
2973
2974 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2975 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2976 return Builder.CreateCall(F);
2977 }
2978
2979 // CRC32
2980 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2981 switch (BuiltinID) {
2982 case clang::ARM::BI__builtin_arm_crc32b:
2983 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2984 case clang::ARM::BI__builtin_arm_crc32cb:
2985 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2986 case clang::ARM::BI__builtin_arm_crc32h:
2987 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2988 case clang::ARM::BI__builtin_arm_crc32ch:
2989 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2990 case clang::ARM::BI__builtin_arm_crc32w:
2991 case clang::ARM::BI__builtin_arm_crc32d:
2992 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2993 case clang::ARM::BI__builtin_arm_crc32cw:
2994 case clang::ARM::BI__builtin_arm_crc32cd:
2995 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2996 }
2997
2998 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2999 Value *Arg0 = EmitScalarExpr(E->getArg(0));
3000 Value *Arg1 = EmitScalarExpr(E->getArg(1));
3001
3002 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
3003 // intrinsics, hence we need different codegen for these cases.
3004 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
3005 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
3006 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3007 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3008 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3009 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3010
3011 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3012 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
3013 return Builder.CreateCall(F, {Res, Arg1b});
3014 } else {
3015 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3016
3017 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3018 return Builder.CreateCall(F, {Arg0, Arg1});
3019 }
3020 }
3021
3022 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3023 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3024 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3025 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
3026 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
3027 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
3028
3029 SpecialRegisterAccessKind AccessKind = Write;
3030 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3031 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3032 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
3033 AccessKind = VolatileRead;
3034
3035 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3036 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
3037
3038 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3039 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
3040
3041 llvm::Type *ValueType;
3042 llvm::Type *RegisterType;
3043 if (IsPointerBuiltin) {
3044 ValueType = VoidPtrTy;
3046 } else if (Is64Bit) {
3047 ValueType = RegisterType = Int64Ty;
3048 } else {
3049 ValueType = RegisterType = Int32Ty;
3050 }
3051
3052 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
3053 AccessKind);
3054 }
3055
3056 if (BuiltinID == ARM::BI__builtin_sponentry) {
3057 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
3058 return Builder.CreateCall(F);
3059 }
3060
3061 // Handle MSVC intrinsics before argument evaluation to prevent double
3062 // evaluation.
3063 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3064 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
3065
3066 // Deal with MVE builtins
3067 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3068 return Result;
3069 // Handle CDE builtins
3070 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3071 return Result;
3072
3073 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3074 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
3075 return P.first == BuiltinID;
3076 });
3077 if (It != end(NEONEquivalentIntrinsicMap))
3078 BuiltinID = It->second;
3079
3080 // Find out if any arguments are required to be integer constant
3081 // expressions.
3082 unsigned ICEArguments = 0;
3084 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3085 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3086
3087 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3088 return Builder.getInt32(addr.getAlignment().getQuantity());
3089 };
3090
3091 Address PtrOp0 = Address::invalid();
3092 Address PtrOp1 = Address::invalid();
3094 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3095 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3096 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3097 if (i == 0) {
3098 switch (BuiltinID) {
3099 case NEON::BI__builtin_neon_vld1_v:
3100 case NEON::BI__builtin_neon_vld1q_v:
3101 case NEON::BI__builtin_neon_vld1q_lane_v:
3102 case NEON::BI__builtin_neon_vld1_lane_v:
3103 case NEON::BI__builtin_neon_vld1_dup_v:
3104 case NEON::BI__builtin_neon_vld1q_dup_v:
3105 case NEON::BI__builtin_neon_vst1_v:
3106 case NEON::BI__builtin_neon_vst1q_v:
3107 case NEON::BI__builtin_neon_vst1q_lane_v:
3108 case NEON::BI__builtin_neon_vst1_lane_v:
3109 case NEON::BI__builtin_neon_vst2_v:
3110 case NEON::BI__builtin_neon_vst2q_v:
3111 case NEON::BI__builtin_neon_vst2_lane_v:
3112 case NEON::BI__builtin_neon_vst2q_lane_v:
3113 case NEON::BI__builtin_neon_vst3_v:
3114 case NEON::BI__builtin_neon_vst3q_v:
3115 case NEON::BI__builtin_neon_vst3_lane_v:
3116 case NEON::BI__builtin_neon_vst3q_lane_v:
3117 case NEON::BI__builtin_neon_vst4_v:
3118 case NEON::BI__builtin_neon_vst4q_v:
3119 case NEON::BI__builtin_neon_vst4_lane_v:
3120 case NEON::BI__builtin_neon_vst4q_lane_v:
3121 // Get the alignment for the argument in addition to the value;
3122 // we'll use it later.
3123 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3124 Ops.push_back(PtrOp0.emitRawPointer(*this));
3125 continue;
3126 }
3127 }
3128 if (i == 1) {
3129 switch (BuiltinID) {
3130 case NEON::BI__builtin_neon_vld2_v:
3131 case NEON::BI__builtin_neon_vld2q_v:
3132 case NEON::BI__builtin_neon_vld3_v:
3133 case NEON::BI__builtin_neon_vld3q_v:
3134 case NEON::BI__builtin_neon_vld4_v:
3135 case NEON::BI__builtin_neon_vld4q_v:
3136 case NEON::BI__builtin_neon_vld2_lane_v:
3137 case NEON::BI__builtin_neon_vld2q_lane_v:
3138 case NEON::BI__builtin_neon_vld3_lane_v:
3139 case NEON::BI__builtin_neon_vld3q_lane_v:
3140 case NEON::BI__builtin_neon_vld4_lane_v:
3141 case NEON::BI__builtin_neon_vld4q_lane_v:
3142 case NEON::BI__builtin_neon_vld2_dup_v:
3143 case NEON::BI__builtin_neon_vld2q_dup_v:
3144 case NEON::BI__builtin_neon_vld3_dup_v:
3145 case NEON::BI__builtin_neon_vld3q_dup_v:
3146 case NEON::BI__builtin_neon_vld4_dup_v:
3147 case NEON::BI__builtin_neon_vld4q_dup_v:
3148 // Get the alignment for the argument in addition to the value;
3149 // we'll use it later.
3150 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3151 Ops.push_back(PtrOp1.emitRawPointer(*this));
3152 continue;
3153 }
3154 }
3155
3156 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
3157 }
3158
3159 switch (BuiltinID) {
3160 default: break;
3161
3162 case NEON::BI__builtin_neon_vget_lane_i8:
3163 case NEON::BI__builtin_neon_vget_lane_i16:
3164 case NEON::BI__builtin_neon_vget_lane_i32:
3165 case NEON::BI__builtin_neon_vget_lane_i64:
3166 case NEON::BI__builtin_neon_vget_lane_bf16:
3167 case NEON::BI__builtin_neon_vget_lane_f32:
3168 case NEON::BI__builtin_neon_vgetq_lane_i8:
3169 case NEON::BI__builtin_neon_vgetq_lane_i16:
3170 case NEON::BI__builtin_neon_vgetq_lane_i32:
3171 case NEON::BI__builtin_neon_vgetq_lane_i64:
3172 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3173 case NEON::BI__builtin_neon_vgetq_lane_f32:
3174 case NEON::BI__builtin_neon_vduph_lane_bf16:
3175 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3176 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3177
3178 case NEON::BI__builtin_neon_vrndns_f32: {
3179 Value *Arg = EmitScalarExpr(E->getArg(0));
3180 llvm::Type *Tys[] = {Arg->getType()};
3181 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
3182 return Builder.CreateCall(F, {Arg}, "vrndn"); }
3183
3184 case NEON::BI__builtin_neon_vset_lane_i8:
3185 case NEON::BI__builtin_neon_vset_lane_i16:
3186 case NEON::BI__builtin_neon_vset_lane_i32:
3187 case NEON::BI__builtin_neon_vset_lane_i64:
3188 case NEON::BI__builtin_neon_vset_lane_bf16:
3189 case NEON::BI__builtin_neon_vset_lane_f32:
3190 case NEON::BI__builtin_neon_vsetq_lane_i8:
3191 case NEON::BI__builtin_neon_vsetq_lane_i16:
3192 case NEON::BI__builtin_neon_vsetq_lane_i32:
3193 case NEON::BI__builtin_neon_vsetq_lane_i64:
3194 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3195 case NEON::BI__builtin_neon_vsetq_lane_f32:
3196 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3197
3198 case NEON::BI__builtin_neon_vsha1h_u32:
3199 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3200 "vsha1h");
3201 case NEON::BI__builtin_neon_vsha1cq_u32:
3202 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3203 "vsha1h");
3204 case NEON::BI__builtin_neon_vsha1pq_u32:
3205 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3206 "vsha1h");
3207 case NEON::BI__builtin_neon_vsha1mq_u32:
3208 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3209 "vsha1h");
3210
3211 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3212 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3213 "vcvtbfp2bf");
3214 }
3215
3216 // The ARM _MoveToCoprocessor builtins put the input register value as
3217 // the first argument, but the LLVM intrinsic expects it as the third one.
3218 case clang::ARM::BI_MoveToCoprocessor:
3219 case clang::ARM::BI_MoveToCoprocessor2: {
3220 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
3221 ? Intrinsic::arm_mcr
3222 : Intrinsic::arm_mcr2);
3223 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3224 Ops[3], Ops[4], Ops[5]});
3225 }
3226 }
3227
3228 // Get the last argument, which specifies the vector type.
3229 assert(HasExtraArg);
3230 const Expr *Arg = E->getArg(E->getNumArgs()-1);
3231 std::optional<llvm::APSInt> Result =
3233 if (!Result)
3234 return nullptr;
3235
3236 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3237 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3238 // Determine the overloaded type of this builtin.
3239 llvm::Type *Ty;
3240 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3241 Ty = FloatTy;
3242 else
3243 Ty = DoubleTy;
3244
3245 // Determine whether this is an unsigned conversion or not.
3246 bool usgn = Result->getZExtValue() == 1;
3247 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3248
3249 // Call the appropriate intrinsic.
3250 Function *F = CGM.getIntrinsic(Int, Ty);
3251 return Builder.CreateCall(F, Ops, "vcvtr");
3252 }
3253
3254 // Determine the type of this overloaded NEON intrinsic.
3255 NeonTypeFlags Type = Result->getZExtValue();
3256 bool usgn = Type.isUnsigned();
3257 bool rightShift = false;
3258
3259 llvm::FixedVectorType *VTy =
3260 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
3261 getTarget().hasBFloat16Type());
3262 llvm::Type *Ty = VTy;
3263 if (!Ty)
3264 return nullptr;
3265
3266 // Many NEON builtins have identical semantics and uses in ARM and
3267 // AArch64. Emit these in a single function.
3268 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3269 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3270 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3271 if (Builtin)
3273 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3274 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3275
3276 unsigned Int;
3277 switch (BuiltinID) {
3278 default: return nullptr;
3279 case NEON::BI__builtin_neon_vld1q_lane_v:
3280 // Handle 64-bit integer elements as a special case. Use shuffles of
3281 // one-element vectors to avoid poor code for i64 in the backend.
3282 if (VTy->getElementType()->isIntegerTy(64)) {
3283 // Extract the other lane.
3284 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3285 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3286 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3287 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3288 // Load the value as a one-element vector.
3289 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3290 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3291 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3292 Value *Align = getAlignmentValue32(PtrOp0);
3293 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3294 // Combine them.
3295 int Indices[] = {1 - Lane, Lane};
3296 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
3297 }
3298 [[fallthrough]];
3299 case NEON::BI__builtin_neon_vld1_lane_v: {
3300 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3301 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
3302 Value *Ld = Builder.CreateLoad(PtrOp0);
3303 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3304 }
3305 case NEON::BI__builtin_neon_vqrshrn_n_v:
3306 Int =
3307 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3308 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3309 1, true);
3310 case NEON::BI__builtin_neon_vqrshrun_n_v:
3311 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3312 Ops, "vqrshrun_n", 1, true);
3313 case NEON::BI__builtin_neon_vqshrn_n_v:
3314 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3315 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3316 1, true);
3317 case NEON::BI__builtin_neon_vqshrun_n_v:
3318 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3319 Ops, "vqshrun_n", 1, true);
3320 case NEON::BI__builtin_neon_vrecpe_v:
3321 case NEON::BI__builtin_neon_vrecpeq_v:
3322 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3323 Ops, "vrecpe");
3324 case NEON::BI__builtin_neon_vrshrn_n_v:
3325 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3326 Ops, "vrshrn_n", 1, true);
3327 case NEON::BI__builtin_neon_vrsra_n_v:
3328 case NEON::BI__builtin_neon_vrsraq_n_v:
3329 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3330 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3331 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3332 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3333 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3334 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3335 case NEON::BI__builtin_neon_vsri_n_v:
3336 case NEON::BI__builtin_neon_vsriq_n_v:
3337 rightShift = true;
3338 [[fallthrough]];
3339 case NEON::BI__builtin_neon_vsli_n_v:
3340 case NEON::BI__builtin_neon_vsliq_n_v:
3341 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3342 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3343 Ops, "vsli_n");
3344 case NEON::BI__builtin_neon_vsra_n_v:
3345 case NEON::BI__builtin_neon_vsraq_n_v:
3346 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3347 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3348 return Builder.CreateAdd(Ops[0], Ops[1]);
3349 case NEON::BI__builtin_neon_vst1q_lane_v:
3350 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3351 // a one-element vector and avoid poor code for i64 in the backend.
3352 if (VTy->getElementType()->isIntegerTy(64)) {
3353 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3354 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3355 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3356 Ops[2] = getAlignmentValue32(PtrOp0);
3357 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3358 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3359 Tys), Ops);
3360 }
3361 [[fallthrough]];
3362 case NEON::BI__builtin_neon_vst1_lane_v: {
3363 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3364 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3365 return Builder.CreateStore(Ops[1],
3366 PtrOp0.withElementType(Ops[1]->getType()));
3367 }
3368 case NEON::BI__builtin_neon_vtbl1_v:
3369 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3370 Ops, "vtbl1");
3371 case NEON::BI__builtin_neon_vtbl2_v:
3372 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3373 Ops, "vtbl2");
3374 case NEON::BI__builtin_neon_vtbl3_v:
3375 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3376 Ops, "vtbl3");
3377 case NEON::BI__builtin_neon_vtbl4_v:
3378 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3379 Ops, "vtbl4");
3380 case NEON::BI__builtin_neon_vtbx1_v:
3381 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3382 Ops, "vtbx1");
3383 case NEON::BI__builtin_neon_vtbx2_v:
3384 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3385 Ops, "vtbx2");
3386 case NEON::BI__builtin_neon_vtbx3_v:
3387 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3388 Ops, "vtbx3");
3389 case NEON::BI__builtin_neon_vtbx4_v:
3390 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3391 Ops, "vtbx4");
3392 }
3393}
3394
3395template<typename Integer>
3397 return E->getIntegerConstantExpr(Context)->getExtValue();
3398}
3399
3400static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3401 llvm::Type *T, bool Unsigned) {
3402 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3403 // which finds it convenient to specify signed/unsigned as a boolean flag.
3404 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
3405}
3406
3407static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3408 uint32_t Shift, bool Unsigned) {
3409 // MVE helper function for integer shift right. This must handle signed vs
3410 // unsigned, and also deal specially with the case where the shift count is
3411 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3412 // undefined behavior, but in MVE it's legal, so we must convert it to code
3413 // that is not undefined in IR.
3414 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
3415 ->getElementType()
3416 ->getPrimitiveSizeInBits();
3417 if (Shift == LaneBits) {
3418 // An unsigned shift of the full lane size always generates zero, so we can
3419 // simply emit a zero vector. A signed shift of the full lane size does the
3420 // same thing as shifting by one bit fewer.
3421 if (Unsigned)
3422 return llvm::Constant::getNullValue(V->getType());
3423 else
3424 --Shift;
3425 }
3426 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
3427}
3428
3429static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3430 // MVE-specific helper function for a vector splat, which infers the element
3431 // count of the output vector by knowing that MVE vectors are all 128 bits
3432 // wide.
3433 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3434 return Builder.CreateVectorSplat(Elements, V);
3435}
3436
3437static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3438 CodeGenFunction *CGF,
3439 llvm::Value *V,
3440 llvm::Type *DestType) {
3441 // Convert one MVE vector type into another by reinterpreting its in-register
3442 // format.
3443 //
3444 // Little-endian, this is identical to a bitcast (which reinterprets the
3445 // memory format). But big-endian, they're not necessarily the same, because
3446 // the register and memory formats map to each other differently depending on
3447 // the lane size.
3448 //
3449 // We generate a bitcast whenever we can (if we're little-endian, or if the
3450 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3451 // that performs the different kind of reinterpretation.
3452 if (CGF->getTarget().isBigEndian() &&
3453 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3454 return Builder.CreateCall(
3455 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
3456 {DestType, V->getType()}),
3457 V);
3458 } else {
3459 return Builder.CreateBitCast(V, DestType);
3460 }
3461}
3462
3463static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3464 // Make a shufflevector that extracts every other element of a vector (evens
3465 // or odds, as desired).
3466 SmallVector<int, 16> Indices;
3467 unsigned InputElements =
3468 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
3469 for (unsigned i = 0; i < InputElements; i += 2)
3470 Indices.push_back(i + Odd);
3471 return Builder.CreateShuffleVector(V, Indices);
3472}
3473
3474static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3475 llvm::Value *V1) {
3476 // Make a shufflevector that interleaves two vectors element by element.
3477 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3478 SmallVector<int, 16> Indices;
3479 unsigned InputElements =
3480 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
3481 for (unsigned i = 0; i < InputElements; i++) {
3482 Indices.push_back(i);
3483 Indices.push_back(i + InputElements);
3484 }
3485 return Builder.CreateShuffleVector(V0, V1, Indices);
3486}
3487
3488template<unsigned HighBit, unsigned OtherBits>
3489static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3490 // MVE-specific helper function to make a vector splat of a constant such as
3491 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3492 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
3493 unsigned LaneBits = T->getPrimitiveSizeInBits();
3494 uint32_t Value = HighBit << (LaneBits - 1);
3495 if (OtherBits)
3496 Value |= (1UL << (LaneBits - 1)) - 1;
3497 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
3498 return ARMMVEVectorSplat(Builder, Lane);
3499}
3500
3501static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3502 llvm::Value *V,
3503 unsigned ReverseWidth) {
3504 // MVE-specific helper function which reverses the elements of a
3505 // vector within every (ReverseWidth)-bit collection of lanes.
3506 SmallVector<int, 16> Indices;
3507 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3508 unsigned Elements = 128 / LaneSize;
3509 unsigned Mask = ReverseWidth / LaneSize - 1;
3510 for (unsigned i = 0; i < Elements; i++)
3511 Indices.push_back(i ^ Mask);
3512 return Builder.CreateShuffleVector(V, Indices);
3513}
3514
3515static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
3516 CodeGenFunction *CGF, llvm::Value *V,
3517 llvm::Type *Ty) {
3518 return Builder.CreateCall(
3519 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
3520 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
3521}
3522
3523static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
3524 CodeGenFunction *CGF, llvm::Value *V,
3525 llvm::Type *Ty) {
3526 return Builder.CreateCall(
3527 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
3528 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
3529}
3530
3531static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
3532 CodeGenFunction *CGF, llvm::Value *V,
3533 llvm::Type *Ty) {
3534 return Builder.CreateCall(
3535 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
3536 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
3537}
3538
3539static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
3540 CodeGenFunction *CGF, llvm::Value *V,
3541 llvm::Type *Ty) {
3542 return Builder.CreateCall(
3543 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
3544 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
3545}
3546
3548 const CallExpr *E,
3550 llvm::Triple::ArchType Arch) {
3551 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3552 Intrinsic::ID IRIntr;
3553 unsigned NumVectors;
3554
3555 // Code autogenerated by Tablegen will handle all the simple builtins.
3556 switch (BuiltinID) {
3557 #include "clang/Basic/arm_mve_builtin_cg.inc"
3558
3559 // If we didn't match an MVE builtin id at all, go back to the
3560 // main EmitARMBuiltinExpr.
3561 default:
3562 return nullptr;
3563 }
3564
3565 // Anything that breaks from that switch is an MVE builtin that
3566 // needs handwritten code to generate.
3567
3568 switch (CustomCodeGenType) {
3569
3570 case CustomCodeGen::VLD24: {
3573
3574 auto MvecCType = E->getType();
3575 auto MvecLType = ConvertType(MvecCType);
3576 assert(MvecLType->isStructTy() &&
3577 "Return type for vld[24]q should be a struct");
3578 assert(MvecLType->getStructNumElements() == 1 &&
3579 "Return-type struct for vld[24]q should have one element");
3580 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3581 assert(MvecLTypeInner->isArrayTy() &&
3582 "Return-type struct for vld[24]q should contain an array");
3583 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3584 "Array member of return-type struct vld[24]q has wrong length");
3585 auto VecLType = MvecLTypeInner->getArrayElementType();
3586
3587 Tys.push_back(VecLType);
3588
3589 auto Addr = E->getArg(0);
3590 Ops.push_back(EmitScalarExpr(Addr));
3591 Tys.push_back(ConvertType(Addr->getType()));
3592
3593 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3594 Value *LoadResult = Builder.CreateCall(F, Ops);
3595 Value *MvecOut = PoisonValue::get(MvecLType);
3596 for (unsigned i = 0; i < NumVectors; ++i) {
3597 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3598 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3599 }
3600
3601 if (ReturnValue.isNull())
3602 return MvecOut;
3603 else
3604 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3605 }
3606
3607 case CustomCodeGen::VST24: {
3610
3611 auto Addr = E->getArg(0);
3612 Ops.push_back(EmitScalarExpr(Addr));
3613 Tys.push_back(ConvertType(Addr->getType()));
3614
3615 auto MvecCType = E->getArg(1)->getType();
3616 auto MvecLType = ConvertType(MvecCType);
3617 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3618 assert(MvecLType->getStructNumElements() == 1 &&
3619 "Data-type struct for vst2q should have one element");
3620 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3621 assert(MvecLTypeInner->isArrayTy() &&
3622 "Data-type struct for vst2q should contain an array");
3623 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3624 "Array member of return-type struct vld[24]q has wrong length");
3625 auto VecLType = MvecLTypeInner->getArrayElementType();
3626
3627 Tys.push_back(VecLType);
3628
3629 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3630 EmitAggExpr(E->getArg(1), MvecSlot);
3631 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3632 for (unsigned i = 0; i < NumVectors; i++)
3633 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3634
3635 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3636 Value *ToReturn = nullptr;
3637 for (unsigned i = 0; i < NumVectors; i++) {
3638 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3639 ToReturn = Builder.CreateCall(F, Ops);
3640 Ops.pop_back();
3641 }
3642 return ToReturn;
3643 }
3644 }
3645 llvm_unreachable("unknown custom codegen type.");
3646}
3647
3649 const CallExpr *E,
3651 llvm::Triple::ArchType Arch) {
3652 switch (BuiltinID) {
3653 default:
3654 return nullptr;
3655#include "clang/Basic/arm_cde_builtin_cg.inc"
3656 }
3657}
3658
3659static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3660 const CallExpr *E,
3662 llvm::Triple::ArchType Arch) {
3663 unsigned int Int = 0;
3664 const char *s = nullptr;
3665
3666 switch (BuiltinID) {
3667 default:
3668 return nullptr;
3669 case NEON::BI__builtin_neon_vtbl1_v:
3670 case NEON::BI__builtin_neon_vqtbl1_v:
3671 case NEON::BI__builtin_neon_vqtbl1q_v:
3672 case NEON::BI__builtin_neon_vtbl2_v:
3673 case NEON::BI__builtin_neon_vqtbl2_v:
3674 case NEON::BI__builtin_neon_vqtbl2q_v:
3675 case NEON::BI__builtin_neon_vtbl3_v:
3676 case NEON::BI__builtin_neon_vqtbl3_v:
3677 case NEON::BI__builtin_neon_vqtbl3q_v:
3678 case NEON::BI__builtin_neon_vtbl4_v:
3679 case NEON::BI__builtin_neon_vqtbl4_v:
3680 case NEON::BI__builtin_neon_vqtbl4q_v:
3681 break;
3682 case NEON::BI__builtin_neon_vtbx1_v:
3683 case NEON::BI__builtin_neon_vqtbx1_v:
3684 case NEON::BI__builtin_neon_vqtbx1q_v:
3685 case NEON::BI__builtin_neon_vtbx2_v:
3686 case NEON::BI__builtin_neon_vqtbx2_v:
3687 case NEON::BI__builtin_neon_vqtbx2q_v:
3688 case NEON::BI__builtin_neon_vtbx3_v:
3689 case NEON::BI__builtin_neon_vqtbx3_v:
3690 case NEON::BI__builtin_neon_vqtbx3q_v:
3691 case NEON::BI__builtin_neon_vtbx4_v:
3692 case NEON::BI__builtin_neon_vqtbx4_v:
3693 case NEON::BI__builtin_neon_vqtbx4q_v:
3694 break;
3695 }
3696
3697 assert(E->getNumArgs() >= 3);
3698
3699 // Get the last argument, which specifies the vector type.
3700 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3701 std::optional<llvm::APSInt> Result =
3703 if (!Result)
3704 return nullptr;
3705
3706 // Determine the type of this overloaded NEON intrinsic.
3707 NeonTypeFlags Type = Result->getZExtValue();
3708 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3709 if (!Ty)
3710 return nullptr;
3711
3712 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3713
3714 // AArch64 scalar builtins are not overloaded, they do not have an extra
3715 // argument that specifies the vector type, need to handle each case.
3716 switch (BuiltinID) {
3717 case NEON::BI__builtin_neon_vtbl1_v: {
3718 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3719 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3720 }
3721 case NEON::BI__builtin_neon_vtbl2_v: {
3722 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3723 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3724 }
3725 case NEON::BI__builtin_neon_vtbl3_v: {
3726 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3727 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3728 }
3729 case NEON::BI__builtin_neon_vtbl4_v: {
3730 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3731 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3732 }
3733 case NEON::BI__builtin_neon_vtbx1_v: {
3734 Value *TblRes =
3735 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3736 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3737
3738 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3739 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3740 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3741
3742 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3743 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3744 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3745 }
3746 case NEON::BI__builtin_neon_vtbx2_v: {
3747 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3748 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3749 }
3750 case NEON::BI__builtin_neon_vtbx3_v: {
3751 Value *TblRes =
3752 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3753 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3754
3755 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3756 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3757 TwentyFourV);
3758 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3759
3760 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3761 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3762 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3763 }
3764 case NEON::BI__builtin_neon_vtbx4_v: {
3765 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3766 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3767 }
3768 case NEON::BI__builtin_neon_vqtbl1_v:
3769 case NEON::BI__builtin_neon_vqtbl1q_v:
3770 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3771 case NEON::BI__builtin_neon_vqtbl2_v:
3772 case NEON::BI__builtin_neon_vqtbl2q_v: {
3773 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3774 case NEON::BI__builtin_neon_vqtbl3_v:
3775 case NEON::BI__builtin_neon_vqtbl3q_v:
3776 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3777 case NEON::BI__builtin_neon_vqtbl4_v:
3778 case NEON::BI__builtin_neon_vqtbl4q_v:
3779 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3780 case NEON::BI__builtin_neon_vqtbx1_v:
3781 case NEON::BI__builtin_neon_vqtbx1q_v:
3782 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3783 case NEON::BI__builtin_neon_vqtbx2_v:
3784 case NEON::BI__builtin_neon_vqtbx2q_v:
3785 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3786 case NEON::BI__builtin_neon_vqtbx3_v:
3787 case NEON::BI__builtin_neon_vqtbx3q_v:
3788 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3789 case NEON::BI__builtin_neon_vqtbx4_v:
3790 case NEON::BI__builtin_neon_vqtbx4q_v:
3791 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3792 }
3793 }
3794
3795 if (!Int)
3796 return nullptr;
3797
3798 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3799 return CGF.EmitNeonCall(F, Ops, s);
3800}
3801
3803 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3804 Op = Builder.CreateBitCast(Op, Int16Ty);
3805 Value *V = PoisonValue::get(VTy);
3806 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3807 Op = Builder.CreateInsertElement(V, Op, CI);
3808 return Op;
3809}
3810
3811/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3812/// access builtin. Only required if it can't be inferred from the base pointer
3813/// operand.
3815 switch (TypeFlags.getMemEltType()) {
3816 case SVETypeFlags::MemEltTyDefault:
3817 return getEltType(TypeFlags);
3818 case SVETypeFlags::MemEltTyInt8:
3819 return Builder.getInt8Ty();
3820 case SVETypeFlags::MemEltTyInt16:
3821 return Builder.getInt16Ty();
3822 case SVETypeFlags::MemEltTyInt32:
3823 return Builder.getInt32Ty();
3824 case SVETypeFlags::MemEltTyInt64:
3825 return Builder.getInt64Ty();
3826 }
3827 llvm_unreachable("Unknown MemEltType");
3828}
3829
3830llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3831 switch (TypeFlags.getEltType()) {
3832 default:
3833 llvm_unreachable("Invalid SVETypeFlag!");
3834
3835 case SVETypeFlags::EltTyMFloat8:
3836 case SVETypeFlags::EltTyInt8:
3837 return Builder.getInt8Ty();
3838 case SVETypeFlags::EltTyInt16:
3839 return Builder.getInt16Ty();
3840 case SVETypeFlags::EltTyInt32:
3841 return Builder.getInt32Ty();
3842 case SVETypeFlags::EltTyInt64:
3843 return Builder.getInt64Ty();
3844 case SVETypeFlags::EltTyInt128:
3845 return Builder.getInt128Ty();
3846
3847 case SVETypeFlags::EltTyFloat16:
3848 return Builder.getHalfTy();
3849 case SVETypeFlags::EltTyFloat32:
3850 return Builder.getFloatTy();
3851 case SVETypeFlags::EltTyFloat64:
3852 return Builder.getDoubleTy();
3853
3854 case SVETypeFlags::EltTyBFloat16:
3855 return Builder.getBFloatTy();
3856
3857 case SVETypeFlags::EltTyBool8:
3858 case SVETypeFlags::EltTyBool16:
3859 case SVETypeFlags::EltTyBool32:
3860 case SVETypeFlags::EltTyBool64:
3861 return Builder.getInt1Ty();
3862 }
3863}
3864
3865// Return the llvm predicate vector type corresponding to the specified element
3866// TypeFlags.
3867llvm::ScalableVectorType *
3869 switch (TypeFlags.getEltType()) {
3870 default: llvm_unreachable("Unhandled SVETypeFlag!");
3871
3872 case SVETypeFlags::EltTyInt8:
3873 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3874 case SVETypeFlags::EltTyInt16:
3875 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3876 case SVETypeFlags::EltTyInt32:
3877 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3878 case SVETypeFlags::EltTyInt64:
3879 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3880
3881 case SVETypeFlags::EltTyBFloat16:
3882 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3883 case SVETypeFlags::EltTyFloat16:
3884 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3885 case SVETypeFlags::EltTyFloat32:
3886 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3887 case SVETypeFlags::EltTyFloat64:
3888 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3889
3890 case SVETypeFlags::EltTyBool8:
3891 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3892 case SVETypeFlags::EltTyBool16:
3893 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3894 case SVETypeFlags::EltTyBool32:
3895 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3896 case SVETypeFlags::EltTyBool64:
3897 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3898 }
3899}
3900
3901// Return the llvm vector type corresponding to the specified element TypeFlags.
3902llvm::ScalableVectorType *
3904 switch (TypeFlags.getEltType()) {
3905 default:
3906 llvm_unreachable("Invalid SVETypeFlag!");
3907
3908 case SVETypeFlags::EltTyInt8:
3909 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3910 case SVETypeFlags::EltTyInt16:
3911 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3912 case SVETypeFlags::EltTyInt32:
3913 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3914 case SVETypeFlags::EltTyInt64:
3915 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3916
3917 case SVETypeFlags::EltTyMFloat8:
3918 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3919 case SVETypeFlags::EltTyFloat16:
3920 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3921 case SVETypeFlags::EltTyBFloat16:
3922 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3923 case SVETypeFlags::EltTyFloat32:
3924 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3925 case SVETypeFlags::EltTyFloat64:
3926 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3927
3928 case SVETypeFlags::EltTyBool8:
3929 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3930 case SVETypeFlags::EltTyBool16:
3931 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3932 case SVETypeFlags::EltTyBool32:
3933 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3934 case SVETypeFlags::EltTyBool64:
3935 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3936 }
3937}
3938
3939llvm::Value *
3941 Function *Ptrue =
3942 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3943 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
3944}
3945
3946constexpr unsigned SVEBitsPerBlock = 128;
3947
3948static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3949 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3950 return llvm::ScalableVectorType::get(EltTy, NumElts);
3951}
3952
3953// Reinterpret the input predicate so that it can be used to correctly isolate
3954// the elements of the specified datatype.
3956 llvm::ScalableVectorType *VTy) {
3957
3958 if (isa<TargetExtType>(Pred->getType()) &&
3959 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3960 return Pred;
3961
3962 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3963 if (Pred->getType() == RTy)
3964 return Pred;
3965
3966 unsigned IntID;
3967 llvm::Type *IntrinsicTy;
3968 switch (VTy->getMinNumElements()) {
3969 default:
3970 llvm_unreachable("unsupported element count!");
3971 case 1:
3972 case 2:
3973 case 4:
3974 case 8:
3975 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3976 IntrinsicTy = RTy;
3977 break;
3978 case 16:
3979 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3980 IntrinsicTy = Pred->getType();
3981 break;
3982 }
3983
3984 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3985 Value *C = Builder.CreateCall(F, Pred);
3986 assert(C->getType() == RTy && "Unexpected return type!");
3987 return C;
3988}
3989
3991 llvm::StructType *Ty) {
3992 if (PredTuple->getType() == Ty)
3993 return PredTuple;
3994
3995 Value *Ret = llvm::PoisonValue::get(Ty);
3996 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3997 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3998 Pred = EmitSVEPredicateCast(
3999 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
4000 Ret = Builder.CreateInsertValue(Ret, Pred, I);
4001 }
4002
4003 return Ret;
4004}
4005
4008 unsigned IntID) {
4009 auto *ResultTy = getSVEType(TypeFlags);
4010 auto *OverloadedTy =
4011 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
4012
4013 Function *F = nullptr;
4014 if (Ops[1]->getType()->isVectorTy())
4015 // This is the "vector base, scalar offset" case. In order to uniquely
4016 // map this built-in to an LLVM IR intrinsic, we need both the return type
4017 // and the type of the vector base.
4018 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
4019 else
4020 // This is the "scalar base, vector offset case". The type of the offset
4021 // is encoded in the name of the intrinsic. We only need to specify the
4022 // return type in order to uniquely map this built-in to an LLVM IR
4023 // intrinsic.
4024 F = CGM.getIntrinsic(IntID, OverloadedTy);
4025
4026 // At the ACLE level there's only one predicate type, svbool_t, which is
4027 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4028 // actual type being loaded. For example, when loading doubles (i64) the
4029 // predicate should be <n x 2 x i1> instead. At the IR level the type of
4030 // the predicate and the data being loaded must match. Cast to the type
4031 // expected by the intrinsic. The intrinsic itself should be defined in
4032 // a way than enforces relations between parameter types.
4033 Ops[0] = EmitSVEPredicateCast(
4034 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
4035
4036 // Pass 0 when the offset is missing. This can only be applied when using
4037 // the "vector base" addressing mode for which ACLE allows no offset. The
4038 // corresponding LLVM IR always requires an offset.
4039 if (Ops.size() == 2) {
4040 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4041 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4042 }
4043
4044 // For "vector base, scalar index" scale the index so that it becomes a
4045 // scalar offset.
4046 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
4047 unsigned BytesPerElt =
4048 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4049 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4050 }
4051
4052 Value *Call = Builder.CreateCall(F, Ops);
4053
4054 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
4055 // other cases it's folded into a nop.
4056 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
4057 : Builder.CreateSExt(Call, ResultTy);
4058}
4059
4062 unsigned IntID) {
4063 auto *SrcDataTy = getSVEType(TypeFlags);
4064 auto *OverloadedTy =
4065 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
4066
4067 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
4068 // it's the first argument. Move it accordingly.
4069 Ops.insert(Ops.begin(), Ops.pop_back_val());
4070
4071 Function *F = nullptr;
4072 if (Ops[2]->getType()->isVectorTy())
4073 // This is the "vector base, scalar offset" case. In order to uniquely
4074 // map this built-in to an LLVM IR intrinsic, we need both the return type
4075 // and the type of the vector base.
4076 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
4077 else
4078 // This is the "scalar base, vector offset case". The type of the offset
4079 // is encoded in the name of the intrinsic. We only need to specify the
4080 // return type in order to uniquely map this built-in to an LLVM IR
4081 // intrinsic.
4082 F = CGM.getIntrinsic(IntID, OverloadedTy);
4083
4084 // Pass 0 when the offset is missing. This can only be applied when using
4085 // the "vector base" addressing mode for which ACLE allows no offset. The
4086 // corresponding LLVM IR always requires an offset.
4087 if (Ops.size() == 3) {
4088 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4089 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4090 }
4091
4092 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4093 // folded into a nop.
4094 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
4095
4096 // At the ACLE level there's only one predicate type, svbool_t, which is
4097 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4098 // actual type being stored. For example, when storing doubles (i64) the
4099 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4100 // the predicate and the data being stored must match. Cast to the type
4101 // expected by the intrinsic. The intrinsic itself should be defined in
4102 // a way that enforces relations between parameter types.
4103 Ops[1] = EmitSVEPredicateCast(
4104 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
4105
4106 // For "vector base, scalar index" scale the index so that it becomes a
4107 // scalar offset.
4108 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4109 unsigned BytesPerElt =
4110 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4111 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
4112 }
4113
4114 return Builder.CreateCall(F, Ops);
4115}
4116
4119 unsigned IntID) {
4120 // The gather prefetches are overloaded on the vector input - this can either
4121 // be the vector of base addresses or vector of offsets.
4122 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
4123 if (!OverloadedTy)
4124 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
4125
4126 // Cast the predicate from svbool_t to the right number of elements.
4127 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
4128
4129 // vector + imm addressing modes
4130 if (Ops[1]->getType()->isVectorTy()) {
4131 if (Ops.size() == 3) {
4132 // Pass 0 for 'vector+imm' when the index is omitted.
4133 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4134
4135 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4136 std::swap(Ops[2], Ops[3]);
4137 } else {
4138 // Index needs to be passed as scaled offset.
4139 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4140 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4141 if (BytesPerElt > 1)
4142 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4143 }
4144 }
4145
4146 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
4147 return Builder.CreateCall(F, Ops);
4148}
4149
4152 unsigned IntID) {
4153 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4154 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4155 Value *BasePtr = Ops[1];
4156
4157 // Does the load have an offset?
4158 if (Ops.size() > 2)
4159 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4160
4161 Function *F = CGM.getIntrinsic(IntID, {VTy});
4162 return Builder.CreateCall(F, {Predicate, BasePtr});
4163}
4164
4167 unsigned IntID) {
4168 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4169
4170 unsigned N;
4171 switch (IntID) {
4172 case Intrinsic::aarch64_sve_st2:
4173 case Intrinsic::aarch64_sve_st1_pn_x2:
4174 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4175 case Intrinsic::aarch64_sve_st2q:
4176 N = 2;
4177 break;
4178 case Intrinsic::aarch64_sve_st3:
4179 case Intrinsic::aarch64_sve_st3q:
4180 N = 3;
4181 break;
4182 case Intrinsic::aarch64_sve_st4:
4183 case Intrinsic::aarch64_sve_st1_pn_x4:
4184 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4185 case Intrinsic::aarch64_sve_st4q:
4186 N = 4;
4187 break;
4188 default:
4189 llvm_unreachable("unknown intrinsic!");
4190 }
4191
4192 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4193 Value *BasePtr = Ops[1];
4194
4195 // Does the store have an offset?
4196 if (Ops.size() > (2 + N))
4197 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4198
4199 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4200 // need to break up the tuple vector.
4202 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4203 Operands.push_back(Ops[I]);
4204 Operands.append({Predicate, BasePtr});
4205 Function *F = CGM.getIntrinsic(IntID, { VTy });
4206
4207 return Builder.CreateCall(F, Operands);
4208}
4209
4210// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4211// svpmullt_pair intrinsics, with the exception that their results are bitcast
4212// to a wider type.
4215 unsigned BuiltinID) {
4216 // Splat scalar operand to vector (intrinsics with _n infix)
4217 if (TypeFlags.hasSplatOperand()) {
4218 unsigned OpNo = TypeFlags.getSplatOperand();
4219 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4220 }
4221
4222 // The pair-wise function has a narrower overloaded type.
4223 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
4224 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
4225
4226 // Now bitcast to the wider result type.
4227 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4228 return EmitSVEReinterpret(Call, Ty);
4229}
4230
4232 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4233 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4234 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
4235 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
4236}
4237
4240 unsigned BuiltinID) {
4241 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4242 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
4243 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4244
4245 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
4246 Value *BasePtr = Ops[1];
4247
4248 // Implement the index operand if not omitted.
4249 if (Ops.size() > 3)
4250 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4251
4252 Value *PrfOp = Ops.back();
4253
4254 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
4255 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
4256}
4257
4259 llvm::Type *ReturnTy,
4261 unsigned IntrinsicID,
4262 bool IsZExtReturn) {
4263 QualType LangPTy = E->getArg(1)->getType();
4264 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4265 LangPTy->castAs<PointerType>()->getPointeeType());
4266
4267 // Mfloat8 types is stored as a vector, so extra work
4268 // to extract sclar element type is necessary.
4269 if (MemEltTy->isVectorTy()) {
4270 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4271 "Only <1 x i8> expected");
4272 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4273 }
4274
4275 // The vector type that is returned may be different from the
4276 // eventual type loaded from memory.
4277 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
4278 llvm::ScalableVectorType *MemoryTy = nullptr;
4279 llvm::ScalableVectorType *PredTy = nullptr;
4280 bool IsQuadLoad = false;
4281 switch (IntrinsicID) {
4282 case Intrinsic::aarch64_sve_ld1uwq:
4283 case Intrinsic::aarch64_sve_ld1udq:
4284 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4285 PredTy = llvm::ScalableVectorType::get(
4286 llvm::Type::getInt1Ty(getLLVMContext()), 1);
4287 IsQuadLoad = true;
4288 break;
4289 default:
4290 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4291 PredTy = MemoryTy;
4292 break;
4293 }
4294
4295 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4296 Value *BasePtr = Ops[1];
4297
4298 // Does the load have an offset?
4299 if (Ops.size() > 2)
4300 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4301
4302 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
4303 auto *Load =
4304 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
4305 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4306 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
4307
4308 if (IsQuadLoad)
4309 return Load;
4310
4311 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
4312 : Builder.CreateSExt(Load, VectorTy);
4313}
4314
4317 unsigned IntrinsicID) {
4318 QualType LangPTy = E->getArg(1)->getType();
4319 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4320 LangPTy->castAs<PointerType>()->getPointeeType());
4321
4322 // Mfloat8 types is stored as a vector, so extra work
4323 // to extract sclar element type is necessary.
4324 if (MemEltTy->isVectorTy()) {
4325 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4326 "Only <1 x i8> expected");
4327 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4328 }
4329
4330 // The vector type that is stored may be different from the
4331 // eventual type stored to memory.
4332 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
4333 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4334
4335 auto PredTy = MemoryTy;
4336 auto AddrMemoryTy = MemoryTy;
4337 bool IsQuadStore = false;
4338
4339 switch (IntrinsicID) {
4340 case Intrinsic::aarch64_sve_st1wq:
4341 case Intrinsic::aarch64_sve_st1dq:
4342 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4343 PredTy =
4344 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
4345 IsQuadStore = true;
4346 break;
4347 default:
4348 break;
4349 }
4350 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4351 Value *BasePtr = Ops[1];
4352
4353 // Does the store have an offset?
4354 if (Ops.size() == 4)
4355 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
4356
4357 // Last value is always the data
4358 Value *Val =
4359 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
4360
4361 Function *F =
4362 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
4363 auto *Store =
4364 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
4365 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4366 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
4367 return Store;
4368}
4369
4372 unsigned IntID) {
4373 Ops[2] = EmitSVEPredicateCast(
4375
4376 SmallVector<Value *> NewOps;
4377 NewOps.push_back(Ops[2]);
4378
4379 llvm::Value *BasePtr = Ops[3];
4380 llvm::Value *RealSlice = Ops[1];
4381 // If the intrinsic contains the vnum parameter, multiply it with the vector
4382 // size in bytes.
4383 if (Ops.size() == 5) {
4384 Function *StreamingVectorLength =
4385 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
4386 llvm::Value *StreamingVectorLengthCall =
4387 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
4388 llvm::ConstantInt::get(Int64Ty, 8), "svl",
4389 /* HasNUW */ true, /* HasNSW */ true);
4390 llvm::Value *Mulvl =
4391 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
4392 // The type of the ptr parameter is void *, so use Int8Ty here.
4393 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
4394 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
4395 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
4396 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
4397 }
4398 NewOps.push_back(BasePtr);
4399 NewOps.push_back(Ops[0]);
4400 NewOps.push_back(RealSlice);
4401 Function *F = CGM.getIntrinsic(IntID);
4402 return Builder.CreateCall(F, NewOps);
4403}
4404
4407 unsigned IntID) {
4408 auto *VecTy = getSVEType(TypeFlags);
4409 Function *F = CGM.getIntrinsic(IntID, VecTy);
4410 if (TypeFlags.isReadZA())
4411 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
4412 else if (TypeFlags.isWriteZA())
4413 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
4414 return Builder.CreateCall(F, Ops);
4415}
4416
4419 unsigned IntID) {
4420 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4421 if (Ops.size() == 0)
4422 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
4423 Function *F = CGM.getIntrinsic(IntID, {});
4424 return Builder.CreateCall(F, Ops);
4425}
4426
4429 unsigned IntID) {
4430 if (Ops.size() == 2)
4431 Ops.push_back(Builder.getInt32(0));
4432 else
4433 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
4434 Function *F = CGM.getIntrinsic(IntID, {});
4435 return Builder.CreateCall(F, Ops);
4436}
4437
4438// Limit the usage of scalable llvm IR generated by the ACLE by using the
4439// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4440Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4441 return Builder.CreateVectorSplat(
4442 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
4443}
4444
4446 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4447#ifndef NDEBUG
4448 auto *VecTy = cast<llvm::VectorType>(Ty);
4449 ElementCount EC = VecTy->getElementCount();
4450 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4451 "Only <1 x i8> expected");
4452#endif
4453 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
4454 }
4455 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
4456}
4457
4459 // FIXME: For big endian this needs an additional REV, or needs a separate
4460 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4461 // instruction is defined as 'bitwise' equivalent from memory point of
4462 // view (when storing/reloading), whereas the svreinterpret builtin
4463 // implements bitwise equivalent cast from register point of view.
4464 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4465
4466 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
4467 Value *Tuple = llvm::PoisonValue::get(Ty);
4468
4469 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4470 Value *In = Builder.CreateExtractValue(Val, I);
4471 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
4472 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
4473 }
4474
4475 return Tuple;
4476 }
4477
4478 return Builder.CreateBitCast(Val, Ty);
4479}
4480
4481static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4483 auto *SplatZero = Constant::getNullValue(Ty);
4484 Ops.insert(Ops.begin(), SplatZero);
4485}
4486
4487static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4489 auto *SplatUndef = UndefValue::get(Ty);
4490 Ops.insert(Ops.begin(), SplatUndef);
4491}
4492
4493SmallVector<llvm::Type *, 2>
4495 llvm::Type *ResultType,
4496 ArrayRef<Value *> Ops) {
4497 if (TypeFlags.isOverloadNone())
4498 return {};
4499
4500 llvm::Type *DefaultType = getSVEType(TypeFlags);
4501
4502 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4503 return {DefaultType, Ops[1]->getType()};
4504
4505 if (TypeFlags.isOverloadWhileRW())
4506 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4507
4508 if (TypeFlags.isOverloadFirstandLast())
4509 return {Ops[0]->getType(), Ops.back()->getType()};
4510
4511 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4512 ResultType->isVectorTy())
4513 return {ResultType, Ops[1]->getType()};
4514
4515 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4516 return {DefaultType};
4517}
4518
4520 ArrayRef<Value *> Ops) {
4521 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4522 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4523 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
4524
4525 if (TypeFlags.isTupleSet())
4526 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
4527 return Builder.CreateExtractValue(Ops[0], Idx);
4528}
4529
4531 llvm::Type *Ty,
4532 ArrayRef<Value *> Ops) {
4533 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4534
4535 Value *Tuple = llvm::PoisonValue::get(Ty);
4536 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4537 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
4538
4539 return Tuple;
4540}
4541
4543 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4544 SVETypeFlags TypeFlags) {
4545 // Find out if any arguments are required to be integer constant expressions.
4546 unsigned ICEArguments = 0;
4548 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4549 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4550
4551 // Tuple set/get only requires one insert/extract vector, which is
4552 // created by EmitSVETupleSetOrGet.
4553 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4554
4555 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4556 bool IsICE = ICEArguments & (1 << i);
4557 Value *Arg = EmitScalarExpr(E->getArg(i));
4558
4559 if (IsICE) {
4560 // If this is required to be a constant, constant fold it so that we know
4561 // that the generated intrinsic gets a ConstantInt.
4562 std::optional<llvm::APSInt> Result =
4564 assert(Result && "Expected argument to be a constant");
4565
4566 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4567 // truncate because the immediate has been range checked and no valid
4568 // immediate requires more than a handful of bits.
4569 *Result = Result->extOrTrunc(32);
4570 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4571 continue;
4572 }
4573
4574 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4575 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4576 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4577
4578 continue;
4579 }
4580
4581 Ops.push_back(Arg);
4582 }
4583}
4584
4586 const CallExpr *E) {
4587 llvm::Type *Ty = ConvertType(E->getType());
4588 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4589 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4590 Value *Val = EmitScalarExpr(E->getArg(0));
4591 return EmitSVEReinterpret(Val, Ty);
4592 }
4593
4596
4598 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4599 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4600
4601 if (TypeFlags.isLoad())
4602 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4603 TypeFlags.isZExtReturn());
4604 else if (TypeFlags.isStore())
4605 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4606 else if (TypeFlags.isGatherLoad())
4607 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4608 else if (TypeFlags.isScatterStore())
4609 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4610 else if (TypeFlags.isPrefetch())
4611 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4612 else if (TypeFlags.isGatherPrefetch())
4613 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4614 else if (TypeFlags.isStructLoad())
4615 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4616 else if (TypeFlags.isStructStore())
4617 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4618 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4619 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4620 else if (TypeFlags.isTupleCreate())
4621 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4622 else if (TypeFlags.isUndef())
4623 return UndefValue::get(Ty);
4624 else if (Builtin->LLVMIntrinsic != 0) {
4625 // Emit set FPMR for intrinsics that require it
4626 if (TypeFlags.setsFPMR())
4627 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4628 Ops.pop_back_val());
4629 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4631
4632 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4634
4635 // Some ACLE builtins leave out the argument to specify the predicate
4636 // pattern, which is expected to be expanded to an SV_ALL pattern.
4637 if (TypeFlags.isAppendSVALL())
4638 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4639 if (TypeFlags.isInsertOp1SVALL())
4640 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4641
4642 // Predicates must match the main datatype.
4643 for (Value *&Op : Ops)
4644 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4645 if (PredTy->getElementType()->isIntegerTy(1))
4646 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4647
4648 // Splat scalar operand to vector (intrinsics with _n infix)
4649 if (TypeFlags.hasSplatOperand()) {
4650 unsigned OpNo = TypeFlags.getSplatOperand();
4651 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4652 }
4653
4654 if (TypeFlags.isReverseCompare())
4655 std::swap(Ops[1], Ops[2]);
4656 else if (TypeFlags.isReverseUSDOT())
4657 std::swap(Ops[1], Ops[2]);
4658 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4659 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4660 std::swap(Ops[1], Ops[2]);
4661 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4662 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4663 std::swap(Ops[1], Ops[3]);
4664
4665 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4666 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4667 llvm::Type *OpndTy = Ops[1]->getType();
4668 auto *SplatZero = Constant::getNullValue(OpndTy);
4669 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4670 }
4671
4672 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4673 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4674 Value *Call = Builder.CreateCall(F, Ops);
4675
4676 if (Call->getType() == Ty)
4677 return Call;
4678
4679 // Predicate results must be converted to svbool_t.
4680 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4681 return EmitSVEPredicateCast(Call, PredTy);
4682 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4683 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4684
4685 llvm_unreachable("unsupported element count!");
4686 }
4687
4688 switch (BuiltinID) {
4689 default:
4690 return nullptr;
4691
4692 case SVE::BI__builtin_sve_svreinterpret_b: {
4693 auto SVCountTy =
4694 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4695 Function *CastFromSVCountF =
4696 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4697 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4698 }
4699 case SVE::BI__builtin_sve_svreinterpret_c: {
4700 auto SVCountTy =
4701 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4702 Function *CastToSVCountF =
4703 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4704 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4705 }
4706
4707 case SVE::BI__builtin_sve_svpsel_lane_b8:
4708 case SVE::BI__builtin_sve_svpsel_lane_b16:
4709 case SVE::BI__builtin_sve_svpsel_lane_b32:
4710 case SVE::BI__builtin_sve_svpsel_lane_b64:
4711 case SVE::BI__builtin_sve_svpsel_lane_c8:
4712 case SVE::BI__builtin_sve_svpsel_lane_c16:
4713 case SVE::BI__builtin_sve_svpsel_lane_c32:
4714 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4715 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4716 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4717 "aarch64.svcount")) &&
4718 "Unexpected TargetExtType");
4719 auto SVCountTy =
4720 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4721 Function *CastFromSVCountF =
4722 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4723 Function *CastToSVCountF =
4724 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4725
4726 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4727 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4728 llvm::Value *Ops0 =
4729 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4730 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4731 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4732 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4733 }
4734 case SVE::BI__builtin_sve_svmov_b_z: {
4735 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4736 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4737 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4738 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4739 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4740 }
4741
4742 case SVE::BI__builtin_sve_svnot_b_z: {
4743 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4744 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4745 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4746 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4747 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4748 }
4749
4750 case SVE::BI__builtin_sve_svmovlb_u16:
4751 case SVE::BI__builtin_sve_svmovlb_u32:
4752 case SVE::BI__builtin_sve_svmovlb_u64:
4753 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4754
4755 case SVE::BI__builtin_sve_svmovlb_s16:
4756 case SVE::BI__builtin_sve_svmovlb_s32:
4757 case SVE::BI__builtin_sve_svmovlb_s64:
4758 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4759
4760 case SVE::BI__builtin_sve_svmovlt_u16:
4761 case SVE::BI__builtin_sve_svmovlt_u32:
4762 case SVE::BI__builtin_sve_svmovlt_u64:
4763 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4764
4765 case SVE::BI__builtin_sve_svmovlt_s16:
4766 case SVE::BI__builtin_sve_svmovlt_s32:
4767 case SVE::BI__builtin_sve_svmovlt_s64:
4768 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4769
4770 case SVE::BI__builtin_sve_svpmullt_u16:
4771 case SVE::BI__builtin_sve_svpmullt_u64:
4772 case SVE::BI__builtin_sve_svpmullt_n_u16:
4773 case SVE::BI__builtin_sve_svpmullt_n_u64:
4774 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4775
4776 case SVE::BI__builtin_sve_svpmullb_u16:
4777 case SVE::BI__builtin_sve_svpmullb_u64:
4778 case SVE::BI__builtin_sve_svpmullb_n_u16:
4779 case SVE::BI__builtin_sve_svpmullb_n_u64:
4780 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4781
4782 case SVE::BI__builtin_sve_svdup_n_b8:
4783 case SVE::BI__builtin_sve_svdup_n_b16:
4784 case SVE::BI__builtin_sve_svdup_n_b32:
4785 case SVE::BI__builtin_sve_svdup_n_b64: {
4786 Value *CmpNE =
4787 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4788 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4789 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4791 }
4792
4793 case SVE::BI__builtin_sve_svdupq_n_b8:
4794 case SVE::BI__builtin_sve_svdupq_n_b16:
4795 case SVE::BI__builtin_sve_svdupq_n_b32:
4796 case SVE::BI__builtin_sve_svdupq_n_b64:
4797 case SVE::BI__builtin_sve_svdupq_n_u8:
4798 case SVE::BI__builtin_sve_svdupq_n_s8:
4799 case SVE::BI__builtin_sve_svdupq_n_u64:
4800 case SVE::BI__builtin_sve_svdupq_n_f64:
4801 case SVE::BI__builtin_sve_svdupq_n_s64:
4802 case SVE::BI__builtin_sve_svdupq_n_u16:
4803 case SVE::BI__builtin_sve_svdupq_n_f16:
4804 case SVE::BI__builtin_sve_svdupq_n_bf16:
4805 case SVE::BI__builtin_sve_svdupq_n_s16:
4806 case SVE::BI__builtin_sve_svdupq_n_u32:
4807 case SVE::BI__builtin_sve_svdupq_n_f32:
4808 case SVE::BI__builtin_sve_svdupq_n_s32: {
4809 // These builtins are implemented by storing each element to an array and using
4810 // ld1rq to materialize a vector.
4811 unsigned NumOpnds = Ops.size();
4812
4813 bool IsBoolTy =
4814 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4815
4816 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4817 // so that the compare can use the width that is natural for the expected
4818 // number of predicate lanes.
4819 llvm::Type *EltTy = Ops[0]->getType();
4820 if (IsBoolTy)
4821 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4822
4824 for (unsigned I = 0; I < NumOpnds; ++I)
4825 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4826 Value *Vec = BuildVector(VecOps);
4827
4828 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4829 Value *InsertSubVec = Builder.CreateInsertVector(
4830 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4831
4832 Function *F =
4833 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4834 Value *DupQLane =
4835 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4836
4837 if (!IsBoolTy)
4838 return DupQLane;
4839
4840 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4841 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4842
4843 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4844 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4845 : Intrinsic::aarch64_sve_cmpne_wide,
4846 OverloadedTy);
4847 Value *Call = Builder.CreateCall(
4848 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4850 }
4851
4852 case SVE::BI__builtin_sve_svpfalse_b:
4853 return ConstantInt::getFalse(Ty);
4854
4855 case SVE::BI__builtin_sve_svpfalse_c: {
4856 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4857 Function *CastToSVCountF =
4858 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4859 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4860 }
4861
4862 case SVE::BI__builtin_sve_svlen_bf16:
4863 case SVE::BI__builtin_sve_svlen_f16:
4864 case SVE::BI__builtin_sve_svlen_f32:
4865 case SVE::BI__builtin_sve_svlen_f64:
4866 case SVE::BI__builtin_sve_svlen_s8:
4867 case SVE::BI__builtin_sve_svlen_s16:
4868 case SVE::BI__builtin_sve_svlen_s32:
4869 case SVE::BI__builtin_sve_svlen_s64:
4870 case SVE::BI__builtin_sve_svlen_u8:
4871 case SVE::BI__builtin_sve_svlen_u16:
4872 case SVE::BI__builtin_sve_svlen_u32:
4873 case SVE::BI__builtin_sve_svlen_u64: {
4874 SVETypeFlags TF(Builtin->TypeModifier);
4875 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4876 }
4877
4878 case SVE::BI__builtin_sve_svtbl2_u8:
4879 case SVE::BI__builtin_sve_svtbl2_s8:
4880 case SVE::BI__builtin_sve_svtbl2_u16:
4881 case SVE::BI__builtin_sve_svtbl2_s16:
4882 case SVE::BI__builtin_sve_svtbl2_u32:
4883 case SVE::BI__builtin_sve_svtbl2_s32:
4884 case SVE::BI__builtin_sve_svtbl2_u64:
4885 case SVE::BI__builtin_sve_svtbl2_s64:
4886 case SVE::BI__builtin_sve_svtbl2_f16:
4887 case SVE::BI__builtin_sve_svtbl2_bf16:
4888 case SVE::BI__builtin_sve_svtbl2_f32:
4889 case SVE::BI__builtin_sve_svtbl2_f64: {
4890 SVETypeFlags TF(Builtin->TypeModifier);
4891 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4892 return Builder.CreateCall(F, Ops);
4893 }
4894
4895 case SVE::BI__builtin_sve_svset_neonq_s8:
4896 case SVE::BI__builtin_sve_svset_neonq_s16:
4897 case SVE::BI__builtin_sve_svset_neonq_s32:
4898 case SVE::BI__builtin_sve_svset_neonq_s64:
4899 case SVE::BI__builtin_sve_svset_neonq_u8:
4900 case SVE::BI__builtin_sve_svset_neonq_u16:
4901 case SVE::BI__builtin_sve_svset_neonq_u32:
4902 case SVE::BI__builtin_sve_svset_neonq_u64:
4903 case SVE::BI__builtin_sve_svset_neonq_f16:
4904 case SVE::BI__builtin_sve_svset_neonq_f32:
4905 case SVE::BI__builtin_sve_svset_neonq_f64:
4906 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4907 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4908 }
4909
4910 case SVE::BI__builtin_sve_svget_neonq_s8:
4911 case SVE::BI__builtin_sve_svget_neonq_s16:
4912 case SVE::BI__builtin_sve_svget_neonq_s32:
4913 case SVE::BI__builtin_sve_svget_neonq_s64:
4914 case SVE::BI__builtin_sve_svget_neonq_u8:
4915 case SVE::BI__builtin_sve_svget_neonq_u16:
4916 case SVE::BI__builtin_sve_svget_neonq_u32:
4917 case SVE::BI__builtin_sve_svget_neonq_u64:
4918 case SVE::BI__builtin_sve_svget_neonq_f16:
4919 case SVE::BI__builtin_sve_svget_neonq_f32:
4920 case SVE::BI__builtin_sve_svget_neonq_f64:
4921 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4922 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4923 }
4924
4925 case SVE::BI__builtin_sve_svdup_neonq_s8:
4926 case SVE::BI__builtin_sve_svdup_neonq_s16:
4927 case SVE::BI__builtin_sve_svdup_neonq_s32:
4928 case SVE::BI__builtin_sve_svdup_neonq_s64:
4929 case SVE::BI__builtin_sve_svdup_neonq_u8:
4930 case SVE::BI__builtin_sve_svdup_neonq_u16:
4931 case SVE::BI__builtin_sve_svdup_neonq_u32:
4932 case SVE::BI__builtin_sve_svdup_neonq_u64:
4933 case SVE::BI__builtin_sve_svdup_neonq_f16:
4934 case SVE::BI__builtin_sve_svdup_neonq_f32:
4935 case SVE::BI__builtin_sve_svdup_neonq_f64:
4936 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4937 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4938 uint64_t(0));
4939 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4940 {Insert, Builder.getInt64(0)});
4941 }
4942 }
4943
4944 /// Should not happen
4945 return nullptr;
4946}
4947
4948static void swapCommutativeSMEOperands(unsigned BuiltinID,
4950 unsigned MultiVec;
4951 switch (BuiltinID) {
4952 default:
4953 return;
4954 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4955 MultiVec = 1;
4956 break;
4957 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4958 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4959 MultiVec = 2;
4960 break;
4961 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4962 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4963 MultiVec = 4;
4964 break;
4965 }
4966
4967 if (MultiVec > 0)
4968 for (unsigned I = 0; I < MultiVec; ++I)
4969 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4970}
4971
4973 const CallExpr *E) {
4976
4978 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4979 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4980
4981 if (TypeFlags.isLoad() || TypeFlags.isStore())
4982 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4983 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4984 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4985 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4986 BuiltinID == SME::BI__builtin_sme_svzero_za)
4987 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4988 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4989 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4990 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4991 BuiltinID == SME::BI__builtin_sme_svstr_za)
4992 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4993
4994 // Emit set FPMR for intrinsics that require it
4995 if (TypeFlags.setsFPMR())
4996 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4997 Ops.pop_back_val());
4998 // Handle builtins which require their multi-vector operands to be swapped
4999 swapCommutativeSMEOperands(BuiltinID, Ops);
5000
5001 auto isCntsBuiltin = [&]() {
5002 switch (BuiltinID) {
5003 default:
5004 return 0;
5005 case SME::BI__builtin_sme_svcntsb:
5006 return 8;
5007 case SME::BI__builtin_sme_svcntsh:
5008 return 4;
5009 case SME::BI__builtin_sme_svcntsw:
5010 return 2;
5011 }
5012 };
5013
5014 if (auto Mul = isCntsBuiltin()) {
5015 llvm::Value *Cntd =
5016 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
5017 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
5018 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
5019 }
5020
5021 // Should not happen!
5022 if (Builtin->LLVMIntrinsic == 0)
5023 return nullptr;
5024
5025 // Predicates must match the main datatype.
5026 for (Value *&Op : Ops)
5027 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
5028 if (PredTy->getElementType()->isIntegerTy(1))
5029 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
5030
5031 Function *F =
5032 TypeFlags.isOverloadNone()
5033 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
5034 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
5035
5036 return Builder.CreateCall(F, Ops);
5037}
5038
5039/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
5040/// return it as an i8 pointer.
5042 LLVMContext &Context = CGF.CGM.getLLVMContext();
5043 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
5044 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5045 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5046 llvm::Function *F =
5047 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
5048 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
5049 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
5050}
5051
5053 const CallExpr *E,
5054 llvm::Triple::ArchType Arch) {
5055 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
5056 BuiltinID <= clang::AArch64::LastSVEBuiltin)
5057 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
5058
5059 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
5060 BuiltinID <= clang::AArch64::LastSMEBuiltin)
5061 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
5062
5063 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
5064 return EmitAArch64CpuSupports(E);
5065
5066 unsigned HintID = static_cast<unsigned>(-1);
5067 switch (BuiltinID) {
5068 default: break;
5069 case clang::AArch64::BI__builtin_arm_nop:
5070 HintID = 0;
5071 break;
5072 case clang::AArch64::BI__builtin_arm_yield:
5073 case clang::AArch64::BI__yield:
5074 HintID = 1;
5075 break;
5076 case clang::AArch64::BI__builtin_arm_wfe:
5077 case clang::AArch64::BI__wfe:
5078 HintID = 2;
5079 break;
5080 case clang::AArch64::BI__builtin_arm_wfi:
5081 case clang::AArch64::BI__wfi:
5082 HintID = 3;
5083 break;
5084 case clang::AArch64::BI__builtin_arm_sev:
5085 case clang::AArch64::BI__sev:
5086 HintID = 4;
5087 break;
5088 case clang::AArch64::BI__builtin_arm_sevl:
5089 case clang::AArch64::BI__sevl:
5090 HintID = 5;
5091 break;
5092 }
5093
5094 if (HintID != static_cast<unsigned>(-1)) {
5095 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5096 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5097 }
5098
5099 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5100 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5101 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5102 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
5103 }
5104
5105 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5106 // Create call to __arm_sme_state and store the results to the two pointers.
5107 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
5108 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
5109 false),
5110 "__arm_sme_state"));
5111 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
5112 "aarch64_pstate_sm_compatible");
5113 CI->setAttributes(Attrs);
5114 CI->setCallingConv(
5115 llvm::CallingConv::
5116 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5117 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
5119 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
5121 }
5122
5123 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5124 assert((getContext().getTypeSize(E->getType()) == 32) &&
5125 "rbit of unusual size!");
5126 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5127 return Builder.CreateCall(
5128 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5129 }
5130 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5131 assert((getContext().getTypeSize(E->getType()) == 64) &&
5132 "rbit of unusual size!");
5133 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5134 return Builder.CreateCall(
5135 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5136 }
5137
5138 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5139 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5140 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5141 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5142 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5143 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5144 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
5145 return Res;
5146 }
5147
5148 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5149 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5150 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5151 "cls");
5152 }
5153 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5154 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5155 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5156 "cls");
5157 }
5158
5159 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5160 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5161 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5162 llvm::Type *Ty = Arg->getType();
5163 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5164 Arg, "frint32z");
5165 }
5166
5167 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5168 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5169 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5170 llvm::Type *Ty = Arg->getType();
5171 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5172 Arg, "frint64z");
5173 }
5174
5175 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5176 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5177 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5178 llvm::Type *Ty = Arg->getType();
5179 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5180 Arg, "frint32x");
5181 }
5182
5183 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5184 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5185 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5186 llvm::Type *Ty = Arg->getType();
5187 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5188 Arg, "frint64x");
5189 }
5190
5191 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5192 assert((getContext().getTypeSize(E->getType()) == 32) &&
5193 "__jcvt of unusual size!");
5194 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5195 return Builder.CreateCall(
5196 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5197 }
5198
5199 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5200 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5201 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5202 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5203 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
5204 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
5205
5206 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5207 // Load from the address via an LLVM intrinsic, receiving a
5208 // tuple of 8 i64 words, and store each one to ValPtr.
5209 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5210 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
5211 llvm::Value *ToRet;
5212 for (size_t i = 0; i < 8; i++) {
5213 llvm::Value *ValOffsetPtr =
5214 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5215 Address Addr =
5216 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5217 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
5218 }
5219 return ToRet;
5220 } else {
5221 // Load 8 i64 words from ValPtr, and store them to the address
5222 // via an LLVM intrinsic.
5224 Args.push_back(MemAddr);
5225 for (size_t i = 0; i < 8; i++) {
5226 llvm::Value *ValOffsetPtr =
5227 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5228 Address Addr =
5229 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5230 Args.push_back(Builder.CreateLoad(Addr));
5231 }
5232
5233 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5234 ? Intrinsic::aarch64_st64b
5235 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5236 ? Intrinsic::aarch64_st64bv
5237 : Intrinsic::aarch64_st64bv0);
5238 Function *F = CGM.getIntrinsic(Intr);
5239 return Builder.CreateCall(F, Args);
5240 }
5241 }
5242
5243 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5244 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5245
5246 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5247 ? Intrinsic::aarch64_rndr
5248 : Intrinsic::aarch64_rndrrs);
5249 Function *F = CGM.getIntrinsic(Intr);
5250 llvm::Value *Val = Builder.CreateCall(F);
5251 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
5252 Value *Status = Builder.CreateExtractValue(Val, 1);
5253
5254 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
5255 Builder.CreateStore(RandomValue, MemAddress);
5256 Status = Builder.CreateZExt(Status, Int32Ty);
5257 return Status;
5258 }
5259
5260 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5261 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5262 const FunctionDecl *FD = E->getDirectCallee();
5263 Value *Ops[2];
5264 for (unsigned i = 0; i < 2; i++)
5265 Ops[i] = EmitScalarExpr(E->getArg(i));
5266 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5267 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5268 StringRef Name = FD->getName();
5269 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5270 }
5271
5272 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5273 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5274 getContext().getTypeSize(E->getType()) == 128) {
5275 Function *F =
5276 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5277 ? Intrinsic::aarch64_ldaxp
5278 : Intrinsic::aarch64_ldxp);
5279
5280 Value *LdPtr = EmitScalarExpr(E->getArg(0));
5281 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
5282
5283 Value *Val0 = Builder.CreateExtractValue(Val, 1);
5284 Value *Val1 = Builder.CreateExtractValue(Val, 0);
5285 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5286 Val0 = Builder.CreateZExt(Val0, Int128Ty);
5287 Val1 = Builder.CreateZExt(Val1, Int128Ty);
5288
5289 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5290 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5291 Val = Builder.CreateOr(Val, Val1);
5292 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5293 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5294 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5295 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5296
5297 QualType Ty = E->getType();
5298 llvm::Type *RealResTy = ConvertType(Ty);
5299 llvm::Type *IntTy =
5300 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5301
5302 Function *F =
5303 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5304 ? Intrinsic::aarch64_ldaxr
5305 : Intrinsic::aarch64_ldxr,
5306 DefaultPtrTy);
5307 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5308 Val->addParamAttr(
5309 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
5310
5311 if (RealResTy->isPointerTy())
5312 return Builder.CreateIntToPtr(Val, RealResTy);
5313
5314 llvm::Type *IntResTy = llvm::IntegerType::get(
5315 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5316 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
5317 RealResTy);
5318 }
5319
5320 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5321 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5322 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5323 Function *F =
5324 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5325 ? Intrinsic::aarch64_stlxp
5326 : Intrinsic::aarch64_stxp);
5327 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5328
5329 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5330 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5331
5332 Tmp = Tmp.withElementType(STy);
5333 llvm::Value *Val = Builder.CreateLoad(Tmp);
5334
5335 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5336 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5337 Value *StPtr = EmitScalarExpr(E->getArg(1));
5338 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5339 }
5340
5341 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5342 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5343 Value *StoreVal = EmitScalarExpr(E->getArg(0));
5344 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5345
5346 QualType Ty = E->getArg(0)->getType();
5347 llvm::Type *StoreTy =
5348 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5349
5350 if (StoreVal->getType()->isPointerTy())
5351 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5352 else {
5353 llvm::Type *IntTy = llvm::IntegerType::get(
5355 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5356 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5357 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5358 }
5359
5360 Function *F =
5361 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5362 ? Intrinsic::aarch64_stlxr
5363 : Intrinsic::aarch64_stxr,
5364 StoreAddr->getType());
5365 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5366 CI->addParamAttr(
5367 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
5368 return CI;
5369 }
5370
5371 if (BuiltinID == clang::AArch64::BI__getReg) {
5373 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5374 llvm_unreachable("Sema will ensure that the parameter is constant");
5375
5376 llvm::APSInt Value = Result.Val.getInt();
5377 LLVMContext &Context = CGM.getLLVMContext();
5378 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
5379
5380 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5381 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5382 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5383
5384 llvm::Function *F =
5385 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
5386 return Builder.CreateCall(F, Metadata);
5387 }
5388
5389 if (BuiltinID == clang::AArch64::BI__break) {
5391 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5392 llvm_unreachable("Sema will ensure that the parameter is constant");
5393
5394 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5395 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5396 }
5397
5398 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5399 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5400 return Builder.CreateCall(F);
5401 }
5402
5403 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5404 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5405 llvm::SyncScope::SingleThread);
5406
5407 // CRC32
5408 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5409 switch (BuiltinID) {
5410 case clang::AArch64::BI__builtin_arm_crc32b:
5411 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5412 case clang::AArch64::BI__builtin_arm_crc32cb:
5413 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5414 case clang::AArch64::BI__builtin_arm_crc32h:
5415 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5416 case clang::AArch64::BI__builtin_arm_crc32ch:
5417 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5418 case clang::AArch64::BI__builtin_arm_crc32w:
5419 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5420 case clang::AArch64::BI__builtin_arm_crc32cw:
5421 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5422 case clang::AArch64::BI__builtin_arm_crc32d:
5423 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5424 case clang::AArch64::BI__builtin_arm_crc32cd:
5425 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5426 }
5427
5428 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5429 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5430 Value *Arg1 = EmitScalarExpr(E->getArg(1));
5431 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5432
5433 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5434 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5435
5436 return Builder.CreateCall(F, {Arg0, Arg1});
5437 }
5438
5439 // Memory Operations (MOPS)
5440 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5441 Value *Dst = EmitScalarExpr(E->getArg(0));
5442 Value *Val = EmitScalarExpr(E->getArg(1));
5443 Value *Size = EmitScalarExpr(E->getArg(2));
5444 Val = Builder.CreateTrunc(Val, Int8Ty);
5445 Size = Builder.CreateIntCast(Size, Int64Ty, false);
5446 return Builder.CreateCall(
5447 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5448 }
5449
5450 // Memory Tagging Extensions (MTE) Intrinsics
5451 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5452 switch (BuiltinID) {
5453 case clang::AArch64::BI__builtin_arm_irg:
5454 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5455 case clang::AArch64::BI__builtin_arm_addg:
5456 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5457 case clang::AArch64::BI__builtin_arm_gmi:
5458 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5459 case clang::AArch64::BI__builtin_arm_ldg:
5460 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5461 case clang::AArch64::BI__builtin_arm_stg:
5462 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5463 case clang::AArch64::BI__builtin_arm_subp:
5464 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5465 }
5466
5467 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5468 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5470 Value *Mask = EmitScalarExpr(E->getArg(1));
5471
5472 Mask = Builder.CreateZExt(Mask, Int64Ty);
5473 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5474 {Pointer, Mask});
5475 }
5476 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5478 Value *TagOffset = EmitScalarExpr(E->getArg(1));
5479
5480 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
5481 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5482 {Pointer, TagOffset});
5483 }
5484 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5486 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
5487
5488 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
5489 return Builder.CreateCall(
5490 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5491 }
5492 // Although it is possible to supply a different return
5493 // address (first arg) to this intrinsic, for now we set
5494 // return address same as input address.
5495 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5496 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5497 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5498 {TagAddress, TagAddress});
5499 }
5500 // Although it is possible to supply a different tag (to set)
5501 // to this intrinsic (as first arg), for now we supply
5502 // the tag that is in input address arg (common use case).
5503 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5504 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5505 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5506 {TagAddress, TagAddress});
5507 }
5508 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5509 Value *PointerA = EmitScalarExpr(E->getArg(0));
5510 Value *PointerB = EmitScalarExpr(E->getArg(1));
5511 return Builder.CreateCall(
5512 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5513 }
5514 }
5515
5516 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5517 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5518 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5519 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5520 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5521 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5522 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5523 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5524
5525 SpecialRegisterAccessKind AccessKind = Write;
5526 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5527 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5528 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5529 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5530 AccessKind = VolatileRead;
5531
5532 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5533 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5534
5535 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5536 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5537
5538 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5539 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5540
5541 llvm::Type *ValueType;
5542 llvm::Type *RegisterType = Int64Ty;
5543 if (Is32Bit) {
5544 ValueType = Int32Ty;
5545 } else if (Is128Bit) {
5546 llvm::Type *Int128Ty =
5547 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5548 ValueType = Int128Ty;
5549 RegisterType = Int128Ty;
5550 } else if (IsPointerBuiltin) {
5551 ValueType = VoidPtrTy;
5552 } else {
5553 ValueType = Int64Ty;
5554 };
5555
5556 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5557 AccessKind);
5558 }
5559
5560 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5561 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5562 BuiltinID == clang::AArch64::BI__sys) {
5563 LLVMContext &Context = CGM.getLLVMContext();
5564
5565 unsigned SysReg =
5566 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5567
5568 std::string SysRegStr;
5569 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5570 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5571 ? ((1 << 1) | ((SysReg >> 14) & 1))
5572 : 1;
5573 llvm::raw_string_ostream(SysRegStr)
5574 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5575 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5576 << (SysReg & 7);
5577
5578 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5579 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5580 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5581
5582 llvm::Type *RegisterType = Int64Ty;
5583 llvm::Type *Types[] = { RegisterType };
5584
5585 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5586 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5587
5588 return Builder.CreateCall(F, Metadata);
5589 }
5590
5591 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5592 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5593 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5594 if (BuiltinID == clang::AArch64::BI__sys) {
5595 // Return 0 for convenience, even though MSVC returns some other undefined
5596 // value.
5597 Result = ConstantInt::get(Builder.getInt32Ty(), 0);
5598 }
5599 return Result;
5600 }
5601
5602 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5603 llvm::Function *F =
5604 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5605 return Builder.CreateCall(F);
5606 }
5607
5608 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5609 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5610 return Builder.CreateCall(F);
5611 }
5612
5613 if (BuiltinID == clang::AArch64::BI__mulh ||
5614 BuiltinID == clang::AArch64::BI__umulh) {
5615 llvm::Type *ResType = ConvertType(E->getType());
5616 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5617
5618 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5619 Value *LHS =
5620 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5621 Value *RHS =
5622 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5623
5624 Value *MulResult, *HigherBits;
5625 if (IsSigned) {
5626 MulResult = Builder.CreateNSWMul(LHS, RHS);
5627 HigherBits = Builder.CreateAShr(MulResult, 64);
5628 } else {
5629 MulResult = Builder.CreateNUWMul(LHS, RHS);
5630 HigherBits = Builder.CreateLShr(MulResult, 64);
5631 }
5632 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5633
5634 return HigherBits;
5635 }
5636
5637 if (BuiltinID == AArch64::BI__writex18byte ||
5638 BuiltinID == AArch64::BI__writex18word ||
5639 BuiltinID == AArch64::BI__writex18dword ||
5640 BuiltinID == AArch64::BI__writex18qword) {
5641 // Process the args first
5642 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5643 Value *DataArg = EmitScalarExpr(E->getArg(1));
5644
5645 // Read x18 as i8*
5646 llvm::Value *X18 = readX18AsPtr(*this);
5647
5648 // Store val at x18 + offset
5649 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5650 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5651 StoreInst *Store =
5652 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5653 return Store;
5654 }
5655
5656 if (BuiltinID == AArch64::BI__readx18byte ||
5657 BuiltinID == AArch64::BI__readx18word ||
5658 BuiltinID == AArch64::BI__readx18dword ||
5659 BuiltinID == AArch64::BI__readx18qword) {
5660 // Process the args first
5661 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5662
5663 // Read x18 as i8*
5664 llvm::Value *X18 = readX18AsPtr(*this);
5665
5666 // Load x18 + offset
5667 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5668 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5669 llvm::Type *IntTy = ConvertType(E->getType());
5670 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5671 return Load;
5672 }
5673
5674 if (BuiltinID == AArch64::BI__addx18byte ||
5675 BuiltinID == AArch64::BI__addx18word ||
5676 BuiltinID == AArch64::BI__addx18dword ||
5677 BuiltinID == AArch64::BI__addx18qword ||
5678 BuiltinID == AArch64::BI__incx18byte ||
5679 BuiltinID == AArch64::BI__incx18word ||
5680 BuiltinID == AArch64::BI__incx18dword ||
5681 BuiltinID == AArch64::BI__incx18qword) {
5682 llvm::Type *IntTy;
5683 bool isIncrement;
5684 switch (BuiltinID) {
5685 case AArch64::BI__incx18byte:
5686 IntTy = Int8Ty;
5687 isIncrement = true;
5688 break;
5689 case AArch64::BI__incx18word:
5690 IntTy = Int16Ty;
5691 isIncrement = true;
5692 break;
5693 case AArch64::BI__incx18dword:
5694 IntTy = Int32Ty;
5695 isIncrement = true;
5696 break;
5697 case AArch64::BI__incx18qword:
5698 IntTy = Int64Ty;
5699 isIncrement = true;
5700 break;
5701 default:
5702 IntTy = ConvertType(E->getArg(1)->getType());
5703 isIncrement = false;
5704 break;
5705 }
5706 // Process the args first
5707 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5708 Value *ValToAdd =
5709 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5710
5711 // Read x18 as i8*
5712 llvm::Value *X18 = readX18AsPtr(*this);
5713
5714 // Load x18 + offset
5715 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5716 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5717 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5718
5719 // Add values
5720 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5721
5722 // Store val at x18 + offset
5723 StoreInst *Store =
5724 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5725 return Store;
5726 }
5727
5728 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5729 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5730 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5731 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5732 Value *Arg = EmitScalarExpr(E->getArg(0));
5733 llvm::Type *RetTy = ConvertType(E->getType());
5734 return Builder.CreateBitCast(Arg, RetTy);
5735 }
5736
5737 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5738 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5739 BuiltinID == AArch64::BI_CountLeadingZeros ||
5740 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5741 Value *Arg = EmitScalarExpr(E->getArg(0));
5742 llvm::Type *ArgType = Arg->getType();
5743
5744 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5745 BuiltinID == AArch64::BI_CountLeadingOnes64)
5746 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5747
5748 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5749 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5750
5751 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5752 BuiltinID == AArch64::BI_CountLeadingZeros64)
5753 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5754 return Result;
5755 }
5756
5757 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5758 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5759 Value *Arg = EmitScalarExpr(E->getArg(0));
5760
5761 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5762 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5763 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5764
5765 Value *Result = Builder.CreateCall(F, Arg, "cls");
5766 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5767 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5768 return Result;
5769 }
5770
5771 if (BuiltinID == AArch64::BI_CountOneBits ||
5772 BuiltinID == AArch64::BI_CountOneBits64) {
5773 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5774 llvm::Type *ArgType = ArgValue->getType();
5775 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5776
5777 Value *Result = Builder.CreateCall(F, ArgValue);
5778 if (BuiltinID == AArch64::BI_CountOneBits64)
5779 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5780 return Result;
5781 }
5782
5783 if (BuiltinID == AArch64::BI__prefetch) {
5785 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5786 Value *Locality = ConstantInt::get(Int32Ty, 3);
5787 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5788 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5789 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5790 }
5791
5792 if (BuiltinID == AArch64::BI__hlt) {
5793 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5794 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5795
5796 // Return 0 for convenience, even though MSVC returns some other undefined
5797 // value.
5798 return ConstantInt::get(Builder.getInt32Ty(), 0);
5799 }
5800
5801 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5802 return Builder.CreateFPTrunc(
5803 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5804 Builder.getFloatTy()),
5805 Builder.getBFloatTy());
5806
5807 // Handle MSVC intrinsics before argument evaluation to prevent double
5808 // evaluation.
5809 if (std::optional<MSVCIntrin> MsvcIntId =
5811 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5812
5813 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5814 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5815 return P.first == BuiltinID;
5816 });
5817 if (It != end(NEONEquivalentIntrinsicMap))
5818 BuiltinID = It->second;
5819
5820 // Find out if any arguments are required to be integer constant
5821 // expressions.
5822 unsigned ICEArguments = 0;
5824 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5825 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5826
5828 Address PtrOp0 = Address::invalid();
5829 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5830 if (i == 0) {
5831 switch (BuiltinID) {
5832 case NEON::BI__builtin_neon_vld1_v:
5833 case NEON::BI__builtin_neon_vld1q_v:
5834 case NEON::BI__builtin_neon_vld1_dup_v:
5835 case NEON::BI__builtin_neon_vld1q_dup_v:
5836 case NEON::BI__builtin_neon_vld1_lane_v:
5837 case NEON::BI__builtin_neon_vld1q_lane_v:
5838 case NEON::BI__builtin_neon_vst1_v:
5839 case NEON::BI__builtin_neon_vst1q_v:
5840 case NEON::BI__builtin_neon_vst1_lane_v:
5841 case NEON::BI__builtin_neon_vst1q_lane_v:
5842 case NEON::BI__builtin_neon_vldap1_lane_s64:
5843 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5844 case NEON::BI__builtin_neon_vstl1_lane_s64:
5845 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5846 // Get the alignment for the argument in addition to the value;
5847 // we'll use it later.
5848 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5849 Ops.push_back(PtrOp0.emitRawPointer(*this));
5850 continue;
5851 }
5852 }
5853 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5854 }
5855
5856 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5857 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5858 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5859
5860 if (Builtin) {
5861 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5863 assert(Result && "SISD intrinsic should have been handled");
5864 return Result;
5865 }
5866
5867 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5869 if (std::optional<llvm::APSInt> Result =
5871 // Determine the type of this overloaded NEON intrinsic.
5872 Type = NeonTypeFlags(Result->getZExtValue());
5873
5874 bool usgn = Type.isUnsigned();
5875 bool quad = Type.isQuad();
5876
5877 // Handle non-overloaded intrinsics first.
5878 switch (BuiltinID) {
5879 default: break;
5880 case NEON::BI__builtin_neon_vabsh_f16:
5881 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5882 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5883 case NEON::BI__builtin_neon_vaddq_p128: {
5884 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5885 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5886 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5887 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5888 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5889 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5890 return Builder.CreateBitCast(Ops[0], Int128Ty);
5891 }
5892 case NEON::BI__builtin_neon_vldrq_p128: {
5893 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5894 Value *Ptr = EmitScalarExpr(E->getArg(0));
5895 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5897 }
5898 case NEON::BI__builtin_neon_vstrq_p128: {
5899 Value *Ptr = Ops[0];
5900 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5901 }
5902 case NEON::BI__builtin_neon_vcvts_f32_u32:
5903 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5904 usgn = true;
5905 [[fallthrough]];
5906 case NEON::BI__builtin_neon_vcvts_f32_s32:
5907 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5908 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5909 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5910 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5911 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5912 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5913 if (usgn)
5914 return Builder.CreateUIToFP(Ops[0], FTy);
5915 return Builder.CreateSIToFP(Ops[0], FTy);
5916 }
5917 case NEON::BI__builtin_neon_vcvth_f16_u16:
5918 case NEON::BI__builtin_neon_vcvth_f16_u32:
5919 case NEON::BI__builtin_neon_vcvth_f16_u64:
5920 usgn = true;
5921 [[fallthrough]];
5922 case NEON::BI__builtin_neon_vcvth_f16_s16:
5923 case NEON::BI__builtin_neon_vcvth_f16_s32:
5924 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5925 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5926 llvm::Type *FTy = HalfTy;
5927 llvm::Type *InTy;
5928 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5929 InTy = Int64Ty;
5930 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5931 InTy = Int32Ty;
5932 else
5933 InTy = Int16Ty;
5934 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5935 if (usgn)
5936 return Builder.CreateUIToFP(Ops[0], FTy);
5937 return Builder.CreateSIToFP(Ops[0], FTy);
5938 }
5939 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5940 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5941 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5942 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5943 case NEON::BI__builtin_neon_vcvth_u16_f16:
5944 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5945 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5946 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5947 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5948 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5949 unsigned Int;
5950 llvm::Type *InTy = Int16Ty;
5951 llvm::Type* FTy = HalfTy;
5952 llvm::Type *Tys[2] = {InTy, FTy};
5953 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5954 switch (BuiltinID) {
5955 default: llvm_unreachable("missing builtin ID in switch!");
5956 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5957 Int = Intrinsic::aarch64_neon_fcvtau; break;
5958 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5959 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5960 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5961 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5962 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5963 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5964 case NEON::BI__builtin_neon_vcvth_u16_f16:
5965 Int = Intrinsic::aarch64_neon_fcvtzu; break;
5966 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5967 Int = Intrinsic::aarch64_neon_fcvtas; break;
5968 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5969 Int = Intrinsic::aarch64_neon_fcvtms; break;
5970 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5971 Int = Intrinsic::aarch64_neon_fcvtns; break;
5972 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5973 Int = Intrinsic::aarch64_neon_fcvtps; break;
5974 case NEON::BI__builtin_neon_vcvth_s16_f16:
5975 Int = Intrinsic::aarch64_neon_fcvtzs; break;
5976 }
5977 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5978 }
5979 case NEON::BI__builtin_neon_vcaleh_f16:
5980 case NEON::BI__builtin_neon_vcalth_f16:
5981 case NEON::BI__builtin_neon_vcageh_f16:
5982 case NEON::BI__builtin_neon_vcagth_f16: {
5983 unsigned Int;
5984 llvm::Type* InTy = Int32Ty;
5985 llvm::Type* FTy = HalfTy;
5986 llvm::Type *Tys[2] = {InTy, FTy};
5987 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5988 switch (BuiltinID) {
5989 default: llvm_unreachable("missing builtin ID in switch!");
5990 case NEON::BI__builtin_neon_vcageh_f16:
5991 Int = Intrinsic::aarch64_neon_facge; break;
5992 case NEON::BI__builtin_neon_vcagth_f16:
5993 Int = Intrinsic::aarch64_neon_facgt; break;
5994 case NEON::BI__builtin_neon_vcaleh_f16:
5995 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5996 case NEON::BI__builtin_neon_vcalth_f16:
5997 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5998 }
5999 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
6000 return Builder.CreateTrunc(Ops[0], Int16Ty);
6001 }
6002 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6003 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
6004 unsigned Int;
6005 llvm::Type* InTy = Int32Ty;
6006 llvm::Type* FTy = HalfTy;
6007 llvm::Type *Tys[2] = {InTy, FTy};
6008 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6009 switch (BuiltinID) {
6010 default: llvm_unreachable("missing builtin ID in switch!");
6011 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6012 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
6013 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
6014 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
6015 }
6016 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
6017 return Builder.CreateTrunc(Ops[0], Int16Ty);
6018 }
6019 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6020 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
6021 unsigned Int;
6022 llvm::Type* FTy = HalfTy;
6023 llvm::Type* InTy = Int32Ty;
6024 llvm::Type *Tys[2] = {FTy, InTy};
6025 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6026 switch (BuiltinID) {
6027 default: llvm_unreachable("missing builtin ID in switch!");
6028 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6029 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
6030 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
6031 break;
6032 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
6033 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
6034 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
6035 break;
6036 }
6037 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
6038 }
6039 case NEON::BI__builtin_neon_vpaddd_s64: {
6040 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
6041 Value *Vec = EmitScalarExpr(E->getArg(0));
6042 // The vector is v2f64, so make sure it's bitcast to that.
6043 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
6044 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6045 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6046 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6047 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6048 // Pairwise addition of a v2f64 into a scalar f64.
6049 return Builder.CreateAdd(Op0, Op1, "vpaddd");
6050 }
6051 case NEON::BI__builtin_neon_vpaddd_f64: {
6052 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
6053 Value *Vec = EmitScalarExpr(E->getArg(0));
6054 // The vector is v2f64, so make sure it's bitcast to that.
6055 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
6056 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6057 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6058 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6059 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6060 // Pairwise addition of a v2f64 into a scalar f64.
6061 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6062 }
6063 case NEON::BI__builtin_neon_vpadds_f32: {
6064 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
6065 Value *Vec = EmitScalarExpr(E->getArg(0));
6066 // The vector is v2f32, so make sure it's bitcast to that.
6067 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
6068 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6069 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6070 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6071 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6072 // Pairwise addition of a v2f32 into a scalar f32.
6073 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6074 }
6075 case NEON::BI__builtin_neon_vceqzd_s64:
6076 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6079 ICmpInst::ICMP_EQ, "vceqz");
6080 case NEON::BI__builtin_neon_vceqzd_f64:
6081 case NEON::BI__builtin_neon_vceqzs_f32:
6082 case NEON::BI__builtin_neon_vceqzh_f16:
6083 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6086 ICmpInst::FCMP_OEQ, "vceqz");
6087 case NEON::BI__builtin_neon_vcgezd_s64:
6088 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6091 ICmpInst::ICMP_SGE, "vcgez");
6092 case NEON::BI__builtin_neon_vcgezd_f64:
6093 case NEON::BI__builtin_neon_vcgezs_f32:
6094 case NEON::BI__builtin_neon_vcgezh_f16:
6095 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6098 ICmpInst::FCMP_OGE, "vcgez");
6099 case NEON::BI__builtin_neon_vclezd_s64:
6100 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6103 ICmpInst::ICMP_SLE, "vclez");
6104 case NEON::BI__builtin_neon_vclezd_f64:
6105 case NEON::BI__builtin_neon_vclezs_f32:
6106 case NEON::BI__builtin_neon_vclezh_f16:
6107 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6110 ICmpInst::FCMP_OLE, "vclez");
6111 case NEON::BI__builtin_neon_vcgtzd_s64:
6112 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6115 ICmpInst::ICMP_SGT, "vcgtz");
6116 case NEON::BI__builtin_neon_vcgtzd_f64:
6117 case NEON::BI__builtin_neon_vcgtzs_f32:
6118 case NEON::BI__builtin_neon_vcgtzh_f16:
6119 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6122 ICmpInst::FCMP_OGT, "vcgtz");
6123 case NEON::BI__builtin_neon_vcltzd_s64:
6124 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6127 ICmpInst::ICMP_SLT, "vcltz");
6128
6129 case NEON::BI__builtin_neon_vcltzd_f64:
6130 case NEON::BI__builtin_neon_vcltzs_f32:
6131 case NEON::BI__builtin_neon_vcltzh_f16:
6132 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6135 ICmpInst::FCMP_OLT, "vcltz");
6136
6137 case NEON::BI__builtin_neon_vceqzd_u64: {
6138 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6139 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6140 Ops[0] =
6141 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
6142 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
6143 }
6144 case NEON::BI__builtin_neon_vceqd_f64:
6145 case NEON::BI__builtin_neon_vcled_f64:
6146 case NEON::BI__builtin_neon_vcltd_f64:
6147 case NEON::BI__builtin_neon_vcged_f64:
6148 case NEON::BI__builtin_neon_vcgtd_f64: {
6149 llvm::CmpInst::Predicate P;
6150 switch (BuiltinID) {
6151 default: llvm_unreachable("missing builtin ID in switch!");
6152 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6153 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6154 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6155 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6156 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6157 }
6158 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6159 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6160 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6161 if (P == llvm::FCmpInst::FCMP_OEQ)
6162 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6163 else
6164 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6165 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
6166 }
6167 case NEON::BI__builtin_neon_vceqs_f32:
6168 case NEON::BI__builtin_neon_vcles_f32:
6169 case NEON::BI__builtin_neon_vclts_f32:
6170 case NEON::BI__builtin_neon_vcges_f32:
6171 case NEON::BI__builtin_neon_vcgts_f32: {
6172 llvm::CmpInst::Predicate P;
6173 switch (BuiltinID) {
6174 default: llvm_unreachable("missing builtin ID in switch!");
6175 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6176 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6177 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6178 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6179 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6180 }
6181 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6182 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
6183 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
6184 if (P == llvm::FCmpInst::FCMP_OEQ)
6185 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6186 else
6187 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6188 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
6189 }
6190 case NEON::BI__builtin_neon_vceqh_f16:
6191 case NEON::BI__builtin_neon_vcleh_f16:
6192 case NEON::BI__builtin_neon_vclth_f16:
6193 case NEON::BI__builtin_neon_vcgeh_f16:
6194 case NEON::BI__builtin_neon_vcgth_f16: {
6195 llvm::CmpInst::Predicate P;
6196 switch (BuiltinID) {
6197 default: llvm_unreachable("missing builtin ID in switch!");
6198 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6199 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6200 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6201 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6202 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6203 }
6204 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6205 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6206 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
6207 if (P == llvm::FCmpInst::FCMP_OEQ)
6208 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6209 else
6210 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6211 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
6212 }
6213 case NEON::BI__builtin_neon_vceqd_s64:
6214 case NEON::BI__builtin_neon_vceqd_u64:
6215 case NEON::BI__builtin_neon_vcgtd_s64:
6216 case NEON::BI__builtin_neon_vcgtd_u64:
6217 case NEON::BI__builtin_neon_vcltd_s64:
6218 case NEON::BI__builtin_neon_vcltd_u64:
6219 case NEON::BI__builtin_neon_vcged_u64:
6220 case NEON::BI__builtin_neon_vcged_s64:
6221 case NEON::BI__builtin_neon_vcled_u64:
6222 case NEON::BI__builtin_neon_vcled_s64: {
6223 llvm::CmpInst::Predicate P;
6224 switch (BuiltinID) {
6225 default: llvm_unreachable("missing builtin ID in switch!");
6226 case NEON::BI__builtin_neon_vceqd_s64:
6227 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6228 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6229 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6230 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6231 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6232 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6233 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6234 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6235 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6236 }
6237 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6238 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6239 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6240 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
6241 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
6242 }
6243 case NEON::BI__builtin_neon_vtstd_s64:
6244 case NEON::BI__builtin_neon_vtstd_u64: {
6245 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6246 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6247 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6248 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6249 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6250 llvm::Constant::getNullValue(Int64Ty));
6251 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6252 }
6253 case NEON::BI__builtin_neon_vset_lane_i8:
6254 case NEON::BI__builtin_neon_vset_lane_i16:
6255 case NEON::BI__builtin_neon_vset_lane_i32:
6256 case NEON::BI__builtin_neon_vset_lane_i64:
6257 case NEON::BI__builtin_neon_vset_lane_bf16:
6258 case NEON::BI__builtin_neon_vset_lane_f32:
6259 case NEON::BI__builtin_neon_vsetq_lane_i8:
6260 case NEON::BI__builtin_neon_vsetq_lane_i16:
6261 case NEON::BI__builtin_neon_vsetq_lane_i32:
6262 case NEON::BI__builtin_neon_vsetq_lane_i64:
6263 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6264 case NEON::BI__builtin_neon_vsetq_lane_f32:
6265 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6266 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6267 case NEON::BI__builtin_neon_vset_lane_f64:
6268 // The vector type needs a cast for the v1f64 variant.
6269 Ops[1] =
6270 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
6271 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6272 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6273 case NEON::BI__builtin_neon_vset_lane_mf8:
6274 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6275 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6276 // The input vector type needs a cast to scalar type.
6277 Ops[0] =
6278 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
6279 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6280 case NEON::BI__builtin_neon_vsetq_lane_f64:
6281 // The vector type needs a cast for the v2f64 variant.
6282 Ops[1] =
6283 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
6284 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6285 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6286
6287 case NEON::BI__builtin_neon_vget_lane_i8:
6288 case NEON::BI__builtin_neon_vdupb_lane_i8:
6289 Ops[0] =
6290 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
6291 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6292 "vget_lane");
6293 case NEON::BI__builtin_neon_vgetq_lane_i8:
6294 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6295 Ops[0] =
6296 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
6297 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6298 "vgetq_lane");
6299 case NEON::BI__builtin_neon_vget_lane_mf8:
6300 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6301 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6302 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6303 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6304 "vget_lane");
6305 case NEON::BI__builtin_neon_vget_lane_i16:
6306 case NEON::BI__builtin_neon_vduph_lane_i16:
6307 Ops[0] =
6308 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
6309 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6310 "vget_lane");
6311 case NEON::BI__builtin_neon_vgetq_lane_i16:
6312 case NEON::BI__builtin_neon_vduph_laneq_i16:
6313 Ops[0] =
6314 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
6315 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6316 "vgetq_lane");
6317 case NEON::BI__builtin_neon_vget_lane_i32:
6318 case NEON::BI__builtin_neon_vdups_lane_i32:
6319 Ops[0] =
6320 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
6321 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6322 "vget_lane");
6323 case NEON::BI__builtin_neon_vdups_lane_f32:
6324 Ops[0] =
6325 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6326 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6327 "vdups_lane");
6328 case NEON::BI__builtin_neon_vgetq_lane_i32:
6329 case NEON::BI__builtin_neon_vdups_laneq_i32:
6330 Ops[0] =
6331 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
6332 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6333 "vgetq_lane");
6334 case NEON::BI__builtin_neon_vget_lane_i64:
6335 case NEON::BI__builtin_neon_vdupd_lane_i64:
6336 Ops[0] =
6337 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
6338 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6339 "vget_lane");
6340 case NEON::BI__builtin_neon_vdupd_lane_f64:
6341 Ops[0] =
6342 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6343 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6344 "vdupd_lane");
6345 case NEON::BI__builtin_neon_vgetq_lane_i64:
6346 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6347 Ops[0] =
6348 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
6349 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6350 "vgetq_lane");
6351 case NEON::BI__builtin_neon_vget_lane_f32:
6352 Ops[0] =
6353 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6354 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6355 "vget_lane");
6356 case NEON::BI__builtin_neon_vget_lane_f64:
6357 Ops[0] =
6358 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6359 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6360 "vget_lane");
6361 case NEON::BI__builtin_neon_vgetq_lane_f32:
6362 case NEON::BI__builtin_neon_vdups_laneq_f32:
6363 Ops[0] =
6364 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
6365 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6366 "vgetq_lane");
6367 case NEON::BI__builtin_neon_vgetq_lane_f64:
6368 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6369 Ops[0] =
6370 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
6371 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6372 "vgetq_lane");
6373 case NEON::BI__builtin_neon_vaddh_f16:
6374 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6375 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
6376 case NEON::BI__builtin_neon_vsubh_f16:
6377 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6378 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
6379 case NEON::BI__builtin_neon_vmulh_f16:
6380 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6381 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
6382 case NEON::BI__builtin_neon_vdivh_f16:
6383 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6384 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
6385 case NEON::BI__builtin_neon_vfmah_f16:
6386 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6388 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6389 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
6390 case NEON::BI__builtin_neon_vfmsh_f16: {
6391 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
6392
6393 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6395 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6396 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
6397 }
6398 case NEON::BI__builtin_neon_vaddd_s64:
6399 case NEON::BI__builtin_neon_vaddd_u64:
6400 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
6401 case NEON::BI__builtin_neon_vsubd_s64:
6402 case NEON::BI__builtin_neon_vsubd_u64:
6403 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
6404 case NEON::BI__builtin_neon_vqdmlalh_s16:
6405 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6406 SmallVector<Value *, 2> ProductOps;
6407 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6408 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6409 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6410 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6411 ProductOps, "vqdmlXl");
6412 Constant *CI = ConstantInt::get(SizeTy, 0);
6413 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6414
6415 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6416 ? Intrinsic::aarch64_neon_sqadd
6417 : Intrinsic::aarch64_neon_sqsub;
6418 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6419 }
6420 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6421 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6422 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6423 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6424 Ops, "vqshlu_n");
6425 }
6426 case NEON::BI__builtin_neon_vqshld_n_u64:
6427 case NEON::BI__builtin_neon_vqshld_n_s64: {
6428 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6429 ? Intrinsic::aarch64_neon_uqshl
6430 : Intrinsic::aarch64_neon_sqshl;
6431 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6432 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6433 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6434 }
6435 case NEON::BI__builtin_neon_vrshrd_n_u64:
6436 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6437 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6438 ? Intrinsic::aarch64_neon_urshl
6439 : Intrinsic::aarch64_neon_srshl;
6440 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6441 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6442 Ops[1] = ConstantInt::get(Int64Ty, -SV);
6443 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6444 }
6445 case NEON::BI__builtin_neon_vrsrad_n_u64:
6446 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6447 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6448 ? Intrinsic::aarch64_neon_urshl
6449 : Intrinsic::aarch64_neon_srshl;
6450 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6451 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6452 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6453 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6454 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6455 }
6456 case NEON::BI__builtin_neon_vshld_n_s64:
6457 case NEON::BI__builtin_neon_vshld_n_u64: {
6458 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6459 return Builder.CreateShl(
6460 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6461 }
6462 case NEON::BI__builtin_neon_vshrd_n_s64: {
6463 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6464 return Builder.CreateAShr(
6465 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6466 Amt->getZExtValue())),
6467 "shrd_n");
6468 }
6469 case NEON::BI__builtin_neon_vshrd_n_u64: {
6470 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6471 uint64_t ShiftAmt = Amt->getZExtValue();
6472 // Right-shifting an unsigned value by its size yields 0.
6473 if (ShiftAmt == 64)
6474 return ConstantInt::get(Int64Ty, 0);
6475 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6476 "shrd_n");
6477 }
6478 case NEON::BI__builtin_neon_vsrad_n_s64: {
6479 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6480 Ops[1] = Builder.CreateAShr(
6481 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6482 Amt->getZExtValue())),
6483 "shrd_n");
6484 return Builder.CreateAdd(Ops[0], Ops[1]);
6485 }
6486 case NEON::BI__builtin_neon_vsrad_n_u64: {
6487 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6488 uint64_t ShiftAmt = Amt->getZExtValue();
6489 // Right-shifting an unsigned value by its size yields 0.
6490 // As Op + 0 = Op, return Ops[0] directly.
6491 if (ShiftAmt == 64)
6492 return Ops[0];
6493 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6494 "shrd_n");
6495 return Builder.CreateAdd(Ops[0], Ops[1]);
6496 }
6497 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6498 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6499 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6500 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6501 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6502 "lane");
6503 SmallVector<Value *, 2> ProductOps;
6504 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6505 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6506 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6507 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6508 ProductOps, "vqdmlXl");
6509 Constant *CI = ConstantInt::get(SizeTy, 0);
6510 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6511 Ops.pop_back();
6512
6513 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6514 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6515 ? Intrinsic::aarch64_neon_sqadd
6516 : Intrinsic::aarch64_neon_sqsub;
6517 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6518 }
6519 case NEON::BI__builtin_neon_vqdmlals_s32:
6520 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6521 SmallVector<Value *, 2> ProductOps;
6522 ProductOps.push_back(Ops[1]);
6523 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6524 Ops[1] =
6525 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6526 ProductOps, "vqdmlXl");
6527
6528 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6529 ? Intrinsic::aarch64_neon_sqadd
6530 : Intrinsic::aarch64_neon_sqsub;
6531 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6532 }
6533 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6534 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6535 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6536 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6537 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6538 "lane");
6539 SmallVector<Value *, 2> ProductOps;
6540 ProductOps.push_back(Ops[1]);
6541 ProductOps.push_back(Ops[2]);
6542 Ops[1] =
6543 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6544 ProductOps, "vqdmlXl");
6545 Ops.pop_back();
6546
6547 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6548 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6549 ? Intrinsic::aarch64_neon_sqadd
6550 : Intrinsic::aarch64_neon_sqsub;
6551 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6552 }
6553 case NEON::BI__builtin_neon_vget_lane_bf16:
6554 case NEON::BI__builtin_neon_vduph_lane_bf16:
6555 case NEON::BI__builtin_neon_vduph_lane_f16: {
6556 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6557 "vget_lane");
6558 }
6559 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6560 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6561 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6562 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6563 "vgetq_lane");
6564 }
6565 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6566 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6567 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6568 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6569 }
6570 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6571 SmallVector<int, 16> ConcatMask(8);
6572 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6573 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6574 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6575 llvm::Value *Trunc =
6576 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6577 return Builder.CreateShuffleVector(
6578 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6579 }
6580 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6581 SmallVector<int, 16> ConcatMask(8);
6582 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6583 SmallVector<int, 16> LoMask(4);
6584 std::iota(LoMask.begin(), LoMask.end(), 0);
6585 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6586 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6587 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6588 llvm::Value *Inactive = Builder.CreateShuffleVector(
6589 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6590 llvm::Value *Trunc =
6591 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6592 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6593 }
6594
6595 case clang::AArch64::BI_InterlockedAdd:
6596 case clang::AArch64::BI_InterlockedAdd_acq:
6597 case clang::AArch64::BI_InterlockedAdd_rel:
6598 case clang::AArch64::BI_InterlockedAdd_nf:
6599 case clang::AArch64::BI_InterlockedAdd64:
6600 case clang::AArch64::BI_InterlockedAdd64_acq:
6601 case clang::AArch64::BI_InterlockedAdd64_rel:
6602 case clang::AArch64::BI_InterlockedAdd64_nf: {
6603 Address DestAddr = CheckAtomicAlignment(*this, E);
6604 Value *Val = EmitScalarExpr(E->getArg(1));
6605 llvm::AtomicOrdering Ordering;
6606 switch (BuiltinID) {
6607 case clang::AArch64::BI_InterlockedAdd:
6608 case clang::AArch64::BI_InterlockedAdd64:
6609 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6610 break;
6611 case clang::AArch64::BI_InterlockedAdd_acq:
6612 case clang::AArch64::BI_InterlockedAdd64_acq:
6613 Ordering = llvm::AtomicOrdering::Acquire;
6614 break;
6615 case clang::AArch64::BI_InterlockedAdd_rel:
6616 case clang::AArch64::BI_InterlockedAdd64_rel:
6617 Ordering = llvm::AtomicOrdering::Release;
6618 break;
6619 case clang::AArch64::BI_InterlockedAdd_nf:
6620 case clang::AArch64::BI_InterlockedAdd64_nf:
6621 Ordering = llvm::AtomicOrdering::Monotonic;
6622 break;
6623 default:
6624 llvm_unreachable("missing builtin ID in switch!");
6625 }
6626 AtomicRMWInst *RMWI =
6627 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6628 return Builder.CreateAdd(RMWI, Val);
6629 }
6630 }
6631
6632 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6633 llvm::Type *Ty = VTy;
6634 if (!Ty)
6635 return nullptr;
6636
6637 // Not all intrinsics handled by the common case work for AArch64 yet, so only
6638 // defer to common code if it's been added to our special map.
6641
6642 if (Builtin)
6644 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6645 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6646 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
6647
6648 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
6649 return V;
6650
6651 unsigned Int;
6652 bool ExtractLow = false;
6653 bool ExtendLaneArg = false;
6654 switch (BuiltinID) {
6655 default: return nullptr;
6656 case NEON::BI__builtin_neon_vbsl_v:
6657 case NEON::BI__builtin_neon_vbslq_v: {
6658 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6659 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6660 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6661 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6662
6663 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6664 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6665 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6666 return Builder.CreateBitCast(Ops[0], Ty);
6667 }
6668 case NEON::BI__builtin_neon_vfma_lane_v:
6669 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6670 // The ARM builtins (and instructions) have the addend as the first
6671 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6672 Value *Addend = Ops[0];
6673 Value *Multiplicand = Ops[1];
6674 Value *LaneSource = Ops[2];
6675 Ops[0] = Multiplicand;
6676 Ops[1] = LaneSource;
6677 Ops[2] = Addend;
6678
6679 // Now adjust things to handle the lane access.
6680 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6681 ? llvm::FixedVectorType::get(VTy->getElementType(),
6682 VTy->getNumElements() / 2)
6683 : VTy;
6684 llvm::Constant *cst = cast<Constant>(Ops[3]);
6685 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6686 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6687 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6688
6689 Ops.pop_back();
6690 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6691 : Intrinsic::fma;
6692 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6693 }
6694 case NEON::BI__builtin_neon_vfma_laneq_v: {
6695 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6696 // v1f64 fma should be mapped to Neon scalar f64 fma
6697 if (VTy && VTy->getElementType() == DoubleTy) {
6698 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6699 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6700 llvm::FixedVectorType *VTy =
6702 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6703 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6704 Value *Result;
6706 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6707 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6708 return Builder.CreateBitCast(Result, Ty);
6709 }
6710 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6711 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6712
6713 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6714 VTy->getNumElements() * 2);
6715 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6716 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6717 cast<ConstantInt>(Ops[3]));
6718 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6719
6721 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6722 {Ops[2], Ops[1], Ops[0]});
6723 }
6724 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6725 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6726 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6727
6728 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6729 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6731 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6732 {Ops[2], Ops[1], Ops[0]});
6733 }
6734 case NEON::BI__builtin_neon_vfmah_lane_f16:
6735 case NEON::BI__builtin_neon_vfmas_lane_f32:
6736 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6737 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6738 case NEON::BI__builtin_neon_vfmad_lane_f64:
6739 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6740 Ops.push_back(EmitScalarExpr(E->getArg(3)));
6741 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6742 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6744 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6745 {Ops[1], Ops[2], Ops[0]});
6746 }
6747 case NEON::BI__builtin_neon_vmull_v:
6748 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6749 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6750 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6751 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6752 case NEON::BI__builtin_neon_vmax_v:
6753 case NEON::BI__builtin_neon_vmaxq_v:
6754 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6755 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6756 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6757 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6758 case NEON::BI__builtin_neon_vmaxh_f16: {
6759 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6760 Int = Intrinsic::aarch64_neon_fmax;
6761 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6762 }
6763 case NEON::BI__builtin_neon_vmin_v:
6764 case NEON::BI__builtin_neon_vminq_v:
6765 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6766 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6767 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6768 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6769 case NEON::BI__builtin_neon_vminh_f16: {
6770 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6771 Int = Intrinsic::aarch64_neon_fmin;
6772 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6773 }
6774 case NEON::BI__builtin_neon_vabd_v:
6775 case NEON::BI__builtin_neon_vabdq_v:
6776 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6777 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6778 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6779 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6780 case NEON::BI__builtin_neon_vpadal_v:
6781 case NEON::BI__builtin_neon_vpadalq_v: {
6782 unsigned ArgElts = VTy->getNumElements();
6783 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6784 unsigned BitWidth = EltTy->getBitWidth();
6785 auto *ArgTy = llvm::FixedVectorType::get(
6786 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6787 llvm::Type* Tys[2] = { VTy, ArgTy };
6788 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6790 TmpOps.push_back(Ops[1]);
6791 Function *F = CGM.getIntrinsic(Int, Tys);
6792 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6793 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6794 return Builder.CreateAdd(tmp, addend);
6795 }
6796 case NEON::BI__builtin_neon_vpmin_v:
6797 case NEON::BI__builtin_neon_vpminq_v:
6798 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6799 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6800 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6801 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6802 case NEON::BI__builtin_neon_vpmax_v:
6803 case NEON::BI__builtin_neon_vpmaxq_v:
6804 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6805 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6806 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6807 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6808 case NEON::BI__builtin_neon_vminnm_v:
6809 case NEON::BI__builtin_neon_vminnmq_v:
6810 Int = Intrinsic::aarch64_neon_fminnm;
6811 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6812 case NEON::BI__builtin_neon_vminnmh_f16:
6813 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6814 Int = Intrinsic::aarch64_neon_fminnm;
6815 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6816 case NEON::BI__builtin_neon_vmaxnm_v:
6817 case NEON::BI__builtin_neon_vmaxnmq_v:
6818 Int = Intrinsic::aarch64_neon_fmaxnm;
6819 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6820 case NEON::BI__builtin_neon_vmaxnmh_f16:
6821 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6822 Int = Intrinsic::aarch64_neon_fmaxnm;
6823 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6824 case NEON::BI__builtin_neon_vrecpss_f32: {
6825 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6826 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6827 Ops, "vrecps");
6828 }
6829 case NEON::BI__builtin_neon_vrecpsd_f64:
6830 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6831 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6832 Ops, "vrecps");
6833 case NEON::BI__builtin_neon_vrecpsh_f16:
6834 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6835 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6836 Ops, "vrecps");
6837 case NEON::BI__builtin_neon_vqshrun_n_v:
6838 Int = Intrinsic::aarch64_neon_sqshrun;
6839 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6840 case NEON::BI__builtin_neon_vqrshrun_n_v:
6841 Int = Intrinsic::aarch64_neon_sqrshrun;
6842 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6843 case NEON::BI__builtin_neon_vqshrn_n_v:
6844 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6845 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6846 case NEON::BI__builtin_neon_vrshrn_n_v:
6847 Int = Intrinsic::aarch64_neon_rshrn;
6848 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6849 case NEON::BI__builtin_neon_vqrshrn_n_v:
6850 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6851 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6852 case NEON::BI__builtin_neon_vrndah_f16: {
6853 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6854 Int = Builder.getIsFPConstrained()
6855 ? Intrinsic::experimental_constrained_round
6856 : Intrinsic::round;
6857 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6858 }
6859 case NEON::BI__builtin_neon_vrnda_v:
6860 case NEON::BI__builtin_neon_vrndaq_v: {
6861 Int = Builder.getIsFPConstrained()
6862 ? Intrinsic::experimental_constrained_round
6863 : Intrinsic::round;
6864 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6865 }
6866 case NEON::BI__builtin_neon_vrndih_f16: {
6867 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6868 Int = Builder.getIsFPConstrained()
6869 ? Intrinsic::experimental_constrained_nearbyint
6870 : Intrinsic::nearbyint;
6871 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6872 }
6873 case NEON::BI__builtin_neon_vrndmh_f16: {
6874 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6875 Int = Builder.getIsFPConstrained()
6876 ? Intrinsic::experimental_constrained_floor
6877 : Intrinsic::floor;
6878 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6879 }
6880 case NEON::BI__builtin_neon_vrndm_v:
6881 case NEON::BI__builtin_neon_vrndmq_v: {
6882 Int = Builder.getIsFPConstrained()
6883 ? Intrinsic::experimental_constrained_floor
6884 : Intrinsic::floor;
6885 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6886 }
6887 case NEON::BI__builtin_neon_vrndnh_f16: {
6888 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6889 Int = Builder.getIsFPConstrained()
6890 ? Intrinsic::experimental_constrained_roundeven
6891 : Intrinsic::roundeven;
6892 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6893 }
6894 case NEON::BI__builtin_neon_vrndn_v:
6895 case NEON::BI__builtin_neon_vrndnq_v: {
6896 Int = Builder.getIsFPConstrained()
6897 ? Intrinsic::experimental_constrained_roundeven
6898 : Intrinsic::roundeven;
6899 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6900 }
6901 case NEON::BI__builtin_neon_vrndns_f32: {
6902 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6903 Int = Builder.getIsFPConstrained()
6904 ? Intrinsic::experimental_constrained_roundeven
6905 : Intrinsic::roundeven;
6906 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6907 }
6908 case NEON::BI__builtin_neon_vrndph_f16: {
6909 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6910 Int = Builder.getIsFPConstrained()
6911 ? Intrinsic::experimental_constrained_ceil
6912 : Intrinsic::ceil;
6913 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6914 }
6915 case NEON::BI__builtin_neon_vrndp_v:
6916 case NEON::BI__builtin_neon_vrndpq_v: {
6917 Int = Builder.getIsFPConstrained()
6918 ? Intrinsic::experimental_constrained_ceil
6919 : Intrinsic::ceil;
6920 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6921 }
6922 case NEON::BI__builtin_neon_vrndxh_f16: {
6923 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6924 Int = Builder.getIsFPConstrained()
6925 ? Intrinsic::experimental_constrained_rint
6926 : Intrinsic::rint;
6927 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6928 }
6929 case NEON::BI__builtin_neon_vrndx_v:
6930 case NEON::BI__builtin_neon_vrndxq_v: {
6931 Int = Builder.getIsFPConstrained()
6932 ? Intrinsic::experimental_constrained_rint
6933 : Intrinsic::rint;
6934 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6935 }
6936 case NEON::BI__builtin_neon_vrndh_f16: {
6937 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6938 Int = Builder.getIsFPConstrained()
6939 ? Intrinsic::experimental_constrained_trunc
6940 : Intrinsic::trunc;
6941 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6942 }
6943 case NEON::BI__builtin_neon_vrnd32x_f32:
6944 case NEON::BI__builtin_neon_vrnd32xq_f32:
6945 case NEON::BI__builtin_neon_vrnd32x_f64:
6946 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6947 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6948 Int = Intrinsic::aarch64_neon_frint32x;
6949 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6950 }
6951 case NEON::BI__builtin_neon_vrnd32z_f32:
6952 case NEON::BI__builtin_neon_vrnd32zq_f32:
6953 case NEON::BI__builtin_neon_vrnd32z_f64:
6954 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6955 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6956 Int = Intrinsic::aarch64_neon_frint32z;
6957 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6958 }
6959 case NEON::BI__builtin_neon_vrnd64x_f32:
6960 case NEON::BI__builtin_neon_vrnd64xq_f32:
6961 case NEON::BI__builtin_neon_vrnd64x_f64:
6962 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6963 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6964 Int = Intrinsic::aarch64_neon_frint64x;
6965 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6966 }
6967 case NEON::BI__builtin_neon_vrnd64z_f32:
6968 case NEON::BI__builtin_neon_vrnd64zq_f32:
6969 case NEON::BI__builtin_neon_vrnd64z_f64:
6970 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6971 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6972 Int = Intrinsic::aarch64_neon_frint64z;
6973 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6974 }
6975 case NEON::BI__builtin_neon_vrnd_v:
6976 case NEON::BI__builtin_neon_vrndq_v: {
6977 Int = Builder.getIsFPConstrained()
6978 ? Intrinsic::experimental_constrained_trunc
6979 : Intrinsic::trunc;
6980 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6981 }
6982 case NEON::BI__builtin_neon_vcvt_f64_v:
6983 case NEON::BI__builtin_neon_vcvtq_f64_v:
6984 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6985 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6986 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6987 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6988 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6989 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6990 "unexpected vcvt_f64_f32 builtin");
6991 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6992 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6993
6994 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6995 }
6996 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6997 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6998 "unexpected vcvt_f32_f64 builtin");
6999 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
7000 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
7001
7002 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
7003 }
7004 case NEON::BI__builtin_neon_vcvt_s32_v:
7005 case NEON::BI__builtin_neon_vcvt_u32_v:
7006 case NEON::BI__builtin_neon_vcvt_s64_v:
7007 case NEON::BI__builtin_neon_vcvt_u64_v:
7008 case NEON::BI__builtin_neon_vcvt_s16_f16:
7009 case NEON::BI__builtin_neon_vcvt_u16_f16:
7010 case NEON::BI__builtin_neon_vcvtq_s32_v:
7011 case NEON::BI__builtin_neon_vcvtq_u32_v:
7012 case NEON::BI__builtin_neon_vcvtq_s64_v:
7013 case NEON::BI__builtin_neon_vcvtq_u64_v:
7014 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7015 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7016 Int =
7017 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
7018 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
7019 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
7020 }
7021 case NEON::BI__builtin_neon_vcvta_s16_f16:
7022 case NEON::BI__builtin_neon_vcvta_u16_f16:
7023 case NEON::BI__builtin_neon_vcvta_s32_v:
7024 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7025 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7026 case NEON::BI__builtin_neon_vcvta_u32_v:
7027 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7028 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7029 case NEON::BI__builtin_neon_vcvta_s64_v:
7030 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7031 case NEON::BI__builtin_neon_vcvta_u64_v:
7032 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
7033 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
7034 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7035 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
7036 }
7037 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7038 case NEON::BI__builtin_neon_vcvtm_s32_v:
7039 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7040 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7041 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7042 case NEON::BI__builtin_neon_vcvtm_u32_v:
7043 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7044 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7045 case NEON::BI__builtin_neon_vcvtm_s64_v:
7046 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7047 case NEON::BI__builtin_neon_vcvtm_u64_v:
7048 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7049 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
7050 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7051 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
7052 }
7053 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7054 case NEON::BI__builtin_neon_vcvtn_s32_v:
7055 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7056 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7057 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7058 case NEON::BI__builtin_neon_vcvtn_u32_v:
7059 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7060 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7061 case NEON::BI__builtin_neon_vcvtn_s64_v:
7062 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7063 case NEON::BI__builtin_neon_vcvtn_u64_v:
7064 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
7065 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
7066 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7067 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
7068 }
7069 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7070 case NEON::BI__builtin_neon_vcvtp_s32_v:
7071 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7072 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7073 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7074 case NEON::BI__builtin_neon_vcvtp_u32_v:
7075 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7076 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7077 case NEON::BI__builtin_neon_vcvtp_s64_v:
7078 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7079 case NEON::BI__builtin_neon_vcvtp_u64_v:
7080 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7081 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7082 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7083 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
7084 }
7085 case NEON::BI__builtin_neon_vmulx_v:
7086 case NEON::BI__builtin_neon_vmulxq_v: {
7087 Int = Intrinsic::aarch64_neon_fmulx;
7088 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
7089 }
7090 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7091 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7092 // vmulx_lane should be mapped to Neon scalar mulx after
7093 // extracting the scalar element
7094 Ops.push_back(EmitScalarExpr(E->getArg(2)));
7095 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7096 Ops.pop_back();
7097 Int = Intrinsic::aarch64_neon_fmulx;
7098 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
7099 }
7100 case NEON::BI__builtin_neon_vmul_lane_v:
7101 case NEON::BI__builtin_neon_vmul_laneq_v: {
7102 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7103 bool Quad = false;
7104 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7105 Quad = true;
7106 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7107 llvm::FixedVectorType *VTy =
7109 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7110 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7111 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
7112 return Builder.CreateBitCast(Result, Ty);
7113 }
7114 case NEON::BI__builtin_neon_vnegd_s64:
7115 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
7116 case NEON::BI__builtin_neon_vnegh_f16:
7117 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
7118 case NEON::BI__builtin_neon_vpmaxnm_v:
7119 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7120 Int = Intrinsic::aarch64_neon_fmaxnmp;
7121 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
7122 }
7123 case NEON::BI__builtin_neon_vpminnm_v:
7124 case NEON::BI__builtin_neon_vpminnmq_v: {
7125 Int = Intrinsic::aarch64_neon_fminnmp;
7126 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
7127 }
7128 case NEON::BI__builtin_neon_vsqrth_f16: {
7129 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7130 Int = Builder.getIsFPConstrained()
7131 ? Intrinsic::experimental_constrained_sqrt
7132 : Intrinsic::sqrt;
7133 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
7134 }
7135 case NEON::BI__builtin_neon_vsqrt_v:
7136 case NEON::BI__builtin_neon_vsqrtq_v: {
7137 Int = Builder.getIsFPConstrained()
7138 ? Intrinsic::experimental_constrained_sqrt
7139 : Intrinsic::sqrt;
7140 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7141 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
7142 }
7143 case NEON::BI__builtin_neon_vrbit_v:
7144 case NEON::BI__builtin_neon_vrbitq_v: {
7145 Int = Intrinsic::bitreverse;
7146 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
7147 }
7148 case NEON::BI__builtin_neon_vmaxv_f16: {
7149 Int = Intrinsic::aarch64_neon_fmaxv;
7150 Ty = HalfTy;
7151 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7152 llvm::Type *Tys[2] = { Ty, VTy };
7153 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7154 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7155 return Builder.CreateTrunc(Ops[0], HalfTy);
7156 }
7157 case NEON::BI__builtin_neon_vmaxvq_f16: {
7158 Int = Intrinsic::aarch64_neon_fmaxv;
7159 Ty = HalfTy;
7160 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7161 llvm::Type *Tys[2] = { Ty, VTy };
7162 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7163 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7164 return Builder.CreateTrunc(Ops[0], HalfTy);
7165 }
7166 case NEON::BI__builtin_neon_vminv_f16: {
7167 Int = Intrinsic::aarch64_neon_fminv;
7168 Ty = HalfTy;
7169 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7170 llvm::Type *Tys[2] = { Ty, VTy };
7171 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7172 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7173 return Builder.CreateTrunc(Ops[0], HalfTy);
7174 }
7175 case NEON::BI__builtin_neon_vminvq_f16: {
7176 Int = Intrinsic::aarch64_neon_fminv;
7177 Ty = HalfTy;
7178 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7179 llvm::Type *Tys[2] = { Ty, VTy };
7180 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7181 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7182 return Builder.CreateTrunc(Ops[0], HalfTy);
7183 }
7184 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7185 Int = Intrinsic::aarch64_neon_fmaxnmv;
7186 Ty = HalfTy;
7187 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7188 llvm::Type *Tys[2] = { Ty, VTy };
7189 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7190 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7191 return Builder.CreateTrunc(Ops[0], HalfTy);
7192 }
7193 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7194 Int = Intrinsic::aarch64_neon_fmaxnmv;
7195 Ty = HalfTy;
7196 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7197 llvm::Type *Tys[2] = { Ty, VTy };
7198 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7199 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7200 return Builder.CreateTrunc(Ops[0], HalfTy);
7201 }
7202 case NEON::BI__builtin_neon_vminnmv_f16: {
7203 Int = Intrinsic::aarch64_neon_fminnmv;
7204 Ty = HalfTy;
7205 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7206 llvm::Type *Tys[2] = { Ty, VTy };
7207 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7208 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7209 return Builder.CreateTrunc(Ops[0], HalfTy);
7210 }
7211 case NEON::BI__builtin_neon_vminnmvq_f16: {
7212 Int = Intrinsic::aarch64_neon_fminnmv;
7213 Ty = HalfTy;
7214 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7215 llvm::Type *Tys[2] = { Ty, VTy };
7216 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7217 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7218 return Builder.CreateTrunc(Ops[0], HalfTy);
7219 }
7220 case NEON::BI__builtin_neon_vmul_n_f64: {
7221 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7222 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
7223 return Builder.CreateFMul(Ops[0], RHS);
7224 }
7225 case NEON::BI__builtin_neon_vaddlv_u8: {
7226 Int = Intrinsic::aarch64_neon_uaddlv;
7227 Ty = Int32Ty;
7228 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7229 llvm::Type *Tys[2] = { Ty, VTy };
7230 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7231 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7232 return Builder.CreateTrunc(Ops[0], Int16Ty);
7233 }
7234 case NEON::BI__builtin_neon_vaddlv_u16: {
7235 Int = Intrinsic::aarch64_neon_uaddlv;
7236 Ty = Int32Ty;
7237 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7238 llvm::Type *Tys[2] = { Ty, VTy };
7239 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7240 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7241 }
7242 case NEON::BI__builtin_neon_vaddlvq_u8: {
7243 Int = Intrinsic::aarch64_neon_uaddlv;
7244 Ty = Int32Ty;
7245 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7246 llvm::Type *Tys[2] = { Ty, VTy };
7247 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7248 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7249 return Builder.CreateTrunc(Ops[0], Int16Ty);
7250 }
7251 case NEON::BI__builtin_neon_vaddlvq_u16: {
7252 Int = Intrinsic::aarch64_neon_uaddlv;
7253 Ty = Int32Ty;
7254 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7255 llvm::Type *Tys[2] = { Ty, VTy };
7256 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7257 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7258 }
7259 case NEON::BI__builtin_neon_vaddlv_s8: {
7260 Int = Intrinsic::aarch64_neon_saddlv;
7261 Ty = Int32Ty;
7262 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7263 llvm::Type *Tys[2] = { Ty, VTy };
7264 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7265 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7266 return Builder.CreateTrunc(Ops[0], Int16Ty);
7267 }
7268 case NEON::BI__builtin_neon_vaddlv_s16: {
7269 Int = Intrinsic::aarch64_neon_saddlv;
7270 Ty = Int32Ty;
7271 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7272 llvm::Type *Tys[2] = { Ty, VTy };
7273 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7274 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7275 }
7276 case NEON::BI__builtin_neon_vaddlvq_s8: {
7277 Int = Intrinsic::aarch64_neon_saddlv;
7278 Ty = Int32Ty;
7279 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7280 llvm::Type *Tys[2] = { Ty, VTy };
7281 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7282 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7283 return Builder.CreateTrunc(Ops[0], Int16Ty);
7284 }
7285 case NEON::BI__builtin_neon_vaddlvq_s16: {
7286 Int = Intrinsic::aarch64_neon_saddlv;
7287 Ty = Int32Ty;
7288 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7289 llvm::Type *Tys[2] = { Ty, VTy };
7290 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7291 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7292 }
7293 case NEON::BI__builtin_neon_vsri_n_v:
7294 case NEON::BI__builtin_neon_vsriq_n_v: {
7295 Int = Intrinsic::aarch64_neon_vsri;
7296 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7297 return EmitNeonCall(Intrin, Ops, "vsri_n");
7298 }
7299 case NEON::BI__builtin_neon_vsli_n_v:
7300 case NEON::BI__builtin_neon_vsliq_n_v: {
7301 Int = Intrinsic::aarch64_neon_vsli;
7302 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7303 return EmitNeonCall(Intrin, Ops, "vsli_n");
7304 }
7305 case NEON::BI__builtin_neon_vsra_n_v:
7306 case NEON::BI__builtin_neon_vsraq_n_v:
7307 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7308 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
7309 return Builder.CreateAdd(Ops[0], Ops[1]);
7310 case NEON::BI__builtin_neon_vrsra_n_v:
7311 case NEON::BI__builtin_neon_vrsraq_n_v: {
7312 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7314 TmpOps.push_back(Ops[1]);
7315 TmpOps.push_back(Ops[2]);
7316 Function* F = CGM.getIntrinsic(Int, Ty);
7317 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
7318 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7319 return Builder.CreateAdd(Ops[0], tmp);
7320 }
7321 case NEON::BI__builtin_neon_vld1_v:
7322 case NEON::BI__builtin_neon_vld1q_v: {
7323 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
7324 }
7325 case NEON::BI__builtin_neon_vst1_v:
7326 case NEON::BI__builtin_neon_vst1q_v:
7327 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7328 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7329 case NEON::BI__builtin_neon_vld1_lane_v:
7330 case NEON::BI__builtin_neon_vld1q_lane_v: {
7331 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7332 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7333 PtrOp0.getAlignment());
7334 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7335 }
7336 case NEON::BI__builtin_neon_vldap1_lane_s64:
7337 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7338 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7339 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7340 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
7341 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7342 Ops[0] = LI;
7343 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
7344 }
7345 case NEON::BI__builtin_neon_vld1_dup_v:
7346 case NEON::BI__builtin_neon_vld1q_dup_v: {
7347 Value *V = PoisonValue::get(Ty);
7348 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7349 PtrOp0.getAlignment());
7350 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7351 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7352 return EmitNeonSplat(Ops[0], CI);
7353 }
7354 case NEON::BI__builtin_neon_vst1_lane_v:
7355 case NEON::BI__builtin_neon_vst1q_lane_v:
7356 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7357 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7358 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7359 case NEON::BI__builtin_neon_vstl1_lane_s64:
7360 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7361 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7362 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7363 llvm::StoreInst *SI =
7364 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7365 SI->setAtomic(llvm::AtomicOrdering::Release);
7366 return SI;
7367 }
7368 case NEON::BI__builtin_neon_vld2_v:
7369 case NEON::BI__builtin_neon_vld2q_v: {
7370 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7371 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7372 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7373 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7374 }
7375 case NEON::BI__builtin_neon_vld3_v:
7376 case NEON::BI__builtin_neon_vld3q_v: {
7377 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7378 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7379 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7380 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7381 }
7382 case NEON::BI__builtin_neon_vld4_v:
7383 case NEON::BI__builtin_neon_vld4q_v: {
7384 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7385 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7386 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7387 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7388 }
7389 case NEON::BI__builtin_neon_vld2_dup_v:
7390 case NEON::BI__builtin_neon_vld2q_dup_v: {
7391 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7392 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7393 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7394 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7395 }
7396 case NEON::BI__builtin_neon_vld3_dup_v:
7397 case NEON::BI__builtin_neon_vld3q_dup_v: {
7398 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7399 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7400 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7401 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7402 }
7403 case NEON::BI__builtin_neon_vld4_dup_v:
7404 case NEON::BI__builtin_neon_vld4q_dup_v: {
7405 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7406 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7407 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7408 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7409 }
7410 case NEON::BI__builtin_neon_vld2_lane_v:
7411 case NEON::BI__builtin_neon_vld2q_lane_v: {
7412 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7413 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7414 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7415 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7416 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7417 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7418 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
7419 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7420 }
7421 case NEON::BI__builtin_neon_vld3_lane_v:
7422 case NEON::BI__builtin_neon_vld3q_lane_v: {
7423 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7424 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7425 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7426 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7427 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7428 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7429 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7430 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
7431 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7432 }
7433 case NEON::BI__builtin_neon_vld4_lane_v:
7434 case NEON::BI__builtin_neon_vld4q_lane_v: {
7435 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7436 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7437 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7438 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7439 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7440 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7441 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7442 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7443 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
7444 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7445 }
7446 case NEON::BI__builtin_neon_vst2_v:
7447 case NEON::BI__builtin_neon_vst2q_v: {
7448 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7449 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7450 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7451 Ops, "");
7452 }
7453 case NEON::BI__builtin_neon_vst2_lane_v:
7454 case NEON::BI__builtin_neon_vst2q_lane_v: {
7455 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7456 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7457 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7458 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7459 Ops, "");
7460 }
7461 case NEON::BI__builtin_neon_vst3_v:
7462 case NEON::BI__builtin_neon_vst3q_v: {
7463 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7464 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7465 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7466 Ops, "");
7467 }
7468 case NEON::BI__builtin_neon_vst3_lane_v:
7469 case NEON::BI__builtin_neon_vst3q_lane_v: {
7470 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7471 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7472 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7473 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7474 Ops, "");
7475 }
7476 case NEON::BI__builtin_neon_vst4_v:
7477 case NEON::BI__builtin_neon_vst4q_v: {
7478 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7479 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7480 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7481 Ops, "");
7482 }
7483 case NEON::BI__builtin_neon_vst4_lane_v:
7484 case NEON::BI__builtin_neon_vst4q_lane_v: {
7485 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7486 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7487 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7488 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7489 Ops, "");
7490 }
7491 case NEON::BI__builtin_neon_vtrn_v:
7492 case NEON::BI__builtin_neon_vtrnq_v: {
7493 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7494 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7495 Value *SV = nullptr;
7496
7497 for (unsigned vi = 0; vi != 2; ++vi) {
7498 SmallVector<int, 16> Indices;
7499 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7500 Indices.push_back(i+vi);
7501 Indices.push_back(i+e+vi);
7502 }
7503 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7504 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7505 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7506 }
7507 return SV;
7508 }
7509 case NEON::BI__builtin_neon_vuzp_v:
7510 case NEON::BI__builtin_neon_vuzpq_v: {
7511 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7512 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7513 Value *SV = nullptr;
7514
7515 for (unsigned vi = 0; vi != 2; ++vi) {
7516 SmallVector<int, 16> Indices;
7517 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7518 Indices.push_back(2*i+vi);
7519
7520 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7521 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7522 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7523 }
7524 return SV;
7525 }
7526 case NEON::BI__builtin_neon_vzip_v:
7527 case NEON::BI__builtin_neon_vzipq_v: {
7528 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7529 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7530 Value *SV = nullptr;
7531
7532 for (unsigned vi = 0; vi != 2; ++vi) {
7533 SmallVector<int, 16> Indices;
7534 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7535 Indices.push_back((i + vi*e) >> 1);
7536 Indices.push_back(((i + vi*e) >> 1)+e);
7537 }
7538 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7539 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7540 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7541 }
7542 return SV;
7543 }
7544 case NEON::BI__builtin_neon_vqtbl1q_v: {
7545 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7546 Ops, "vtbl1");
7547 }
7548 case NEON::BI__builtin_neon_vqtbl2q_v: {
7549 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7550 Ops, "vtbl2");
7551 }
7552 case NEON::BI__builtin_neon_vqtbl3q_v: {
7553 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7554 Ops, "vtbl3");
7555 }
7556 case NEON::BI__builtin_neon_vqtbl4q_v: {
7557 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7558 Ops, "vtbl4");
7559 }
7560 case NEON::BI__builtin_neon_vqtbx1q_v: {
7561 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7562 Ops, "vtbx1");
7563 }
7564 case NEON::BI__builtin_neon_vqtbx2q_v: {
7565 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7566 Ops, "vtbx2");
7567 }
7568 case NEON::BI__builtin_neon_vqtbx3q_v: {
7569 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7570 Ops, "vtbx3");
7571 }
7572 case NEON::BI__builtin_neon_vqtbx4q_v: {
7573 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7574 Ops, "vtbx4");
7575 }
7576 case NEON::BI__builtin_neon_vsqadd_v:
7577 case NEON::BI__builtin_neon_vsqaddq_v: {
7578 Int = Intrinsic::aarch64_neon_usqadd;
7579 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7580 }
7581 case NEON::BI__builtin_neon_vuqadd_v:
7582 case NEON::BI__builtin_neon_vuqaddq_v: {
7583 Int = Intrinsic::aarch64_neon_suqadd;
7584 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7585 }
7586
7587 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7588 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7589 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7590 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7591 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7592 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7593 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7594 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7595 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7596 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7597 llvm::Type *Tys[2];
7598 Tys[0] = Ty;
7599 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7600 /*isQuad*/ false));
7601 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7602 }
7603 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7604 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7605 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7606 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7607 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7608 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7609 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7610 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7611 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7612 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7613 llvm::Type *Tys[2];
7614 Tys[0] = Ty;
7615 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7616 /*isQuad*/ true));
7617 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7618 }
7619 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7620 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7621 case NEON::BI__builtin_neon_vluti2_lane_f16:
7622 case NEON::BI__builtin_neon_vluti2_lane_p16:
7623 case NEON::BI__builtin_neon_vluti2_lane_p8:
7624 case NEON::BI__builtin_neon_vluti2_lane_s16:
7625 case NEON::BI__builtin_neon_vluti2_lane_s8:
7626 case NEON::BI__builtin_neon_vluti2_lane_u16:
7627 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7628 Int = Intrinsic::aarch64_neon_vluti2_lane;
7629 llvm::Type *Tys[2];
7630 Tys[0] = Ty;
7631 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7632 /*isQuad*/ false));
7633 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7634 }
7635 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7636 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7637 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7638 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7639 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7640 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7641 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7642 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7643 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7644 Int = Intrinsic::aarch64_neon_vluti2_lane;
7645 llvm::Type *Tys[2];
7646 Tys[0] = Ty;
7647 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7648 /*isQuad*/ true));
7649 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7650 }
7651 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7652 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7653 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7654 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7655 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7656 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7657 }
7658 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7659 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7660 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7661 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7662 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7663 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7664 }
7665 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7666 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7667 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7668 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7669 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7670 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7671 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7672 }
7673 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7674 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7675 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7676 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7677 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7678 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7679 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7680 }
7681 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7682 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7683 {llvm::FixedVectorType::get(HalfTy, 8),
7684 llvm::FixedVectorType::get(Int8Ty, 16)},
7685 Ops, E, "fmmla");
7686 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7687 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7688 {llvm::FixedVectorType::get(FloatTy, 4),
7689 llvm::FixedVectorType::get(Int8Ty, 16)},
7690 Ops, E, "fmmla");
7691 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7692 ExtractLow = true;
7693 [[fallthrough]];
7694 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7695 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7696 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7697 llvm::FixedVectorType::get(BFloatTy, 8),
7698 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7699 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7700 ExtractLow = true;
7701 [[fallthrough]];
7702 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7703 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7704 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7705 llvm::FixedVectorType::get(BFloatTy, 8),
7706 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7707 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7708 ExtractLow = true;
7709 [[fallthrough]];
7710 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7711 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7712 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7713 llvm::FixedVectorType::get(HalfTy, 8),
7714 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7715 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7716 ExtractLow = true;
7717 [[fallthrough]];
7718 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7719 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7720 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7721 llvm::FixedVectorType::get(HalfTy, 8),
7722 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7723 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7724 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7725 llvm::FixedVectorType::get(Int8Ty, 8),
7726 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7727 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7728 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7729 llvm::FixedVectorType::get(Int8Ty, 8),
7730 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7731 E, "vfcvtn");
7732 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7733 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7734 llvm::FixedVectorType::get(Int8Ty, 16),
7735 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7736 E, "vfcvtn");
7737 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7738 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7739 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7740 uint64_t(0));
7741 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7742 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7743 }
7744
7745 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7746 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7747 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7748 Ops, E, "fdot2");
7749 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7750 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7751 ExtendLaneArg = true;
7752 [[fallthrough]];
7753 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7754 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7755 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7756 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7757 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7758 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7759 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7760 FloatTy, Ops, E, "fdot4");
7761 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7762 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7763 ExtendLaneArg = true;
7764 [[fallthrough]];
7765 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7766 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7767 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7768 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7769
7770 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7771 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7772 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7773 "vmlal");
7774 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7775 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7776 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7777 "vmlal");
7778 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7779 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7780 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7781 "vmlall");
7782 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7783 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7784 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7785 "vmlall");
7786 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7787 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7788 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7789 "vmlall");
7790 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7791 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7792 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7793 "vmlall");
7794 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7795 ExtendLaneArg = true;
7796 [[fallthrough]];
7797 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7798 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7799 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7800 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7801 ExtendLaneArg = true;
7802 [[fallthrough]];
7803 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7804 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7805 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7806 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7807 ExtendLaneArg = true;
7808 [[fallthrough]];
7809 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7810 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7811 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7812 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7813 ExtendLaneArg = true;
7814 [[fallthrough]];
7815 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7816 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7817 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7818 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7819 ExtendLaneArg = true;
7820 [[fallthrough]];
7821 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7822 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7823 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7824 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7825 ExtendLaneArg = true;
7826 [[fallthrough]];
7827 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7828 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7829 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7830 case NEON::BI__builtin_neon_vamin_f16:
7831 case NEON::BI__builtin_neon_vaminq_f16:
7832 case NEON::BI__builtin_neon_vamin_f32:
7833 case NEON::BI__builtin_neon_vaminq_f32:
7834 case NEON::BI__builtin_neon_vaminq_f64: {
7835 Int = Intrinsic::aarch64_neon_famin;
7836 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7837 }
7838 case NEON::BI__builtin_neon_vamax_f16:
7839 case NEON::BI__builtin_neon_vamaxq_f16:
7840 case NEON::BI__builtin_neon_vamax_f32:
7841 case NEON::BI__builtin_neon_vamaxq_f32:
7842 case NEON::BI__builtin_neon_vamaxq_f64: {
7843 Int = Intrinsic::aarch64_neon_famax;
7844 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7845 }
7846 case NEON::BI__builtin_neon_vscale_f16:
7847 case NEON::BI__builtin_neon_vscaleq_f16:
7848 case NEON::BI__builtin_neon_vscale_f32:
7849 case NEON::BI__builtin_neon_vscaleq_f32:
7850 case NEON::BI__builtin_neon_vscaleq_f64: {
7851 Int = Intrinsic::aarch64_neon_fp8_fscale;
7852 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7853 }
7854 }
7855}
7856
7858 const CallExpr *E) {
7859 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7860 BuiltinID == BPF::BI__builtin_btf_type_id ||
7861 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7862 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7863 "unexpected BPF builtin");
7864
7865 // A sequence number, injected into IR builtin functions, to
7866 // prevent CSE given the only difference of the function
7867 // may just be the debuginfo metadata.
7868 static uint32_t BuiltinSeqNum;
7869
7870 switch (BuiltinID) {
7871 default:
7872 llvm_unreachable("Unexpected BPF builtin");
7873 case BPF::BI__builtin_preserve_field_info: {
7874 const Expr *Arg = E->getArg(0);
7875 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7876
7877 if (!getDebugInfo()) {
7878 CGM.Error(E->getExprLoc(),
7879 "using __builtin_preserve_field_info() without -g");
7880 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7881 : EmitLValue(Arg).emitRawPointer(*this);
7882 }
7883
7884 // Enable underlying preserve_*_access_index() generation.
7885 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7886 IsInPreservedAIRegion = true;
7887 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7888 : EmitLValue(Arg).emitRawPointer(*this);
7889 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7890
7891 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7892 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7893
7894 // Built the IR for the preserve_field_info intrinsic.
7895 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7896 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7897 {FieldAddr->getType()});
7898 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7899 }
7900 case BPF::BI__builtin_btf_type_id:
7901 case BPF::BI__builtin_preserve_type_info: {
7902 if (!getDebugInfo()) {
7903 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7904 return nullptr;
7905 }
7906
7907 const Expr *Arg0 = E->getArg(0);
7908 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7909 Arg0->getType(), Arg0->getExprLoc());
7910
7911 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7912 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7913 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7914
7915 llvm::Function *FnDecl;
7916 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7917 FnDecl = Intrinsic::getOrInsertDeclaration(
7918 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7919 else
7920 FnDecl = Intrinsic::getOrInsertDeclaration(
7921 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7922 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
7923 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7924 return Fn;
7925 }
7926 case BPF::BI__builtin_preserve_enum_value: {
7927 if (!getDebugInfo()) {
7928 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7929 return nullptr;
7930 }
7931
7932 const Expr *Arg0 = E->getArg(0);
7933 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7934 Arg0->getType(), Arg0->getExprLoc());
7935
7936 // Find enumerator
7937 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
7938 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
7939 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
7940 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
7941
7942 auto InitVal = Enumerator->getInitVal();
7943 std::string InitValStr;
7944 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7945 InitValStr = std::to_string(InitVal.getSExtValue());
7946 else
7947 InitValStr = std::to_string(InitVal.getZExtValue());
7948 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7949 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
7950
7951 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7952 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7953 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7954
7955 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7956 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
7957 CallInst *Fn =
7958 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
7959 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7960 return Fn;
7961 }
7962 }
7963}
7964
7967 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7968 "Not a power-of-two sized vector!");
7969 bool AllConstants = true;
7970 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7971 AllConstants &= isa<Constant>(Ops[i]);
7972
7973 // If this is a constant vector, create a ConstantVector.
7974 if (AllConstants) {
7976 for (llvm::Value *Op : Ops)
7977 CstOps.push_back(cast<Constant>(Op));
7978 return llvm::ConstantVector::get(CstOps);
7979 }
7980
7981 // Otherwise, insertelement the values to build the vector.
7982 Value *Result = llvm::PoisonValue::get(
7983 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
7984
7985 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7986 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
7987
7988 return Result;
7989}
7990
7991Value *CodeGenFunction::EmitAArch64CpuInit() {
7992 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
7993 llvm::FunctionCallee Func =
7994 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
7995 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
7996 cast<llvm::GlobalValue>(Func.getCallee())
7997 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
7998 return Builder.CreateCall(Func);
7999}
8000
8001Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
8002 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
8003 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
8005 ArgStr.split(Features, "+");
8006 for (auto &Feature : Features) {
8007 Feature = Feature.trim();
8008 if (!llvm::AArch64::parseFMVExtension(Feature))
8009 return Builder.getFalse();
8010 if (Feature != "default")
8011 Features.push_back(Feature);
8012 }
8013 return EmitAArch64CpuSupports(Features);
8014}
8015
8016llvm::Value *
8017CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8018 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
8019 Value *Result = Builder.getTrue();
8020 if (FeaturesMask != 0) {
8021 // Get features from structure in runtime library
8022 // struct {
8023 // unsigned long long features;
8024 // } __aarch64_cpu_features;
8025 llvm::Type *STy = llvm::StructType::get(Int64Ty);
8026 llvm::Constant *AArch64CPUFeatures =
8027 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
8028 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
8029 llvm::Value *CpuFeatures = Builder.CreateGEP(
8030 STy, AArch64CPUFeatures,
8031 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
8032 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
8034 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
8035 Value *Bitset = Builder.CreateAnd(Features, Mask);
8036 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
8037 Result = Builder.CreateAnd(Result, Cmp);
8038 }
8039 return Result;
8040}
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:2597
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:3437
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition ARM.cpp:588
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:3407
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:3400
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4481
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:1649
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3659
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4948
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:1661
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:1631
static llvm::Value * ARMMVECreateFPToSI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3531
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2665
static llvm::Value * ARMMVECreateFPToUI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3539
static llvm::Value * ARMMVECreateSIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3515
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition ARM.cpp:584
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:1662
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition ARM.cpp:1666
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4487
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:3396
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:1663
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:3474
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:3946
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:1473
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:359
#define NEONMAP0(NameBase)
Definition ARM.cpp:581
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:5041
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:3501
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:31
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:190
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:594
static llvm::Value * ARMMVECreateUIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3523
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:3463
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition ARM.cpp:3948
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:3489
SpecialRegisterAccessKind
Definition ARM.cpp:2588
@ VolatileRead
Definition ARM.cpp:2590
@ NormalRead
Definition ARM.cpp:2589
@ Write
Definition ARM.cpp:2591
@ UnsignedAlts
Definition ARM.cpp:551
@ Vectorize1ArgType
Definition ARM.cpp:556
@ FpCmpzModifiers
Definition ARM.cpp:560
@ Use64BitVectors
Definition ARM.cpp:553
@ VectorizeArgTypes
Definition ARM.cpp:548
@ VectorRetGetArgs01
Definition ARM.cpp:558
@ InventFloatType
Definition ARM.cpp:550
@ AddRetType
Definition ARM.cpp:543
@ Add2ArgTypes
Definition ARM.cpp:545
@ VectorizeRetType
Definition ARM.cpp:547
@ VectorRet
Definition ARM.cpp:557
@ Add1ArgType
Definition ARM.cpp:544
@ Use128BitVectors
Definition ARM.cpp:554
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:3429
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:1658
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition ARM.cpp:913
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1724
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:342
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:2515
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:1660
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition ARM.cpp:1188
TokenType getType() const
Returns the token's type, e.g.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:58
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2877
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3081
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3060
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3068
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1602
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:505
Address getAddress() const
Definition CGValue.h:645
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:3955
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:474
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:7966
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4150
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:4258
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:448
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4117
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4405
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3814
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4060
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4315
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4972
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:4542
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4006
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1685
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3830
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1786
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4585
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:495
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2710
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3903
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7857
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4427
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4530
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4213
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3547
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:511
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4494
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:489
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:5052
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:458
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3802
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3648
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:2488
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:295
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:4231
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:3990
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4238
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4417
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1574
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4165
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:188
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4370
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3940
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:4458
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1690
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3868
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4519
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:420
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:379
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3094
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3085
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:451
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:276
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2000
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3329
QualType getPointeeType() const
Definition TypeBase.h:3339
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
EltType getEltType() const
bool isOverloadFirstandLast() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:711
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1833
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9179
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
QualType getType() const
Definition Decl.h:723
QualType getType() const
Definition Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:154
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
const FunctionProtoType * T
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:826
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601