16#include "TargetInfo.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
27using namespace CodeGen;
30static std::optional<CodeGenFunction::MSVCIntrin>
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
186 llvm_unreachable(
"must return from switch");
189static std::optional<CodeGenFunction::MSVCIntrin>
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
337 llvm_unreachable(
"must return from switch");
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
348 if (CGF.
Builder.getIsFPConstrained())
353 if (CGF.
Builder.getIsFPConstrained())
354 return CGF.
Builder.CreateConstrainedFPCall(F, Args);
356 return CGF.
Builder.CreateCall(F, Args);
361 bool HasFastHalfType =
true,
363 bool AllowBFloatArgsAndRet =
true) {
364 int IsQuad = TypeFlags.
isQuad();
369 return llvm::FixedVectorType::get(CGF->
Int8Ty, V1Ty ? 1 : (8 << IsQuad));
372 return llvm::FixedVectorType::get(CGF->
Int16Ty, V1Ty ? 1 : (4 << IsQuad));
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(CGF->
BFloatTy, V1Ty ? 1 : (4 << IsQuad));
377 return llvm::FixedVectorType::get(CGF->
Int16Ty, V1Ty ? 1 : (4 << IsQuad));
380 return llvm::FixedVectorType::get(CGF->
HalfTy, V1Ty ? 1 : (4 << IsQuad));
382 return llvm::FixedVectorType::get(CGF->
Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->
Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->
Int64Ty, V1Ty ? 1 : (1 << IsQuad));
392 return llvm::FixedVectorType::get(CGF->
Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->
FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->
DoubleTy, V1Ty ? 1 : (1 << IsQuad));
398 llvm_unreachable(
"Unknown vector element type!");
403 int IsQuad = IntTypeFlags.
isQuad();
406 return llvm::FixedVectorType::get(CGF->
HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->
FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->
DoubleTy, (1 << IsQuad));
412 llvm_unreachable(
"Type can't be converted to floating-point!");
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count,
C);
419 return Builder.CreateShuffleVector(
V,
V, SV,
"lane");
423 ElementCount EC = cast<llvm::VectorType>(
V->getType())->getElementCount();
429 unsigned shift,
bool rightshift) {
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
436 if (shift > 0 && shift == j)
439 Ops[j] =
Builder.CreateBitCast(Ops[j], ai->getType(), name);
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
445 return Builder.CreateCall(F, Ops, name);
459 unsigned IID,
bool ExtendLaneArg, llvm::Type *RetTy,
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
467 auto *VT = llvm::FixedVectorType::get(
Int8Ty, 16);
468 Ops[2] =
Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
475 unsigned IID,
bool ExtendLaneArg, llvm::Type *RetTy,
479 auto *VT = llvm::FixedVectorType::get(
Int8Ty, 16);
480 Ops[2] =
Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
491 int SV = cast<ConstantInt>(
V)->getSExtValue();
492 return ConstantInt::get(Ty, neg ? -SV : SV);
496 llvm::Type *Ty1,
bool Extract,
500 llvm::Type *Tys[] = {Ty0, Ty1};
504 Tys[1] = llvm::FixedVectorType::get(
Int8Ty, 8);
505 Ops[0] =
Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
512 llvm::Type *Ty,
bool usgn,
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
516 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
519 Vec =
Builder.CreateBitCast(Vec, Ty);
523 if (ShiftAmt == EltSize) {
526 return llvm::ConstantAggregateZero::get(VTy);
531 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
537 return Builder.CreateLShr(Vec, Shift, name);
539 return Builder.CreateAShr(Vec, Shift, name);
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
572 bool operator<(
unsigned RHSBuiltinID)
const {
573 return BuiltinID < RHSBuiltinID;
575 bool operator<(
const ARMVectorIntrinsicInfo &TE)
const {
576 return BuiltinID < TE.BuiltinID;
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
594 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
602 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
606 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
607 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
608 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
609 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
610 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
611 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
613 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
614 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
627 NEONMAP1(vcage_v, arm_neon_vacge, 0),
628 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
629 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
630 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcale_v, arm_neon_vacge, 0),
632 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
633 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
634 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
651 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
654 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
656 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
657 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
658 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
659 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
660 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
663 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
671 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
672 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
675 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
678 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
681 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
684 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
685 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
688 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
691 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
694 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
697 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
700 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
703 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
706 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
709 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
712 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
715 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
718 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
723 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
724 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
725 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
726 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
727 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
730 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
738 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
739 NEONMAP1(vdot_u32, arm_neon_udot, 0),
740 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
741 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
752 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
753 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
754 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
756 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
757 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
758 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
759 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
760 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
761 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
763 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
764 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
765 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
766 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
767 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
769 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
770 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
771 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
772 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
773 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
775 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
776 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
777 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
786 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
787 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
805 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
806 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
830 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
831 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
835 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
836 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
859 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
860 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
864 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
865 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
866 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
867 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
868 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
869 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
879 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
880 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
881 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
882 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
883 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
884 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
885 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
886 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
888 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
889 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
890 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
892 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
893 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
894 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
896 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
897 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
903 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
904 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
917 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
918 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
923 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
924 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
925 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
926 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
935 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
936 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
938 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
939 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
950 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
951 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
953 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
955 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
957 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
994 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
997 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
999 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1000 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1001 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1002 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1004 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1007 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1014 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1015 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1016 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1017 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1019 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1022 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1025 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1026 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1027 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1028 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1041 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1042 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1043 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1044 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1045 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1046 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1047 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1048 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1053 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1054 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1055 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1056 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1057 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1058 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1059 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1060 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1073 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1074 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1075 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1076 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1078 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1079 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1094 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1095 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1097 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1098 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1106 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1107 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1111 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1112 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1113 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1140 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1141 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1145 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1146 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1147 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1148 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1149 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1150 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1151 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1152 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1153 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1154 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1163 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1164 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1165 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1166 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1167 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1168 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1169 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1170 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1171 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1172 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1173 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1174 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1175 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1176 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1177 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1181 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1182 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1184 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1241 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1262 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1290 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1371 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1372 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1373 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1374 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1428 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1429 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1430 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1431 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1432 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1433 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1434 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1435 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1436 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1437 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1438 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1439 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1440 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1441 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1442 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1443 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1444 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1445 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1446 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1447 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1448 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1449 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1450 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1451 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1452 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1453 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1454 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1455 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1456 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1457 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1458 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1459 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1460 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1461 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1462 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1463 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1464 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1465 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1466 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1467 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1468 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1469 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1470 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1471 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1472 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1473 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1474 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1475 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1476 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1477 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1478 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1479 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1480 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1481 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1482 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1483 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1484 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1485 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1486 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1487 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1488 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1489 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1490 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1491 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1492 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1493 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1494 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1495 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1496 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1497 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1498 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1499 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1500 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1501 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1502 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1503 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1504 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1505 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1506 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1507 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1508 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1509 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1510 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1511 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1512 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1513 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1514 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1515 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1516 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1517 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1518 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1519 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1520 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1521 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1522 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1523 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1524 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1525 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1526 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1527 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1528 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1529 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1530 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1531 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1532 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1533 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1534 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1535 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1536 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1537 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1538 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1539 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1540 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1541 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1542 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1543 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1544 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1545 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1546 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1547 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1548 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1549 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1550 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1551 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1552 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1553 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1554 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1555 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1559 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1560 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1561 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1562 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1563 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1564 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1565 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1566 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1567 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1568 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1569 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1570 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1577#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1579 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1583#define SVEMAP2(NameBase, TypeModifier) \
1584 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1586#define GET_SVE_LLVM_INTRINSIC_MAP
1587#include "clang/Basic/arm_sve_builtin_cg.inc"
1588#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1589#undef GET_SVE_LLVM_INTRINSIC_MAP
1595#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1597 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1601#define SMEMAP2(NameBase, TypeModifier) \
1602 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1604#define GET_SME_LLVM_INTRINSIC_MAP
1605#include "clang/Basic/arm_sme_builtin_cg.inc"
1606#undef GET_SME_LLVM_INTRINSIC_MAP
1619static const ARMVectorIntrinsicInfo *
1621 unsigned BuiltinID,
bool &MapProvenSorted) {
1624 if (!MapProvenSorted) {
1625 assert(llvm::is_sorted(IntrinsicMap));
1626 MapProvenSorted =
true;
1630 const ARMVectorIntrinsicInfo *
Builtin =
1631 llvm::lower_bound(IntrinsicMap, BuiltinID);
1633 if (
Builtin != IntrinsicMap.end() &&
Builtin->BuiltinID == BuiltinID)
1641 llvm::Type *ArgType,
1654 Ty = llvm::FixedVectorType::get(
1655 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1662 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1663 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1667 Tys.push_back(ArgType);
1670 Tys.push_back(ArgType);
1681 unsigned BuiltinID = SISDInfo.BuiltinID;
1682 unsigned int Int = SISDInfo.LLVMIntrinsic;
1683 unsigned Modifier = SISDInfo.TypeModifier;
1684 const char *
s = SISDInfo.NameHint;
1686 switch (BuiltinID) {
1687 case NEON::BI__builtin_neon_vcled_s64:
1688 case NEON::BI__builtin_neon_vcled_u64:
1689 case NEON::BI__builtin_neon_vcles_f32:
1690 case NEON::BI__builtin_neon_vcled_f64:
1691 case NEON::BI__builtin_neon_vcltd_s64:
1692 case NEON::BI__builtin_neon_vcltd_u64:
1693 case NEON::BI__builtin_neon_vclts_f32:
1694 case NEON::BI__builtin_neon_vcltd_f64:
1695 case NEON::BI__builtin_neon_vcales_f32:
1696 case NEON::BI__builtin_neon_vcaled_f64:
1697 case NEON::BI__builtin_neon_vcalts_f32:
1698 case NEON::BI__builtin_neon_vcaltd_f64:
1702 std::swap(Ops[0], Ops[1]);
1706 assert(Int &&
"Generic code assumes a valid intrinsic");
1709 const Expr *Arg =
E->getArg(0);
1714 ConstantInt *C0 = ConstantInt::get(CGF.
SizeTy, 0);
1715 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1716 ai != ae; ++ai, ++j) {
1717 llvm::Type *ArgTy = ai->getType();
1718 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1719 ArgTy->getPrimitiveSizeInBits())
1722 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1725 Ops[j] = CGF.
Builder.CreateTruncOrBitCast(
1726 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1728 CGF.
Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1733 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1734 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1741 unsigned BuiltinID,
unsigned LLVMIntrinsic,
unsigned AltLLVMIntrinsic,
1742 const char *NameHint,
unsigned Modifier,
const CallExpr *
E,
1744 llvm::Triple::ArchType
Arch) {
1746 const Expr *Arg =
E->getArg(
E->getNumArgs() - 1);
1747 std::optional<llvm::APSInt> NeonTypeConst =
1754 const bool Usgn =
Type.isUnsigned();
1755 const bool Quad =
Type.isQuad();
1756 const bool Floating =
Type.isFloatingPoint();
1758 const bool AllowBFloatArgsAndRet =
1761 llvm::FixedVectorType *VTy =
1762 GetNeonType(
this,
Type, HasFastHalfType,
false, AllowBFloatArgsAndRet);
1763 llvm::Type *Ty = VTy;
1767 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
1768 return Builder.getInt32(addr.getAlignment().getQuantity());
1771 unsigned Int = LLVMIntrinsic;
1773 Int = AltLLVMIntrinsic;
1775 switch (BuiltinID) {
1777 case NEON::BI__builtin_neon_splat_lane_v:
1778 case NEON::BI__builtin_neon_splat_laneq_v:
1779 case NEON::BI__builtin_neon_splatq_lane_v:
1780 case NEON::BI__builtin_neon_splatq_laneq_v: {
1781 auto NumElements = VTy->getElementCount();
1782 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1783 NumElements = NumElements * 2;
1784 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1785 NumElements = NumElements.divideCoefficientBy(2);
1787 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1788 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1790 case NEON::BI__builtin_neon_vpadd_v:
1791 case NEON::BI__builtin_neon_vpaddq_v:
1793 if (VTy->getElementType()->isFloatingPointTy() &&
1794 Int == Intrinsic::aarch64_neon_addp)
1795 Int = Intrinsic::aarch64_neon_faddp;
1797 case NEON::BI__builtin_neon_vabs_v:
1798 case NEON::BI__builtin_neon_vabsq_v:
1799 if (VTy->getElementType()->isFloatingPointTy())
1802 case NEON::BI__builtin_neon_vadd_v:
1803 case NEON::BI__builtin_neon_vaddq_v: {
1804 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, Quad ? 16 : 8);
1805 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1806 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
1807 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
1808 return Builder.CreateBitCast(Ops[0], Ty);
1810 case NEON::BI__builtin_neon_vaddhn_v: {
1811 llvm::FixedVectorType *SrcTy =
1812 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1815 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1816 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1817 Ops[0] =
Builder.CreateAdd(Ops[0], Ops[1],
"vaddhn");
1820 Constant *ShiftAmt =
1821 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1822 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vaddhn");
1825 return Builder.CreateTrunc(Ops[0], VTy,
"vaddhn");
1827 case NEON::BI__builtin_neon_vcale_v:
1828 case NEON::BI__builtin_neon_vcaleq_v:
1829 case NEON::BI__builtin_neon_vcalt_v:
1830 case NEON::BI__builtin_neon_vcaltq_v:
1831 std::swap(Ops[0], Ops[1]);
1833 case NEON::BI__builtin_neon_vcage_v:
1834 case NEON::BI__builtin_neon_vcageq_v:
1835 case NEON::BI__builtin_neon_vcagt_v:
1836 case NEON::BI__builtin_neon_vcagtq_v: {
1838 switch (VTy->getScalarSizeInBits()) {
1839 default: llvm_unreachable(
"unexpected type");
1850 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1851 llvm::Type *Tys[] = { VTy, VecFlt };
1855 case NEON::BI__builtin_neon_vceqz_v:
1856 case NEON::BI__builtin_neon_vceqzq_v:
1858 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ,
"vceqz");
1859 case NEON::BI__builtin_neon_vcgez_v:
1860 case NEON::BI__builtin_neon_vcgezq_v:
1862 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1864 case NEON::BI__builtin_neon_vclez_v:
1865 case NEON::BI__builtin_neon_vclezq_v:
1867 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1869 case NEON::BI__builtin_neon_vcgtz_v:
1870 case NEON::BI__builtin_neon_vcgtzq_v:
1872 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1874 case NEON::BI__builtin_neon_vcltz_v:
1875 case NEON::BI__builtin_neon_vcltzq_v:
1877 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1879 case NEON::BI__builtin_neon_vclz_v:
1880 case NEON::BI__builtin_neon_vclzq_v:
1885 case NEON::BI__builtin_neon_vcvt_f32_v:
1886 case NEON::BI__builtin_neon_vcvtq_f32_v:
1887 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1890 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1891 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1892 case NEON::BI__builtin_neon_vcvt_f16_s16:
1893 case NEON::BI__builtin_neon_vcvt_f16_u16:
1894 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1895 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1896 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1899 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1900 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1901 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1902 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1903 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1904 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1909 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1910 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1911 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1912 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1914 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1918 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1919 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1920 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1921 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1922 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1923 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1924 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1925 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1926 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1927 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1928 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1929 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1934 case NEON::BI__builtin_neon_vcvt_s32_v:
1935 case NEON::BI__builtin_neon_vcvt_u32_v:
1936 case NEON::BI__builtin_neon_vcvt_s64_v:
1937 case NEON::BI__builtin_neon_vcvt_u64_v:
1938 case NEON::BI__builtin_neon_vcvt_s16_f16:
1939 case NEON::BI__builtin_neon_vcvt_u16_f16:
1940 case NEON::BI__builtin_neon_vcvtq_s32_v:
1941 case NEON::BI__builtin_neon_vcvtq_u32_v:
1942 case NEON::BI__builtin_neon_vcvtq_s64_v:
1943 case NEON::BI__builtin_neon_vcvtq_u64_v:
1944 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1945 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1947 return Usgn ?
Builder.CreateFPToUI(Ops[0], Ty,
"vcvt")
1948 :
Builder.CreateFPToSI(Ops[0], Ty,
"vcvt");
1950 case NEON::BI__builtin_neon_vcvta_s16_f16:
1951 case NEON::BI__builtin_neon_vcvta_s32_v:
1952 case NEON::BI__builtin_neon_vcvta_s64_v:
1953 case NEON::BI__builtin_neon_vcvta_u16_f16:
1954 case NEON::BI__builtin_neon_vcvta_u32_v:
1955 case NEON::BI__builtin_neon_vcvta_u64_v:
1956 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1957 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1958 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1959 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1960 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1961 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1962 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1963 case NEON::BI__builtin_neon_vcvtn_s32_v:
1964 case NEON::BI__builtin_neon_vcvtn_s64_v:
1965 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1966 case NEON::BI__builtin_neon_vcvtn_u32_v:
1967 case NEON::BI__builtin_neon_vcvtn_u64_v:
1968 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1969 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1970 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1971 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1972 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1973 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1974 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1975 case NEON::BI__builtin_neon_vcvtp_s32_v:
1976 case NEON::BI__builtin_neon_vcvtp_s64_v:
1977 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1978 case NEON::BI__builtin_neon_vcvtp_u32_v:
1979 case NEON::BI__builtin_neon_vcvtp_u64_v:
1980 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1981 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1982 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1983 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1984 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1985 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1986 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1987 case NEON::BI__builtin_neon_vcvtm_s32_v:
1988 case NEON::BI__builtin_neon_vcvtm_s64_v:
1989 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1990 case NEON::BI__builtin_neon_vcvtm_u32_v:
1991 case NEON::BI__builtin_neon_vcvtm_u64_v:
1992 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1993 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1994 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1995 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1996 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1997 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2001 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2002 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2006 case NEON::BI__builtin_neon_vext_v:
2007 case NEON::BI__builtin_neon_vextq_v: {
2008 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2010 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2011 Indices.push_back(i+CV);
2013 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2014 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2015 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices,
"vext");
2017 case NEON::BI__builtin_neon_vfma_v:
2018 case NEON::BI__builtin_neon_vfmaq_v: {
2019 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2020 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2021 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2025 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2026 {Ops[1], Ops[2], Ops[0]});
2028 case NEON::BI__builtin_neon_vld1_v:
2029 case NEON::BI__builtin_neon_vld1q_v: {
2031 Ops.push_back(getAlignmentValue32(PtrOp0));
2034 case NEON::BI__builtin_neon_vld1_x2_v:
2035 case NEON::BI__builtin_neon_vld1q_x2_v:
2036 case NEON::BI__builtin_neon_vld1_x3_v:
2037 case NEON::BI__builtin_neon_vld1q_x3_v:
2038 case NEON::BI__builtin_neon_vld1_x4_v:
2039 case NEON::BI__builtin_neon_vld1q_x4_v: {
2042 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld1xN");
2045 case NEON::BI__builtin_neon_vld2_v:
2046 case NEON::BI__builtin_neon_vld2q_v:
2047 case NEON::BI__builtin_neon_vld3_v:
2048 case NEON::BI__builtin_neon_vld3q_v:
2049 case NEON::BI__builtin_neon_vld4_v:
2050 case NEON::BI__builtin_neon_vld4q_v:
2051 case NEON::BI__builtin_neon_vld2_dup_v:
2052 case NEON::BI__builtin_neon_vld2q_dup_v:
2053 case NEON::BI__builtin_neon_vld3_dup_v:
2054 case NEON::BI__builtin_neon_vld3q_dup_v:
2055 case NEON::BI__builtin_neon_vld4_dup_v:
2056 case NEON::BI__builtin_neon_vld4q_dup_v: {
2059 Value *Align = getAlignmentValue32(PtrOp1);
2060 Ops[1] =
Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2063 case NEON::BI__builtin_neon_vld1_dup_v:
2064 case NEON::BI__builtin_neon_vld1q_dup_v: {
2065 Value *
V = PoisonValue::get(Ty);
2068 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
2069 Ops[0] =
Builder.CreateInsertElement(
V, Ld, CI);
2072 case NEON::BI__builtin_neon_vld2_lane_v:
2073 case NEON::BI__builtin_neon_vld2q_lane_v:
2074 case NEON::BI__builtin_neon_vld3_lane_v:
2075 case NEON::BI__builtin_neon_vld3q_lane_v:
2076 case NEON::BI__builtin_neon_vld4_lane_v:
2077 case NEON::BI__builtin_neon_vld4q_lane_v: {
2080 for (
unsigned I = 2; I < Ops.size() - 1; ++I)
2081 Ops[I] =
Builder.CreateBitCast(Ops[I], Ty);
2082 Ops.push_back(getAlignmentValue32(PtrOp1));
2086 case NEON::BI__builtin_neon_vmovl_v: {
2087 llvm::FixedVectorType *DTy =
2088 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2089 Ops[0] =
Builder.CreateBitCast(Ops[0], DTy);
2091 return Builder.CreateZExt(Ops[0], Ty,
"vmovl");
2092 return Builder.CreateSExt(Ops[0], Ty,
"vmovl");
2094 case NEON::BI__builtin_neon_vmovn_v: {
2095 llvm::FixedVectorType *QTy =
2096 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2097 Ops[0] =
Builder.CreateBitCast(Ops[0], QTy);
2098 return Builder.CreateTrunc(Ops[0], Ty,
"vmovn");
2100 case NEON::BI__builtin_neon_vmull_v:
2106 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2107 Int =
Type.isPoly() ? (
unsigned)Intrinsic::arm_neon_vmullp : Int;
2109 case NEON::BI__builtin_neon_vpadal_v:
2110 case NEON::BI__builtin_neon_vpadalq_v: {
2112 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2116 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2117 llvm::Type *Tys[2] = { Ty, NarrowTy };
2120 case NEON::BI__builtin_neon_vpaddl_v:
2121 case NEON::BI__builtin_neon_vpaddlq_v: {
2123 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2124 llvm::Type *EltTy = llvm::IntegerType::get(
getLLVMContext(), EltBits / 2);
2126 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2127 llvm::Type *Tys[2] = { Ty, NarrowTy };
2130 case NEON::BI__builtin_neon_vqdmlal_v:
2131 case NEON::BI__builtin_neon_vqdmlsl_v: {
2138 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2139 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2140 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2141 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2142 auto *RTy = cast<llvm::FixedVectorType>(Ty);
2143 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2144 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2145 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2146 RTy->getNumElements() * 2);
2147 llvm::Type *Tys[2] = {
2152 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2153 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2154 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2155 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2156 llvm::Type *Tys[2] = {
2161 case NEON::BI__builtin_neon_vqshl_n_v:
2162 case NEON::BI__builtin_neon_vqshlq_n_v:
2165 case NEON::BI__builtin_neon_vqshlu_n_v:
2166 case NEON::BI__builtin_neon_vqshluq_n_v:
2169 case NEON::BI__builtin_neon_vrecpe_v:
2170 case NEON::BI__builtin_neon_vrecpeq_v:
2171 case NEON::BI__builtin_neon_vrsqrte_v:
2172 case NEON::BI__builtin_neon_vrsqrteq_v:
2173 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2175 case NEON::BI__builtin_neon_vrndi_v:
2176 case NEON::BI__builtin_neon_vrndiq_v:
2177 Int =
Builder.getIsFPConstrained()
2178 ? Intrinsic::experimental_constrained_nearbyint
2179 : Intrinsic::nearbyint;
2181 case NEON::BI__builtin_neon_vrshr_n_v:
2182 case NEON::BI__builtin_neon_vrshrq_n_v:
2185 case NEON::BI__builtin_neon_vsha512hq_u64:
2186 case NEON::BI__builtin_neon_vsha512h2q_u64:
2187 case NEON::BI__builtin_neon_vsha512su0q_u64:
2188 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2192 case NEON::BI__builtin_neon_vshl_n_v:
2193 case NEON::BI__builtin_neon_vshlq_n_v:
2195 return Builder.CreateShl(
Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2197 case NEON::BI__builtin_neon_vshll_n_v: {
2198 llvm::FixedVectorType *SrcTy =
2199 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2200 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2202 Ops[0] =
Builder.CreateZExt(Ops[0], VTy);
2204 Ops[0] =
Builder.CreateSExt(Ops[0], VTy);
2206 return Builder.CreateShl(Ops[0], Ops[1],
"vshll_n");
2208 case NEON::BI__builtin_neon_vshrn_n_v: {
2209 llvm::FixedVectorType *SrcTy =
2210 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2211 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2214 Ops[0] =
Builder.CreateLShr(Ops[0], Ops[1]);
2216 Ops[0] =
Builder.CreateAShr(Ops[0], Ops[1]);
2217 return Builder.CreateTrunc(Ops[0], Ty,
"vshrn_n");
2219 case NEON::BI__builtin_neon_vshr_n_v:
2220 case NEON::BI__builtin_neon_vshrq_n_v:
2222 case NEON::BI__builtin_neon_vst1_v:
2223 case NEON::BI__builtin_neon_vst1q_v:
2224 case NEON::BI__builtin_neon_vst2_v:
2225 case NEON::BI__builtin_neon_vst2q_v:
2226 case NEON::BI__builtin_neon_vst3_v:
2227 case NEON::BI__builtin_neon_vst3q_v:
2228 case NEON::BI__builtin_neon_vst4_v:
2229 case NEON::BI__builtin_neon_vst4q_v:
2230 case NEON::BI__builtin_neon_vst2_lane_v:
2231 case NEON::BI__builtin_neon_vst2q_lane_v:
2232 case NEON::BI__builtin_neon_vst3_lane_v:
2233 case NEON::BI__builtin_neon_vst3q_lane_v:
2234 case NEON::BI__builtin_neon_vst4_lane_v:
2235 case NEON::BI__builtin_neon_vst4q_lane_v: {
2237 Ops.push_back(getAlignmentValue32(PtrOp0));
2240 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2241 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2242 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2243 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2244 case NEON::BI__builtin_neon_vsm4eq_u32: {
2248 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2249 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2250 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2251 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2256 case NEON::BI__builtin_neon_vst1_x2_v:
2257 case NEON::BI__builtin_neon_vst1q_x2_v:
2258 case NEON::BI__builtin_neon_vst1_x3_v:
2259 case NEON::BI__builtin_neon_vst1q_x3_v:
2260 case NEON::BI__builtin_neon_vst1_x4_v:
2261 case NEON::BI__builtin_neon_vst1q_x4_v: {
2264 if (
Arch == llvm::Triple::aarch64 ||
Arch == llvm::Triple::aarch64_be ||
2265 Arch == llvm::Triple::aarch64_32) {
2267 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2273 case NEON::BI__builtin_neon_vsubhn_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2278 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2279 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
2280 Ops[0] =
Builder.CreateSub(Ops[0], Ops[1],
"vsubhn");
2283 Constant *ShiftAmt =
2284 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2285 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vsubhn");
2288 return Builder.CreateTrunc(Ops[0], VTy,
"vsubhn");
2290 case NEON::BI__builtin_neon_vtrn_v:
2291 case NEON::BI__builtin_neon_vtrnq_v: {
2292 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2293 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2294 Value *SV =
nullptr;
2296 for (
unsigned vi = 0; vi != 2; ++vi) {
2298 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2299 Indices.push_back(i+vi);
2300 Indices.push_back(i+e+vi);
2303 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
2308 case NEON::BI__builtin_neon_vtst_v:
2309 case NEON::BI__builtin_neon_vtstq_v: {
2310 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2311 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2312 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
2313 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2314 ConstantAggregateZero::get(Ty));
2315 return Builder.CreateSExt(Ops[0], Ty,
"vtst");
2317 case NEON::BI__builtin_neon_vuzp_v:
2318 case NEON::BI__builtin_neon_vuzpq_v: {
2319 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2320 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2321 Value *SV =
nullptr;
2323 for (
unsigned vi = 0; vi != 2; ++vi) {
2325 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2326 Indices.push_back(2*i+vi);
2329 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
2334 case NEON::BI__builtin_neon_vxarq_u64: {
2339 case NEON::BI__builtin_neon_vzip_v:
2340 case NEON::BI__builtin_neon_vzipq_v: {
2341 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2342 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2343 Value *SV =
nullptr;
2345 for (
unsigned vi = 0; vi != 2; ++vi) {
2347 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2348 Indices.push_back((i + vi*e) >> 1);
2349 Indices.push_back(((i + vi*e) >> 1)+e);
2352 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
2357 case NEON::BI__builtin_neon_vdot_s32:
2358 case NEON::BI__builtin_neon_vdot_u32:
2359 case NEON::BI__builtin_neon_vdotq_s32:
2360 case NEON::BI__builtin_neon_vdotq_u32: {
2362 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2363 llvm::Type *Tys[2] = { Ty, InputTy };
2366 case NEON::BI__builtin_neon_vfmlal_low_f16:
2367 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2369 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2370 llvm::Type *Tys[2] = { Ty, InputTy };
2373 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2374 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2376 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2377 llvm::Type *Tys[2] = { Ty, InputTy };
2380 case NEON::BI__builtin_neon_vfmlal_high_f16:
2381 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2383 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2384 llvm::Type *Tys[2] = { Ty, InputTy };
2387 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2388 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2390 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2391 llvm::Type *Tys[2] = { Ty, InputTy };
2394 case NEON::BI__builtin_neon_vmmlaq_s32:
2395 case NEON::BI__builtin_neon_vmmlaq_u32: {
2397 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2398 llvm::Type *Tys[2] = { Ty, InputTy };
2401 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2403 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2404 llvm::Type *Tys[2] = { Ty, InputTy };
2407 case NEON::BI__builtin_neon_vusdot_s32:
2408 case NEON::BI__builtin_neon_vusdotq_s32: {
2410 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2411 llvm::Type *Tys[2] = { Ty, InputTy };
2414 case NEON::BI__builtin_neon_vbfdot_f32:
2415 case NEON::BI__builtin_neon_vbfdotq_f32: {
2416 llvm::Type *InputTy =
2417 llvm::FixedVectorType::get(
BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2418 llvm::Type *Tys[2] = { Ty, InputTy };
2421 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2422 llvm::Type *Tys[1] = { Ty };
2429 assert(Int &&
"Expected valid intrinsic number");
2443 const CmpInst::Predicate Pred,
2444 const Twine &Name) {
2446 if (isa<FixedVectorType>(Ty)) {
2448 Op =
Builder.CreateBitCast(Op, Ty);
2451 if (CmpInst::isFPPredicate(Pred)) {
2452 if (Pred == CmpInst::FCMP_OEQ)
2453 Op =
Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->
getType()));
2455 Op =
Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->
getType()));
2457 Op =
Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->
getType()));
2460 llvm::Type *ResTy = Ty;
2461 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty))
2462 ResTy = FixedVectorType::get(
2464 VTy->getNumElements());
2466 return Builder.CreateSExt(Op, ResTy, Name);
2471 llvm::Type *ResTy,
unsigned IntID,
2475 TblOps.push_back(ExtOp);
2479 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
2480 for (
unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2481 Indices.push_back(2*i);
2482 Indices.push_back(2*i+1);
2485 int PairPos = 0, End = Ops.size() - 1;
2486 while (PairPos < End) {
2487 TblOps.push_back(CGF.
Builder.CreateShuffleVector(Ops[PairPos],
2488 Ops[PairPos+1], Indices,
2495 if (PairPos == End) {
2496 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
2497 TblOps.push_back(CGF.
Builder.CreateShuffleVector(Ops[PairPos],
2498 ZeroTbl, Indices, Name));
2502 TblOps.push_back(IndexOp);
2508Value *CodeGenFunction::GetValueForARMHint(
unsigned BuiltinID) {
2510 switch (BuiltinID) {
2513 case clang::ARM::BI__builtin_arm_nop:
2516 case clang::ARM::BI__builtin_arm_yield:
2517 case clang::ARM::BI__yield:
2520 case clang::ARM::BI__builtin_arm_wfe:
2521 case clang::ARM::BI__wfe:
2524 case clang::ARM::BI__builtin_arm_wfi:
2525 case clang::ARM::BI__wfi:
2528 case clang::ARM::BI__builtin_arm_sev:
2529 case clang::ARM::BI__sev:
2532 case clang::ARM::BI__builtin_arm_sevl:
2533 case clang::ARM::BI__sevl:
2554 llvm::Type *ValueType,
2556 StringRef SysReg =
"") {
2560 "Unsupported size for register.");
2566 if (SysReg.empty()) {
2568 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2571 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2572 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2573 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2577 bool MixedTypes =
RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2578 assert(!(
RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2579 &&
"Can't fit 64-bit value in 32-bit register");
2581 if (AccessKind !=
Write) {
2584 AccessKind ==
VolatileRead ? Intrinsic::read_volatile_register
2585 : Intrinsic::read_register,
2587 llvm::Value *
Call = Builder.CreateCall(F, Metadata);
2591 return Builder.CreateTrunc(
Call, ValueType);
2593 if (ValueType->isPointerTy())
2595 return Builder.CreateIntToPtr(
Call, ValueType);
2600 llvm::Function *F = CGM.
getIntrinsic(Intrinsic::write_register, Types);
2605 return Builder.CreateCall(F, { Metadata, ArgValue });
2608 if (ValueType->isPointerTy()) {
2610 ArgValue = Builder.CreatePtrToInt(ArgValue,
RegisterType);
2611 return Builder.CreateCall(F, { Metadata, ArgValue });
2614 return Builder.CreateCall(F, { Metadata, ArgValue });
2620 switch (BuiltinID) {
2622 case NEON::BI__builtin_neon_vget_lane_i8:
2623 case NEON::BI__builtin_neon_vget_lane_i16:
2624 case NEON::BI__builtin_neon_vget_lane_bf16:
2625 case NEON::BI__builtin_neon_vget_lane_i32:
2626 case NEON::BI__builtin_neon_vget_lane_i64:
2627 case NEON::BI__builtin_neon_vget_lane_mf8:
2628 case NEON::BI__builtin_neon_vget_lane_f32:
2629 case NEON::BI__builtin_neon_vgetq_lane_i8:
2630 case NEON::BI__builtin_neon_vgetq_lane_i16:
2631 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2632 case NEON::BI__builtin_neon_vgetq_lane_i32:
2633 case NEON::BI__builtin_neon_vgetq_lane_i64:
2634 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2635 case NEON::BI__builtin_neon_vgetq_lane_f32:
2636 case NEON::BI__builtin_neon_vduph_lane_bf16:
2637 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_mf8:
2640 case NEON::BI__builtin_neon_vset_lane_i16:
2641 case NEON::BI__builtin_neon_vset_lane_bf16:
2642 case NEON::BI__builtin_neon_vset_lane_i32:
2643 case NEON::BI__builtin_neon_vset_lane_i64:
2644 case NEON::BI__builtin_neon_vset_lane_f32:
2645 case NEON::BI__builtin_neon_vsetq_lane_i8:
2646 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2647 case NEON::BI__builtin_neon_vsetq_lane_i16:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_i32:
2650 case NEON::BI__builtin_neon_vsetq_lane_i64:
2651 case NEON::BI__builtin_neon_vsetq_lane_f32:
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2653 case NEON::BI__builtin_neon_vsha1cq_u32:
2654 case NEON::BI__builtin_neon_vsha1pq_u32:
2655 case NEON::BI__builtin_neon_vsha1mq_u32:
2656 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2657 case clang::ARM::BI_MoveToCoprocessor:
2658 case clang::ARM::BI_MoveToCoprocessor2:
2667 llvm::Triple::ArchType
Arch) {
2668 if (
auto Hint = GetValueForARMHint(BuiltinID))
2671 if (BuiltinID == clang::ARM::BI__emit) {
2673 llvm::FunctionType *FTy =
2674 llvm::FunctionType::get(
VoidTy,
false);
2678 llvm_unreachable(
"Sema will ensure that the parameter is constant");
2681 uint64_t ZExtValue =
Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2683 llvm::InlineAsm *Emit =
2684 IsThumb ? InlineAsm::get(FTy,
".inst.n 0x" + utohexstr(ZExtValue),
"",
2686 : InlineAsm::get(FTy,
".inst 0x" + utohexstr(ZExtValue),
"",
2689 return Builder.CreateCall(Emit);
2692 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2697 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2709 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2712 CGM.
getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
2715 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2716 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2720 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2726 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2730 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2736 if (BuiltinID == clang::ARM::BI__clear_cache) {
2737 assert(
E->getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
2740 for (
unsigned i = 0; i < 2; i++)
2743 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2744 StringRef Name = FD->
getName();
2748 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2749 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2752 switch (BuiltinID) {
2753 default: llvm_unreachable(
"unexpected builtin");
2754 case clang::ARM::BI__builtin_arm_mcrr:
2757 case clang::ARM::BI__builtin_arm_mcrr2:
2779 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2782 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2783 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2786 switch (BuiltinID) {
2787 default: llvm_unreachable(
"unexpected builtin");
2788 case clang::ARM::BI__builtin_arm_mrrc:
2791 case clang::ARM::BI__builtin_arm_mrrc2:
2799 Value *RtAndRt2 =
Builder.CreateCall(F, {Coproc, Opc1, CRm});
2809 Value *ShiftCast = llvm::ConstantInt::get(
Int64Ty, 32);
2810 RtAndRt2 =
Builder.CreateShl(Rt, ShiftCast,
"shl",
true);
2811 RtAndRt2 =
Builder.CreateOr(RtAndRt2, Rt1);
2816 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2817 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2818 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2820 BuiltinID == clang::ARM::BI__ldrexd) {
2823 switch (BuiltinID) {
2824 default: llvm_unreachable(
"unexpected builtin");
2825 case clang::ARM::BI__builtin_arm_ldaex:
2828 case clang::ARM::BI__builtin_arm_ldrexd:
2829 case clang::ARM::BI__builtin_arm_ldrex:
2830 case clang::ARM::BI__ldrexd:
2844 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
2845 Val =
Builder.CreateOr(Val, Val1);
2849 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2850 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2859 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2860 : Intrinsic::arm_ldrex,
2862 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldrex");
2866 if (RealResTy->isPointerTy())
2867 return Builder.CreateIntToPtr(Val, RealResTy);
2869 llvm::Type *IntResTy = llvm::IntegerType::get(
2871 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
2876 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2877 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2878 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2881 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2882 : Intrinsic::arm_strexd);
2895 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"strexd");
2898 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2899 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2904 llvm::Type *StoreTy =
2907 if (StoreVal->
getType()->isPointerTy())
2910 llvm::Type *
IntTy = llvm::IntegerType::get(
2918 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2919 : Intrinsic::arm_strex,
2922 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"strex");
2924 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
2928 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2934 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2935 switch (BuiltinID) {
2936 case clang::ARM::BI__builtin_arm_crc32b:
2937 CRCIntrinsicID = Intrinsic::arm_crc32b;
break;
2938 case clang::ARM::BI__builtin_arm_crc32cb:
2939 CRCIntrinsicID = Intrinsic::arm_crc32cb;
break;
2940 case clang::ARM::BI__builtin_arm_crc32h:
2941 CRCIntrinsicID = Intrinsic::arm_crc32h;
break;
2942 case clang::ARM::BI__builtin_arm_crc32ch:
2943 CRCIntrinsicID = Intrinsic::arm_crc32ch;
break;
2944 case clang::ARM::BI__builtin_arm_crc32w:
2945 case clang::ARM::BI__builtin_arm_crc32d:
2946 CRCIntrinsicID = Intrinsic::arm_crc32w;
break;
2947 case clang::ARM::BI__builtin_arm_crc32cw:
2948 case clang::ARM::BI__builtin_arm_crc32cd:
2949 CRCIntrinsicID = Intrinsic::arm_crc32cw;
break;
2952 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2958 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2959 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2967 return Builder.CreateCall(F, {Res, Arg1b});
2972 return Builder.CreateCall(F, {Arg0, Arg1});
2976 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2977 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2978 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2979 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2980 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2981 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2984 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2985 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2986 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2989 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2990 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2992 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2993 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2995 llvm::Type *ValueType;
2997 if (IsPointerBuiltin) {
3000 }
else if (Is64Bit) {
3010 if (BuiltinID == ARM::BI__builtin_sponentry) {
3029 return P.first == BuiltinID;
3032 BuiltinID = It->second;
3036 unsigned ICEArguments = 0;
3041 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
3042 return Builder.getInt32(addr.getAlignment().getQuantity());
3049 unsigned NumArgs =
E->getNumArgs() - (HasExtraArg ? 1 : 0);
3050 for (
unsigned i = 0, e = NumArgs; i != e; i++) {
3052 switch (BuiltinID) {
3053 case NEON::BI__builtin_neon_vld1_v:
3054 case NEON::BI__builtin_neon_vld1q_v:
3055 case NEON::BI__builtin_neon_vld1q_lane_v:
3056 case NEON::BI__builtin_neon_vld1_lane_v:
3057 case NEON::BI__builtin_neon_vld1_dup_v:
3058 case NEON::BI__builtin_neon_vld1q_dup_v:
3059 case NEON::BI__builtin_neon_vst1_v:
3060 case NEON::BI__builtin_neon_vst1q_v:
3061 case NEON::BI__builtin_neon_vst1q_lane_v:
3062 case NEON::BI__builtin_neon_vst1_lane_v:
3063 case NEON::BI__builtin_neon_vst2_v:
3064 case NEON::BI__builtin_neon_vst2q_v:
3065 case NEON::BI__builtin_neon_vst2_lane_v:
3066 case NEON::BI__builtin_neon_vst2q_lane_v:
3067 case NEON::BI__builtin_neon_vst3_v:
3068 case NEON::BI__builtin_neon_vst3q_v:
3069 case NEON::BI__builtin_neon_vst3_lane_v:
3070 case NEON::BI__builtin_neon_vst3q_lane_v:
3071 case NEON::BI__builtin_neon_vst4_v:
3072 case NEON::BI__builtin_neon_vst4q_v:
3073 case NEON::BI__builtin_neon_vst4_lane_v:
3074 case NEON::BI__builtin_neon_vst4q_lane_v:
3083 switch (BuiltinID) {
3084 case NEON::BI__builtin_neon_vld2_v:
3085 case NEON::BI__builtin_neon_vld2q_v:
3086 case NEON::BI__builtin_neon_vld3_v:
3087 case NEON::BI__builtin_neon_vld3q_v:
3088 case NEON::BI__builtin_neon_vld4_v:
3089 case NEON::BI__builtin_neon_vld4q_v:
3090 case NEON::BI__builtin_neon_vld2_lane_v:
3091 case NEON::BI__builtin_neon_vld2q_lane_v:
3092 case NEON::BI__builtin_neon_vld3_lane_v:
3093 case NEON::BI__builtin_neon_vld3q_lane_v:
3094 case NEON::BI__builtin_neon_vld4_lane_v:
3095 case NEON::BI__builtin_neon_vld4q_lane_v:
3096 case NEON::BI__builtin_neon_vld2_dup_v:
3097 case NEON::BI__builtin_neon_vld2q_dup_v:
3098 case NEON::BI__builtin_neon_vld3_dup_v:
3099 case NEON::BI__builtin_neon_vld3q_dup_v:
3100 case NEON::BI__builtin_neon_vld4_dup_v:
3101 case NEON::BI__builtin_neon_vld4q_dup_v:
3113 switch (BuiltinID) {
3116 case NEON::BI__builtin_neon_vget_lane_i8:
3117 case NEON::BI__builtin_neon_vget_lane_i16:
3118 case NEON::BI__builtin_neon_vget_lane_i32:
3119 case NEON::BI__builtin_neon_vget_lane_i64:
3120 case NEON::BI__builtin_neon_vget_lane_bf16:
3121 case NEON::BI__builtin_neon_vget_lane_f32:
3122 case NEON::BI__builtin_neon_vgetq_lane_i8:
3123 case NEON::BI__builtin_neon_vgetq_lane_i16:
3124 case NEON::BI__builtin_neon_vgetq_lane_i32:
3125 case NEON::BI__builtin_neon_vgetq_lane_i64:
3126 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3127 case NEON::BI__builtin_neon_vgetq_lane_f32:
3128 case NEON::BI__builtin_neon_vduph_lane_bf16:
3129 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3130 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
3132 case NEON::BI__builtin_neon_vrndns_f32: {
3134 llvm::Type *Tys[] = {Arg->
getType()};
3136 return Builder.CreateCall(F, {Arg},
"vrndn"); }
3138 case NEON::BI__builtin_neon_vset_lane_i8:
3139 case NEON::BI__builtin_neon_vset_lane_i16:
3140 case NEON::BI__builtin_neon_vset_lane_i32:
3141 case NEON::BI__builtin_neon_vset_lane_i64:
3142 case NEON::BI__builtin_neon_vset_lane_bf16:
3143 case NEON::BI__builtin_neon_vset_lane_f32:
3144 case NEON::BI__builtin_neon_vsetq_lane_i8:
3145 case NEON::BI__builtin_neon_vsetq_lane_i16:
3146 case NEON::BI__builtin_neon_vsetq_lane_i32:
3147 case NEON::BI__builtin_neon_vsetq_lane_i64:
3148 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3149 case NEON::BI__builtin_neon_vsetq_lane_f32:
3150 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
3152 case NEON::BI__builtin_neon_vsha1h_u32:
3155 case NEON::BI__builtin_neon_vsha1cq_u32:
3158 case NEON::BI__builtin_neon_vsha1pq_u32:
3161 case NEON::BI__builtin_neon_vsha1mq_u32:
3165 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3172 case clang::ARM::BI_MoveToCoprocessor:
3173 case clang::ARM::BI_MoveToCoprocessor2: {
3175 ? Intrinsic::arm_mcr
3176 : Intrinsic::arm_mcr2);
3177 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3178 Ops[3], Ops[4], Ops[5]});
3183 assert(HasExtraArg);
3184 const Expr *Arg =
E->getArg(
E->getNumArgs()-1);
3185 std::optional<llvm::APSInt>
Result =
3190 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3191 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3194 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3200 bool usgn =
Result->getZExtValue() == 1;
3201 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3205 return Builder.CreateCall(F, Ops,
"vcvtr");
3210 bool usgn =
Type.isUnsigned();
3211 bool rightShift =
false;
3213 llvm::FixedVectorType *VTy =
3216 llvm::Type *Ty = VTy;
3231 switch (BuiltinID) {
3232 default:
return nullptr;
3233 case NEON::BI__builtin_neon_vld1q_lane_v:
3236 if (VTy->getElementType()->isIntegerTy(64)) {
3238 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3239 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3240 Value *SV = llvm::ConstantVector::get(ConstantInt::get(
Int32Ty, 1-Lane));
3241 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3243 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3246 Value *Align = getAlignmentValue32(PtrOp0);
3249 int Indices[] = {1 - Lane, Lane};
3250 return Builder.CreateShuffleVector(Ops[1], Ld, Indices,
"vld1q_lane");
3253 case NEON::BI__builtin_neon_vld1_lane_v: {
3254 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3257 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2],
"vld1_lane");
3259 case NEON::BI__builtin_neon_vqrshrn_n_v:
3261 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3264 case NEON::BI__builtin_neon_vqrshrun_n_v:
3266 Ops,
"vqrshrun_n", 1,
true);
3267 case NEON::BI__builtin_neon_vqshrn_n_v:
3268 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3271 case NEON::BI__builtin_neon_vqshrun_n_v:
3273 Ops,
"vqshrun_n", 1,
true);
3274 case NEON::BI__builtin_neon_vrecpe_v:
3275 case NEON::BI__builtin_neon_vrecpeq_v:
3278 case NEON::BI__builtin_neon_vrshrn_n_v:
3280 Ops,
"vrshrn_n", 1,
true);
3281 case NEON::BI__builtin_neon_vrsra_n_v:
3282 case NEON::BI__builtin_neon_vrsraq_n_v:
3283 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
3284 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3286 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3288 return Builder.CreateAdd(Ops[0], Ops[1],
"vrsra_n");
3289 case NEON::BI__builtin_neon_vsri_n_v:
3290 case NEON::BI__builtin_neon_vsriq_n_v:
3293 case NEON::BI__builtin_neon_vsli_n_v:
3294 case NEON::BI__builtin_neon_vsliq_n_v:
3298 case NEON::BI__builtin_neon_vsra_n_v:
3299 case NEON::BI__builtin_neon_vsraq_n_v:
3300 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
3302 return Builder.CreateAdd(Ops[0], Ops[1]);
3303 case NEON::BI__builtin_neon_vst1q_lane_v:
3306 if (VTy->getElementType()->isIntegerTy(64)) {
3307 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3308 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3309 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3310 Ops[2] = getAlignmentValue32(PtrOp0);
3311 llvm::Type *Tys[] = {
Int8PtrTy, Ops[1]->getType()};
3316 case NEON::BI__builtin_neon_vst1_lane_v: {
3317 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3318 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
3322 case NEON::BI__builtin_neon_vtbl1_v:
3325 case NEON::BI__builtin_neon_vtbl2_v:
3328 case NEON::BI__builtin_neon_vtbl3_v:
3331 case NEON::BI__builtin_neon_vtbl4_v:
3334 case NEON::BI__builtin_neon_vtbx1_v:
3337 case NEON::BI__builtin_neon_vtbx2_v:
3340 case NEON::BI__builtin_neon_vtbx3_v:
3343 case NEON::BI__builtin_neon_vtbx4_v:
3349template<
typename Integer>
3358 return Unsigned ? Builder.CreateZExt(
V,
T) : Builder.CreateSExt(
V,
T);
3368 unsigned LaneBits = cast<llvm::VectorType>(
V->getType())
3370 ->getPrimitiveSizeInBits();
3371 if (Shift == LaneBits) {
3376 return llvm::Constant::getNullValue(
V->getType());
3380 return Unsigned ? Builder.CreateLShr(
V, Shift) : Builder.CreateAShr(
V, Shift);
3387 unsigned Elements = 128 /
V->getType()->getPrimitiveSizeInBits();
3388 return Builder.CreateVectorSplat(Elements,
V);
3394 llvm::Type *DestType) {
3407 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3408 return Builder.CreateCall(
3410 {DestType, V->getType()}),
3413 return Builder.CreateBitCast(
V, DestType);
3421 unsigned InputElements =
3422 cast<llvm::FixedVectorType>(
V->getType())->getNumElements();
3423 for (
unsigned i = 0; i < InputElements; i += 2)
3424 Indices.push_back(i + Odd);
3425 return Builder.CreateShuffleVector(
V, Indices);
3431 assert(V0->getType() == V1->getType() &&
"Can't zip different vector types");
3433 unsigned InputElements =
3434 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
3435 for (
unsigned i = 0; i < InputElements; i++) {
3436 Indices.push_back(i);
3437 Indices.push_back(i + InputElements);
3439 return Builder.CreateShuffleVector(V0, V1, Indices);
3442template<
unsigned HighBit,
unsigned OtherBits>
3446 llvm::Type *
T = cast<llvm::VectorType>(VT)->getElementType();
3447 unsigned LaneBits =
T->getPrimitiveSizeInBits();
3448 uint32_t
Value = HighBit << (LaneBits - 1);
3450 Value |= (1UL << (LaneBits - 1)) - 1;
3451 llvm::Value *Lane = llvm::ConstantInt::get(
T,
Value);
3457 unsigned ReverseWidth) {
3461 unsigned LaneSize =
V->getType()->getScalarSizeInBits();
3462 unsigned Elements = 128 / LaneSize;
3463 unsigned Mask = ReverseWidth / LaneSize - 1;
3464 for (
unsigned i = 0; i < Elements; i++)
3465 Indices.push_back(i ^ Mask);
3466 return Builder.CreateShuffleVector(
V, Indices);
3472 llvm::Triple::ArchType
Arch) {
3473 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3474 Intrinsic::ID IRIntr;
3475 unsigned NumVectors;
3478 switch (BuiltinID) {
3479 #include "clang/Basic/arm_mve_builtin_cg.inc"
3490 switch (CustomCodeGenType) {
3492 case CustomCodeGen::VLD24: {
3498 assert(MvecLType->isStructTy() &&
3499 "Return type for vld[24]q should be a struct");
3500 assert(MvecLType->getStructNumElements() == 1 &&
3501 "Return-type struct for vld[24]q should have one element");
3502 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3503 assert(MvecLTypeInner->isArrayTy() &&
3504 "Return-type struct for vld[24]q should contain an array");
3505 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3506 "Array member of return-type struct vld[24]q has wrong length");
3507 auto VecLType = MvecLTypeInner->getArrayElementType();
3509 Tys.push_back(VecLType);
3511 auto Addr =
E->getArg(0);
3517 Value *MvecOut = PoisonValue::get(MvecLType);
3518 for (
unsigned i = 0; i < NumVectors; ++i) {
3519 Value *Vec =
Builder.CreateExtractValue(LoadResult, i);
3520 MvecOut =
Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3529 case CustomCodeGen::VST24: {
3533 auto Addr =
E->getArg(0);
3537 auto MvecCType =
E->getArg(1)->
getType();
3539 assert(MvecLType->isStructTy() &&
"Data type for vst2q should be a struct");
3540 assert(MvecLType->getStructNumElements() == 1 &&
3541 "Data-type struct for vst2q should have one element");
3542 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3543 assert(MvecLTypeInner->isArrayTy() &&
3544 "Data-type struct for vst2q should contain an array");
3545 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3546 "Array member of return-type struct vld[24]q has wrong length");
3547 auto VecLType = MvecLTypeInner->getArrayElementType();
3549 Tys.push_back(VecLType);
3554 for (
unsigned i = 0; i < NumVectors; i++)
3555 Ops.push_back(
Builder.CreateExtractValue(Mvec, {0, i}));
3558 Value *ToReturn =
nullptr;
3559 for (
unsigned i = 0; i < NumVectors; i++) {
3560 Ops.push_back(llvm::ConstantInt::get(
Int32Ty, i));
3561 ToReturn =
Builder.CreateCall(F, Ops);
3567 llvm_unreachable(
"unknown custom codegen type.");
3573 llvm::Triple::ArchType
Arch) {
3574 switch (BuiltinID) {
3577#include "clang/Basic/arm_cde_builtin_cg.inc"
3584 llvm::Triple::ArchType
Arch) {
3585 unsigned int Int = 0;
3586 const char *
s =
nullptr;
3588 switch (BuiltinID) {
3591 case NEON::BI__builtin_neon_vtbl1_v:
3592 case NEON::BI__builtin_neon_vqtbl1_v:
3593 case NEON::BI__builtin_neon_vqtbl1q_v:
3594 case NEON::BI__builtin_neon_vtbl2_v:
3595 case NEON::BI__builtin_neon_vqtbl2_v:
3596 case NEON::BI__builtin_neon_vqtbl2q_v:
3597 case NEON::BI__builtin_neon_vtbl3_v:
3598 case NEON::BI__builtin_neon_vqtbl3_v:
3599 case NEON::BI__builtin_neon_vqtbl3q_v:
3600 case NEON::BI__builtin_neon_vtbl4_v:
3601 case NEON::BI__builtin_neon_vqtbl4_v:
3602 case NEON::BI__builtin_neon_vqtbl4q_v:
3604 case NEON::BI__builtin_neon_vtbx1_v:
3605 case NEON::BI__builtin_neon_vqtbx1_v:
3606 case NEON::BI__builtin_neon_vqtbx1q_v:
3607 case NEON::BI__builtin_neon_vtbx2_v:
3608 case NEON::BI__builtin_neon_vqtbx2_v:
3609 case NEON::BI__builtin_neon_vqtbx2q_v:
3610 case NEON::BI__builtin_neon_vtbx3_v:
3611 case NEON::BI__builtin_neon_vqtbx3_v:
3612 case NEON::BI__builtin_neon_vqtbx3q_v:
3613 case NEON::BI__builtin_neon_vtbx4_v:
3614 case NEON::BI__builtin_neon_vqtbx4_v:
3615 case NEON::BI__builtin_neon_vqtbx4q_v:
3619 assert(
E->getNumArgs() >= 3);
3622 const Expr *Arg =
E->getArg(
E->getNumArgs() - 1);
3623 std::optional<llvm::APSInt>
Result =
3638 switch (BuiltinID) {
3639 case NEON::BI__builtin_neon_vtbl1_v: {
3641 Ty, Intrinsic::aarch64_neon_tbl1,
"vtbl1");
3643 case NEON::BI__builtin_neon_vtbl2_v: {
3645 Ty, Intrinsic::aarch64_neon_tbl1,
"vtbl1");
3647 case NEON::BI__builtin_neon_vtbl3_v: {
3649 Ty, Intrinsic::aarch64_neon_tbl2,
"vtbl2");
3651 case NEON::BI__builtin_neon_vtbl4_v: {
3653 Ty, Intrinsic::aarch64_neon_tbl2,
"vtbl2");
3655 case NEON::BI__builtin_neon_vtbx1_v: {
3658 Intrinsic::aarch64_neon_tbl1,
"vtbl1");
3660 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3661 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3662 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3664 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3665 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3666 return Builder.CreateOr(EltsFromInput, EltsFromTbl,
"vtbx");
3668 case NEON::BI__builtin_neon_vtbx2_v: {
3670 Ty, Intrinsic::aarch64_neon_tbx1,
"vtbx1");
3672 case NEON::BI__builtin_neon_vtbx3_v: {
3675 Intrinsic::aarch64_neon_tbl2,
"vtbl2");
3677 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3678 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3680 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3682 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3683 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3684 return Builder.CreateOr(EltsFromInput, EltsFromTbl,
"vtbx");
3686 case NEON::BI__builtin_neon_vtbx4_v: {
3688 Ty, Intrinsic::aarch64_neon_tbx2,
"vtbx2");
3690 case NEON::BI__builtin_neon_vqtbl1_v:
3691 case NEON::BI__builtin_neon_vqtbl1q_v:
3692 Int = Intrinsic::aarch64_neon_tbl1;
s =
"vtbl1";
break;
3693 case NEON::BI__builtin_neon_vqtbl2_v:
3694 case NEON::BI__builtin_neon_vqtbl2q_v: {
3695 Int = Intrinsic::aarch64_neon_tbl2;
s =
"vtbl2";
break;
3696 case NEON::BI__builtin_neon_vqtbl3_v:
3697 case NEON::BI__builtin_neon_vqtbl3q_v:
3698 Int = Intrinsic::aarch64_neon_tbl3;
s =
"vtbl3";
break;
3699 case NEON::BI__builtin_neon_vqtbl4_v:
3700 case NEON::BI__builtin_neon_vqtbl4q_v:
3701 Int = Intrinsic::aarch64_neon_tbl4;
s =
"vtbl4";
break;
3702 case NEON::BI__builtin_neon_vqtbx1_v:
3703 case NEON::BI__builtin_neon_vqtbx1q_v:
3704 Int = Intrinsic::aarch64_neon_tbx1;
s =
"vtbx1";
break;
3705 case NEON::BI__builtin_neon_vqtbx2_v:
3706 case NEON::BI__builtin_neon_vqtbx2q_v:
3707 Int = Intrinsic::aarch64_neon_tbx2;
s =
"vtbx2";
break;
3708 case NEON::BI__builtin_neon_vqtbx3_v:
3709 case NEON::BI__builtin_neon_vqtbx3q_v:
3710 Int = Intrinsic::aarch64_neon_tbx3;
s =
"vtbx3";
break;
3711 case NEON::BI__builtin_neon_vqtbx4_v:
3712 case NEON::BI__builtin_neon_vqtbx4q_v:
3713 Int = Intrinsic::aarch64_neon_tbx4;
s =
"vtbx4";
break;
3725 auto *VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
3727 Value *
V = PoisonValue::get(VTy);
3728 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
3729 Op =
Builder.CreateInsertElement(
V, Op, CI);
3738 case SVETypeFlags::MemEltTyDefault:
3740 case SVETypeFlags::MemEltTyInt8:
3742 case SVETypeFlags::MemEltTyInt16:
3744 case SVETypeFlags::MemEltTyInt32:
3746 case SVETypeFlags::MemEltTyInt64:
3749 llvm_unreachable(
"Unknown MemEltType");
3755 llvm_unreachable(
"Invalid SVETypeFlag!");
3757 case SVETypeFlags::EltTyMFloat8:
3758 case SVETypeFlags::EltTyInt8:
3760 case SVETypeFlags::EltTyInt16:
3762 case SVETypeFlags::EltTyInt32:
3764 case SVETypeFlags::EltTyInt64:
3766 case SVETypeFlags::EltTyInt128:
3769 case SVETypeFlags::EltTyFloat16:
3771 case SVETypeFlags::EltTyFloat32:
3773 case SVETypeFlags::EltTyFloat64:
3776 case SVETypeFlags::EltTyBFloat16:
3779 case SVETypeFlags::EltTyBool8:
3780 case SVETypeFlags::EltTyBool16:
3781 case SVETypeFlags::EltTyBool32:
3782 case SVETypeFlags::EltTyBool64:
3789llvm::ScalableVectorType *
3792 default: llvm_unreachable(
"Unhandled SVETypeFlag!");
3794 case SVETypeFlags::EltTyInt8:
3795 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 16);
3796 case SVETypeFlags::EltTyInt16:
3797 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3798 case SVETypeFlags::EltTyInt32:
3799 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
3800 case SVETypeFlags::EltTyInt64:
3801 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
3803 case SVETypeFlags::EltTyBFloat16:
3804 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3805 case SVETypeFlags::EltTyFloat16:
3806 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3807 case SVETypeFlags::EltTyFloat32:
3808 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
3809 case SVETypeFlags::EltTyFloat64:
3810 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
3812 case SVETypeFlags::EltTyBool8:
3813 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 16);
3814 case SVETypeFlags::EltTyBool16:
3815 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3816 case SVETypeFlags::EltTyBool32:
3817 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
3818 case SVETypeFlags::EltTyBool64:
3819 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
3824llvm::ScalableVectorType *
3828 llvm_unreachable(
"Invalid SVETypeFlag!");
3830 case SVETypeFlags::EltTyInt8:
3831 return llvm::ScalableVectorType::get(
Builder.getInt8Ty(), 16);
3832 case SVETypeFlags::EltTyInt16:
3833 return llvm::ScalableVectorType::get(
Builder.getInt16Ty(), 8);
3834 case SVETypeFlags::EltTyInt32:
3835 return llvm::ScalableVectorType::get(
Builder.getInt32Ty(), 4);
3836 case SVETypeFlags::EltTyInt64:
3837 return llvm::ScalableVectorType::get(
Builder.getInt64Ty(), 2);
3839 case SVETypeFlags::EltTyMFloat8:
3840 return llvm::ScalableVectorType::get(
Builder.getInt8Ty(), 16);
3841 case SVETypeFlags::EltTyFloat16:
3842 return llvm::ScalableVectorType::get(
Builder.getHalfTy(), 8);
3843 case SVETypeFlags::EltTyBFloat16:
3844 return llvm::ScalableVectorType::get(
Builder.getBFloatTy(), 8);
3845 case SVETypeFlags::EltTyFloat32:
3846 return llvm::ScalableVectorType::get(
Builder.getFloatTy(), 4);
3847 case SVETypeFlags::EltTyFloat64:
3848 return llvm::ScalableVectorType::get(
Builder.getDoubleTy(), 2);
3850 case SVETypeFlags::EltTyBool8:
3851 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 16);
3852 case SVETypeFlags::EltTyBool16:
3853 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3854 case SVETypeFlags::EltTyBool32:
3855 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
3856 case SVETypeFlags::EltTyBool64:
3857 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
3872 return llvm::ScalableVectorType::get(EltTy, NumElts);
3878 llvm::ScalableVectorType *VTy) {
3880 if (isa<TargetExtType>(Pred->
getType()) &&
3881 cast<TargetExtType>(Pred->
getType())->getName() ==
"aarch64.svcount")
3884 auto *RTy = llvm::VectorType::get(IntegerType::get(
getLLVMContext(), 1), VTy);
3889 llvm::Type *IntrinsicTy;
3890 switch (VTy->getMinNumElements()) {
3892 llvm_unreachable(
"unsupported element count!");
3897 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3901 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3902 IntrinsicTy = Pred->
getType();
3908 assert(
C->getType() == RTy &&
"Unexpected return type!");
3913 llvm::StructType *Ty) {
3914 if (PredTuple->
getType() == Ty)
3917 Value *Ret = llvm::PoisonValue::get(Ty);
3918 for (
unsigned I = 0; I < Ty->getNumElements(); ++I) {
3919 Value *Pred =
Builder.CreateExtractValue(PredTuple, I);
3921 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3922 Ret =
Builder.CreateInsertValue(Ret, Pred, I);
3932 auto *OverloadedTy =
3936 if (Ops[1]->getType()->isVectorTy())
3956 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3961 if (Ops.size() == 2) {
3962 assert(Ops[1]->getType()->isVectorTy() &&
"Scalar base requires an offset");
3963 Ops.push_back(ConstantInt::get(
Int64Ty, 0));
3968 if (!TypeFlags.
isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3969 unsigned BytesPerElt =
3970 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3971 Ops[2] =
Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3986 auto *OverloadedTy =
3991 Ops.insert(Ops.begin(), Ops.pop_back_val());
3994 if (Ops[2]->getType()->isVectorTy())
4009 if (Ops.size() == 3) {
4010 assert(Ops[1]->getType()->isVectorTy() &&
"Scalar base requires an offset");
4011 Ops.push_back(ConstantInt::get(
Int64Ty, 0));
4016 Ops[0] =
Builder.CreateTrunc(Ops[0], OverloadedTy);
4026 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
4030 if (!TypeFlags.
isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4031 unsigned BytesPerElt =
4032 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4033 Ops[3] =
Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
4036 return Builder.CreateCall(F, Ops);
4044 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
4046 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
4052 if (Ops[1]->getType()->isVectorTy()) {
4053 if (Ops.size() == 3) {
4055 Ops.push_back(ConstantInt::get(
Int64Ty, 0));
4058 std::swap(Ops[2], Ops[3]);
4062 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4063 if (BytesPerElt > 1)
4064 Ops[2] =
Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4069 return Builder.CreateCall(F, Ops);
4075 llvm::ScalableVectorType *VTy =
getSVEType(TypeFlags);
4077 Value *BasePtr = Ops[1];
4084 return Builder.CreateCall(F, {Predicate, BasePtr});
4090 llvm::ScalableVectorType *VTy =
getSVEType(TypeFlags);
4094 case Intrinsic::aarch64_sve_st2:
4095 case Intrinsic::aarch64_sve_st1_pn_x2:
4096 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4097 case Intrinsic::aarch64_sve_st2q:
4100 case Intrinsic::aarch64_sve_st3:
4101 case Intrinsic::aarch64_sve_st3q:
4104 case Intrinsic::aarch64_sve_st4:
4105 case Intrinsic::aarch64_sve_st1_pn_x4:
4106 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4107 case Intrinsic::aarch64_sve_st4q:
4111 llvm_unreachable(
"unknown intrinsic!");
4115 Value *BasePtr = Ops[1];
4118 if (Ops.size() > (2 + N))
4124 for (
unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4125 Operands.push_back(Ops[I]);
4126 Operands.append({Predicate, BasePtr});
4129 return Builder.CreateCall(F, Operands);
4137 unsigned BuiltinID) {
4149 llvm::ScalableVectorType *Ty =
getSVEType(TypeFlags);
4155 llvm::Type *OverloadedTy =
getSVEType(TypeFlags);
4162 unsigned BuiltinID) {
4165 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4168 Value *BasePtr = Ops[1];
4174 Value *PrfOp = Ops.back();
4177 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
4181 llvm::Type *ReturnTy,
4183 unsigned IntrinsicID,
4184 bool IsZExtReturn) {
4191 if (MemEltTy->isVectorTy()) {
4192 assert(MemEltTy == FixedVectorType::get(
Int8Ty, 1) &&
4193 "Only <1 x i8> expected");
4194 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4199 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
4200 llvm::ScalableVectorType *MemoryTy =
nullptr;
4201 llvm::ScalableVectorType *PredTy =
nullptr;
4202 bool IsQuadLoad =
false;
4203 switch (IntrinsicID) {
4204 case Intrinsic::aarch64_sve_ld1uwq:
4205 case Intrinsic::aarch64_sve_ld1udq:
4206 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4207 PredTy = llvm::ScalableVectorType::get(
4212 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4218 Value *BasePtr = Ops[1];
4226 cast<llvm::Instruction>(
Builder.CreateCall(F, {Predicate, BasePtr}));
4233 return IsZExtReturn ?
Builder.CreateZExt(Load, VectorTy)
4234 :
Builder.CreateSExt(Load, VectorTy);
4239 unsigned IntrinsicID) {
4246 if (MemEltTy->isVectorTy()) {
4247 assert(MemEltTy == FixedVectorType::get(
Int8Ty, 1) &&
4248 "Only <1 x i8> expected");
4249 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4254 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
4255 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4257 auto PredTy = MemoryTy;
4258 auto AddrMemoryTy = MemoryTy;
4259 bool IsQuadStore =
false;
4261 switch (IntrinsicID) {
4262 case Intrinsic::aarch64_sve_st1wq:
4263 case Intrinsic::aarch64_sve_st1dq:
4264 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4266 llvm::ScalableVectorType::get(IntegerType::get(
getLLVMContext(), 1), 1);
4273 Value *BasePtr = Ops[1];
4276 if (Ops.size() == 4)
4281 IsQuadStore ? Ops.back() :
Builder.CreateTrunc(Ops.back(), MemoryTy);
4286 cast<llvm::Instruction>(
Builder.CreateCall(F, {Val, Predicate, BasePtr}));
4299 NewOps.push_back(Ops[2]);
4301 llvm::Value *BasePtr = Ops[3];
4302 llvm::Value *RealSlice = Ops[1];
4305 if (Ops.size() == 5) {
4308 llvm::Value *StreamingVectorLengthCall =
4309 Builder.CreateCall(StreamingVectorLength);
4310 llvm::Value *Mulvl =
4311 Builder.CreateMul(StreamingVectorLengthCall, Ops[4],
"mulvl");
4315 RealSlice =
Builder.CreateAdd(RealSlice, Ops[4]);
4318 NewOps.push_back(BasePtr);
4319 NewOps.push_back(Ops[0]);
4320 NewOps.push_back(RealSlice);
4322 return Builder.CreateCall(F, NewOps);
4334 return Builder.CreateCall(F, Ops);
4341 if (Ops.size() == 0)
4342 Ops.push_back(llvm::ConstantInt::get(
Int32Ty, 255));
4344 return Builder.CreateCall(F, Ops);
4350 if (Ops.size() == 2)
4351 Ops.push_back(
Builder.getInt32(0));
4355 return Builder.CreateCall(F, Ops);
4361 return Builder.CreateVectorSplat(
4362 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
4366 if (
auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4368 auto *VecTy = cast<llvm::VectorType>(Ty);
4369 ElementCount EC = VecTy->getElementCount();
4370 assert(EC.isScalar() && VecTy->getElementType() ==
Int8Ty &&
4371 "Only <1 x i8> expected");
4373 Scalar =
Builder.CreateExtractElement(Scalar, uint64_t(0));
4386 if (
auto *StructTy = dyn_cast<StructType>(Ty)) {
4387 Value *Tuple = llvm::PoisonValue::get(Ty);
4389 for (
unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4391 Value *Out =
Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
4392 Tuple =
Builder.CreateInsertValue(Tuple, Out, I);
4398 return Builder.CreateBitCast(Val, Ty);
4403 auto *SplatZero = Constant::getNullValue(Ty);
4404 Ops.insert(Ops.begin(), SplatZero);
4409 auto *SplatUndef = UndefValue::get(Ty);
4410 Ops.insert(Ops.begin(), SplatUndef);
4415 llvm::Type *ResultType,
4420 llvm::Type *DefaultType =
getSVEType(TypeFlags);
4423 return {DefaultType, Ops[1]->getType()};
4429 return {Ops[0]->getType(), Ops.back()->getType()};
4431 if (TypeFlags.
isReductionQV() && !ResultType->isScalableTy() &&
4432 ResultType->isVectorTy())
4433 return {ResultType, Ops[1]->getType()};
4436 return {DefaultType};
4442 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4443 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
4446 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
4447 return Builder.CreateExtractValue(Ops[0], Idx);
4453 assert(TypeFlags.
isTupleCreate() &&
"Expects TypleFlag isTupleCreate");
4455 Value *Tuple = llvm::PoisonValue::get(Ty);
4456 for (
unsigned Idx = 0; Idx < Ops.size(); Idx++)
4457 Tuple =
Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
4466 unsigned ICEArguments = 0;
4475 for (
unsigned i = 0, e =
E->getNumArgs(); i != e; i++) {
4476 bool IsICE = ICEArguments & (1 << i);
4482 std::optional<llvm::APSInt>
Result =
4484 assert(
Result &&
"Expected argument to be a constant");
4494 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4495 for (
unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4496 Ops.push_back(
Builder.CreateExtractValue(Arg, I));
4508 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4509 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4543 return UndefValue::get(Ty);
4544 else if (
Builtin->LLVMIntrinsic != 0) {
4548 Ops.pop_back_val());
4549 if (TypeFlags.
getMergeType() == SVETypeFlags::MergeZeroExp)
4552 if (TypeFlags.
getMergeType() == SVETypeFlags::MergeAnyExp)
4558 Ops.push_back(
Builder.getInt32( 31));
4560 Ops.insert(&Ops[1],
Builder.getInt32( 31));
4563 for (
Value *&Op : Ops)
4564 if (
auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4565 if (PredTy->getElementType()->isIntegerTy(1))
4575 std::swap(Ops[1], Ops[2]);
4577 std::swap(Ops[1], Ops[2]);
4580 std::swap(Ops[1], Ops[2]);
4583 std::swap(Ops[1], Ops[3]);
4586 if (TypeFlags.
getMergeType() == SVETypeFlags::MergeZero) {
4587 llvm::Type *OpndTy = Ops[1]->getType();
4588 auto *SplatZero = Constant::getNullValue(OpndTy);
4589 Ops[1] =
Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4596 if (
Call->getType() == Ty)
4600 if (
auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4602 if (
auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4605 llvm_unreachable(
"unsupported element count!");
4608 switch (BuiltinID) {
4612 case SVE::BI__builtin_sve_svreinterpret_b: {
4616 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4617 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4619 case SVE::BI__builtin_sve_svreinterpret_c: {
4623 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4624 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4627 case SVE::BI__builtin_sve_svpsel_lane_b8:
4628 case SVE::BI__builtin_sve_svpsel_lane_b16:
4629 case SVE::BI__builtin_sve_svpsel_lane_b32:
4630 case SVE::BI__builtin_sve_svpsel_lane_b64:
4631 case SVE::BI__builtin_sve_svpsel_lane_c8:
4632 case SVE::BI__builtin_sve_svpsel_lane_c16:
4633 case SVE::BI__builtin_sve_svpsel_lane_c32:
4634 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4635 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4636 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4637 "aarch64.svcount")) &&
4638 "Unexpected TargetExtType");
4642 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4644 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4649 IsSVCount ?
Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4651 llvm::Value *PSel =
Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4652 return IsSVCount ?
Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4654 case SVE::BI__builtin_sve_svmov_b_z: {
4657 llvm::Type* OverloadedTy =
getSVEType(TypeFlags);
4659 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4662 case SVE::BI__builtin_sve_svnot_b_z: {
4665 llvm::Type* OverloadedTy =
getSVEType(TypeFlags);
4667 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4670 case SVE::BI__builtin_sve_svmovlb_u16:
4671 case SVE::BI__builtin_sve_svmovlb_u32:
4672 case SVE::BI__builtin_sve_svmovlb_u64:
4673 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4675 case SVE::BI__builtin_sve_svmovlb_s16:
4676 case SVE::BI__builtin_sve_svmovlb_s32:
4677 case SVE::BI__builtin_sve_svmovlb_s64:
4678 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4680 case SVE::BI__builtin_sve_svmovlt_u16:
4681 case SVE::BI__builtin_sve_svmovlt_u32:
4682 case SVE::BI__builtin_sve_svmovlt_u64:
4683 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4685 case SVE::BI__builtin_sve_svmovlt_s16:
4686 case SVE::BI__builtin_sve_svmovlt_s32:
4687 case SVE::BI__builtin_sve_svmovlt_s64:
4688 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4690 case SVE::BI__builtin_sve_svpmullt_u16:
4691 case SVE::BI__builtin_sve_svpmullt_u64:
4692 case SVE::BI__builtin_sve_svpmullt_n_u16:
4693 case SVE::BI__builtin_sve_svpmullt_n_u64:
4694 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4696 case SVE::BI__builtin_sve_svpmullb_u16:
4697 case SVE::BI__builtin_sve_svpmullb_u64:
4698 case SVE::BI__builtin_sve_svpmullb_n_u16:
4699 case SVE::BI__builtin_sve_svpmullb_n_u64:
4700 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4702 case SVE::BI__builtin_sve_svdup_n_b8:
4703 case SVE::BI__builtin_sve_svdup_n_b16:
4704 case SVE::BI__builtin_sve_svdup_n_b32:
4705 case SVE::BI__builtin_sve_svdup_n_b64: {
4707 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4708 llvm::ScalableVectorType *OverloadedTy =
getSVEType(TypeFlags);
4713 case SVE::BI__builtin_sve_svdupq_n_b8:
4714 case SVE::BI__builtin_sve_svdupq_n_b16:
4715 case SVE::BI__builtin_sve_svdupq_n_b32:
4716 case SVE::BI__builtin_sve_svdupq_n_b64:
4717 case SVE::BI__builtin_sve_svdupq_n_u8:
4718 case SVE::BI__builtin_sve_svdupq_n_s8:
4719 case SVE::BI__builtin_sve_svdupq_n_u64:
4720 case SVE::BI__builtin_sve_svdupq_n_f64:
4721 case SVE::BI__builtin_sve_svdupq_n_s64:
4722 case SVE::BI__builtin_sve_svdupq_n_u16:
4723 case SVE::BI__builtin_sve_svdupq_n_f16:
4724 case SVE::BI__builtin_sve_svdupq_n_bf16:
4725 case SVE::BI__builtin_sve_svdupq_n_s16:
4726 case SVE::BI__builtin_sve_svdupq_n_u32:
4727 case SVE::BI__builtin_sve_svdupq_n_f32:
4728 case SVE::BI__builtin_sve_svdupq_n_s32: {
4731 unsigned NumOpnds = Ops.size();
4734 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4739 llvm::Type *EltTy = Ops[0]->getType();
4744 for (
unsigned I = 0; I < NumOpnds; ++I)
4745 VecOps.push_back(
Builder.CreateZExt(Ops[I], EltTy));
4750 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4765 : Intrinsic::aarch64_sve_cmpne_wide,
4772 case SVE::BI__builtin_sve_svpfalse_b:
4773 return ConstantInt::getFalse(Ty);
4775 case SVE::BI__builtin_sve_svpfalse_c: {
4776 auto SVBoolTy = ScalableVectorType::get(
Builder.getInt1Ty(), 16);
4779 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4782 case SVE::BI__builtin_sve_svlen_bf16:
4783 case SVE::BI__builtin_sve_svlen_f16:
4784 case SVE::BI__builtin_sve_svlen_f32:
4785 case SVE::BI__builtin_sve_svlen_f64:
4786 case SVE::BI__builtin_sve_svlen_s8:
4787 case SVE::BI__builtin_sve_svlen_s16:
4788 case SVE::BI__builtin_sve_svlen_s32:
4789 case SVE::BI__builtin_sve_svlen_s64:
4790 case SVE::BI__builtin_sve_svlen_u8:
4791 case SVE::BI__builtin_sve_svlen_u16:
4792 case SVE::BI__builtin_sve_svlen_u32:
4793 case SVE::BI__builtin_sve_svlen_u64: {
4798 case SVE::BI__builtin_sve_svtbl2_u8:
4799 case SVE::BI__builtin_sve_svtbl2_s8:
4800 case SVE::BI__builtin_sve_svtbl2_u16:
4801 case SVE::BI__builtin_sve_svtbl2_s16:
4802 case SVE::BI__builtin_sve_svtbl2_u32:
4803 case SVE::BI__builtin_sve_svtbl2_s32:
4804 case SVE::BI__builtin_sve_svtbl2_u64:
4805 case SVE::BI__builtin_sve_svtbl2_s64:
4806 case SVE::BI__builtin_sve_svtbl2_f16:
4807 case SVE::BI__builtin_sve_svtbl2_bf16:
4808 case SVE::BI__builtin_sve_svtbl2_f32:
4809 case SVE::BI__builtin_sve_svtbl2_f64: {
4812 return Builder.CreateCall(F, Ops);
4815 case SVE::BI__builtin_sve_svset_neonq_s8:
4816 case SVE::BI__builtin_sve_svset_neonq_s16:
4817 case SVE::BI__builtin_sve_svset_neonq_s32:
4818 case SVE::BI__builtin_sve_svset_neonq_s64:
4819 case SVE::BI__builtin_sve_svset_neonq_u8:
4820 case SVE::BI__builtin_sve_svset_neonq_u16:
4821 case SVE::BI__builtin_sve_svset_neonq_u32:
4822 case SVE::BI__builtin_sve_svset_neonq_u64:
4823 case SVE::BI__builtin_sve_svset_neonq_f16:
4824 case SVE::BI__builtin_sve_svset_neonq_f32:
4825 case SVE::BI__builtin_sve_svset_neonq_f64:
4826 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4827 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4830 case SVE::BI__builtin_sve_svget_neonq_s8:
4831 case SVE::BI__builtin_sve_svget_neonq_s16:
4832 case SVE::BI__builtin_sve_svget_neonq_s32:
4833 case SVE::BI__builtin_sve_svget_neonq_s64:
4834 case SVE::BI__builtin_sve_svget_neonq_u8:
4835 case SVE::BI__builtin_sve_svget_neonq_u16:
4836 case SVE::BI__builtin_sve_svget_neonq_u32:
4837 case SVE::BI__builtin_sve_svget_neonq_u64:
4838 case SVE::BI__builtin_sve_svget_neonq_f16:
4839 case SVE::BI__builtin_sve_svget_neonq_f32:
4840 case SVE::BI__builtin_sve_svget_neonq_f64:
4841 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4842 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4845 case SVE::BI__builtin_sve_svdup_neonq_s8:
4846 case SVE::BI__builtin_sve_svdup_neonq_s16:
4847 case SVE::BI__builtin_sve_svdup_neonq_s32:
4848 case SVE::BI__builtin_sve_svdup_neonq_s64:
4849 case SVE::BI__builtin_sve_svdup_neonq_u8:
4850 case SVE::BI__builtin_sve_svdup_neonq_u16:
4851 case SVE::BI__builtin_sve_svdup_neonq_u32:
4852 case SVE::BI__builtin_sve_svdup_neonq_u64:
4853 case SVE::BI__builtin_sve_svdup_neonq_f16:
4854 case SVE::BI__builtin_sve_svdup_neonq_f32:
4855 case SVE::BI__builtin_sve_svdup_neonq_f64:
4856 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4857 Value *Insert =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4859 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4860 {Insert,
Builder.getInt64(0)});
4871 switch (BuiltinID) {
4874 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4877 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4878 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4881 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4882 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4888 for (
unsigned I = 0; I < MultiVec; ++I)
4889 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4905 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4906 BuiltinID == SME::BI__builtin_sme_svzero_za)
4908 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4909 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4910 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4911 BuiltinID == SME::BI__builtin_sme_svstr_za)
4917 Ops.pop_back_val());
4922 if (
Builtin->LLVMIntrinsic == 0)
4926 for (
Value *&Op : Ops)
4927 if (
auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4928 if (PredTy->getElementType()->isIntegerTy(1))
4936 return Builder.CreateCall(F, Ops);
4943 llvm::Metadata *Ops[] = {llvm::MDString::get(Context,
"x18")};
4944 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4945 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4948 llvm::Value *X18 = CGF.
Builder.CreateCall(F, Metadata);
4954 llvm::Triple::ArchType
Arch) {
4963 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4964 return EmitAArch64CpuSupports(
E);
4966 unsigned HintID =
static_cast<unsigned>(-1);
4967 switch (BuiltinID) {
4969 case clang::AArch64::BI__builtin_arm_nop:
4972 case clang::AArch64::BI__builtin_arm_yield:
4973 case clang::AArch64::BI__yield:
4976 case clang::AArch64::BI__builtin_arm_wfe:
4977 case clang::AArch64::BI__wfe:
4980 case clang::AArch64::BI__builtin_arm_wfi:
4981 case clang::AArch64::BI__wfi:
4984 case clang::AArch64::BI__builtin_arm_sev:
4985 case clang::AArch64::BI__sev:
4988 case clang::AArch64::BI__builtin_arm_sevl:
4989 case clang::AArch64::BI__sevl:
4994 if (HintID !=
static_cast<unsigned>(-1)) {
4996 return Builder.CreateCall(F, llvm::ConstantInt::get(
Int32Ty, HintID));
4999 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5005 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5010 "__arm_sme_state"));
5012 "aarch64_pstate_sm_compatible");
5013 CI->setAttributes(Attrs);
5016 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5023 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5025 "rbit of unusual size!");
5028 CGM.
getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5030 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5032 "rbit of unusual size!");
5035 CGM.
getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5038 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5039 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5043 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5048 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5053 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5059 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5060 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5062 llvm::Type *Ty = Arg->getType();
5067 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5068 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5070 llvm::Type *Ty = Arg->getType();
5075 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5076 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5078 llvm::Type *Ty = Arg->getType();
5083 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5084 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5086 llvm::Type *Ty = Arg->getType();
5091 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5093 "__jcvt of unusual size!");
5099 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5100 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5101 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5102 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5106 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5110 llvm::Value *Val =
Builder.CreateCall(F, MemAddr);
5112 for (
size_t i = 0; i < 8; i++) {
5113 llvm::Value *ValOffsetPtr =
5124 Args.push_back(MemAddr);
5125 for (
size_t i = 0; i < 8; i++) {
5126 llvm::Value *ValOffsetPtr =
5133 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5134 ? Intrinsic::aarch64_st64b
5135 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5136 ? Intrinsic::aarch64_st64bv
5137 : Intrinsic::aarch64_st64bv0);
5139 return Builder.CreateCall(F, Args);
5143 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5144 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5146 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5147 ? Intrinsic::aarch64_rndr
5148 : Intrinsic::aarch64_rndrrs);
5150 llvm::Value *Val =
Builder.CreateCall(F);
5151 Value *RandomValue =
Builder.CreateExtractValue(Val, 0);
5160 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5161 assert(
E->getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
5164 for (
unsigned i = 0; i < 2; i++)
5167 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5168 StringRef Name = FD->
getName();
5172 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5173 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5177 ? Intrinsic::aarch64_ldaxp
5178 : Intrinsic::aarch64_ldxp);
5185 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5186 Val0 =
Builder.CreateZExt(Val0, Int128Ty);
5187 Val1 =
Builder.CreateZExt(Val1, Int128Ty);
5189 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5190 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
5191 Val =
Builder.CreateOr(Val, Val1);
5193 }
else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5194 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5204 ? Intrinsic::aarch64_ldaxr
5205 : Intrinsic::aarch64_ldxr,
5207 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldxr");
5211 if (RealResTy->isPointerTy())
5212 return Builder.CreateIntToPtr(Val, RealResTy);
5214 llvm::Type *IntResTy = llvm::IntegerType::get(
5216 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
5220 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5221 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5225 ? Intrinsic::aarch64_stlxp
5226 : Intrinsic::aarch64_stxp);
5238 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"stxp");
5241 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5242 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5247 llvm::Type *StoreTy =
5250 if (StoreVal->
getType()->isPointerTy())
5253 llvm::Type *
IntTy = llvm::IntegerType::get(
5262 ? Intrinsic::aarch64_stlxr
5263 : Intrinsic::aarch64_stxr,
5265 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"stxr");
5267 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
5271 if (BuiltinID == clang::AArch64::BI__getReg) {
5274 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5280 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5281 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5282 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5286 return Builder.CreateCall(F, Metadata);
5289 if (BuiltinID == clang::AArch64::BI__break) {
5292 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5298 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5303 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5304 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5305 llvm::SyncScope::SingleThread);
5308 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5309 switch (BuiltinID) {
5310 case clang::AArch64::BI__builtin_arm_crc32b:
5311 CRCIntrinsicID = Intrinsic::aarch64_crc32b;
break;
5312 case clang::AArch64::BI__builtin_arm_crc32cb:
5313 CRCIntrinsicID = Intrinsic::aarch64_crc32cb;
break;
5314 case clang::AArch64::BI__builtin_arm_crc32h:
5315 CRCIntrinsicID = Intrinsic::aarch64_crc32h;
break;
5316 case clang::AArch64::BI__builtin_arm_crc32ch:
5317 CRCIntrinsicID = Intrinsic::aarch64_crc32ch;
break;
5318 case clang::AArch64::BI__builtin_arm_crc32w:
5319 CRCIntrinsicID = Intrinsic::aarch64_crc32w;
break;
5320 case clang::AArch64::BI__builtin_arm_crc32cw:
5321 CRCIntrinsicID = Intrinsic::aarch64_crc32cw;
break;
5322 case clang::AArch64::BI__builtin_arm_crc32d:
5323 CRCIntrinsicID = Intrinsic::aarch64_crc32x;
break;
5324 case clang::AArch64::BI__builtin_arm_crc32cd:
5325 CRCIntrinsicID = Intrinsic::aarch64_crc32cx;
break;
5328 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5333 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5334 Arg1 =
Builder.CreateZExtOrBitCast(Arg1, DataTy);
5336 return Builder.CreateCall(F, {Arg0, Arg1});
5340 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5347 CGM.
getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5351 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5352 switch (BuiltinID) {
5353 case clang::AArch64::BI__builtin_arm_irg:
5354 MTEIntrinsicID = Intrinsic::aarch64_irg;
break;
5355 case clang::AArch64::BI__builtin_arm_addg:
5356 MTEIntrinsicID = Intrinsic::aarch64_addg;
break;
5357 case clang::AArch64::BI__builtin_arm_gmi:
5358 MTEIntrinsicID = Intrinsic::aarch64_gmi;
break;
5359 case clang::AArch64::BI__builtin_arm_ldg:
5360 MTEIntrinsicID = Intrinsic::aarch64_ldg;
break;
5361 case clang::AArch64::BI__builtin_arm_stg:
5362 MTEIntrinsicID = Intrinsic::aarch64_stg;
break;
5363 case clang::AArch64::BI__builtin_arm_subp:
5364 MTEIntrinsicID = Intrinsic::aarch64_subp;
break;
5367 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5368 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5376 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5382 {Pointer, TagOffset});
5384 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5395 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5398 {TagAddress, TagAddress});
5403 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5406 {TagAddress, TagAddress});
5408 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5416 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5417 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5418 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5419 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5420 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5421 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5422 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5423 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5426 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5427 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5428 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5429 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5432 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5433 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5435 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5436 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5438 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5439 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5441 llvm::Type *ValueType;
5445 }
else if (Is128Bit) {
5446 llvm::Type *Int128Ty =
5448 ValueType = Int128Ty;
5450 }
else if (IsPointerBuiltin) {
5460 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5461 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5462 BuiltinID == clang::AArch64::BI__sys) {
5468 std::string SysRegStr;
5469 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5470 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5471 ? ((1 << 1) | ((SysReg >> 14) & 1))
5473 llvm::raw_string_ostream(SysRegStr)
5474 << SysRegOp0 <<
":" << ((SysReg >> 11) & 7) <<
":"
5475 << ((SysReg >> 7) & 15) <<
":" << ((SysReg >> 3) & 15) <<
":"
5478 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5479 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5480 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5485 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5486 llvm::Function *F =
CGM.
getIntrinsic(Intrinsic::read_register, Types);
5488 return Builder.CreateCall(F, Metadata);
5491 llvm::Function *F =
CGM.
getIntrinsic(Intrinsic::write_register, Types);
5493 llvm::Value *
Result =
Builder.CreateCall(F, {Metadata, ArgValue});
5494 if (BuiltinID == clang::AArch64::BI__sys) {
5502 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5508 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5513 if (BuiltinID == clang::AArch64::BI__mulh ||
5514 BuiltinID == clang::AArch64::BI__umulh) {
5516 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5518 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5524 Value *MulResult, *HigherBits;
5526 MulResult =
Builder.CreateNSWMul(LHS, RHS);
5527 HigherBits =
Builder.CreateAShr(MulResult, 64);
5529 MulResult =
Builder.CreateNUWMul(LHS, RHS);
5530 HigherBits =
Builder.CreateLShr(MulResult, 64);
5532 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5537 if (BuiltinID == AArch64::BI__writex18byte ||
5538 BuiltinID == AArch64::BI__writex18word ||
5539 BuiltinID == AArch64::BI__writex18dword ||
5540 BuiltinID == AArch64::BI__writex18qword) {
5556 if (BuiltinID == AArch64::BI__readx18byte ||
5557 BuiltinID == AArch64::BI__readx18word ||
5558 BuiltinID == AArch64::BI__readx18dword ||
5559 BuiltinID == AArch64::BI__readx18qword) {
5574 if (BuiltinID == AArch64::BI__addx18byte ||
5575 BuiltinID == AArch64::BI__addx18word ||
5576 BuiltinID == AArch64::BI__addx18dword ||
5577 BuiltinID == AArch64::BI__addx18qword ||
5578 BuiltinID == AArch64::BI__incx18byte ||
5579 BuiltinID == AArch64::BI__incx18word ||
5580 BuiltinID == AArch64::BI__incx18dword ||
5581 BuiltinID == AArch64::BI__incx18qword) {
5584 switch (BuiltinID) {
5585 case AArch64::BI__incx18byte:
5589 case AArch64::BI__incx18word:
5593 case AArch64::BI__incx18dword:
5597 case AArch64::BI__incx18qword:
5603 isIncrement =
false;
5628 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5629 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5630 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5631 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5634 return Builder.CreateBitCast(Arg, RetTy);
5637 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5638 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5639 BuiltinID == AArch64::BI_CountLeadingZeros ||
5640 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5642 llvm::Type *ArgType = Arg->
getType();
5644 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5645 BuiltinID == AArch64::BI_CountLeadingOnes64)
5646 Arg =
Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5651 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5652 BuiltinID == AArch64::BI_CountLeadingZeros64)
5657 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5658 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5661 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5666 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5671 if (BuiltinID == AArch64::BI_CountOneBits ||
5672 BuiltinID == AArch64::BI_CountOneBits64) {
5674 llvm::Type *ArgType = ArgValue->
getType();
5678 if (BuiltinID == AArch64::BI_CountOneBits64)
5683 if (BuiltinID == AArch64::BI__prefetch) {
5692 if (BuiltinID == AArch64::BI__hlt) {
5698 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5701 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5709 if (std::optional<MSVCIntrin> MsvcIntId =
5715 return P.first == BuiltinID;
5718 BuiltinID = It->second;
5722 unsigned ICEArguments = 0;
5729 for (
unsigned i = 0, e =
E->getNumArgs() - 1; i != e; i++) {
5731 switch (BuiltinID) {
5732 case NEON::BI__builtin_neon_vld1_v:
5733 case NEON::BI__builtin_neon_vld1q_v:
5734 case NEON::BI__builtin_neon_vld1_dup_v:
5735 case NEON::BI__builtin_neon_vld1q_dup_v:
5736 case NEON::BI__builtin_neon_vld1_lane_v:
5737 case NEON::BI__builtin_neon_vld1q_lane_v:
5738 case NEON::BI__builtin_neon_vst1_v:
5739 case NEON::BI__builtin_neon_vst1q_v:
5740 case NEON::BI__builtin_neon_vst1_lane_v:
5741 case NEON::BI__builtin_neon_vst1q_lane_v:
5742 case NEON::BI__builtin_neon_vldap1_lane_s64:
5743 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5744 case NEON::BI__builtin_neon_vstl1_lane_s64:
5745 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5763 assert(
Result &&
"SISD intrinsic should have been handled");
5767 const Expr *Arg =
E->getArg(
E->getNumArgs()-1);
5769 if (std::optional<llvm::APSInt>
Result =
5774 bool usgn =
Type.isUnsigned();
5775 bool quad =
Type.isQuad();
5778 switch (BuiltinID) {
5780 case NEON::BI__builtin_neon_vabsh_f16:
5783 case NEON::BI__builtin_neon_vaddq_p128: {
5786 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
5787 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
5788 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
5789 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5790 return Builder.CreateBitCast(Ops[0], Int128Ty);
5792 case NEON::BI__builtin_neon_vldrq_p128: {
5793 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5798 case NEON::BI__builtin_neon_vstrq_p128: {
5799 Value *Ptr = Ops[0];
5802 case NEON::BI__builtin_neon_vcvts_f32_u32:
5803 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5806 case NEON::BI__builtin_neon_vcvts_f32_s32:
5807 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5809 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5812 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5814 return Builder.CreateUIToFP(Ops[0], FTy);
5815 return Builder.CreateSIToFP(Ops[0], FTy);
5817 case NEON::BI__builtin_neon_vcvth_f16_u16:
5818 case NEON::BI__builtin_neon_vcvth_f16_u32:
5819 case NEON::BI__builtin_neon_vcvth_f16_u64:
5822 case NEON::BI__builtin_neon_vcvth_f16_s16:
5823 case NEON::BI__builtin_neon_vcvth_f16_s32:
5824 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5826 llvm::Type *FTy =
HalfTy;
5828 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5830 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5834 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5836 return Builder.CreateUIToFP(Ops[0], FTy);
5837 return Builder.CreateSIToFP(Ops[0], FTy);
5839 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5840 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5841 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5842 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5843 case NEON::BI__builtin_neon_vcvth_u16_f16:
5844 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5845 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5846 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5847 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5848 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5851 llvm::Type* FTy =
HalfTy;
5852 llvm::Type *Tys[2] = {InTy, FTy};
5854 switch (BuiltinID) {
5855 default: llvm_unreachable(
"missing builtin ID in switch!");
5856 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5857 Int = Intrinsic::aarch64_neon_fcvtau;
break;
5858 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5859 Int = Intrinsic::aarch64_neon_fcvtmu;
break;
5860 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5861 Int = Intrinsic::aarch64_neon_fcvtnu;
break;
5862 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5863 Int = Intrinsic::aarch64_neon_fcvtpu;
break;
5864 case NEON::BI__builtin_neon_vcvth_u16_f16:
5865 Int = Intrinsic::aarch64_neon_fcvtzu;
break;
5866 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5867 Int = Intrinsic::aarch64_neon_fcvtas;
break;
5868 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5869 Int = Intrinsic::aarch64_neon_fcvtms;
break;
5870 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5871 Int = Intrinsic::aarch64_neon_fcvtns;
break;
5872 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5873 Int = Intrinsic::aarch64_neon_fcvtps;
break;
5874 case NEON::BI__builtin_neon_vcvth_s16_f16:
5875 Int = Intrinsic::aarch64_neon_fcvtzs;
break;
5880 case NEON::BI__builtin_neon_vcaleh_f16:
5881 case NEON::BI__builtin_neon_vcalth_f16:
5882 case NEON::BI__builtin_neon_vcageh_f16:
5883 case NEON::BI__builtin_neon_vcagth_f16: {
5886 llvm::Type* FTy =
HalfTy;
5887 llvm::Type *Tys[2] = {InTy, FTy};
5889 switch (BuiltinID) {
5890 default: llvm_unreachable(
"missing builtin ID in switch!");
5891 case NEON::BI__builtin_neon_vcageh_f16:
5892 Int = Intrinsic::aarch64_neon_facge;
break;
5893 case NEON::BI__builtin_neon_vcagth_f16:
5894 Int = Intrinsic::aarch64_neon_facgt;
break;
5895 case NEON::BI__builtin_neon_vcaleh_f16:
5896 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]);
break;
5897 case NEON::BI__builtin_neon_vcalth_f16:
5898 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]);
break;
5903 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5904 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5907 llvm::Type* FTy =
HalfTy;
5908 llvm::Type *Tys[2] = {InTy, FTy};
5910 switch (BuiltinID) {
5911 default: llvm_unreachable(
"missing builtin ID in switch!");
5912 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5913 Int = Intrinsic::aarch64_neon_vcvtfp2fxs;
break;
5914 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5915 Int = Intrinsic::aarch64_neon_vcvtfp2fxu;
break;
5920 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5921 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5923 llvm::Type* FTy =
HalfTy;
5925 llvm::Type *Tys[2] = {FTy, InTy};
5927 switch (BuiltinID) {
5928 default: llvm_unreachable(
"missing builtin ID in switch!");
5929 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5930 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5931 Ops[0] =
Builder.CreateSExt(Ops[0], InTy,
"sext");
5933 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5934 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5935 Ops[0] =
Builder.CreateZExt(Ops[0], InTy);
5940 case NEON::BI__builtin_neon_vpaddd_s64: {
5941 auto *Ty = llvm::FixedVectorType::get(
Int64Ty, 2);
5944 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2i64");
5945 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5946 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5947 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
5948 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
5950 return Builder.CreateAdd(Op0, Op1,
"vpaddd");
5952 case NEON::BI__builtin_neon_vpaddd_f64: {
5953 auto *Ty = llvm::FixedVectorType::get(
DoubleTy, 2);
5956 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f64");
5957 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5958 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5959 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
5960 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
5962 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5964 case NEON::BI__builtin_neon_vpadds_f32: {
5965 auto *Ty = llvm::FixedVectorType::get(
FloatTy, 2);
5968 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f32");
5969 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5970 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5971 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
5972 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
5974 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5976 case NEON::BI__builtin_neon_vceqzd_s64:
5980 ICmpInst::ICMP_EQ,
"vceqz");
5981 case NEON::BI__builtin_neon_vceqzd_f64:
5982 case NEON::BI__builtin_neon_vceqzs_f32:
5983 case NEON::BI__builtin_neon_vceqzh_f16:
5987 ICmpInst::FCMP_OEQ,
"vceqz");
5988 case NEON::BI__builtin_neon_vcgezd_s64:
5992 ICmpInst::ICMP_SGE,
"vcgez");
5993 case NEON::BI__builtin_neon_vcgezd_f64:
5994 case NEON::BI__builtin_neon_vcgezs_f32:
5995 case NEON::BI__builtin_neon_vcgezh_f16:
5999 ICmpInst::FCMP_OGE,
"vcgez");
6000 case NEON::BI__builtin_neon_vclezd_s64:
6004 ICmpInst::ICMP_SLE,
"vclez");
6005 case NEON::BI__builtin_neon_vclezd_f64:
6006 case NEON::BI__builtin_neon_vclezs_f32:
6007 case NEON::BI__builtin_neon_vclezh_f16:
6011 ICmpInst::FCMP_OLE,
"vclez");
6012 case NEON::BI__builtin_neon_vcgtzd_s64:
6016 ICmpInst::ICMP_SGT,
"vcgtz");
6017 case NEON::BI__builtin_neon_vcgtzd_f64:
6018 case NEON::BI__builtin_neon_vcgtzs_f32:
6019 case NEON::BI__builtin_neon_vcgtzh_f16:
6023 ICmpInst::FCMP_OGT,
"vcgtz");
6024 case NEON::BI__builtin_neon_vcltzd_s64:
6028 ICmpInst::ICMP_SLT,
"vcltz");
6030 case NEON::BI__builtin_neon_vcltzd_f64:
6031 case NEON::BI__builtin_neon_vcltzs_f32:
6032 case NEON::BI__builtin_neon_vcltzh_f16:
6036 ICmpInst::FCMP_OLT,
"vcltz");
6038 case NEON::BI__builtin_neon_vceqzd_u64: {
6042 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(
Int64Ty));
6045 case NEON::BI__builtin_neon_vceqd_f64:
6046 case NEON::BI__builtin_neon_vcled_f64:
6047 case NEON::BI__builtin_neon_vcltd_f64:
6048 case NEON::BI__builtin_neon_vcged_f64:
6049 case NEON::BI__builtin_neon_vcgtd_f64: {
6050 llvm::CmpInst::Predicate
P;
6051 switch (BuiltinID) {
6052 default: llvm_unreachable(
"missing builtin ID in switch!");
6053 case NEON::BI__builtin_neon_vceqd_f64:
P = llvm::FCmpInst::FCMP_OEQ;
break;
6054 case NEON::BI__builtin_neon_vcled_f64:
P = llvm::FCmpInst::FCMP_OLE;
break;
6055 case NEON::BI__builtin_neon_vcltd_f64:
P = llvm::FCmpInst::FCMP_OLT;
break;
6056 case NEON::BI__builtin_neon_vcged_f64:
P = llvm::FCmpInst::FCMP_OGE;
break;
6057 case NEON::BI__builtin_neon_vcgtd_f64:
P = llvm::FCmpInst::FCMP_OGT;
break;
6062 if (
P == llvm::FCmpInst::FCMP_OEQ)
6063 Ops[0] =
Builder.CreateFCmp(
P, Ops[0], Ops[1]);
6065 Ops[0] =
Builder.CreateFCmpS(
P, Ops[0], Ops[1]);
6068 case NEON::BI__builtin_neon_vceqs_f32:
6069 case NEON::BI__builtin_neon_vcles_f32:
6070 case NEON::BI__builtin_neon_vclts_f32:
6071 case NEON::BI__builtin_neon_vcges_f32:
6072 case NEON::BI__builtin_neon_vcgts_f32: {
6073 llvm::CmpInst::Predicate
P;
6074 switch (BuiltinID) {
6075 default: llvm_unreachable(
"missing builtin ID in switch!");
6076 case NEON::BI__builtin_neon_vceqs_f32:
P = llvm::FCmpInst::FCMP_OEQ;
break;
6077 case NEON::BI__builtin_neon_vcles_f32:
P = llvm::FCmpInst::FCMP_OLE;
break;
6078 case NEON::BI__builtin_neon_vclts_f32:
P = llvm::FCmpInst::FCMP_OLT;
break;
6079 case NEON::BI__builtin_neon_vcges_f32:
P = llvm::FCmpInst::FCMP_OGE;
break;
6080 case NEON::BI__builtin_neon_vcgts_f32:
P = llvm::FCmpInst::FCMP_OGT;
break;
6085 if (
P == llvm::FCmpInst::FCMP_OEQ)
6086 Ops[0] =
Builder.CreateFCmp(
P, Ops[0], Ops[1]);
6088 Ops[0] =
Builder.CreateFCmpS(
P, Ops[0], Ops[1]);
6091 case NEON::BI__builtin_neon_vceqh_f16:
6092 case NEON::BI__builtin_neon_vcleh_f16:
6093 case NEON::BI__builtin_neon_vclth_f16:
6094 case NEON::BI__builtin_neon_vcgeh_f16:
6095 case NEON::BI__builtin_neon_vcgth_f16: {
6096 llvm::CmpInst::Predicate
P;
6097 switch (BuiltinID) {
6098 default: llvm_unreachable(
"missing builtin ID in switch!");
6099 case NEON::BI__builtin_neon_vceqh_f16:
P = llvm::FCmpInst::FCMP_OEQ;
break;
6100 case NEON::BI__builtin_neon_vcleh_f16:
P = llvm::FCmpInst::FCMP_OLE;
break;
6101 case NEON::BI__builtin_neon_vclth_f16:
P = llvm::FCmpInst::FCMP_OLT;
break;
6102 case NEON::BI__builtin_neon_vcgeh_f16:
P = llvm::FCmpInst::FCMP_OGE;
break;
6103 case NEON::BI__builtin_neon_vcgth_f16:
P = llvm::FCmpInst::FCMP_OGT;
break;
6108 if (
P == llvm::FCmpInst::FCMP_OEQ)
6109 Ops[0] =
Builder.CreateFCmp(
P, Ops[0], Ops[1]);
6111 Ops[0] =
Builder.CreateFCmpS(
P, Ops[0], Ops[1]);
6114 case NEON::BI__builtin_neon_vceqd_s64:
6115 case NEON::BI__builtin_neon_vceqd_u64:
6116 case NEON::BI__builtin_neon_vcgtd_s64:
6117 case NEON::BI__builtin_neon_vcgtd_u64:
6118 case NEON::BI__builtin_neon_vcltd_s64:
6119 case NEON::BI__builtin_neon_vcltd_u64:
6120 case NEON::BI__builtin_neon_vcged_u64:
6121 case NEON::BI__builtin_neon_vcged_s64:
6122 case NEON::BI__builtin_neon_vcled_u64:
6123 case NEON::BI__builtin_neon_vcled_s64: {
6124 llvm::CmpInst::Predicate
P;
6125 switch (BuiltinID) {
6126 default: llvm_unreachable(
"missing builtin ID in switch!");
6127 case NEON::BI__builtin_neon_vceqd_s64:
6128 case NEON::BI__builtin_neon_vceqd_u64:
P = llvm::ICmpInst::ICMP_EQ;
break;
6129 case NEON::BI__builtin_neon_vcgtd_s64:
P = llvm::ICmpInst::ICMP_SGT;
break;
6130 case NEON::BI__builtin_neon_vcgtd_u64:
P = llvm::ICmpInst::ICMP_UGT;
break;
6131 case NEON::BI__builtin_neon_vcltd_s64:
P = llvm::ICmpInst::ICMP_SLT;
break;
6132 case NEON::BI__builtin_neon_vcltd_u64:
P = llvm::ICmpInst::ICMP_ULT;
break;
6133 case NEON::BI__builtin_neon_vcged_u64:
P = llvm::ICmpInst::ICMP_UGE;
break;
6134 case NEON::BI__builtin_neon_vcged_s64:
P = llvm::ICmpInst::ICMP_SGE;
break;
6135 case NEON::BI__builtin_neon_vcled_u64:
P = llvm::ICmpInst::ICMP_ULE;
break;
6136 case NEON::BI__builtin_neon_vcled_s64:
P = llvm::ICmpInst::ICMP_SLE;
break;
6141 Ops[0] =
Builder.CreateICmp(
P, Ops[0], Ops[1]);
6144 case NEON::BI__builtin_neon_vtstd_s64:
6145 case NEON::BI__builtin_neon_vtstd_u64: {
6149 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
6150 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6151 llvm::Constant::getNullValue(
Int64Ty));
6154 case NEON::BI__builtin_neon_vset_lane_i8:
6155 case NEON::BI__builtin_neon_vset_lane_i16:
6156 case NEON::BI__builtin_neon_vset_lane_i32:
6157 case NEON::BI__builtin_neon_vset_lane_i64:
6158 case NEON::BI__builtin_neon_vset_lane_bf16:
6159 case NEON::BI__builtin_neon_vset_lane_f32:
6160 case NEON::BI__builtin_neon_vsetq_lane_i8:
6161 case NEON::BI__builtin_neon_vsetq_lane_i16:
6162 case NEON::BI__builtin_neon_vsetq_lane_i32:
6163 case NEON::BI__builtin_neon_vsetq_lane_i64:
6164 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6165 case NEON::BI__builtin_neon_vsetq_lane_f32:
6167 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6168 case NEON::BI__builtin_neon_vset_lane_f64:
6171 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 1));
6173 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6174 case NEON::BI__builtin_neon_vset_lane_mf8:
6175 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6180 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6181 case NEON::BI__builtin_neon_vsetq_lane_f64:
6184 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 2));
6186 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6188 case NEON::BI__builtin_neon_vget_lane_i8:
6189 case NEON::BI__builtin_neon_vdupb_lane_i8:
6191 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 8));
6194 case NEON::BI__builtin_neon_vgetq_lane_i8:
6195 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6197 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 16));
6200 case NEON::BI__builtin_neon_vget_lane_mf8:
6201 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6202 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6203 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6206 case NEON::BI__builtin_neon_vget_lane_i16:
6207 case NEON::BI__builtin_neon_vduph_lane_i16:
6209 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 4));
6212 case NEON::BI__builtin_neon_vgetq_lane_i16:
6213 case NEON::BI__builtin_neon_vduph_laneq_i16:
6215 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 8));
6218 case NEON::BI__builtin_neon_vget_lane_i32:
6219 case NEON::BI__builtin_neon_vdups_lane_i32:
6221 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 2));
6224 case NEON::BI__builtin_neon_vdups_lane_f32:
6226 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6229 case NEON::BI__builtin_neon_vgetq_lane_i32:
6230 case NEON::BI__builtin_neon_vdups_laneq_i32:
6232 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 4));
6235 case NEON::BI__builtin_neon_vget_lane_i64:
6236 case NEON::BI__builtin_neon_vdupd_lane_i64:
6238 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 1));
6241 case NEON::BI__builtin_neon_vdupd_lane_f64:
6243 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6246 case NEON::BI__builtin_neon_vgetq_lane_i64:
6247 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6249 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 2));
6252 case NEON::BI__builtin_neon_vget_lane_f32:
6254 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6257 case NEON::BI__builtin_neon_vget_lane_f64:
6259 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6262 case NEON::BI__builtin_neon_vgetq_lane_f32:
6263 case NEON::BI__builtin_neon_vdups_laneq_f32:
6265 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 4));
6268 case NEON::BI__builtin_neon_vgetq_lane_f64:
6269 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6271 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 2));
6274 case NEON::BI__builtin_neon_vaddh_f16:
6276 return Builder.CreateFAdd(Ops[0], Ops[1],
"vaddh");
6277 case NEON::BI__builtin_neon_vsubh_f16:
6279 return Builder.CreateFSub(Ops[0], Ops[1],
"vsubh");
6280 case NEON::BI__builtin_neon_vmulh_f16:
6282 return Builder.CreateFMul(Ops[0], Ops[1],
"vmulh");
6283 case NEON::BI__builtin_neon_vdivh_f16:
6285 return Builder.CreateFDiv(Ops[0], Ops[1],
"vdivh");
6286 case NEON::BI__builtin_neon_vfmah_f16:
6289 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6291 case NEON::BI__builtin_neon_vfmsh_f16: {
6296 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6299 case NEON::BI__builtin_neon_vaddd_s64:
6300 case NEON::BI__builtin_neon_vaddd_u64:
6302 case NEON::BI__builtin_neon_vsubd_s64:
6303 case NEON::BI__builtin_neon_vsubd_u64:
6305 case NEON::BI__builtin_neon_vqdmlalh_s16:
6306 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6310 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6312 ProductOps,
"vqdmlXl");
6313 Constant *CI = ConstantInt::get(
SizeTy, 0);
6314 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6316 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6317 ? Intrinsic::aarch64_neon_sqadd
6318 : Intrinsic::aarch64_neon_sqsub;
6321 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6327 case NEON::BI__builtin_neon_vqshld_n_u64:
6328 case NEON::BI__builtin_neon_vqshld_n_s64: {
6329 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6330 ? Intrinsic::aarch64_neon_uqshl
6331 : Intrinsic::aarch64_neon_sqshl;
6336 case NEON::BI__builtin_neon_vrshrd_n_u64:
6337 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6338 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6339 ? Intrinsic::aarch64_neon_urshl
6340 : Intrinsic::aarch64_neon_srshl;
6342 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6343 Ops[1] = ConstantInt::get(
Int64Ty, -SV);
6346 case NEON::BI__builtin_neon_vrsrad_n_u64:
6347 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6348 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6349 ? Intrinsic::aarch64_neon_urshl
6350 : Intrinsic::aarch64_neon_srshl;
6354 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6357 case NEON::BI__builtin_neon_vshld_n_s64:
6358 case NEON::BI__builtin_neon_vshld_n_u64: {
6359 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(1)));
6361 Ops[0], ConstantInt::get(
Int64Ty, Amt->getZExtValue()),
"shld_n");
6363 case NEON::BI__builtin_neon_vshrd_n_s64: {
6364 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(1)));
6366 Ops[0], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6367 Amt->getZExtValue())),
6370 case NEON::BI__builtin_neon_vshrd_n_u64: {
6371 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(1)));
6372 uint64_t ShiftAmt = Amt->getZExtValue();
6375 return ConstantInt::get(
Int64Ty, 0);
6376 return Builder.CreateLShr(Ops[0], ConstantInt::get(
Int64Ty, ShiftAmt),
6379 case NEON::BI__builtin_neon_vsrad_n_s64: {
6380 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(2)));
6382 Ops[1], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6383 Amt->getZExtValue())),
6385 return Builder.CreateAdd(Ops[0], Ops[1]);
6387 case NEON::BI__builtin_neon_vsrad_n_u64: {
6388 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(2)));
6389 uint64_t ShiftAmt = Amt->getZExtValue();
6394 Ops[1] =
Builder.CreateLShr(Ops[1], ConstantInt::get(
Int64Ty, ShiftAmt),
6396 return Builder.CreateAdd(Ops[0], Ops[1]);
6398 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6399 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6400 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6401 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6407 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6409 ProductOps,
"vqdmlXl");
6410 Constant *CI = ConstantInt::get(
SizeTy, 0);
6411 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6414 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6415 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6416 ? Intrinsic::aarch64_neon_sqadd
6417 : Intrinsic::aarch64_neon_sqsub;
6420 case NEON::BI__builtin_neon_vqdmlals_s32:
6421 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6423 ProductOps.push_back(Ops[1]);
6427 ProductOps,
"vqdmlXl");
6429 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6430 ? Intrinsic::aarch64_neon_sqadd
6431 : Intrinsic::aarch64_neon_sqsub;
6434 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6435 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6436 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6437 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6441 ProductOps.push_back(Ops[1]);
6442 ProductOps.push_back(Ops[2]);
6445 ProductOps,
"vqdmlXl");
6448 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6449 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6450 ? Intrinsic::aarch64_neon_sqadd
6451 : Intrinsic::aarch64_neon_sqsub;
6454 case NEON::BI__builtin_neon_vget_lane_bf16:
6455 case NEON::BI__builtin_neon_vduph_lane_bf16:
6456 case NEON::BI__builtin_neon_vduph_lane_f16: {
6460 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6461 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6462 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6466 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6467 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6468 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6469 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6471 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6473 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6474 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6475 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6476 llvm::Value *Trunc =
6477 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6478 return Builder.CreateShuffleVector(
6479 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6481 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6483 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6485 std::iota(LoMask.begin(), LoMask.end(), 0);
6486 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6487 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6488 llvm::Type *V8BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 8);
6489 llvm::Value *Inactive =
Builder.CreateShuffleVector(
6490 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6491 llvm::Value *Trunc =
6492 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6493 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6496 case clang::AArch64::BI_InterlockedAdd:
6497 case clang::AArch64::BI_InterlockedAdd_acq:
6498 case clang::AArch64::BI_InterlockedAdd_rel:
6499 case clang::AArch64::BI_InterlockedAdd_nf:
6500 case clang::AArch64::BI_InterlockedAdd64:
6501 case clang::AArch64::BI_InterlockedAdd64_acq:
6502 case clang::AArch64::BI_InterlockedAdd64_rel:
6503 case clang::AArch64::BI_InterlockedAdd64_nf: {
6506 llvm::AtomicOrdering Ordering;
6507 switch (BuiltinID) {
6508 case clang::AArch64::BI_InterlockedAdd:
6509 case clang::AArch64::BI_InterlockedAdd64:
6510 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6512 case clang::AArch64::BI_InterlockedAdd_acq:
6513 case clang::AArch64::BI_InterlockedAdd64_acq:
6514 Ordering = llvm::AtomicOrdering::Acquire;
6516 case clang::AArch64::BI_InterlockedAdd_rel:
6517 case clang::AArch64::BI_InterlockedAdd64_rel:
6518 Ordering = llvm::AtomicOrdering::Release;
6520 case clang::AArch64::BI_InterlockedAdd_nf:
6521 case clang::AArch64::BI_InterlockedAdd64_nf:
6522 Ordering = llvm::AtomicOrdering::Monotonic;
6525 llvm_unreachable(
"missing builtin ID in switch!");
6527 AtomicRMWInst *RMWI =
6529 return Builder.CreateAdd(RMWI, Val);
6534 llvm::Type *Ty = VTy;
6553 bool ExtractLow =
false;
6554 bool ExtendLaneArg =
false;
6555 switch (BuiltinID) {
6556 default:
return nullptr;
6557 case NEON::BI__builtin_neon_vbsl_v:
6558 case NEON::BI__builtin_neon_vbslq_v: {
6559 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6560 Ops[0] =
Builder.CreateBitCast(Ops[0], BitTy,
"vbsl");
6561 Ops[1] =
Builder.CreateBitCast(Ops[1], BitTy,
"vbsl");
6562 Ops[2] =
Builder.CreateBitCast(Ops[2], BitTy,
"vbsl");
6564 Ops[1] =
Builder.CreateAnd(Ops[0], Ops[1],
"vbsl");
6565 Ops[2] =
Builder.CreateAnd(
Builder.CreateNot(Ops[0]), Ops[2],
"vbsl");
6566 Ops[0] =
Builder.CreateOr(Ops[1], Ops[2],
"vbsl");
6567 return Builder.CreateBitCast(Ops[0], Ty);
6569 case NEON::BI__builtin_neon_vfma_lane_v:
6570 case NEON::BI__builtin_neon_vfmaq_lane_v: {
6573 Value *Addend = Ops[0];
6574 Value *Multiplicand = Ops[1];
6575 Value *LaneSource = Ops[2];
6576 Ops[0] = Multiplicand;
6577 Ops[1] = LaneSource;
6581 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6582 ? llvm::FixedVectorType::get(VTy->getElementType(),
6583 VTy->getNumElements() / 2)
6585 llvm::Constant *cst = cast<Constant>(Ops[3]);
6586 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6587 Ops[1] =
Builder.CreateBitCast(Ops[1], SourceTy);
6588 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV,
"lane");
6591 Int =
Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6595 case NEON::BI__builtin_neon_vfma_laneq_v: {
6596 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6598 if (VTy && VTy->getElementType() ==
DoubleTy) {
6601 llvm::FixedVectorType *VTy =
6603 Ops[2] =
Builder.CreateBitCast(Ops[2], VTy);
6604 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6607 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6608 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6611 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6612 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6614 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6615 VTy->getNumElements() * 2);
6616 Ops[2] =
Builder.CreateBitCast(Ops[2], STy);
6617 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6618 cast<ConstantInt>(Ops[3]));
6619 Ops[2] =
Builder.CreateShuffleVector(Ops[2], Ops[2], SV,
"lane");
6622 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6623 {Ops[2], Ops[1], Ops[0]});
6625 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6626 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6627 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6629 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6632 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6633 {Ops[2], Ops[1], Ops[0]});
6635 case NEON::BI__builtin_neon_vfmah_lane_f16:
6636 case NEON::BI__builtin_neon_vfmas_lane_f32:
6637 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6638 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6639 case NEON::BI__builtin_neon_vfmad_lane_f64:
6640 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6643 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6645 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6646 {Ops[1], Ops[2], Ops[0]});
6648 case NEON::BI__builtin_neon_vmull_v:
6650 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6651 if (
Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6653 case NEON::BI__builtin_neon_vmax_v:
6654 case NEON::BI__builtin_neon_vmaxq_v:
6656 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6657 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6659 case NEON::BI__builtin_neon_vmaxh_f16: {
6661 Int = Intrinsic::aarch64_neon_fmax;
6664 case NEON::BI__builtin_neon_vmin_v:
6665 case NEON::BI__builtin_neon_vminq_v:
6667 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6668 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6670 case NEON::BI__builtin_neon_vminh_f16: {
6672 Int = Intrinsic::aarch64_neon_fmin;
6675 case NEON::BI__builtin_neon_vabd_v:
6676 case NEON::BI__builtin_neon_vabdq_v:
6678 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6679 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6681 case NEON::BI__builtin_neon_vpadal_v:
6682 case NEON::BI__builtin_neon_vpadalq_v: {
6683 unsigned ArgElts = VTy->getNumElements();
6684 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6685 unsigned BitWidth = EltTy->getBitWidth();
6686 auto *ArgTy = llvm::FixedVectorType::get(
6687 llvm::IntegerType::get(
getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6688 llvm::Type* Tys[2] = { VTy, ArgTy };
6689 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6691 TmpOps.push_back(Ops[1]);
6694 llvm::Value *addend =
Builder.CreateBitCast(Ops[0], tmp->getType());
6695 return Builder.CreateAdd(tmp, addend);
6697 case NEON::BI__builtin_neon_vpmin_v:
6698 case NEON::BI__builtin_neon_vpminq_v:
6700 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6701 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6703 case NEON::BI__builtin_neon_vpmax_v:
6704 case NEON::BI__builtin_neon_vpmaxq_v:
6706 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6707 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6709 case NEON::BI__builtin_neon_vminnm_v:
6710 case NEON::BI__builtin_neon_vminnmq_v:
6711 Int = Intrinsic::aarch64_neon_fminnm;
6713 case NEON::BI__builtin_neon_vminnmh_f16:
6715 Int = Intrinsic::aarch64_neon_fminnm;
6717 case NEON::BI__builtin_neon_vmaxnm_v:
6718 case NEON::BI__builtin_neon_vmaxnmq_v:
6719 Int = Intrinsic::aarch64_neon_fmaxnm;
6721 case NEON::BI__builtin_neon_vmaxnmh_f16:
6723 Int = Intrinsic::aarch64_neon_fmaxnm;
6725 case NEON::BI__builtin_neon_vrecpss_f32: {
6730 case NEON::BI__builtin_neon_vrecpsd_f64:
6734 case NEON::BI__builtin_neon_vrecpsh_f16:
6738 case NEON::BI__builtin_neon_vqshrun_n_v:
6739 Int = Intrinsic::aarch64_neon_sqshrun;
6741 case NEON::BI__builtin_neon_vqrshrun_n_v:
6742 Int = Intrinsic::aarch64_neon_sqrshrun;
6744 case NEON::BI__builtin_neon_vqshrn_n_v:
6745 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6747 case NEON::BI__builtin_neon_vrshrn_n_v:
6748 Int = Intrinsic::aarch64_neon_rshrn;
6750 case NEON::BI__builtin_neon_vqrshrn_n_v:
6751 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6753 case NEON::BI__builtin_neon_vrndah_f16: {
6755 Int =
Builder.getIsFPConstrained()
6756 ? Intrinsic::experimental_constrained_round
6760 case NEON::BI__builtin_neon_vrnda_v:
6761 case NEON::BI__builtin_neon_vrndaq_v: {
6762 Int =
Builder.getIsFPConstrained()
6763 ? Intrinsic::experimental_constrained_round
6767 case NEON::BI__builtin_neon_vrndih_f16: {
6769 Int =
Builder.getIsFPConstrained()
6770 ? Intrinsic::experimental_constrained_nearbyint
6771 : Intrinsic::nearbyint;
6774 case NEON::BI__builtin_neon_vrndmh_f16: {
6776 Int =
Builder.getIsFPConstrained()
6777 ? Intrinsic::experimental_constrained_floor
6781 case NEON::BI__builtin_neon_vrndm_v:
6782 case NEON::BI__builtin_neon_vrndmq_v: {
6783 Int =
Builder.getIsFPConstrained()
6784 ? Intrinsic::experimental_constrained_floor
6788 case NEON::BI__builtin_neon_vrndnh_f16: {
6790 Int =
Builder.getIsFPConstrained()
6791 ? Intrinsic::experimental_constrained_roundeven
6792 : Intrinsic::roundeven;
6795 case NEON::BI__builtin_neon_vrndn_v:
6796 case NEON::BI__builtin_neon_vrndnq_v: {
6797 Int =
Builder.getIsFPConstrained()
6798 ? Intrinsic::experimental_constrained_roundeven
6799 : Intrinsic::roundeven;
6802 case NEON::BI__builtin_neon_vrndns_f32: {
6804 Int =
Builder.getIsFPConstrained()
6805 ? Intrinsic::experimental_constrained_roundeven
6806 : Intrinsic::roundeven;
6809 case NEON::BI__builtin_neon_vrndph_f16: {
6811 Int =
Builder.getIsFPConstrained()
6812 ? Intrinsic::experimental_constrained_ceil
6816 case NEON::BI__builtin_neon_vrndp_v:
6817 case NEON::BI__builtin_neon_vrndpq_v: {
6818 Int =
Builder.getIsFPConstrained()
6819 ? Intrinsic::experimental_constrained_ceil
6823 case NEON::BI__builtin_neon_vrndxh_f16: {
6825 Int =
Builder.getIsFPConstrained()
6826 ? Intrinsic::experimental_constrained_rint
6830 case NEON::BI__builtin_neon_vrndx_v:
6831 case NEON::BI__builtin_neon_vrndxq_v: {
6832 Int =
Builder.getIsFPConstrained()
6833 ? Intrinsic::experimental_constrained_rint
6837 case NEON::BI__builtin_neon_vrndh_f16: {
6839 Int =
Builder.getIsFPConstrained()
6840 ? Intrinsic::experimental_constrained_trunc
6844 case NEON::BI__builtin_neon_vrnd32x_f32:
6845 case NEON::BI__builtin_neon_vrnd32xq_f32:
6846 case NEON::BI__builtin_neon_vrnd32x_f64:
6847 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6849 Int = Intrinsic::aarch64_neon_frint32x;
6852 case NEON::BI__builtin_neon_vrnd32z_f32:
6853 case NEON::BI__builtin_neon_vrnd32zq_f32:
6854 case NEON::BI__builtin_neon_vrnd32z_f64:
6855 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6857 Int = Intrinsic::aarch64_neon_frint32z;
6860 case NEON::BI__builtin_neon_vrnd64x_f32:
6861 case NEON::BI__builtin_neon_vrnd64xq_f32:
6862 case NEON::BI__builtin_neon_vrnd64x_f64:
6863 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6865 Int = Intrinsic::aarch64_neon_frint64x;
6868 case NEON::BI__builtin_neon_vrnd64z_f32:
6869 case NEON::BI__builtin_neon_vrnd64zq_f32:
6870 case NEON::BI__builtin_neon_vrnd64z_f64:
6871 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6873 Int = Intrinsic::aarch64_neon_frint64z;
6876 case NEON::BI__builtin_neon_vrnd_v:
6877 case NEON::BI__builtin_neon_vrndq_v: {
6878 Int =
Builder.getIsFPConstrained()
6879 ? Intrinsic::experimental_constrained_trunc
6883 case NEON::BI__builtin_neon_vcvt_f64_v:
6884 case NEON::BI__builtin_neon_vcvtq_f64_v:
6885 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6887 return usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
6888 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
6889 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6891 "unexpected vcvt_f64_f32 builtin");
6895 return Builder.CreateFPExt(Ops[0], Ty,
"vcvt");
6897 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6899 "unexpected vcvt_f32_f64 builtin");
6903 return Builder.CreateFPTrunc(Ops[0], Ty,
"vcvt");
6905 case NEON::BI__builtin_neon_vcvt_s32_v:
6906 case NEON::BI__builtin_neon_vcvt_u32_v:
6907 case NEON::BI__builtin_neon_vcvt_s64_v:
6908 case NEON::BI__builtin_neon_vcvt_u64_v:
6909 case NEON::BI__builtin_neon_vcvt_s16_f16:
6910 case NEON::BI__builtin_neon_vcvt_u16_f16:
6911 case NEON::BI__builtin_neon_vcvtq_s32_v:
6912 case NEON::BI__builtin_neon_vcvtq_u32_v:
6913 case NEON::BI__builtin_neon_vcvtq_s64_v:
6914 case NEON::BI__builtin_neon_vcvtq_u64_v:
6915 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6916 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6918 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6922 case NEON::BI__builtin_neon_vcvta_s16_f16:
6923 case NEON::BI__builtin_neon_vcvta_u16_f16:
6924 case NEON::BI__builtin_neon_vcvta_s32_v:
6925 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6926 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6927 case NEON::BI__builtin_neon_vcvta_u32_v:
6928 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6929 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6930 case NEON::BI__builtin_neon_vcvta_s64_v:
6931 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6932 case NEON::BI__builtin_neon_vcvta_u64_v:
6933 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6934 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6938 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6939 case NEON::BI__builtin_neon_vcvtm_s32_v:
6940 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6941 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6942 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6943 case NEON::BI__builtin_neon_vcvtm_u32_v:
6944 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6945 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6946 case NEON::BI__builtin_neon_vcvtm_s64_v:
6947 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6948 case NEON::BI__builtin_neon_vcvtm_u64_v:
6949 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6950 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6954 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6955 case NEON::BI__builtin_neon_vcvtn_s32_v:
6956 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6957 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6958 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6959 case NEON::BI__builtin_neon_vcvtn_u32_v:
6960 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6961 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6962 case NEON::BI__builtin_neon_vcvtn_s64_v:
6963 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6964 case NEON::BI__builtin_neon_vcvtn_u64_v:
6965 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6966 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6970 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6971 case NEON::BI__builtin_neon_vcvtp_s32_v:
6972 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6973 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6974 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6975 case NEON::BI__builtin_neon_vcvtp_u32_v:
6976 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6977 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6978 case NEON::BI__builtin_neon_vcvtp_s64_v:
6979 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6980 case NEON::BI__builtin_neon_vcvtp_u64_v:
6981 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6982 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6986 case NEON::BI__builtin_neon_vmulx_v:
6987 case NEON::BI__builtin_neon_vmulxq_v: {
6988 Int = Intrinsic::aarch64_neon_fmulx;
6991 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6992 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6996 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
6998 Int = Intrinsic::aarch64_neon_fmulx;
7001 case NEON::BI__builtin_neon_vmul_lane_v:
7002 case NEON::BI__builtin_neon_vmul_laneq_v: {
7005 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7008 llvm::FixedVectorType *VTy =
7010 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7011 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
7015 case NEON::BI__builtin_neon_vnegd_s64:
7017 case NEON::BI__builtin_neon_vnegh_f16:
7019 case NEON::BI__builtin_neon_vpmaxnm_v:
7020 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7021 Int = Intrinsic::aarch64_neon_fmaxnmp;
7024 case NEON::BI__builtin_neon_vpminnm_v:
7025 case NEON::BI__builtin_neon_vpminnmq_v: {
7026 Int = Intrinsic::aarch64_neon_fminnmp;
7029 case NEON::BI__builtin_neon_vsqrth_f16: {
7031 Int =
Builder.getIsFPConstrained()
7032 ? Intrinsic::experimental_constrained_sqrt
7036 case NEON::BI__builtin_neon_vsqrt_v:
7037 case NEON::BI__builtin_neon_vsqrtq_v: {
7038 Int =
Builder.getIsFPConstrained()
7039 ? Intrinsic::experimental_constrained_sqrt
7041 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7044 case NEON::BI__builtin_neon_vrbit_v:
7045 case NEON::BI__builtin_neon_vrbitq_v: {
7046 Int = Intrinsic::bitreverse;
7049 case NEON::BI__builtin_neon_vaddv_u8:
7053 case NEON::BI__builtin_neon_vaddv_s8: {
7054 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7056 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7057 llvm::Type *Tys[2] = { Ty, VTy };
7062 case NEON::BI__builtin_neon_vaddv_u16:
7065 case NEON::BI__builtin_neon_vaddv_s16: {
7066 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7068 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7069 llvm::Type *Tys[2] = { Ty, VTy };
7074 case NEON::BI__builtin_neon_vaddvq_u8:
7077 case NEON::BI__builtin_neon_vaddvq_s8: {
7078 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7080 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7081 llvm::Type *Tys[2] = { Ty, VTy };
7086 case NEON::BI__builtin_neon_vaddvq_u16:
7089 case NEON::BI__builtin_neon_vaddvq_s16: {
7090 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7092 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7093 llvm::Type *Tys[2] = { Ty, VTy };
7098 case NEON::BI__builtin_neon_vmaxv_u8: {
7099 Int = Intrinsic::aarch64_neon_umaxv;
7101 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7102 llvm::Type *Tys[2] = { Ty, VTy };
7107 case NEON::BI__builtin_neon_vmaxv_u16: {
7108 Int = Intrinsic::aarch64_neon_umaxv;
7110 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7111 llvm::Type *Tys[2] = { Ty, VTy };
7116 case NEON::BI__builtin_neon_vmaxvq_u8: {
7117 Int = Intrinsic::aarch64_neon_umaxv;
7119 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7120 llvm::Type *Tys[2] = { Ty, VTy };
7125 case NEON::BI__builtin_neon_vmaxvq_u16: {
7126 Int = Intrinsic::aarch64_neon_umaxv;
7128 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7129 llvm::Type *Tys[2] = { Ty, VTy };
7134 case NEON::BI__builtin_neon_vmaxv_s8: {
7135 Int = Intrinsic::aarch64_neon_smaxv;
7137 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7138 llvm::Type *Tys[2] = { Ty, VTy };
7143 case NEON::BI__builtin_neon_vmaxv_s16: {
7144 Int = Intrinsic::aarch64_neon_smaxv;
7146 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7147 llvm::Type *Tys[2] = { Ty, VTy };
7152 case NEON::BI__builtin_neon_vmaxvq_s8: {
7153 Int = Intrinsic::aarch64_neon_smaxv;
7155 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7156 llvm::Type *Tys[2] = { Ty, VTy };
7161 case NEON::BI__builtin_neon_vmaxvq_s16: {
7162 Int = Intrinsic::aarch64_neon_smaxv;
7164 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7165 llvm::Type *Tys[2] = { Ty, VTy };
7170 case NEON::BI__builtin_neon_vmaxv_f16: {
7171 Int = Intrinsic::aarch64_neon_fmaxv;
7173 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7174 llvm::Type *Tys[2] = { Ty, VTy };
7179 case NEON::BI__builtin_neon_vmaxvq_f16: {
7180 Int = Intrinsic::aarch64_neon_fmaxv;
7182 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7183 llvm::Type *Tys[2] = { Ty, VTy };
7188 case NEON::BI__builtin_neon_vminv_u8: {
7189 Int = Intrinsic::aarch64_neon_uminv;
7191 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7192 llvm::Type *Tys[2] = { Ty, VTy };
7197 case NEON::BI__builtin_neon_vminv_u16: {
7198 Int = Intrinsic::aarch64_neon_uminv;
7200 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7201 llvm::Type *Tys[2] = { Ty, VTy };
7206 case NEON::BI__builtin_neon_vminvq_u8: {
7207 Int = Intrinsic::aarch64_neon_uminv;
7209 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7210 llvm::Type *Tys[2] = { Ty, VTy };
7215 case NEON::BI__builtin_neon_vminvq_u16: {
7216 Int = Intrinsic::aarch64_neon_uminv;
7218 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7219 llvm::Type *Tys[2] = { Ty, VTy };
7224 case NEON::BI__builtin_neon_vminv_s8: {
7225 Int = Intrinsic::aarch64_neon_sminv;
7227 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7228 llvm::Type *Tys[2] = { Ty, VTy };
7233 case NEON::BI__builtin_neon_vminv_s16: {
7234 Int = Intrinsic::aarch64_neon_sminv;
7236 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7237 llvm::Type *Tys[2] = { Ty, VTy };
7242 case NEON::BI__builtin_neon_vminvq_s8: {
7243 Int = Intrinsic::aarch64_neon_sminv;
7245 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7246 llvm::Type *Tys[2] = { Ty, VTy };
7251 case NEON::BI__builtin_neon_vminvq_s16: {
7252 Int = Intrinsic::aarch64_neon_sminv;
7254 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7255 llvm::Type *Tys[2] = { Ty, VTy };
7260 case NEON::BI__builtin_neon_vminv_f16: {
7261 Int = Intrinsic::aarch64_neon_fminv;
7263 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7264 llvm::Type *Tys[2] = { Ty, VTy };
7269 case NEON::BI__builtin_neon_vminvq_f16: {
7270 Int = Intrinsic::aarch64_neon_fminv;
7272 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7273 llvm::Type *Tys[2] = { Ty, VTy };
7278 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7279 Int = Intrinsic::aarch64_neon_fmaxnmv;
7281 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7282 llvm::Type *Tys[2] = { Ty, VTy };
7287 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7288 Int = Intrinsic::aarch64_neon_fmaxnmv;
7290 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7291 llvm::Type *Tys[2] = { Ty, VTy };
7296 case NEON::BI__builtin_neon_vminnmv_f16: {
7297 Int = Intrinsic::aarch64_neon_fminnmv;
7299 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7300 llvm::Type *Tys[2] = { Ty, VTy };
7305 case NEON::BI__builtin_neon_vminnmvq_f16: {
7306 Int = Intrinsic::aarch64_neon_fminnmv;
7308 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7309 llvm::Type *Tys[2] = { Ty, VTy };
7314 case NEON::BI__builtin_neon_vmul_n_f64: {
7317 return Builder.CreateFMul(Ops[0], RHS);
7319 case NEON::BI__builtin_neon_vaddlv_u8: {
7320 Int = Intrinsic::aarch64_neon_uaddlv;
7322 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7323 llvm::Type *Tys[2] = { Ty, VTy };
7328 case NEON::BI__builtin_neon_vaddlv_u16: {
7329 Int = Intrinsic::aarch64_neon_uaddlv;
7331 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7332 llvm::Type *Tys[2] = { Ty, VTy };
7336 case NEON::BI__builtin_neon_vaddlvq_u8: {
7337 Int = Intrinsic::aarch64_neon_uaddlv;
7339 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7340 llvm::Type *Tys[2] = { Ty, VTy };
7345 case NEON::BI__builtin_neon_vaddlvq_u16: {
7346 Int = Intrinsic::aarch64_neon_uaddlv;
7348 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7349 llvm::Type *Tys[2] = { Ty, VTy };
7353 case NEON::BI__builtin_neon_vaddlv_s8: {
7354 Int = Intrinsic::aarch64_neon_saddlv;
7356 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7357 llvm::Type *Tys[2] = { Ty, VTy };
7362 case NEON::BI__builtin_neon_vaddlv_s16: {
7363 Int = Intrinsic::aarch64_neon_saddlv;
7365 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7366 llvm::Type *Tys[2] = { Ty, VTy };
7370 case NEON::BI__builtin_neon_vaddlvq_s8: {
7371 Int = Intrinsic::aarch64_neon_saddlv;
7373 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7374 llvm::Type *Tys[2] = { Ty, VTy };
7379 case NEON::BI__builtin_neon_vaddlvq_s16: {
7380 Int = Intrinsic::aarch64_neon_saddlv;
7382 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7383 llvm::Type *Tys[2] = { Ty, VTy };
7387 case NEON::BI__builtin_neon_vsri_n_v:
7388 case NEON::BI__builtin_neon_vsriq_n_v: {
7389 Int = Intrinsic::aarch64_neon_vsri;
7393 case NEON::BI__builtin_neon_vsli_n_v:
7394 case NEON::BI__builtin_neon_vsliq_n_v: {
7395 Int = Intrinsic::aarch64_neon_vsli;
7399 case NEON::BI__builtin_neon_vsra_n_v:
7400 case NEON::BI__builtin_neon_vsraq_n_v:
7401 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7403 return Builder.CreateAdd(Ops[0], Ops[1]);
7404 case NEON::BI__builtin_neon_vrsra_n_v:
7405 case NEON::BI__builtin_neon_vrsraq_n_v: {
7406 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7408 TmpOps.push_back(Ops[1]);
7409 TmpOps.push_back(Ops[2]);
7411 llvm::Value *tmp =
EmitNeonCall(F, TmpOps,
"vrshr_n", 1,
true);
7412 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
7413 return Builder.CreateAdd(Ops[0], tmp);
7415 case NEON::BI__builtin_neon_vld1_v:
7416 case NEON::BI__builtin_neon_vld1q_v: {
7419 case NEON::BI__builtin_neon_vst1_v:
7420 case NEON::BI__builtin_neon_vst1q_v:
7421 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7423 case NEON::BI__builtin_neon_vld1_lane_v:
7424 case NEON::BI__builtin_neon_vld1q_lane_v: {
7425 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7428 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vld1_lane");
7430 case NEON::BI__builtin_neon_vldap1_lane_s64:
7431 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7432 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7435 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7437 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vldap1_lane");
7439 case NEON::BI__builtin_neon_vld1_dup_v:
7440 case NEON::BI__builtin_neon_vld1q_dup_v: {
7441 Value *
V = PoisonValue::get(Ty);
7444 llvm::Constant *CI = ConstantInt::get(
Int32Ty, 0);
7445 Ops[0] =
Builder.CreateInsertElement(
V, Ops[0], CI);
7448 case NEON::BI__builtin_neon_vst1_lane_v:
7449 case NEON::BI__builtin_neon_vst1q_lane_v:
7450 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7451 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7453 case NEON::BI__builtin_neon_vstl1_lane_s64:
7454 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7455 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7456 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7457 llvm::StoreInst *SI =
7459 SI->setAtomic(llvm::AtomicOrdering::Release);
7462 case NEON::BI__builtin_neon_vld2_v:
7463 case NEON::BI__builtin_neon_vld2q_v: {
7466 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7469 case NEON::BI__builtin_neon_vld3_v:
7470 case NEON::BI__builtin_neon_vld3q_v: {
7473 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7476 case NEON::BI__builtin_neon_vld4_v:
7477 case NEON::BI__builtin_neon_vld4q_v: {
7480 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7483 case NEON::BI__builtin_neon_vld2_dup_v:
7484 case NEON::BI__builtin_neon_vld2q_dup_v: {
7487 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7490 case NEON::BI__builtin_neon_vld3_dup_v:
7491 case NEON::BI__builtin_neon_vld3q_dup_v: {
7494 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7497 case NEON::BI__builtin_neon_vld4_dup_v:
7498 case NEON::BI__builtin_neon_vld4q_dup_v: {
7501 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7504 case NEON::BI__builtin_neon_vld2_lane_v:
7505 case NEON::BI__builtin_neon_vld2q_lane_v: {
7506 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7508 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7509 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7510 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7515 case NEON::BI__builtin_neon_vld3_lane_v:
7516 case NEON::BI__builtin_neon_vld3q_lane_v: {
7517 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7519 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7520 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7521 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7522 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7527 case NEON::BI__builtin_neon_vld4_lane_v:
7528 case NEON::BI__builtin_neon_vld4q_lane_v: {
7529 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7531 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7532 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7533 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7534 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7535 Ops[4] =
Builder.CreateBitCast(Ops[4], Ty);
7540 case NEON::BI__builtin_neon_vst2_v:
7541 case NEON::BI__builtin_neon_vst2q_v: {
7542 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7543 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7547 case NEON::BI__builtin_neon_vst2_lane_v:
7548 case NEON::BI__builtin_neon_vst2q_lane_v: {
7549 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7551 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7555 case NEON::BI__builtin_neon_vst3_v:
7556 case NEON::BI__builtin_neon_vst3q_v: {
7557 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7558 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7562 case NEON::BI__builtin_neon_vst3_lane_v:
7563 case NEON::BI__builtin_neon_vst3q_lane_v: {
7564 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7566 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7570 case NEON::BI__builtin_neon_vst4_v:
7571 case NEON::BI__builtin_neon_vst4q_v: {
7572 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7573 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7577 case NEON::BI__builtin_neon_vst4_lane_v:
7578 case NEON::BI__builtin_neon_vst4q_lane_v: {
7579 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7581 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7585 case NEON::BI__builtin_neon_vtrn_v:
7586 case NEON::BI__builtin_neon_vtrnq_v: {
7587 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7588 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7589 Value *SV =
nullptr;
7591 for (
unsigned vi = 0; vi != 2; ++vi) {
7593 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7594 Indices.push_back(i+vi);
7595 Indices.push_back(i+e+vi);
7598 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
7603 case NEON::BI__builtin_neon_vuzp_v:
7604 case NEON::BI__builtin_neon_vuzpq_v: {
7605 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7606 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7607 Value *SV =
nullptr;
7609 for (
unsigned vi = 0; vi != 2; ++vi) {
7611 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7612 Indices.push_back(2*i+vi);
7615 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
7620 case NEON::BI__builtin_neon_vzip_v:
7621 case NEON::BI__builtin_neon_vzipq_v: {
7622 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7623 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7624 Value *SV =
nullptr;
7626 for (
unsigned vi = 0; vi != 2; ++vi) {
7628 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7629 Indices.push_back((i + vi*e) >> 1);
7630 Indices.push_back(((i + vi*e) >> 1)+e);
7633 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
7638 case NEON::BI__builtin_neon_vqtbl1q_v: {
7642 case NEON::BI__builtin_neon_vqtbl2q_v: {
7646 case NEON::BI__builtin_neon_vqtbl3q_v: {
7650 case NEON::BI__builtin_neon_vqtbl4q_v: {
7654 case NEON::BI__builtin_neon_vqtbx1q_v: {
7658 case NEON::BI__builtin_neon_vqtbx2q_v: {
7662 case NEON::BI__builtin_neon_vqtbx3q_v: {
7666 case NEON::BI__builtin_neon_vqtbx4q_v: {
7670 case NEON::BI__builtin_neon_vsqadd_v:
7671 case NEON::BI__builtin_neon_vsqaddq_v: {
7672 Int = Intrinsic::aarch64_neon_usqadd;
7675 case NEON::BI__builtin_neon_vuqadd_v:
7676 case NEON::BI__builtin_neon_vuqaddq_v: {
7677 Int = Intrinsic::aarch64_neon_suqadd;
7681 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7682 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7683 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7684 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7685 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7686 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7687 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7688 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7689 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7690 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7697 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7698 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7699 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7700 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7701 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7702 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7703 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7704 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7705 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7706 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7713 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7714 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7715 case NEON::BI__builtin_neon_vluti2_lane_f16:
7716 case NEON::BI__builtin_neon_vluti2_lane_p16:
7717 case NEON::BI__builtin_neon_vluti2_lane_p8:
7718 case NEON::BI__builtin_neon_vluti2_lane_s16:
7719 case NEON::BI__builtin_neon_vluti2_lane_s8:
7720 case NEON::BI__builtin_neon_vluti2_lane_u16:
7721 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7722 Int = Intrinsic::aarch64_neon_vluti2_lane;
7729 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7730 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7731 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7732 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7733 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7734 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7735 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7736 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7737 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7738 Int = Intrinsic::aarch64_neon_vluti2_lane;
7745 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7746 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7747 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7748 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7749 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7752 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7753 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7754 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7755 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7756 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7759 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7760 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7761 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7762 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7763 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7764 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7767 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7768 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7769 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7770 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7771 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7772 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7775 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7778 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7779 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7781 llvm::FixedVectorType::get(
BFloatTy, 8),
7782 Ops[0]->getType(), ExtractLow, Ops,
E,
"vbfcvt1");
7783 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7786 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7787 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7789 llvm::FixedVectorType::get(
BFloatTy, 8),
7790 Ops[0]->getType(), ExtractLow, Ops,
E,
"vbfcvt2");
7791 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7794 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7795 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7797 llvm::FixedVectorType::get(
HalfTy, 8),
7798 Ops[0]->getType(), ExtractLow, Ops,
E,
"vbfcvt1");
7799 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7802 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7803 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7805 llvm::FixedVectorType::get(
HalfTy, 8),
7806 Ops[0]->getType(), ExtractLow, Ops,
E,
"vbfcvt2");
7807 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7809 llvm::FixedVectorType::get(
Int8Ty, 8),
7810 Ops[0]->getType(),
false, Ops,
E,
"vfcvtn");
7811 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7813 llvm::FixedVectorType::get(
Int8Ty, 8),
7814 llvm::FixedVectorType::get(
HalfTy, 4),
false, Ops,
7816 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7818 llvm::FixedVectorType::get(
Int8Ty, 16),
7819 llvm::FixedVectorType::get(
HalfTy, 8),
false, Ops,
7821 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7822 llvm::Type *Ty = llvm::FixedVectorType::get(
Int8Ty, 16);
7823 Ops[0] =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7826 Ops[1]->getType(),
false, Ops,
E,
"vfcvtn2");
7829 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7830 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7833 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7834 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7835 ExtendLaneArg =
true;
7837 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7838 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7840 ExtendLaneArg,
HalfTy, Ops,
E,
"fdot2_lane");
7841 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7842 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7845 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7846 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7847 ExtendLaneArg =
true;
7849 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7850 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7852 ExtendLaneArg,
FloatTy, Ops,
E,
"fdot4_lane");
7854 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7856 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops,
E,
7858 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7860 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops,
E,
7862 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7864 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops,
E,
7866 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7868 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops,
E,
7870 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7872 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops,
E,
7874 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7876 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops,
E,
7878 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7879 ExtendLaneArg =
true;
7881 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7883 ExtendLaneArg,
HalfTy, Ops,
E,
"vmlal_lane");
7884 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7885 ExtendLaneArg =
true;
7887 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7889 ExtendLaneArg,
HalfTy, Ops,
E,
"vmlal_lane");
7890 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7891 ExtendLaneArg =
true;
7893 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7895 ExtendLaneArg,
FloatTy, Ops,
E,
"vmlall_lane");
7896 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7897 ExtendLaneArg =
true;
7899 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7901 ExtendLaneArg,
FloatTy, Ops,
E,
"vmlall_lane");
7902 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7903 ExtendLaneArg =
true;
7905 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7907 ExtendLaneArg,
FloatTy, Ops,
E,
"vmlall_lane");
7908 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7909 ExtendLaneArg =
true;
7911 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7913 ExtendLaneArg,
FloatTy, Ops,
E,
"vmlall_lane");
7914 case NEON::BI__builtin_neon_vamin_f16:
7915 case NEON::BI__builtin_neon_vaminq_f16:
7916 case NEON::BI__builtin_neon_vamin_f32:
7917 case NEON::BI__builtin_neon_vaminq_f32:
7918 case NEON::BI__builtin_neon_vaminq_f64: {
7919 Int = Intrinsic::aarch64_neon_famin;
7922 case NEON::BI__builtin_neon_vamax_f16:
7923 case NEON::BI__builtin_neon_vamaxq_f16:
7924 case NEON::BI__builtin_neon_vamax_f32:
7925 case NEON::BI__builtin_neon_vamaxq_f32:
7926 case NEON::BI__builtin_neon_vamaxq_f64: {
7927 Int = Intrinsic::aarch64_neon_famax;
7930 case NEON::BI__builtin_neon_vscale_f16:
7931 case NEON::BI__builtin_neon_vscaleq_f16:
7932 case NEON::BI__builtin_neon_vscale_f32:
7933 case NEON::BI__builtin_neon_vscaleq_f32:
7934 case NEON::BI__builtin_neon_vscaleq_f64: {
7935 Int = Intrinsic::aarch64_neon_fp8_fscale;
7943 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7944 BuiltinID == BPF::BI__builtin_btf_type_id ||
7945 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7946 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7947 "unexpected BPF builtin");
7952 static uint32_t BuiltinSeqNum;
7954 switch (BuiltinID) {
7956 llvm_unreachable(
"Unexpected BPF builtin");
7957 case BPF::BI__builtin_preserve_field_info: {
7958 const Expr *Arg =
E->getArg(0);
7963 "using __builtin_preserve_field_info() without -g");
7976 Value *InfoKind = ConstantInt::get(
Int64Ty,
C->getSExtValue());
7979 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7981 {FieldAddr->getType()});
7982 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7984 case BPF::BI__builtin_btf_type_id:
7985 case BPF::BI__builtin_preserve_type_info: {
7991 const Expr *Arg0 =
E->getArg(0);
7996 Value *FlagValue = ConstantInt::get(
Int64Ty, Flag->getSExtValue());
7997 Value *SeqNumVal = ConstantInt::get(
Int32Ty, BuiltinSeqNum++);
7999 llvm::Function *FnDecl;
8000 if (BuiltinID == BPF::BI__builtin_btf_type_id)
8001 FnDecl = Intrinsic::getOrInsertDeclaration(
8004 FnDecl = Intrinsic::getOrInsertDeclaration(
8005 &
CGM.
getModule(), Intrinsic::bpf_preserve_type_info, {});
8006 CallInst *Fn =
Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
8007 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8010 case BPF::BI__builtin_preserve_enum_value: {
8016 const Expr *Arg0 =
E->getArg(0);
8021 const auto *UO = cast<UnaryOperator>(Arg0->
IgnoreParens());
8022 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
8023 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
8024 const auto *
Enumerator = cast<EnumConstantDecl>(DR->getDecl());
8027 std::string InitValStr;
8028 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
8029 InitValStr = std::to_string(InitVal.getSExtValue());
8031 InitValStr = std::to_string(InitVal.getZExtValue());
8032 std::string EnumStr =
Enumerator->getNameAsString() +
":" + InitValStr;
8033 Value *EnumStrVal =
Builder.CreateGlobalString(EnumStr);
8036 Value *FlagValue = ConstantInt::get(
Int64Ty, Flag->getSExtValue());
8037 Value *SeqNumVal = ConstantInt::get(
Int32Ty, BuiltinSeqNum++);
8039 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
8040 &
CGM.
getModule(), Intrinsic::bpf_preserve_enum_value, {});
8042 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
8043 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8051 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
8052 "Not a power-of-two sized vector!");
8053 bool AllConstants =
true;
8054 for (
unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
8055 AllConstants &= isa<Constant>(Ops[i]);
8060 for (llvm::Value *Op : Ops)
8061 CstOps.push_back(cast<Constant>(Op));
8062 return llvm::ConstantVector::get(CstOps);
8067 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
8069 for (
unsigned i = 0, e = Ops.size(); i != e; ++i)
8075Value *CodeGenFunction::EmitAArch64CpuInit() {
8076 llvm::FunctionType *FTy = llvm::FunctionType::get(
VoidTy,
false);
8077 llvm::FunctionCallee
Func =
8079 cast<llvm::GlobalValue>(
Func.getCallee())->setDSOLocal(
true);
8080 cast<llvm::GlobalValue>(
Func.getCallee())
8081 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8085Value *CodeGenFunction::EmitAArch64CpuSupports(
const CallExpr *
E) {
8087 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
8089 ArgStr.split(Features,
"+");
8090 for (
auto &
Feature : Features) {
8092 if (!llvm::AArch64::parseFMVExtension(
Feature))
8097 return EmitAArch64CpuSupports(Features);
8102 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
8104 if (FeaturesMask != 0) {
8109 llvm::Type *STy = llvm::StructType::get(
Int64Ty);
8110 llvm::Constant *AArch64CPUFeatures =
8112 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(
true);
8114 STy, AArch64CPUFeatures,
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
static bool AArch64SVEIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
static bool AArch64SMEIntrinsicsProvenSorted
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
constexpr unsigned SVEBitsPerBlock
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
#define NEONMAP0(NameBase)
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
SpecialRegisterAccessKind
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
static bool NEONSIMDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
static bool AArch64SIMDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
static CharUnits One()
One - Construct a CharUnits quantity of one.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
CharUnits getAlignment() const
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
llvm::PointerType * getType() const
Return the type of the pointer value.
Address getAddress() const
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
CGDebugInfo * getDebugInfo()
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
ASTContext & getContext() const
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
CodeGenTypes & getTypes()
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
const T & getABIInfo() const
This represents one expression.
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Represents a function declaration or definition.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
QualType getPointeeType() const
A (possibly-)qualified type.
The collection of all-type qualifiers we support.
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isOverloadWhileRW() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
The base class of the type hierarchy.
const T * castAs() const
Member-template castAs<specific type>.
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
const FunctionProtoType * T
@ Enumerator
Enumerator value with fixed underlying type.
Diagnostic wrappers for TextAPI types for error reporting.
llvm::PointerType * VoidPtrTy
llvm::IntegerType * Int64Ty
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * SizeTy
llvm::IntegerType * Int32Ty
llvm::IntegerType * IntTy
int
llvm::IntegerType * Int16Ty
llvm::PointerType * Int8PtrTy
llvm::PointerType * UnqualPtrTy
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.