clang 23.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(F, Args);
355
356 return CGF.Builder.CreateCall(F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasFastHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
369 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
372 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
376 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
378 if (HasFastHalfType)
379 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
380 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
382 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
385 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
387 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
388 // There is a lot of i128 and f128 API missing.
389 // so we use v16i8 to represent poly128 and get pattern matched.
390 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
392 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
394 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
395 }
396 llvm_unreachable("Unknown vector element type!");
397}
398
399static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
400 NeonTypeFlags IntTypeFlags) {
401 int IsQuad = IntTypeFlags.isQuad();
402 switch (IntTypeFlags.getEltType()) {
404 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
406 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
409 default:
410 llvm_unreachable("Type can't be converted to floating-point!");
411 }
412}
413
415 const ElementCount &Count) {
416 Value *SV = llvm::ConstantVector::getSplat(Count, C);
417 return Builder.CreateShuffleVector(V, V, SV, "lane");
418}
419
421 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
422 return EmitNeonSplat(V, C, EC);
423}
424
426 const char *name,
427 unsigned shift, bool rightshift) {
428 unsigned j = 0;
429 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
430 ai != ae; ++ai, ++j) {
431 if (F->isConstrainedFPIntrinsic())
432 if (ai->getType()->isMetadataTy())
433 continue;
434 if (shift > 0 && shift == j)
435 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
436 else
437 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
438 }
439
440 if (F->isConstrainedFPIntrinsic())
441 return Builder.CreateConstrainedFPCall(F, Ops, name);
442 return Builder.CreateCall(F, Ops, name);
443}
444
448 const CallExpr *E, const char *name) {
449 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
450 Ops.pop_back_val());
451 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
452}
453
455 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
456 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
457
458 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
459 RetTy->getPrimitiveSizeInBits();
460 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
461 Ops[1]->getType()};
462 if (ExtendLaneArg) {
463 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
464 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
465 uint64_t(0));
466 }
467 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
468}
469
471 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
472 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
473
474 if (ExtendLaneArg) {
475 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
476 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
477 uint64_t(0));
478 }
479 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
480 RetTy->getPrimitiveSizeInBits();
481 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
482 Ops, E, name);
483}
484
486 bool neg) {
487 int SV = cast<ConstantInt>(V)->getSExtValue();
488 return ConstantInt::getSigned(Ty, neg ? -SV : SV);
489}
490
491Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
492 llvm::Type *Ty1, bool Extract,
494 const CallExpr *E,
495 const char *name) {
496 llvm::Type *Tys[] = {Ty0, Ty1};
497 if (Extract) {
498 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
499 // the vector.
500 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
501 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
502 }
503 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
504}
505
506// Right-shift a vector by a constant.
508 llvm::Type *Ty, bool usgn,
509 const char *name) {
510 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
511
512 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
513 int EltSize = VTy->getScalarSizeInBits();
514
515 Vec = Builder.CreateBitCast(Vec, Ty);
516
517 // lshr/ashr are undefined when the shift amount is equal to the vector
518 // element size.
519 if (ShiftAmt == EltSize) {
520 if (usgn) {
521 // Right-shifting an unsigned value by its size yields 0.
522 return llvm::ConstantAggregateZero::get(VTy);
523 } else {
524 // Right-shifting a signed value by its size is equivalent
525 // to a shift of size-1.
526 --ShiftAmt;
527 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
528 }
529 }
530
531 Shift = EmitNeonShiftVector(Shift, Ty, false);
532 if (usgn)
533 return Builder.CreateLShr(Vec, Shift, name);
534 return Builder.CreateAShr(Vec, Shift, name);
535}
536
537//===----------------------------------------------------------------------===//
538// Intrinsics maps
539//
540// Maps that help automate code-generation.
541//===----------------------------------------------------------------------===//
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
555
562};
563
564//===----------------------------------------------------------------------===//
565// Intrinsic maps
566//
567// Maps that help automate code-generation.
568//===----------------------------------------------------------------------===//
569
570namespace {
571struct ARMVectorIntrinsicInfo {
572 const char *NameHint;
573 unsigned BuiltinID;
574 unsigned LLVMIntrinsic;
575 unsigned AltLLVMIntrinsic;
577
578 bool operator<(unsigned RHSBuiltinID) const {
579 return BuiltinID < RHSBuiltinID;
580 }
581 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
582 return BuiltinID < TE.BuiltinID;
583 }
584};
585} // end anonymous namespace
586
587#define NEONMAP0(NameBase) \
588 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
589
590#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
591 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
592 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
593
594#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
595 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
596 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
597 TypeModifier }
598
599// clang-format off
600static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
601 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
602 NEONMAP0(splat_lane_v),
603 NEONMAP0(splat_laneq_v),
604 NEONMAP0(splatq_lane_v),
605 NEONMAP0(splatq_laneq_v),
606 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
607 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
608 NEONMAP1(vabs_v, arm_neon_vabs, 0),
609 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
610 NEONMAP0(vadd_v),
611 NEONMAP0(vaddhn_v),
612 NEONMAP0(vaddq_v),
613 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
614 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
615 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
616 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
617 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
618 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
619 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
620 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
621 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
622 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
623 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
624 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
625 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
626 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
628 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
629 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
630 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
631 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
632 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
633 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
634 NEONMAP1(vcage_v, arm_neon_vacge, 0),
635 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
636 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
637 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
638 NEONMAP1(vcale_v, arm_neon_vacge, 0),
639 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
640 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
641 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
642 NEONMAP0(vceqz_v),
643 NEONMAP0(vceqzq_v),
644 NEONMAP0(vcgez_v),
645 NEONMAP0(vcgezq_v),
646 NEONMAP0(vcgtz_v),
647 NEONMAP0(vcgtzq_v),
648 NEONMAP0(vclez_v),
649 NEONMAP0(vclezq_v),
650 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
651 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
652 NEONMAP0(vcltz_v),
653 NEONMAP0(vcltzq_v),
654 NEONMAP1(vclz_v, ctlz, Add1ArgType),
655 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
656 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
657 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
658 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
659 NEONMAP0(vcvt_f16_s16),
660 NEONMAP0(vcvt_f16_u16),
661 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
662 NEONMAP0(vcvt_f32_v),
663 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
664 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
665 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
666 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
667 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
668 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
669 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
670 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
671 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
672 NEONMAP0(vcvt_s16_f16),
673 NEONMAP0(vcvt_s32_v),
674 NEONMAP0(vcvt_s64_v),
675 NEONMAP0(vcvt_u16_f16),
676 NEONMAP0(vcvt_u32_v),
677 NEONMAP0(vcvt_u64_v),
678 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
681 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
684 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
685 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
686 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
687 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
688 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
689 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
690 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
691 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
694 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
697 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
698 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
699 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
700 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
701 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
702 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
703 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
706 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
709 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
710 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
711 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
712 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
713 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
714 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
715 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
718 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
720 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
721 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
722 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
723 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
724 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
725 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
726 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
727 NEONMAP0(vcvtq_f16_s16),
728 NEONMAP0(vcvtq_f16_u16),
729 NEONMAP0(vcvtq_f32_v),
730 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
731 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
732 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
733 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
734 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
735 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
736 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
737 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
738 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
739 NEONMAP0(vcvtq_s16_f16),
740 NEONMAP0(vcvtq_s32_v),
741 NEONMAP0(vcvtq_s64_v),
742 NEONMAP0(vcvtq_u16_f16),
743 NEONMAP0(vcvtq_u32_v),
744 NEONMAP0(vcvtq_u64_v),
745 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
746 NEONMAP1(vdot_u32, arm_neon_udot, 0),
747 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
748 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
749 NEONMAP0(vext_v),
750 NEONMAP0(vextq_v),
751 NEONMAP0(vfma_v),
752 NEONMAP0(vfmaq_v),
753 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
754 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
755 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
756 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
757 NEONMAP0(vld1_dup_v),
758 NEONMAP1(vld1_v, arm_neon_vld1, 0),
759 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
760 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
761 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
762 NEONMAP0(vld1q_dup_v),
763 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
764 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
765 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
766 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
767 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
768 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
769 NEONMAP1(vld2_v, arm_neon_vld2, 0),
770 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
771 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
772 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
773 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
774 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
775 NEONMAP1(vld3_v, arm_neon_vld3, 0),
776 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
777 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
778 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
779 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
780 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
781 NEONMAP1(vld4_v, arm_neon_vld4, 0),
782 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
783 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
784 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
785 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
786 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
787 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
788 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
789 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
790 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
791 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
792 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
793 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
794 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
795 NEONMAP0(vmovl_v),
796 NEONMAP0(vmovn_v),
797 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
798 NEONMAP0(vmull_v),
799 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
800 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
801 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
802 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
803 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
804 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
805 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
806 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
807 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
808 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
809 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
810 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
811 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
812 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
813 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
814 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
815 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
816 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
817 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
818 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
819 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
820 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
821 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
822 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
823 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
824 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
825 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
826 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
827 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
828 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
829 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
830 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
831 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
832 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
833 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
834 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
835 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
836 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
837 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
838 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
839 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
840 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
841 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
842 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
843 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
844 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
845 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
846 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
847 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
848 NEONMAP1(vrnd_v, trunc, Add1ArgType),
849 NEONMAP1(vrnda_v, round, Add1ArgType),
850 NEONMAP1(vrndaq_v, round, Add1ArgType),
851 NEONMAP0(vrndi_v),
852 NEONMAP0(vrndiq_v),
853 NEONMAP1(vrndm_v, floor, Add1ArgType),
854 NEONMAP1(vrndmq_v, floor, Add1ArgType),
855 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
856 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
857 NEONMAP1(vrndp_v, ceil, Add1ArgType),
858 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
859 NEONMAP1(vrndq_v, trunc, Add1ArgType),
860 NEONMAP1(vrndx_v, rint, Add1ArgType),
861 NEONMAP1(vrndxq_v, rint, Add1ArgType),
862 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
863 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
864 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
865 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
866 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
867 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
868 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
869 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
870 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
871 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
872 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
873 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
874 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
875 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
876 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
877 NEONMAP0(vshl_n_v),
878 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
879 NEONMAP0(vshll_n_v),
880 NEONMAP0(vshlq_n_v),
881 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
882 NEONMAP0(vshr_n_v),
883 NEONMAP0(vshrn_n_v),
884 NEONMAP0(vshrq_n_v),
885 NEONMAP1(vst1_v, arm_neon_vst1, 0),
886 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
887 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
888 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
889 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
890 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
891 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
892 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
893 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
894 NEONMAP1(vst2_v, arm_neon_vst2, 0),
895 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
896 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
897 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
898 NEONMAP1(vst3_v, arm_neon_vst3, 0),
899 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
900 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
901 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
902 NEONMAP1(vst4_v, arm_neon_vst4, 0),
903 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
904 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
905 NEONMAP0(vsubhn_v),
906 NEONMAP0(vtrn_v),
907 NEONMAP0(vtrnq_v),
908 NEONMAP0(vtst_v),
909 NEONMAP0(vtstq_v),
910 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
911 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
912 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
913 NEONMAP0(vuzp_v),
914 NEONMAP0(vuzpq_v),
915 NEONMAP0(vzip_v),
916 NEONMAP0(vzipq_v)
917};
918
919static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
920 NEONMAP0(splat_lane_v),
921 NEONMAP0(splat_laneq_v),
922 NEONMAP0(splatq_lane_v),
923 NEONMAP0(splatq_laneq_v),
924 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
925 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
926 NEONMAP0(vadd_v),
927 NEONMAP0(vaddhn_v),
928 NEONMAP0(vaddq_v),
929 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
930 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
931 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
932 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
933 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
936 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
937 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
938 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
939 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
940 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
941 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
942 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
943 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
944 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
945 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
946 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
948 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
951 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
952 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
953 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
954 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
955 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
956 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
957 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
958 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
959 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
960 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
961 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
962 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
963 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
964 NEONMAP0(vceqz_v),
965 NEONMAP0(vceqzq_v),
966 NEONMAP0(vcgez_v),
967 NEONMAP0(vcgezq_v),
968 NEONMAP0(vcgtz_v),
969 NEONMAP0(vcgtzq_v),
970 NEONMAP0(vclez_v),
971 NEONMAP0(vclezq_v),
972 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
973 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
974 NEONMAP0(vcltz_v),
975 NEONMAP0(vcltzq_v),
976 NEONMAP1(vclz_v, ctlz, Add1ArgType),
977 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
978 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
979 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
980 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
981 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
982 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
983 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
984 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
985 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
986 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
987 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
988 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
989 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
990 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
991 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
992 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
993 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
994 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
995 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
996 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
997 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
998 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
999 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
1000 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
1001 NEONMAP0(vcvt_f16_s16),
1002 NEONMAP0(vcvt_f16_u16),
1003 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
1004 NEONMAP0(vcvt_f32_v),
1005 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1006 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1007 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1008 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1009 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1010 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1011 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1012 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1013 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1014 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1015 NEONMAP0(vcvtq_f16_s16),
1016 NEONMAP0(vcvtq_f16_u16),
1017 NEONMAP0(vcvtq_f32_v),
1018 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1019 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1020 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1021 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1022 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1023 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1024 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1025 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1026 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1027 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1028 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1029 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1030 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1031 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1032 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1033 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1038 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1039 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1040 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1041 NEONMAP0(vext_v),
1042 NEONMAP0(vextq_v),
1043 NEONMAP0(vfma_v),
1044 NEONMAP0(vfmaq_v),
1045 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1046 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1047 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1048 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1049 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1050 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1051 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1052 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1053 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1054 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1055 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1056 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1057 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1058 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1059 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1060 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1061 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1062 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1063 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1064 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1065 NEONMAP0(vmovl_v),
1066 NEONMAP0(vmovn_v),
1067 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1068 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1069 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1070 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1071 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1072 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1073 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1074 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1075 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1076 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1077 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1078 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1079 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1080 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1081 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1082 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1083 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1084 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1085 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1086 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1087 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1088 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1089 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1090 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1091 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1092 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1093 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1094 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1095 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1096 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1097 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1098 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1099 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1100 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1101 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1102 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1103 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1104 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1105 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1106 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1107 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1108 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1109 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1110 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1111 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1112 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1113 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1114 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1115 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1116 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1117 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1118 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1119 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1120 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1121 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1122 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1123 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1124 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1125 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1126 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1127 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1128 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1129 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1130 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1131 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1132 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1133 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1134 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1135 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1136 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1137 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1138 NEONMAP0(vrndi_v),
1139 NEONMAP0(vrndiq_v),
1140 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1141 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1142 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1143 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1144 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1145 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1146 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1147 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1148 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1149 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1150 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1151 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1152 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1153 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1154 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1155 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1156 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1157 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1158 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1159 NEONMAP0(vshl_n_v),
1160 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1161 NEONMAP0(vshll_n_v),
1162 NEONMAP0(vshlq_n_v),
1163 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1164 NEONMAP0(vshr_n_v),
1165 NEONMAP0(vshrn_n_v),
1166 NEONMAP0(vshrq_n_v),
1167 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1168 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1169 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1170 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1171 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1172 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1173 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1174 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1175 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1176 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1177 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1178 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1179 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1180 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1181 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1182 NEONMAP0(vsubhn_v),
1183 NEONMAP0(vtst_v),
1184 NEONMAP0(vtstq_v),
1185 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1186 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1187 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1188 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1189};
1190
1191// Single-Instruction-Single-Data (SISD) intrinsics.
1192//
1193// The name is somewhat misleading: not all intrinsics in this table are
1194// strictly SISD. While many builtins operate on scalars,
1195// * some take vector operands (e.g. reduction builtins such as
1196// `vminvq_u16` and `vaddvq_s32`), and
1197// * some take both scalar and vector operands (e.g. crypto builtins
1198// such as `vsha1cq_u32`).
1199//
1200// TODO: Either rename this table to better reflect its contents, or
1201// restrict it to true SISD intrinsics only.
1202static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1203 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1204 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1205 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1206 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1207 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1208 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1209 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1210 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1211 NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
1212 NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
1213 NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
1214 NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
1215 NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
1216 NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
1217 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1218 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1219 NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
1220 NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
1221 NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
1222 NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
1223 NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
1224 NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
1225 NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
1226 NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
1227 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1228 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1229 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1230 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1231 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1232 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1233 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1234 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1237 NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1238 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1242 NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1243 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1244 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1245 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1246 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1247 NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1248 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1249 NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1250 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1251 NEONMAP0(vcvth_bf16_f32),
1252 NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1253 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1254 NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1255 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1256 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1257 NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1258 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1259 NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1260 NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1261 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1262 NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1263 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1264 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1265 NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1266 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1267 NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1268 NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1269 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1270 NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1271 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1272 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1273 NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1274 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1275 NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1276 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1277 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1278 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1279 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1280 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1281 NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1282 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1283 NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1284 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1285 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1286 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1287 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1288 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1289 NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
1290 NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
1291 NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
1292 NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
1293 NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
1294 NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
1295 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1296 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1297 NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
1298 NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
1299 NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
1300 NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
1301 NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
1302 NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
1303 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1304 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1305 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1306 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1307 NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
1308 NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
1309 NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
1310 NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
1311 NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
1312 NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
1313 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1314 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1315 NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
1316 NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
1317 NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
1318 NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
1319 NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
1320 NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
1321 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1322 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1323 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1324 NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
1325 NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
1326 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1327 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1328 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1329 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1330 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1331 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1332 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1333 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1334 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1335 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1336 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1337 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1338 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1339 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1340 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1341 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1342 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1343 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1344 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1345 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1346 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1347 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1348 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1349 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1350 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1351 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1352 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1353 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1354 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1355 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1356 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1357 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1358 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1359 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1360 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1361 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1362 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1363 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1364 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1365 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1366 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1367 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1368 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1369 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1370 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1371 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1372 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1373 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1374 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1375 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1376 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1377 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1378 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1379 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1380 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1381 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1382 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1383 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1384 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1385 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1386 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1387 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1389 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1390 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1391 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1392 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1393 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1394 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1395 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1396 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1397 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1398 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1399 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1400 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1401 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1402 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1403 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1404 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1405 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1406 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1407 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1408 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1409 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1410 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1411 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1412 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1413 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1414 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1415 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1416 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1417 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1418 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1419 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1420 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1421 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1422 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1423 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1424 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1425 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1426 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1427 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1428 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1429 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1430 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1431 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1432 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1433 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1434 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1435 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1436 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1437 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1438 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1439 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1440 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1441 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1442 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1443 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1444 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1445 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1446 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1447 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1448 // FP16 scalar intrinisics go here.
1449 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1450 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1451 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1452 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1453 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1454 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1455 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1456 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1457 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1458 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1459 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1460 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1461 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1462 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1463 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1464 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1465 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1466 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1467 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1468 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1469 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1470 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1471 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1472 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1473 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1474 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1475 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1476 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1477 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1478 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1479 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1480 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1481 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1482 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1483};
1484// clang-format on
1485
1486// Some intrinsics are equivalent for codegen.
1487static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1488 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1489 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1490 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1491 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1492 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1493 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1494 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1495 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1496 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1497 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1498 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1499 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1500 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1501 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1502 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1503 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1504 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1505 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1506 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1507 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1508 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1509 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1510 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1511 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1512 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1513 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1514 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1515 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1516 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1517 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1518 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1519 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1520 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1521 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1522 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1523 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1524 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1525 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1526 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1527 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1528 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1529 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1530 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1531 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1532 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1533 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1534 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1535 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1536 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1537 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1538 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1539 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1540 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1541 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1542 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1543 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1544 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1545 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1546 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1547 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1548 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1549 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1550 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1551 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1552 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1553 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1554 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1555 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1556 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1557 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1558 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1559 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1560 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1561 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1562 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1563 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1564 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1565 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1566 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1567 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1568 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1569 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1570 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1571 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1572 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1573 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1574 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1575 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1576 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1577 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1578 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1579 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1580 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1581 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1582 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1583 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1584 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1585 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1586 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1587 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1588 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1589 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1590 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1591 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1592 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1593 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1594 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1595 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1596 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1597 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1598 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1599 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1600 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1601 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1602 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1603 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1604 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1605 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1606 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1607 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1608 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1609 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1610 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1611 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1612 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1613 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1614 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1615 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1616 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1617 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1618 // arbitrary one to be handled as tha canonical variation.
1619 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1620 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1621 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1622 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1623 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1624 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1625 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1626 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1627 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1628 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1629 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1630 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1631};
1632
1633#undef NEONMAP0
1634#undef NEONMAP1
1635#undef NEONMAP2
1636
1637#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1638 { \
1639 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1640 TypeModifier \
1641 }
1642
1643#define SVEMAP2(NameBase, TypeModifier) \
1644 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1645static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1646#define GET_SVE_LLVM_INTRINSIC_MAP
1647#include "clang/Basic/arm_sve_builtin_cg.inc"
1648#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1649#undef GET_SVE_LLVM_INTRINSIC_MAP
1650};
1651
1652#undef SVEMAP1
1653#undef SVEMAP2
1654
1655#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1656 { \
1657 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1658 TypeModifier \
1659 }
1660
1661#define SMEMAP2(NameBase, TypeModifier) \
1662 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1663static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1664#define GET_SME_LLVM_INTRINSIC_MAP
1665#include "clang/Basic/arm_sme_builtin_cg.inc"
1666#undef GET_SME_LLVM_INTRINSIC_MAP
1667};
1668
1669#undef SMEMAP1
1670#undef SMEMAP2
1671
1673
1678
1679// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
1680// the corresponding info struct.
1681static const ARMVectorIntrinsicInfo *
1683 unsigned BuiltinID, bool &MapProvenSorted) {
1684
1685#ifndef NDEBUG
1686 if (!MapProvenSorted) {
1687 assert(llvm::is_sorted(IntrinsicMap));
1688 MapProvenSorted = true;
1689 }
1690#endif
1691
1692 const ARMVectorIntrinsicInfo *Builtin =
1693 llvm::lower_bound(IntrinsicMap, BuiltinID);
1694
1695 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1696 return Builtin;
1697
1698 return nullptr;
1699}
1700
1702 unsigned Modifier,
1703 llvm::Type *ArgType,
1704 const CallExpr *E) {
1705 int VectorSize = 0;
1706 if (Modifier & Use64BitVectors)
1707 VectorSize = 64;
1708 else if (Modifier & Use128BitVectors)
1709 VectorSize = 128;
1710
1711 // Return type.
1713 if (Modifier & AddRetType) {
1714 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1715 if (Modifier & VectorizeRetType)
1716 Ty = llvm::FixedVectorType::get(
1717 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1718
1719 Tys.push_back(Ty);
1720 }
1721
1722 // Arguments.
1723 if (Modifier & VectorizeArgTypes) {
1724 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1725 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1726 }
1727
1728 if (Modifier & (Add1ArgType | Add2ArgTypes))
1729 Tys.push_back(ArgType);
1730
1731 if (Modifier & Add2ArgTypes)
1732 Tys.push_back(ArgType);
1733
1734 if (Modifier & InventFloatType)
1735 Tys.push_back(FloatTy);
1736
1737 return CGM.getIntrinsic(IntrinsicID, Tys);
1738}
1739
1740//===----------------------------------------------------------------------===//
1741// Emit-helpers
1742//===----------------------------------------------------------------------===//
1744 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1745 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1746 assert(SISDInfo.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
1747
1748 switch (SISDInfo.BuiltinID) {
1749 case NEON::BI__builtin_neon_vcled_s64:
1750 case NEON::BI__builtin_neon_vcled_u64:
1751 case NEON::BI__builtin_neon_vcles_f32:
1752 case NEON::BI__builtin_neon_vcled_f64:
1753 case NEON::BI__builtin_neon_vcltd_s64:
1754 case NEON::BI__builtin_neon_vcltd_u64:
1755 case NEON::BI__builtin_neon_vclts_f32:
1756 case NEON::BI__builtin_neon_vcltd_f64:
1757 case NEON::BI__builtin_neon_vcales_f32:
1758 case NEON::BI__builtin_neon_vcaled_f64:
1759 case NEON::BI__builtin_neon_vcalts_f32:
1760 case NEON::BI__builtin_neon_vcaltd_f64:
1761 // Only one direction of comparisons actually exist, cmle is actually a cmge
1762 // with swapped operands. The table gives us the right intrinsic but we
1763 // still need to do the swap.
1764 std::swap(Ops[0], Ops[1]);
1765 break;
1766 }
1767
1768 // Determine the type(s) of this overloaded AArch64 intrinsic.
1769 llvm::Type *ArgTy = CGF.ConvertType(E->getArg(0)->getType());
1770 Function *F = CGF.LookupNeonLLVMIntrinsic(SISDInfo.LLVMIntrinsic,
1771 SISDInfo.TypeModifier, ArgTy, E);
1772
1773 int j = 0;
1774 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1775 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1776 ai != ae; ++ai, ++j) {
1777 llvm::Type *ArgTy = ai->getType();
1778 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1779 ArgTy->getPrimitiveSizeInBits())
1780 continue;
1781 assert(
1782 ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy() &&
1783 "Expecting vector LLVM intrinsic type and scalar Clang builtin type!");
1784
1785 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1786 // it before inserting.
1787 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1788 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1789 Ops[j] =
1790 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1791 }
1792
1793 Value *Result = CGF.EmitNeonCall(F, Ops, SISDInfo.NameHint);
1794 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1795 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1796 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1797 return CGF.Builder.CreateExtractElement(Result, C0);
1798
1799 return CGF.Builder.CreateBitCast(Result, ResultType, SISDInfo.NameHint);
1800}
1801
1803 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1804 const char *NameHint, unsigned Modifier, const CallExpr *E,
1805 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1806 llvm::Triple::ArchType Arch) {
1807
1808 // Extract the trailing immediate argument that encodes the type discriminator
1809 // for this overloaded intrinsic.
1810 // TODO: Move to the parent code that takes care of argument processing.
1811 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1812 std::optional<llvm::APSInt> NeonTypeConst =
1814 if (!NeonTypeConst)
1815 return nullptr;
1816
1817 // Determine the type of this overloaded NEON intrinsic.
1818 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1819 const bool Usgn = Type.isUnsigned();
1820 const bool Quad = Type.isQuad();
1821 const bool Floating = Type.isFloatingPoint();
1822 const bool HasFastHalfType = getTarget().hasFastHalfType();
1823 const bool AllowBFloatArgsAndRet =
1824 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1825
1826 llvm::FixedVectorType *VTy =
1827 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1828 llvm::Type *Ty = VTy;
1829 if (!Ty)
1830 return nullptr;
1831
1832 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1833 return Builder.getInt32(addr.getAlignment().getQuantity());
1834 };
1835
1836 unsigned Int = LLVMIntrinsic;
1837 if ((Modifier & UnsignedAlts) && !Usgn)
1838 Int = AltLLVMIntrinsic;
1839
1840 switch (BuiltinID) {
1841 default: break;
1842 case NEON::BI__builtin_neon_splat_lane_v:
1843 case NEON::BI__builtin_neon_splat_laneq_v:
1844 case NEON::BI__builtin_neon_splatq_lane_v:
1845 case NEON::BI__builtin_neon_splatq_laneq_v: {
1846 auto NumElements = VTy->getElementCount();
1847 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1848 NumElements = NumElements * 2;
1849 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1850 NumElements = NumElements.divideCoefficientBy(2);
1851
1852 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1853 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1854 }
1855 case NEON::BI__builtin_neon_vpadd_v:
1856 case NEON::BI__builtin_neon_vpaddq_v:
1857 // We don't allow fp/int overloading of intrinsics.
1858 if (VTy->getElementType()->isFloatingPointTy() &&
1859 Int == Intrinsic::aarch64_neon_addp)
1860 Int = Intrinsic::aarch64_neon_faddp;
1861 break;
1862 case NEON::BI__builtin_neon_vabs_v:
1863 case NEON::BI__builtin_neon_vabsq_v:
1864 if (VTy->getElementType()->isFloatingPointTy())
1865 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1866 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1867 case NEON::BI__builtin_neon_vadd_v:
1868 case NEON::BI__builtin_neon_vaddq_v: {
1869 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1870 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1871 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1872 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1873 return Builder.CreateBitCast(Ops[0], Ty);
1874 }
1875 case NEON::BI__builtin_neon_vaddhn_v: {
1876 llvm::FixedVectorType *SrcTy =
1877 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1878
1879 // %sum = add <4 x i32> %lhs, %rhs
1880 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1881 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1882 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1883
1884 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1885 Constant *ShiftAmt =
1886 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1887 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1888
1889 // %res = trunc <4 x i32> %high to <4 x i16>
1890 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1891 }
1892 case NEON::BI__builtin_neon_vcale_v:
1893 case NEON::BI__builtin_neon_vcaleq_v:
1894 case NEON::BI__builtin_neon_vcalt_v:
1895 case NEON::BI__builtin_neon_vcaltq_v:
1896 std::swap(Ops[0], Ops[1]);
1897 [[fallthrough]];
1898 case NEON::BI__builtin_neon_vcage_v:
1899 case NEON::BI__builtin_neon_vcageq_v:
1900 case NEON::BI__builtin_neon_vcagt_v:
1901 case NEON::BI__builtin_neon_vcagtq_v: {
1902 llvm::Type *Ty;
1903 switch (VTy->getScalarSizeInBits()) {
1904 default: llvm_unreachable("unexpected type");
1905 case 32:
1906 Ty = FloatTy;
1907 break;
1908 case 64:
1909 Ty = DoubleTy;
1910 break;
1911 case 16:
1912 Ty = HalfTy;
1913 break;
1914 }
1915 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1916 llvm::Type *Tys[] = { VTy, VecFlt };
1917 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1918 return EmitNeonCall(F, Ops, NameHint);
1919 }
1920 case NEON::BI__builtin_neon_vceqz_v:
1921 case NEON::BI__builtin_neon_vceqzq_v:
1923 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1924 case NEON::BI__builtin_neon_vcgez_v:
1925 case NEON::BI__builtin_neon_vcgezq_v:
1927 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1928 "vcgez");
1929 case NEON::BI__builtin_neon_vclez_v:
1930 case NEON::BI__builtin_neon_vclezq_v:
1932 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1933 "vclez");
1934 case NEON::BI__builtin_neon_vcgtz_v:
1935 case NEON::BI__builtin_neon_vcgtzq_v:
1937 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1938 "vcgtz");
1939 case NEON::BI__builtin_neon_vcltz_v:
1940 case NEON::BI__builtin_neon_vcltzq_v:
1942 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1943 "vcltz");
1944 case NEON::BI__builtin_neon_vclz_v:
1945 case NEON::BI__builtin_neon_vclzq_v:
1946 // We generate target-independent intrinsic, which needs a second argument
1947 // for whether or not clz of zero is undefined; on ARM it isn't.
1948 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1949 break;
1950 case NEON::BI__builtin_neon_vcvt_f32_v:
1951 case NEON::BI__builtin_neon_vcvtq_f32_v:
1952 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1953 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1954 HasFastHalfType);
1955 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1956 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1957 case NEON::BI__builtin_neon_vcvt_f16_s16:
1958 case NEON::BI__builtin_neon_vcvt_f16_u16:
1959 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1960 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1961 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1962 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1963 HasFastHalfType);
1964 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1965 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1966 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1967 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1968 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1969 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1970 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1971 Function *F = CGM.getIntrinsic(Int, Tys);
1972 return EmitNeonCall(F, Ops, "vcvt_n");
1973 }
1974 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1975 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1976 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1977 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1978 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1979 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1980 Function *F = CGM.getIntrinsic(Int, Tys);
1981 return EmitNeonCall(F, Ops, "vcvt_n");
1982 }
1983 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1984 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1985 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1986 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1987 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1988 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1989 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1990 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1991 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1992 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1993 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1994 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1995 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1996 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1997 return EmitNeonCall(F, Ops, "vcvt_n");
1998 }
1999 case NEON::BI__builtin_neon_vcvt_s32_v:
2000 case NEON::BI__builtin_neon_vcvt_u32_v:
2001 case NEON::BI__builtin_neon_vcvt_s64_v:
2002 case NEON::BI__builtin_neon_vcvt_u64_v:
2003 case NEON::BI__builtin_neon_vcvt_s16_f16:
2004 case NEON::BI__builtin_neon_vcvt_u16_f16:
2005 case NEON::BI__builtin_neon_vcvtq_s32_v:
2006 case NEON::BI__builtin_neon_vcvtq_u32_v:
2007 case NEON::BI__builtin_neon_vcvtq_s64_v:
2008 case NEON::BI__builtin_neon_vcvtq_u64_v:
2009 case NEON::BI__builtin_neon_vcvtq_s16_f16:
2010 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
2011 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
2012 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
2013 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
2014 }
2015 case NEON::BI__builtin_neon_vcvta_s16_f16:
2016 case NEON::BI__builtin_neon_vcvta_s32_v:
2017 case NEON::BI__builtin_neon_vcvta_s64_v:
2018 case NEON::BI__builtin_neon_vcvta_u16_f16:
2019 case NEON::BI__builtin_neon_vcvta_u32_v:
2020 case NEON::BI__builtin_neon_vcvta_u64_v:
2021 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
2022 case NEON::BI__builtin_neon_vcvtaq_s32_v:
2023 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2024 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2025 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2026 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2027 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2028 case NEON::BI__builtin_neon_vcvtn_s32_v:
2029 case NEON::BI__builtin_neon_vcvtn_s64_v:
2030 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2031 case NEON::BI__builtin_neon_vcvtn_u32_v:
2032 case NEON::BI__builtin_neon_vcvtn_u64_v:
2033 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2034 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2035 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2036 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2037 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2038 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2039 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2040 case NEON::BI__builtin_neon_vcvtp_s32_v:
2041 case NEON::BI__builtin_neon_vcvtp_s64_v:
2042 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2043 case NEON::BI__builtin_neon_vcvtp_u32_v:
2044 case NEON::BI__builtin_neon_vcvtp_u64_v:
2045 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2046 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2047 case NEON::BI__builtin_neon_vcvtpq_s64_v:
2048 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2049 case NEON::BI__builtin_neon_vcvtpq_u32_v:
2050 case NEON::BI__builtin_neon_vcvtpq_u64_v:
2051 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2052 case NEON::BI__builtin_neon_vcvtm_s32_v:
2053 case NEON::BI__builtin_neon_vcvtm_s64_v:
2054 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2055 case NEON::BI__builtin_neon_vcvtm_u32_v:
2056 case NEON::BI__builtin_neon_vcvtm_u64_v:
2057 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2058 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2059 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2060 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2061 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2062 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2063 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
2064 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2065 }
2066 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2067 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2068 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2069
2070 }
2071 case NEON::BI__builtin_neon_vext_v:
2072 case NEON::BI__builtin_neon_vextq_v: {
2073 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2074 SmallVector<int, 16> Indices;
2075 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2076 Indices.push_back(i+CV);
2077
2078 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2079 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2080 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
2081 }
2082 case NEON::BI__builtin_neon_vfma_v:
2083 case NEON::BI__builtin_neon_vfmaq_v: {
2084 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2085 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2086 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2087
2088 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2090 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2091 {Ops[1], Ops[2], Ops[0]});
2092 }
2093 case NEON::BI__builtin_neon_vld1_v:
2094 case NEON::BI__builtin_neon_vld1q_v: {
2095 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2096 Ops.push_back(getAlignmentValue32(PtrOp0));
2097 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
2098 }
2099 case NEON::BI__builtin_neon_vld1_x2_v:
2100 case NEON::BI__builtin_neon_vld1q_x2_v:
2101 case NEON::BI__builtin_neon_vld1_x3_v:
2102 case NEON::BI__builtin_neon_vld1q_x3_v:
2103 case NEON::BI__builtin_neon_vld1_x4_v:
2104 case NEON::BI__builtin_neon_vld1q_x4_v: {
2105 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2106 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2107 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
2108 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2109 }
2110 case NEON::BI__builtin_neon_vld2_v:
2111 case NEON::BI__builtin_neon_vld2q_v:
2112 case NEON::BI__builtin_neon_vld3_v:
2113 case NEON::BI__builtin_neon_vld3q_v:
2114 case NEON::BI__builtin_neon_vld4_v:
2115 case NEON::BI__builtin_neon_vld4q_v:
2116 case NEON::BI__builtin_neon_vld2_dup_v:
2117 case NEON::BI__builtin_neon_vld2q_dup_v:
2118 case NEON::BI__builtin_neon_vld3_dup_v:
2119 case NEON::BI__builtin_neon_vld3q_dup_v:
2120 case NEON::BI__builtin_neon_vld4_dup_v:
2121 case NEON::BI__builtin_neon_vld4q_dup_v: {
2122 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2123 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2124 Value *Align = getAlignmentValue32(PtrOp1);
2125 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2126 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2127 }
2128 case NEON::BI__builtin_neon_vld1_dup_v:
2129 case NEON::BI__builtin_neon_vld1q_dup_v: {
2130 Value *V = PoisonValue::get(Ty);
2131 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2132 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
2133 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
2134 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2135 return EmitNeonSplat(Ops[0], CI);
2136 }
2137 case NEON::BI__builtin_neon_vld2_lane_v:
2138 case NEON::BI__builtin_neon_vld2q_lane_v:
2139 case NEON::BI__builtin_neon_vld3_lane_v:
2140 case NEON::BI__builtin_neon_vld3q_lane_v:
2141 case NEON::BI__builtin_neon_vld4_lane_v:
2142 case NEON::BI__builtin_neon_vld4q_lane_v: {
2143 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2144 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2145 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2146 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
2147 Ops.push_back(getAlignmentValue32(PtrOp1));
2148 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
2149 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2150 }
2151 case NEON::BI__builtin_neon_vmovl_v: {
2152 llvm::FixedVectorType *DTy =
2153 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2154 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2155 if (Usgn)
2156 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2157 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2158 }
2159 case NEON::BI__builtin_neon_vmovn_v: {
2160 llvm::FixedVectorType *QTy =
2161 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2162 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2163 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2164 }
2165 case NEON::BI__builtin_neon_vmull_v:
2166 // FIXME: the integer vmull operations could be emitted in terms of pure
2167 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2168 // hoisting the exts outside loops. Until global ISel comes along that can
2169 // see through such movement this leads to bad CodeGen. So we need an
2170 // intrinsic for now.
2171 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2172 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2173 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2174 case NEON::BI__builtin_neon_vpadal_v:
2175 case NEON::BI__builtin_neon_vpadalq_v: {
2176 // The source operand type has twice as many elements of half the size.
2177 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2178 llvm::Type *EltTy =
2179 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2180 auto *NarrowTy =
2181 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2182 llvm::Type *Tys[2] = { Ty, NarrowTy };
2183 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2184 }
2185 case NEON::BI__builtin_neon_vpaddl_v:
2186 case NEON::BI__builtin_neon_vpaddlq_v: {
2187 // The source operand type has twice as many elements of half the size.
2188 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2189 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2190 auto *NarrowTy =
2191 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2192 llvm::Type *Tys[2] = { Ty, NarrowTy };
2193 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2194 }
2195 case NEON::BI__builtin_neon_vqdmlal_v:
2196 case NEON::BI__builtin_neon_vqdmlsl_v: {
2197 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2198 Ops[1] =
2199 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
2200 Ops.resize(2);
2201 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2202 }
2203 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2204 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2205 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2206 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2207 auto *RTy = cast<llvm::FixedVectorType>(Ty);
2208 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2209 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2210 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2211 RTy->getNumElements() * 2);
2212 llvm::Type *Tys[2] = {
2213 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2214 /*isQuad*/ false))};
2215 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2216 }
2217 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2218 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2219 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2220 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2221 llvm::Type *Tys[2] = {
2222 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2223 /*isQuad*/ true))};
2224 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2225 }
2226 case NEON::BI__builtin_neon_vqshl_n_v:
2227 case NEON::BI__builtin_neon_vqshlq_n_v:
2228 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2229 1, false);
2230 case NEON::BI__builtin_neon_vqshlu_n_v:
2231 case NEON::BI__builtin_neon_vqshluq_n_v:
2232 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
2233 1, false);
2234 case NEON::BI__builtin_neon_vrecpe_v:
2235 case NEON::BI__builtin_neon_vrecpeq_v:
2236 case NEON::BI__builtin_neon_vrsqrte_v:
2237 case NEON::BI__builtin_neon_vrsqrteq_v:
2238 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2239 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2240 case NEON::BI__builtin_neon_vrndi_v:
2241 case NEON::BI__builtin_neon_vrndiq_v:
2242 Int = Builder.getIsFPConstrained()
2243 ? Intrinsic::experimental_constrained_nearbyint
2244 : Intrinsic::nearbyint;
2245 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2246 case NEON::BI__builtin_neon_vrshr_n_v:
2247 case NEON::BI__builtin_neon_vrshrq_n_v:
2248 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
2249 1, true);
2250 case NEON::BI__builtin_neon_vsha512hq_u64:
2251 case NEON::BI__builtin_neon_vsha512h2q_u64:
2252 case NEON::BI__builtin_neon_vsha512su0q_u64:
2253 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2254 Function *F = CGM.getIntrinsic(Int);
2255 return EmitNeonCall(F, Ops, "");
2256 }
2257 case NEON::BI__builtin_neon_vshl_n_v:
2258 case NEON::BI__builtin_neon_vshlq_n_v:
2259 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2260 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2261 "vshl_n");
2262 case NEON::BI__builtin_neon_vshll_n_v: {
2263 llvm::FixedVectorType *SrcTy =
2264 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2265 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2266 if (Usgn)
2267 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
2268 else
2269 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
2270 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
2271 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
2272 }
2273 case NEON::BI__builtin_neon_vshrn_n_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2276 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2277 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
2278 if (Usgn)
2279 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
2280 else
2281 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
2282 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
2283 }
2284 case NEON::BI__builtin_neon_vshr_n_v:
2285 case NEON::BI__builtin_neon_vshrq_n_v:
2286 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
2287 case NEON::BI__builtin_neon_vst1_v:
2288 case NEON::BI__builtin_neon_vst1q_v:
2289 case NEON::BI__builtin_neon_vst2_v:
2290 case NEON::BI__builtin_neon_vst2q_v:
2291 case NEON::BI__builtin_neon_vst3_v:
2292 case NEON::BI__builtin_neon_vst3q_v:
2293 case NEON::BI__builtin_neon_vst4_v:
2294 case NEON::BI__builtin_neon_vst4q_v:
2295 case NEON::BI__builtin_neon_vst2_lane_v:
2296 case NEON::BI__builtin_neon_vst2q_lane_v:
2297 case NEON::BI__builtin_neon_vst3_lane_v:
2298 case NEON::BI__builtin_neon_vst3q_lane_v:
2299 case NEON::BI__builtin_neon_vst4_lane_v:
2300 case NEON::BI__builtin_neon_vst4q_lane_v: {
2301 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2302 Ops.push_back(getAlignmentValue32(PtrOp0));
2303 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
2304 }
2305 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2306 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2307 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2308 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2309 case NEON::BI__builtin_neon_vsm4eq_u32: {
2310 Function *F = CGM.getIntrinsic(Int);
2311 return EmitNeonCall(F, Ops, "");
2312 }
2313 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2314 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2315 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2316 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2317 Function *F = CGM.getIntrinsic(Int);
2318 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
2319 return EmitNeonCall(F, Ops, "");
2320 }
2321 case NEON::BI__builtin_neon_vst1_x2_v:
2322 case NEON::BI__builtin_neon_vst1q_x2_v:
2323 case NEON::BI__builtin_neon_vst1_x3_v:
2324 case NEON::BI__builtin_neon_vst1q_x3_v:
2325 case NEON::BI__builtin_neon_vst1_x4_v:
2326 case NEON::BI__builtin_neon_vst1q_x4_v: {
2327 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2328 // in AArch64 it comes last. We may want to stick to one or another.
2329 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2330 Arch == llvm::Triple::aarch64_32) {
2331 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2332 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2333 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2334 }
2335 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
2336 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2337 }
2338 case NEON::BI__builtin_neon_vsubhn_v: {
2339 llvm::FixedVectorType *SrcTy =
2340 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2341
2342 // %sum = add <4 x i32> %lhs, %rhs
2343 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2344 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2345 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
2346
2347 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2348 Constant *ShiftAmt =
2349 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2350 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
2351
2352 // %res = trunc <4 x i32> %high to <4 x i16>
2353 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
2354 }
2355 case NEON::BI__builtin_neon_vtrn_v:
2356 case NEON::BI__builtin_neon_vtrnq_v: {
2357 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2358 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2359 Value *SV = nullptr;
2360
2361 for (unsigned vi = 0; vi != 2; ++vi) {
2362 SmallVector<int, 16> Indices;
2363 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2364 Indices.push_back(i+vi);
2365 Indices.push_back(i+e+vi);
2366 }
2367 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2368 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
2369 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2370 }
2371 return SV;
2372 }
2373 case NEON::BI__builtin_neon_vtst_v:
2374 case NEON::BI__builtin_neon_vtstq_v: {
2375 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2376 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2377 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2378 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2379 ConstantAggregateZero::get(Ty));
2380 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2381 }
2382 case NEON::BI__builtin_neon_vuzp_v:
2383 case NEON::BI__builtin_neon_vuzpq_v: {
2384 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2385 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2386 Value *SV = nullptr;
2387
2388 for (unsigned vi = 0; vi != 2; ++vi) {
2389 SmallVector<int, 16> Indices;
2390 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2391 Indices.push_back(2*i+vi);
2392
2393 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2394 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
2395 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2396 }
2397 return SV;
2398 }
2399 case NEON::BI__builtin_neon_vxarq_u64: {
2400 Function *F = CGM.getIntrinsic(Int);
2401 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
2402 return EmitNeonCall(F, Ops, "");
2403 }
2404 case NEON::BI__builtin_neon_vzip_v:
2405 case NEON::BI__builtin_neon_vzipq_v: {
2406 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2407 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2408 Value *SV = nullptr;
2409
2410 for (unsigned vi = 0; vi != 2; ++vi) {
2411 SmallVector<int, 16> Indices;
2412 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2413 Indices.push_back((i + vi*e) >> 1);
2414 Indices.push_back(((i + vi*e) >> 1)+e);
2415 }
2416 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2417 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
2418 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2419 }
2420 return SV;
2421 }
2422 case NEON::BI__builtin_neon_vdot_s32:
2423 case NEON::BI__builtin_neon_vdot_u32:
2424 case NEON::BI__builtin_neon_vdotq_s32:
2425 case NEON::BI__builtin_neon_vdotq_u32: {
2426 auto *InputTy =
2427 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2428 llvm::Type *Tys[2] = { Ty, InputTy };
2429 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
2430 }
2431 case NEON::BI__builtin_neon_vfmlal_low_f16:
2432 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2433 auto *InputTy =
2434 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2435 llvm::Type *Tys[2] = { Ty, InputTy };
2436 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
2437 }
2438 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2439 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2440 auto *InputTy =
2441 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2442 llvm::Type *Tys[2] = { Ty, InputTy };
2443 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
2444 }
2445 case NEON::BI__builtin_neon_vfmlal_high_f16:
2446 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2447 auto *InputTy =
2448 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2449 llvm::Type *Tys[2] = { Ty, InputTy };
2450 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
2451 }
2452 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2453 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2454 auto *InputTy =
2455 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2456 llvm::Type *Tys[2] = { Ty, InputTy };
2457 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
2458 }
2459 case NEON::BI__builtin_neon_vmmlaq_s32:
2460 case NEON::BI__builtin_neon_vmmlaq_u32: {
2461 auto *InputTy =
2462 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2463 llvm::Type *Tys[2] = { Ty, InputTy };
2464 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
2465 }
2466 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2467 auto *InputTy =
2468 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2469 llvm::Type *Tys[2] = { Ty, InputTy };
2470 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
2471 }
2472 case NEON::BI__builtin_neon_vusdot_s32:
2473 case NEON::BI__builtin_neon_vusdotq_s32: {
2474 auto *InputTy =
2475 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2476 llvm::Type *Tys[2] = { Ty, InputTy };
2477 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
2478 }
2479 case NEON::BI__builtin_neon_vbfdot_f32:
2480 case NEON::BI__builtin_neon_vbfdotq_f32: {
2481 llvm::Type *InputTy =
2482 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2483 llvm::Type *Tys[2] = { Ty, InputTy };
2484 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
2485 }
2486 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2487 llvm::Type *Tys[1] = { Ty };
2488 Function *F = CGM.getIntrinsic(Int, Tys);
2489 return EmitNeonCall(F, Ops, "vcvtfp2bf");
2490 }
2491
2492 }
2493
2494 assert(Int && "Expected valid intrinsic number");
2495
2496 // Determine the type(s) of this overloaded AArch64 intrinsic.
2497 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
2498
2499 Value *Result = EmitNeonCall(F, Ops, NameHint);
2500 llvm::Type *ResultType = ConvertType(E->getType());
2501 // AArch64 intrinsic one-element vector type cast to
2502 // scalar type expected by the builtin
2503 return Builder.CreateBitCast(Result, ResultType, NameHint);
2504}
2505
2506Value *
2508 const CmpInst::Predicate Pred,
2509 const Twine &Name) {
2510
2511 if (isa<FixedVectorType>(Ty)) {
2512 // Vector types are cast to i8 vectors. Recover original type.
2513 Op = Builder.CreateBitCast(Op, Ty);
2514 }
2515
2516 Constant *zero = Constant::getNullValue(Op->getType());
2517
2518 if (CmpInst::isFPPredicate(Pred)) {
2519 if (Pred == CmpInst::FCMP_OEQ)
2520 Op = Builder.CreateFCmp(Pred, Op, zero);
2521 else
2522 Op = Builder.CreateFCmpS(Pred, Op, zero);
2523 } else {
2524 Op = Builder.CreateICmp(Pred, Op, zero);
2525 }
2526
2527 llvm::Type *ResTy = Ty;
2528 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
2529 ResTy = FixedVectorType::get(
2530 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
2531 VTy->getNumElements());
2532
2533 return Builder.CreateSExt(Op, ResTy, Name);
2534}
2535
2537 Value *ExtOp, Value *IndexOp,
2538 llvm::Type *ResTy, unsigned IntID,
2539 const char *Name) {
2541 if (ExtOp)
2542 TblOps.push_back(ExtOp);
2543
2544 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2545 SmallVector<int, 16> Indices;
2546 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
2547 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2548 Indices.push_back(2*i);
2549 Indices.push_back(2*i+1);
2550 }
2551
2552 int PairPos = 0, End = Ops.size() - 1;
2553 while (PairPos < End) {
2554 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2555 Ops[PairPos+1], Indices,
2556 Name));
2557 PairPos += 2;
2558 }
2559
2560 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2561 // of the 128-bit lookup table with zero.
2562 if (PairPos == End) {
2563 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
2564 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2565 ZeroTbl, Indices, Name));
2566 }
2567
2568 Function *TblF;
2569 TblOps.push_back(IndexOp);
2570 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
2571
2572 return CGF.EmitNeonCall(TblF, TblOps, Name);
2573}
2574
2575Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2576 unsigned Value;
2577 switch (BuiltinID) {
2578 default:
2579 return nullptr;
2580 case clang::ARM::BI__builtin_arm_nop:
2581 Value = 0;
2582 break;
2583 case clang::ARM::BI__builtin_arm_yield:
2584 case clang::ARM::BI__yield:
2585 Value = 1;
2586 break;
2587 case clang::ARM::BI__builtin_arm_wfe:
2588 case clang::ARM::BI__wfe:
2589 Value = 2;
2590 break;
2591 case clang::ARM::BI__builtin_arm_wfi:
2592 case clang::ARM::BI__wfi:
2593 Value = 3;
2594 break;
2595 case clang::ARM::BI__builtin_arm_sev:
2596 case clang::ARM::BI__sev:
2597 Value = 4;
2598 break;
2599 case clang::ARM::BI__builtin_arm_sevl:
2600 case clang::ARM::BI__sevl:
2601 Value = 5;
2602 break;
2603 }
2604
2605 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2606 llvm::ConstantInt::get(Int32Ty, Value));
2607}
2608
2614
2615// Generates the IR for the read/write special register builtin,
2616// ValueType is the type of the value that is to be written or read,
2617// RegisterType is the type of the register being written to or read from.
2619 const CallExpr *E,
2620 llvm::Type *RegisterType,
2621 llvm::Type *ValueType,
2622 SpecialRegisterAccessKind AccessKind,
2623 StringRef SysReg = "") {
2624 // write and register intrinsics only support 32, 64 and 128 bit operations.
2625 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2626 RegisterType->isIntegerTy(128)) &&
2627 "Unsupported size for register.");
2628
2629 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2630 CodeGen::CodeGenModule &CGM = CGF.CGM;
2631 LLVMContext &Context = CGM.getLLVMContext();
2632
2633 if (SysReg.empty()) {
2634 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2635 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2636 }
2637
2638 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2639 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2640 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2641
2642 llvm::Type *Types[] = { RegisterType };
2643
2644 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2645 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2646 && "Can't fit 64-bit value in 32-bit register");
2647
2648 if (AccessKind != Write) {
2649 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2650 llvm::Function *F = CGM.getIntrinsic(
2651 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2652 : Intrinsic::read_register,
2653 Types);
2654 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2655
2656 if (MixedTypes)
2657 // Read into 64 bit register and then truncate result to 32 bit.
2658 return Builder.CreateTrunc(Call, ValueType);
2659
2660 if (ValueType->isPointerTy())
2661 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2662 return Builder.CreateIntToPtr(Call, ValueType);
2663
2664 return Call;
2665 }
2666
2667 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2668 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2669 if (MixedTypes) {
2670 // Extend 32 bit write value to 64 bit to pass to write.
2671 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2672 return Builder.CreateCall(F, { Metadata, ArgValue });
2673 }
2674
2675 if (ValueType->isPointerTy()) {
2676 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2677 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2678 return Builder.CreateCall(F, { Metadata, ArgValue });
2679 }
2680
2681 return Builder.CreateCall(F, { Metadata, ArgValue });
2682}
2683
2684static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2685 const CallExpr *E) {
2686 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2687 CodeGen::CodeGenModule &CGM = CGF.CGM;
2689
2690 auto getIntArg = [&](unsigned ArgNo) {
2691 Expr::EvalResult Result;
2692 if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
2693 llvm_unreachable("Expected constant argument to range prefetch.");
2694 return Result.Val.getInt().getExtValue();
2695 };
2696
2697 Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
2698 Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
2699 Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
2700
2701 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2702 auto Length = getIntArg(3);
2703 auto Count = getIntArg(4) - 1;
2704 auto Stride = getIntArg(5);
2705 auto Distance = getIntArg(6);
2706
2707 // Map ReuseDistance given in bytes to four bits representing decreasing
2708 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2709 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2710 // over the maximum is represented by 0 (distance not known).
2711 if (Distance > 0) {
2712 Distance = llvm::Log2_32_Ceil(Distance);
2713 if (Distance < 15)
2714 Distance = 15;
2715 else if (Distance > 29)
2716 Distance = 0;
2717 else
2718 Distance = 30 - Distance;
2719 }
2720
2721 uint64_t Mask22 = (1ULL << 22) - 1;
2722 uint64_t Mask16 = (1ULL << 16) - 1;
2723 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2724 ((Count & Mask16) << 22) | (Length & Mask22);
2725
2726 Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
2727 } else
2728 Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
2729
2730 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
2731 Ops);
2732}
2733
2734/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2735/// argument that specifies the vector type. The additional argument is meant
2736/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
2737/// should be kept consistent with the logic in Sema.
2738/// TODO: Make this return false for SISD builtins.
2739static bool HasExtraNeonArgument(unsigned BuiltinID) {
2740 // Required by the headers included below, but not in this particular
2741 // function.
2742 [[maybe_unused]] int PtrArgNum = -1;
2743 [[maybe_unused]] bool HasConstPtr = false;
2744
2745 // The mask encodes the type. We don't care about the actual value. Instead,
2746 // we just check whether its been set.
2747 uint64_t mask = 0;
2748 switch (BuiltinID) {
2749#define GET_NEON_OVERLOAD_CHECK
2750#include "clang/Basic/arm_fp16.inc"
2751#include "clang/Basic/arm_neon.inc"
2752#undef GET_NEON_OVERLOAD_CHECK
2753 // Non-neon builtins for controling VFP that take extra argument for
2754 // discriminating the type.
2755 case ARM::BI__builtin_arm_vcvtr_f:
2756 case ARM::BI__builtin_arm_vcvtr_d:
2757 mask = 1;
2758 }
2759
2760 if (mask)
2761 return true;
2762
2763 return false;
2764}
2765
2767 const CallExpr *E,
2769 llvm::Triple::ArchType Arch) {
2770 if (auto Hint = GetValueForARMHint(BuiltinID))
2771 return Hint;
2772
2773 if (BuiltinID == clang::ARM::BI__emit) {
2774 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2775 llvm::FunctionType *FTy =
2776 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2777
2779 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2780 llvm_unreachable("Sema will ensure that the parameter is constant");
2781
2782 llvm::APSInt Value = Result.Val.getInt();
2783 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2784
2785 llvm::InlineAsm *Emit =
2786 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2787 /*hasSideEffects=*/true)
2788 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2789 /*hasSideEffects=*/true);
2790
2791 return Builder.CreateCall(Emit);
2792 }
2793
2794 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2795 Value *Option = EmitScalarExpr(E->getArg(0));
2796 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2797 }
2798
2799 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2801 Value *RW = EmitScalarExpr(E->getArg(1));
2802 Value *IsData = EmitScalarExpr(E->getArg(2));
2803
2804 // Locality is not supported on ARM target
2805 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2806
2807 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2808 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2809 }
2810
2811 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2812 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2813 return Builder.CreateCall(
2814 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2815 }
2816
2817 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2818 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2819 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2820 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2821 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2822 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2823 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2824 return Res;
2825 }
2826
2827
2828 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2829 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2830 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2831 }
2832 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2833 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2834 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2835 "cls");
2836 }
2837
2838 if (BuiltinID == clang::ARM::BI__clear_cache) {
2839 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2840 const FunctionDecl *FD = E->getDirectCallee();
2841 Value *Ops[2];
2842 for (unsigned i = 0; i < 2; i++)
2843 Ops[i] = EmitScalarExpr(E->getArg(i));
2844 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2845 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2846 StringRef Name = FD->getName();
2847 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2848 }
2849
2850 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2851 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2852 Function *F;
2853
2854 switch (BuiltinID) {
2855 default: llvm_unreachable("unexpected builtin");
2856 case clang::ARM::BI__builtin_arm_mcrr:
2857 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2858 break;
2859 case clang::ARM::BI__builtin_arm_mcrr2:
2860 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2861 break;
2862 }
2863
2864 // MCRR{2} instruction has 5 operands but
2865 // the intrinsic has 4 because Rt and Rt2
2866 // are represented as a single unsigned 64
2867 // bit integer in the intrinsic definition
2868 // but internally it's represented as 2 32
2869 // bit integers.
2870
2871 Value *Coproc = EmitScalarExpr(E->getArg(0));
2872 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2873 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2874 Value *CRm = EmitScalarExpr(E->getArg(3));
2875
2876 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2877 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2878 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2879 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2880
2881 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2882 }
2883
2884 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2885 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2886 Function *F;
2887
2888 switch (BuiltinID) {
2889 default: llvm_unreachable("unexpected builtin");
2890 case clang::ARM::BI__builtin_arm_mrrc:
2891 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2892 break;
2893 case clang::ARM::BI__builtin_arm_mrrc2:
2894 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2895 break;
2896 }
2897
2898 Value *Coproc = EmitScalarExpr(E->getArg(0));
2899 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2900 Value *CRm = EmitScalarExpr(E->getArg(2));
2901 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2902
2903 // Returns an unsigned 64 bit integer, represented
2904 // as two 32 bit integers.
2905
2906 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2907 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2908 Rt = Builder.CreateZExt(Rt, Int64Ty);
2909 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2910
2911 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2912 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2913 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2914
2915 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2916 }
2917
2918 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2919 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2920 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2921 getContext().getTypeSize(E->getType()) == 64) ||
2922 BuiltinID == clang::ARM::BI__ldrexd) {
2923 Function *F;
2924
2925 switch (BuiltinID) {
2926 default: llvm_unreachable("unexpected builtin");
2927 case clang::ARM::BI__builtin_arm_ldaex:
2928 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2929 break;
2930 case clang::ARM::BI__builtin_arm_ldrexd:
2931 case clang::ARM::BI__builtin_arm_ldrex:
2932 case clang::ARM::BI__ldrexd:
2933 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2934 break;
2935 }
2936
2937 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2938 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2939
2940 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2941 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2942 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2943 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2944
2945 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2946 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2947 Val = Builder.CreateOr(Val, Val1);
2948 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2949 }
2950
2951 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2952 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2953 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2954
2955 QualType Ty = E->getType();
2956 llvm::Type *RealResTy = ConvertType(Ty);
2957 llvm::Type *IntTy =
2958 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2959
2960 Function *F = CGM.getIntrinsic(
2961 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2962 : Intrinsic::arm_ldrex,
2963 DefaultPtrTy);
2964 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2965 Val->addParamAttr(
2966 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2967
2968 if (RealResTy->isPointerTy())
2969 return Builder.CreateIntToPtr(Val, RealResTy);
2970 else {
2971 llvm::Type *IntResTy = llvm::IntegerType::get(
2972 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2973 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2974 RealResTy);
2975 }
2976 }
2977
2978 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2979 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2980 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2981 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2982 Function *F = CGM.getIntrinsic(
2983 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2984 : Intrinsic::arm_strexd);
2985 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2986
2987 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
2988 Value *Val = EmitScalarExpr(E->getArg(0));
2989 Builder.CreateStore(Val, Tmp);
2990
2991 Address LdPtr = Tmp.withElementType(STy);
2992 Val = Builder.CreateLoad(LdPtr);
2993
2994 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2995 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2996 Value *StPtr = EmitScalarExpr(E->getArg(1));
2997 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2998 }
2999
3000 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
3001 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
3002 Value *StoreVal = EmitScalarExpr(E->getArg(0));
3003 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
3004
3005 QualType Ty = E->getArg(0)->getType();
3006 llvm::Type *StoreTy =
3007 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
3008
3009 if (StoreVal->getType()->isPointerTy())
3010 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
3011 else {
3012 llvm::Type *IntTy = llvm::IntegerType::get(
3014 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
3015 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
3016 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
3017 }
3018
3019 Function *F = CGM.getIntrinsic(
3020 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
3021 : Intrinsic::arm_strex,
3022 StoreAddr->getType());
3023
3024 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
3025 CI->addParamAttr(
3026 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
3027 return CI;
3028 }
3029
3030 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
3031 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
3032 return Builder.CreateCall(F);
3033 }
3034
3035 // CRC32
3036 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3037 switch (BuiltinID) {
3038 case clang::ARM::BI__builtin_arm_crc32b:
3039 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3040 case clang::ARM::BI__builtin_arm_crc32cb:
3041 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3042 case clang::ARM::BI__builtin_arm_crc32h:
3043 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3044 case clang::ARM::BI__builtin_arm_crc32ch:
3045 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3046 case clang::ARM::BI__builtin_arm_crc32w:
3047 case clang::ARM::BI__builtin_arm_crc32d:
3048 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3049 case clang::ARM::BI__builtin_arm_crc32cw:
3050 case clang::ARM::BI__builtin_arm_crc32cd:
3051 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3052 }
3053
3054 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3055 Value *Arg0 = EmitScalarExpr(E->getArg(0));
3056 Value *Arg1 = EmitScalarExpr(E->getArg(1));
3057
3058 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
3059 // intrinsics, hence we need different codegen for these cases.
3060 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
3061 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
3062 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3063 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3064 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3065 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3066
3067 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3068 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
3069 return Builder.CreateCall(F, {Res, Arg1b});
3070 } else {
3071 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3072
3073 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3074 return Builder.CreateCall(F, {Arg0, Arg1});
3075 }
3076 }
3077
3078 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3079 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3080 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3081 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
3082 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
3083 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
3084
3085 SpecialRegisterAccessKind AccessKind = Write;
3086 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3087 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3088 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
3089 AccessKind = VolatileRead;
3090
3091 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3092 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
3093
3094 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3095 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
3096
3097 llvm::Type *ValueType;
3098 llvm::Type *RegisterType;
3099 if (IsPointerBuiltin) {
3100 ValueType = VoidPtrTy;
3102 } else if (Is64Bit) {
3103 ValueType = RegisterType = Int64Ty;
3104 } else {
3105 ValueType = RegisterType = Int32Ty;
3106 }
3107
3108 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
3109 AccessKind);
3110 }
3111
3112 if (BuiltinID == ARM::BI__builtin_sponentry) {
3113 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
3114 return Builder.CreateCall(F);
3115 }
3116
3117 // Handle MSVC intrinsics before argument evaluation to prevent double
3118 // evaluation.
3119 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3120 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
3121
3122 // Deal with MVE builtins
3123 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3124 return Result;
3125 // Handle CDE builtins
3126 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3127 return Result;
3128
3129 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3130 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
3131 return P.first == BuiltinID;
3132 });
3133 if (It != end(NEONEquivalentIntrinsicMap))
3134 BuiltinID = It->second;
3135
3136 // Find out if any arguments are required to be integer constant
3137 // expressions.
3138 unsigned ICEArguments = 0;
3140 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3141 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3142
3143 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3144 return Builder.getInt32(addr.getAlignment().getQuantity());
3145 };
3146
3147 Address PtrOp0 = Address::invalid();
3148 Address PtrOp1 = Address::invalid();
3150 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3151 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3152 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3153 if (i == 0) {
3154 switch (BuiltinID) {
3155 case NEON::BI__builtin_neon_vld1_v:
3156 case NEON::BI__builtin_neon_vld1q_v:
3157 case NEON::BI__builtin_neon_vld1q_lane_v:
3158 case NEON::BI__builtin_neon_vld1_lane_v:
3159 case NEON::BI__builtin_neon_vld1_dup_v:
3160 case NEON::BI__builtin_neon_vld1q_dup_v:
3161 case NEON::BI__builtin_neon_vst1_v:
3162 case NEON::BI__builtin_neon_vst1q_v:
3163 case NEON::BI__builtin_neon_vst1q_lane_v:
3164 case NEON::BI__builtin_neon_vst1_lane_v:
3165 case NEON::BI__builtin_neon_vst2_v:
3166 case NEON::BI__builtin_neon_vst2q_v:
3167 case NEON::BI__builtin_neon_vst2_lane_v:
3168 case NEON::BI__builtin_neon_vst2q_lane_v:
3169 case NEON::BI__builtin_neon_vst3_v:
3170 case NEON::BI__builtin_neon_vst3q_v:
3171 case NEON::BI__builtin_neon_vst3_lane_v:
3172 case NEON::BI__builtin_neon_vst3q_lane_v:
3173 case NEON::BI__builtin_neon_vst4_v:
3174 case NEON::BI__builtin_neon_vst4q_v:
3175 case NEON::BI__builtin_neon_vst4_lane_v:
3176 case NEON::BI__builtin_neon_vst4q_lane_v:
3177 // Get the alignment for the argument in addition to the value;
3178 // we'll use it later.
3179 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3180 Ops.push_back(PtrOp0.emitRawPointer(*this));
3181 continue;
3182 }
3183 }
3184 if (i == 1) {
3185 switch (BuiltinID) {
3186 case NEON::BI__builtin_neon_vld2_v:
3187 case NEON::BI__builtin_neon_vld2q_v:
3188 case NEON::BI__builtin_neon_vld3_v:
3189 case NEON::BI__builtin_neon_vld3q_v:
3190 case NEON::BI__builtin_neon_vld4_v:
3191 case NEON::BI__builtin_neon_vld4q_v:
3192 case NEON::BI__builtin_neon_vld2_lane_v:
3193 case NEON::BI__builtin_neon_vld2q_lane_v:
3194 case NEON::BI__builtin_neon_vld3_lane_v:
3195 case NEON::BI__builtin_neon_vld3q_lane_v:
3196 case NEON::BI__builtin_neon_vld4_lane_v:
3197 case NEON::BI__builtin_neon_vld4q_lane_v:
3198 case NEON::BI__builtin_neon_vld2_dup_v:
3199 case NEON::BI__builtin_neon_vld2q_dup_v:
3200 case NEON::BI__builtin_neon_vld3_dup_v:
3201 case NEON::BI__builtin_neon_vld3q_dup_v:
3202 case NEON::BI__builtin_neon_vld4_dup_v:
3203 case NEON::BI__builtin_neon_vld4q_dup_v:
3204 // Get the alignment for the argument in addition to the value;
3205 // we'll use it later.
3206 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3207 Ops.push_back(PtrOp1.emitRawPointer(*this));
3208 continue;
3209 }
3210 }
3211
3212 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
3213 }
3214
3215 switch (BuiltinID) {
3216 default: break;
3217
3218 case NEON::BI__builtin_neon_vget_lane_i8:
3219 case NEON::BI__builtin_neon_vget_lane_i16:
3220 case NEON::BI__builtin_neon_vget_lane_i32:
3221 case NEON::BI__builtin_neon_vget_lane_i64:
3222 case NEON::BI__builtin_neon_vget_lane_bf16:
3223 case NEON::BI__builtin_neon_vget_lane_f32:
3224 case NEON::BI__builtin_neon_vgetq_lane_i8:
3225 case NEON::BI__builtin_neon_vgetq_lane_i16:
3226 case NEON::BI__builtin_neon_vgetq_lane_i32:
3227 case NEON::BI__builtin_neon_vgetq_lane_i64:
3228 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3229 case NEON::BI__builtin_neon_vgetq_lane_f32:
3230 case NEON::BI__builtin_neon_vduph_lane_bf16:
3231 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3232 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3233
3234 case NEON::BI__builtin_neon_vrndns_f32: {
3235 Value *Arg = EmitScalarExpr(E->getArg(0));
3236 llvm::Type *Tys[] = {Arg->getType()};
3237 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
3238 return Builder.CreateCall(F, {Arg}, "vrndn"); }
3239
3240 case NEON::BI__builtin_neon_vset_lane_i8:
3241 case NEON::BI__builtin_neon_vset_lane_i16:
3242 case NEON::BI__builtin_neon_vset_lane_i32:
3243 case NEON::BI__builtin_neon_vset_lane_i64:
3244 case NEON::BI__builtin_neon_vset_lane_bf16:
3245 case NEON::BI__builtin_neon_vset_lane_f32:
3246 case NEON::BI__builtin_neon_vsetq_lane_i8:
3247 case NEON::BI__builtin_neon_vsetq_lane_i16:
3248 case NEON::BI__builtin_neon_vsetq_lane_i32:
3249 case NEON::BI__builtin_neon_vsetq_lane_i64:
3250 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3251 case NEON::BI__builtin_neon_vsetq_lane_f32:
3252 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3253
3254 case NEON::BI__builtin_neon_vsha1h_u32:
3255 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3256 "vsha1h");
3257 case NEON::BI__builtin_neon_vsha1cq_u32:
3258 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3259 "vsha1h");
3260 case NEON::BI__builtin_neon_vsha1pq_u32:
3261 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3262 "vsha1h");
3263 case NEON::BI__builtin_neon_vsha1mq_u32:
3264 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3265 "vsha1h");
3266
3267 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3268 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3269 "vcvtbfp2bf");
3270 }
3271
3272 // The ARM _MoveToCoprocessor builtins put the input register value as
3273 // the first argument, but the LLVM intrinsic expects it as the third one.
3274 case clang::ARM::BI_MoveToCoprocessor:
3275 case clang::ARM::BI_MoveToCoprocessor2: {
3276 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
3277 ? Intrinsic::arm_mcr
3278 : Intrinsic::arm_mcr2);
3279 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3280 Ops[3], Ops[4], Ops[5]});
3281 }
3282 }
3283
3284 // Get the last argument, which specifies the vector type.
3285 assert(HasExtraArg);
3286 const Expr *Arg = E->getArg(E->getNumArgs()-1);
3287 std::optional<llvm::APSInt> Result =
3289 if (!Result)
3290 return nullptr;
3291
3292 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3293 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3294 // Determine the overloaded type of this builtin.
3295 llvm::Type *Ty;
3296 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3297 Ty = FloatTy;
3298 else
3299 Ty = DoubleTy;
3300
3301 // Determine whether this is an unsigned conversion or not.
3302 bool usgn = Result->getZExtValue() == 1;
3303 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3304
3305 // Call the appropriate intrinsic.
3306 Function *F = CGM.getIntrinsic(Int, Ty);
3307 return Builder.CreateCall(F, Ops, "vcvtr");
3308 }
3309
3310 // Determine the type of this overloaded NEON intrinsic.
3311 NeonTypeFlags Type = Result->getZExtValue();
3312 bool usgn = Type.isUnsigned();
3313 bool rightShift = false;
3314
3315 llvm::FixedVectorType *VTy =
3316 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
3317 getTarget().hasBFloat16Type());
3318 llvm::Type *Ty = VTy;
3319 if (!Ty)
3320 return nullptr;
3321
3322 // Many NEON builtins have identical semantics and uses in ARM and
3323 // AArch64. Emit these in a single function.
3324 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3325 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3326 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3327 if (Builtin)
3329 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3330 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3331
3332 unsigned Int;
3333 switch (BuiltinID) {
3334 default: return nullptr;
3335 case NEON::BI__builtin_neon_vld1q_lane_v:
3336 // Handle 64-bit integer elements as a special case. Use shuffles of
3337 // one-element vectors to avoid poor code for i64 in the backend.
3338 if (VTy->getElementType()->isIntegerTy(64)) {
3339 // Extract the other lane.
3340 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3341 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3342 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3343 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3344 // Load the value as a one-element vector.
3345 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3346 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3347 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3348 Value *Align = getAlignmentValue32(PtrOp0);
3349 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3350 // Combine them.
3351 int Indices[] = {1 - Lane, Lane};
3352 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
3353 }
3354 [[fallthrough]];
3355 case NEON::BI__builtin_neon_vld1_lane_v: {
3356 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3357 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
3358 Value *Ld = Builder.CreateLoad(PtrOp0);
3359 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3360 }
3361 case NEON::BI__builtin_neon_vqrshrn_n_v:
3362 Int =
3363 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3364 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3365 1, true);
3366 case NEON::BI__builtin_neon_vqrshrun_n_v:
3367 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3368 Ops, "vqrshrun_n", 1, true);
3369 case NEON::BI__builtin_neon_vqshrn_n_v:
3370 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3371 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3372 1, true);
3373 case NEON::BI__builtin_neon_vqshrun_n_v:
3374 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3375 Ops, "vqshrun_n", 1, true);
3376 case NEON::BI__builtin_neon_vrecpe_v:
3377 case NEON::BI__builtin_neon_vrecpeq_v:
3378 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3379 Ops, "vrecpe");
3380 case NEON::BI__builtin_neon_vrshrn_n_v:
3381 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3382 Ops, "vrshrn_n", 1, true);
3383 case NEON::BI__builtin_neon_vrsra_n_v:
3384 case NEON::BI__builtin_neon_vrsraq_n_v:
3385 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3386 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3387 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3388 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3389 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3390 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3391 case NEON::BI__builtin_neon_vsri_n_v:
3392 case NEON::BI__builtin_neon_vsriq_n_v:
3393 rightShift = true;
3394 [[fallthrough]];
3395 case NEON::BI__builtin_neon_vsli_n_v:
3396 case NEON::BI__builtin_neon_vsliq_n_v:
3397 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3398 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3399 Ops, "vsli_n");
3400 case NEON::BI__builtin_neon_vsra_n_v:
3401 case NEON::BI__builtin_neon_vsraq_n_v:
3402 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3403 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3404 return Builder.CreateAdd(Ops[0], Ops[1]);
3405 case NEON::BI__builtin_neon_vst1q_lane_v:
3406 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3407 // a one-element vector and avoid poor code for i64 in the backend.
3408 if (VTy->getElementType()->isIntegerTy(64)) {
3409 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3410 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3411 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3412 Ops[2] = getAlignmentValue32(PtrOp0);
3413 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3414 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3415 Tys), Ops);
3416 }
3417 [[fallthrough]];
3418 case NEON::BI__builtin_neon_vst1_lane_v: {
3419 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3420 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3421 return Builder.CreateStore(Ops[1],
3422 PtrOp0.withElementType(Ops[1]->getType()));
3423 }
3424 case NEON::BI__builtin_neon_vtbl1_v:
3425 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3426 Ops, "vtbl1");
3427 case NEON::BI__builtin_neon_vtbl2_v:
3428 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3429 Ops, "vtbl2");
3430 case NEON::BI__builtin_neon_vtbl3_v:
3431 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3432 Ops, "vtbl3");
3433 case NEON::BI__builtin_neon_vtbl4_v:
3434 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3435 Ops, "vtbl4");
3436 case NEON::BI__builtin_neon_vtbx1_v:
3437 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3438 Ops, "vtbx1");
3439 case NEON::BI__builtin_neon_vtbx2_v:
3440 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3441 Ops, "vtbx2");
3442 case NEON::BI__builtin_neon_vtbx3_v:
3443 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3444 Ops, "vtbx3");
3445 case NEON::BI__builtin_neon_vtbx4_v:
3446 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3447 Ops, "vtbx4");
3448 }
3449}
3450
3451template<typename Integer>
3453 return E->getIntegerConstantExpr(Context)->getExtValue();
3454}
3455
3456static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3457 llvm::Type *T, bool Unsigned) {
3458 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3459 // which finds it convenient to specify signed/unsigned as a boolean flag.
3460 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
3461}
3462
3463static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3464 uint32_t Shift, bool Unsigned) {
3465 // MVE helper function for integer shift right. This must handle signed vs
3466 // unsigned, and also deal specially with the case where the shift count is
3467 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3468 // undefined behavior, but in MVE it's legal, so we must convert it to code
3469 // that is not undefined in IR.
3470 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
3471 ->getElementType()
3472 ->getPrimitiveSizeInBits();
3473 if (Shift == LaneBits) {
3474 // An unsigned shift of the full lane size always generates zero, so we can
3475 // simply emit a zero vector. A signed shift of the full lane size does the
3476 // same thing as shifting by one bit fewer.
3477 if (Unsigned)
3478 return llvm::Constant::getNullValue(V->getType());
3479 else
3480 --Shift;
3481 }
3482 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
3483}
3484
3485static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3486 // MVE-specific helper function for a vector splat, which infers the element
3487 // count of the output vector by knowing that MVE vectors are all 128 bits
3488 // wide.
3489 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3490 return Builder.CreateVectorSplat(Elements, V);
3491}
3492
3493static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3494 CodeGenFunction *CGF,
3495 llvm::Value *V,
3496 llvm::Type *DestType) {
3497 // Convert one MVE vector type into another by reinterpreting its in-register
3498 // format.
3499 //
3500 // Little-endian, this is identical to a bitcast (which reinterprets the
3501 // memory format). But big-endian, they're not necessarily the same, because
3502 // the register and memory formats map to each other differently depending on
3503 // the lane size.
3504 //
3505 // We generate a bitcast whenever we can (if we're little-endian, or if the
3506 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3507 // that performs the different kind of reinterpretation.
3508 if (CGF->getTarget().isBigEndian() &&
3509 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3510 return Builder.CreateCall(
3511 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
3512 {DestType, V->getType()}),
3513 V);
3514 } else {
3515 return Builder.CreateBitCast(V, DestType);
3516 }
3517}
3518
3519static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3520 // Make a shufflevector that extracts every other element of a vector (evens
3521 // or odds, as desired).
3522 SmallVector<int, 16> Indices;
3523 unsigned InputElements =
3524 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
3525 for (unsigned i = 0; i < InputElements; i += 2)
3526 Indices.push_back(i + Odd);
3527 return Builder.CreateShuffleVector(V, Indices);
3528}
3529
3530static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3531 llvm::Value *V1) {
3532 // Make a shufflevector that interleaves two vectors element by element.
3533 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3534 SmallVector<int, 16> Indices;
3535 unsigned InputElements =
3536 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
3537 for (unsigned i = 0; i < InputElements; i++) {
3538 Indices.push_back(i);
3539 Indices.push_back(i + InputElements);
3540 }
3541 return Builder.CreateShuffleVector(V0, V1, Indices);
3542}
3543
3544template<unsigned HighBit, unsigned OtherBits>
3545static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3546 // MVE-specific helper function to make a vector splat of a constant such as
3547 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3548 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
3549 unsigned LaneBits = T->getPrimitiveSizeInBits();
3550 uint32_t Value = HighBit << (LaneBits - 1);
3551 if (OtherBits)
3552 Value |= (1UL << (LaneBits - 1)) - 1;
3553 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
3554 return ARMMVEVectorSplat(Builder, Lane);
3555}
3556
3557static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3558 llvm::Value *V,
3559 unsigned ReverseWidth) {
3560 // MVE-specific helper function which reverses the elements of a
3561 // vector within every (ReverseWidth)-bit collection of lanes.
3562 SmallVector<int, 16> Indices;
3563 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3564 unsigned Elements = 128 / LaneSize;
3565 unsigned Mask = ReverseWidth / LaneSize - 1;
3566 for (unsigned i = 0; i < Elements; i++)
3567 Indices.push_back(i ^ Mask);
3568 return Builder.CreateShuffleVector(V, Indices);
3569}
3570
3571static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
3572 CodeGenFunction *CGF, llvm::Value *V,
3573 llvm::Type *Ty) {
3574 return Builder.CreateCall(
3575 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
3576 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
3577}
3578
3579static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
3580 CodeGenFunction *CGF, llvm::Value *V,
3581 llvm::Type *Ty) {
3582 return Builder.CreateCall(
3583 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
3584 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
3585}
3586
3587static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
3588 CodeGenFunction *CGF, llvm::Value *V,
3589 llvm::Type *Ty) {
3590 return Builder.CreateCall(
3591 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
3592 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
3593}
3594
3595static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
3596 CodeGenFunction *CGF, llvm::Value *V,
3597 llvm::Type *Ty) {
3598 return Builder.CreateCall(
3599 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
3600 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
3601}
3602
3604 const CallExpr *E,
3606 llvm::Triple::ArchType Arch) {
3607 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3608 Intrinsic::ID IRIntr;
3609 unsigned NumVectors;
3610
3611 // Code autogenerated by Tablegen will handle all the simple builtins.
3612 switch (BuiltinID) {
3613 #include "clang/Basic/arm_mve_builtin_cg.inc"
3614
3615 // If we didn't match an MVE builtin id at all, go back to the
3616 // main EmitARMBuiltinExpr.
3617 default:
3618 return nullptr;
3619 }
3620
3621 // Anything that breaks from that switch is an MVE builtin that
3622 // needs handwritten code to generate.
3623
3624 switch (CustomCodeGenType) {
3625
3626 case CustomCodeGen::VLD24: {
3629
3630 auto MvecCType = E->getType();
3631 auto MvecLType = ConvertType(MvecCType);
3632 assert(MvecLType->isStructTy() &&
3633 "Return type for vld[24]q should be a struct");
3634 assert(MvecLType->getStructNumElements() == 1 &&
3635 "Return-type struct for vld[24]q should have one element");
3636 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3637 assert(MvecLTypeInner->isArrayTy() &&
3638 "Return-type struct for vld[24]q should contain an array");
3639 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3640 "Array member of return-type struct vld[24]q has wrong length");
3641 auto VecLType = MvecLTypeInner->getArrayElementType();
3642
3643 Tys.push_back(VecLType);
3644
3645 auto Addr = E->getArg(0);
3646 Ops.push_back(EmitScalarExpr(Addr));
3647 Tys.push_back(ConvertType(Addr->getType()));
3648
3649 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3650 Value *LoadResult = Builder.CreateCall(F, Ops);
3651 Value *MvecOut = PoisonValue::get(MvecLType);
3652 for (unsigned i = 0; i < NumVectors; ++i) {
3653 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3654 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3655 }
3656
3657 if (ReturnValue.isNull())
3658 return MvecOut;
3659 else
3660 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3661 }
3662
3663 case CustomCodeGen::VST24: {
3666
3667 auto Addr = E->getArg(0);
3668 Ops.push_back(EmitScalarExpr(Addr));
3669 Tys.push_back(ConvertType(Addr->getType()));
3670
3671 auto MvecCType = E->getArg(1)->getType();
3672 auto MvecLType = ConvertType(MvecCType);
3673 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3674 assert(MvecLType->getStructNumElements() == 1 &&
3675 "Data-type struct for vst2q should have one element");
3676 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3677 assert(MvecLTypeInner->isArrayTy() &&
3678 "Data-type struct for vst2q should contain an array");
3679 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3680 "Array member of return-type struct vld[24]q has wrong length");
3681 auto VecLType = MvecLTypeInner->getArrayElementType();
3682
3683 Tys.push_back(VecLType);
3684
3685 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3686 EmitAggExpr(E->getArg(1), MvecSlot);
3687 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3688 for (unsigned i = 0; i < NumVectors; i++)
3689 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3690
3691 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3692 Value *ToReturn = nullptr;
3693 for (unsigned i = 0; i < NumVectors; i++) {
3694 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3695 ToReturn = Builder.CreateCall(F, Ops);
3696 Ops.pop_back();
3697 }
3698 return ToReturn;
3699 }
3700 }
3701 llvm_unreachable("unknown custom codegen type.");
3702}
3703
3705 const CallExpr *E,
3707 llvm::Triple::ArchType Arch) {
3708 switch (BuiltinID) {
3709 default:
3710 return nullptr;
3711#include "clang/Basic/arm_cde_builtin_cg.inc"
3712 }
3713}
3714
3715static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3716 const CallExpr *E,
3718 llvm::Triple::ArchType Arch) {
3719 unsigned int Int = 0;
3720 const char *s = nullptr;
3721
3722 switch (BuiltinID) {
3723 default:
3724 return nullptr;
3725 case NEON::BI__builtin_neon_vtbl1_v:
3726 case NEON::BI__builtin_neon_vqtbl1_v:
3727 case NEON::BI__builtin_neon_vqtbl1q_v:
3728 case NEON::BI__builtin_neon_vtbl2_v:
3729 case NEON::BI__builtin_neon_vqtbl2_v:
3730 case NEON::BI__builtin_neon_vqtbl2q_v:
3731 case NEON::BI__builtin_neon_vtbl3_v:
3732 case NEON::BI__builtin_neon_vqtbl3_v:
3733 case NEON::BI__builtin_neon_vqtbl3q_v:
3734 case NEON::BI__builtin_neon_vtbl4_v:
3735 case NEON::BI__builtin_neon_vqtbl4_v:
3736 case NEON::BI__builtin_neon_vqtbl4q_v:
3737 break;
3738 case NEON::BI__builtin_neon_vtbx1_v:
3739 case NEON::BI__builtin_neon_vqtbx1_v:
3740 case NEON::BI__builtin_neon_vqtbx1q_v:
3741 case NEON::BI__builtin_neon_vtbx2_v:
3742 case NEON::BI__builtin_neon_vqtbx2_v:
3743 case NEON::BI__builtin_neon_vqtbx2q_v:
3744 case NEON::BI__builtin_neon_vtbx3_v:
3745 case NEON::BI__builtin_neon_vqtbx3_v:
3746 case NEON::BI__builtin_neon_vqtbx3q_v:
3747 case NEON::BI__builtin_neon_vtbx4_v:
3748 case NEON::BI__builtin_neon_vqtbx4_v:
3749 case NEON::BI__builtin_neon_vqtbx4q_v:
3750 break;
3751 }
3752
3753 assert(E->getNumArgs() >= 3);
3754
3755 // Get the last argument, which specifies the vector type.
3756 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3757 std::optional<llvm::APSInt> Result =
3759 if (!Result)
3760 return nullptr;
3761
3762 // Determine the type of this overloaded NEON intrinsic.
3763 NeonTypeFlags Type = Result->getZExtValue();
3764 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3765 if (!Ty)
3766 return nullptr;
3767
3768 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3769
3770 // AArch64 scalar builtins are not overloaded, they do not have an extra
3771 // argument that specifies the vector type, need to handle each case.
3772 switch (BuiltinID) {
3773 case NEON::BI__builtin_neon_vtbl1_v: {
3774 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3775 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3776 }
3777 case NEON::BI__builtin_neon_vtbl2_v: {
3778 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3779 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3780 }
3781 case NEON::BI__builtin_neon_vtbl3_v: {
3782 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3783 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3784 }
3785 case NEON::BI__builtin_neon_vtbl4_v: {
3786 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3787 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3788 }
3789 case NEON::BI__builtin_neon_vtbx1_v: {
3790 Value *TblRes =
3791 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3792 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3793
3794 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3795 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3796 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3797
3798 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3799 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3800 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3801 }
3802 case NEON::BI__builtin_neon_vtbx2_v: {
3803 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3804 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3805 }
3806 case NEON::BI__builtin_neon_vtbx3_v: {
3807 Value *TblRes =
3808 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3809 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3810
3811 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3812 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3813 TwentyFourV);
3814 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3815
3816 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3817 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3818 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3819 }
3820 case NEON::BI__builtin_neon_vtbx4_v: {
3821 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3822 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3823 }
3824 case NEON::BI__builtin_neon_vqtbl1_v:
3825 case NEON::BI__builtin_neon_vqtbl1q_v:
3826 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3827 case NEON::BI__builtin_neon_vqtbl2_v:
3828 case NEON::BI__builtin_neon_vqtbl2q_v: {
3829 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3830 case NEON::BI__builtin_neon_vqtbl3_v:
3831 case NEON::BI__builtin_neon_vqtbl3q_v:
3832 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3833 case NEON::BI__builtin_neon_vqtbl4_v:
3834 case NEON::BI__builtin_neon_vqtbl4q_v:
3835 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3836 case NEON::BI__builtin_neon_vqtbx1_v:
3837 case NEON::BI__builtin_neon_vqtbx1q_v:
3838 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3839 case NEON::BI__builtin_neon_vqtbx2_v:
3840 case NEON::BI__builtin_neon_vqtbx2q_v:
3841 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3842 case NEON::BI__builtin_neon_vqtbx3_v:
3843 case NEON::BI__builtin_neon_vqtbx3q_v:
3844 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3845 case NEON::BI__builtin_neon_vqtbx4_v:
3846 case NEON::BI__builtin_neon_vqtbx4q_v:
3847 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3848 }
3849 }
3850
3851 if (!Int)
3852 return nullptr;
3853
3854 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3855 return CGF.EmitNeonCall(F, Ops, s);
3856}
3857
3859 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3860 Op = Builder.CreateBitCast(Op, Int16Ty);
3861 Value *V = PoisonValue::get(VTy);
3862 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3863 Op = Builder.CreateInsertElement(V, Op, CI);
3864 return Op;
3865}
3866
3867/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3868/// access builtin. Only required if it can't be inferred from the base pointer
3869/// operand.
3871 switch (TypeFlags.getMemEltType()) {
3872 case SVETypeFlags::MemEltTyDefault:
3873 return getEltType(TypeFlags);
3874 case SVETypeFlags::MemEltTyInt8:
3875 return Builder.getInt8Ty();
3876 case SVETypeFlags::MemEltTyInt16:
3877 return Builder.getInt16Ty();
3878 case SVETypeFlags::MemEltTyInt32:
3879 return Builder.getInt32Ty();
3880 case SVETypeFlags::MemEltTyInt64:
3881 return Builder.getInt64Ty();
3882 }
3883 llvm_unreachable("Unknown MemEltType");
3884}
3885
3886llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3887 switch (TypeFlags.getEltType()) {
3888 default:
3889 llvm_unreachable("Invalid SVETypeFlag!");
3890
3891 case SVETypeFlags::EltTyMFloat8:
3892 case SVETypeFlags::EltTyInt8:
3893 return Builder.getInt8Ty();
3894 case SVETypeFlags::EltTyInt16:
3895 return Builder.getInt16Ty();
3896 case SVETypeFlags::EltTyInt32:
3897 return Builder.getInt32Ty();
3898 case SVETypeFlags::EltTyInt64:
3899 return Builder.getInt64Ty();
3900 case SVETypeFlags::EltTyInt128:
3901 return Builder.getInt128Ty();
3902
3903 case SVETypeFlags::EltTyFloat16:
3904 return Builder.getHalfTy();
3905 case SVETypeFlags::EltTyFloat32:
3906 return Builder.getFloatTy();
3907 case SVETypeFlags::EltTyFloat64:
3908 return Builder.getDoubleTy();
3909
3910 case SVETypeFlags::EltTyBFloat16:
3911 return Builder.getBFloatTy();
3912
3913 case SVETypeFlags::EltTyBool8:
3914 case SVETypeFlags::EltTyBool16:
3915 case SVETypeFlags::EltTyBool32:
3916 case SVETypeFlags::EltTyBool64:
3917 return Builder.getInt1Ty();
3918 }
3919}
3920
3921// Return the llvm predicate vector type corresponding to the specified element
3922// TypeFlags.
3923llvm::ScalableVectorType *
3925 switch (TypeFlags.getEltType()) {
3926 default: llvm_unreachable("Unhandled SVETypeFlag!");
3927
3928 case SVETypeFlags::EltTyInt8:
3929 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3930 case SVETypeFlags::EltTyInt16:
3931 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3932 case SVETypeFlags::EltTyInt32:
3933 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3934 case SVETypeFlags::EltTyInt64:
3935 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3936
3937 case SVETypeFlags::EltTyBFloat16:
3938 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3939 case SVETypeFlags::EltTyFloat16:
3940 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3941 case SVETypeFlags::EltTyFloat32:
3942 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3943 case SVETypeFlags::EltTyFloat64:
3944 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3945
3946 case SVETypeFlags::EltTyBool8:
3947 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3948 case SVETypeFlags::EltTyBool16:
3949 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3950 case SVETypeFlags::EltTyBool32:
3951 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3952 case SVETypeFlags::EltTyBool64:
3953 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3954 }
3955}
3956
3957// Return the llvm vector type corresponding to the specified element TypeFlags.
3958llvm::ScalableVectorType *
3960 switch (TypeFlags.getEltType()) {
3961 default:
3962 llvm_unreachable("Invalid SVETypeFlag!");
3963
3964 case SVETypeFlags::EltTyInt8:
3965 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3966 case SVETypeFlags::EltTyInt16:
3967 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3968 case SVETypeFlags::EltTyInt32:
3969 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3970 case SVETypeFlags::EltTyInt64:
3971 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3972
3973 case SVETypeFlags::EltTyMFloat8:
3974 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3975 case SVETypeFlags::EltTyFloat16:
3976 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3977 case SVETypeFlags::EltTyBFloat16:
3978 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3979 case SVETypeFlags::EltTyFloat32:
3980 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3981 case SVETypeFlags::EltTyFloat64:
3982 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3983
3984 case SVETypeFlags::EltTyBool8:
3985 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3986 case SVETypeFlags::EltTyBool16:
3987 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3988 case SVETypeFlags::EltTyBool32:
3989 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3990 case SVETypeFlags::EltTyBool64:
3991 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3992 }
3993}
3994
3995llvm::Value *
3997 Function *Ptrue =
3998 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3999 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
4000}
4001
4002constexpr unsigned SVEBitsPerBlock = 128;
4003
4004static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
4005 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
4006 return llvm::ScalableVectorType::get(EltTy, NumElts);
4007}
4008
4009// Reinterpret the input predicate so that it can be used to correctly isolate
4010// the elements of the specified datatype.
4012 llvm::ScalableVectorType *VTy) {
4013
4014 if (isa<TargetExtType>(Pred->getType()) &&
4015 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
4016 return Pred;
4017
4018 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
4019 if (Pred->getType() == RTy)
4020 return Pred;
4021
4022 unsigned IntID;
4023 llvm::Type *IntrinsicTy;
4024 switch (VTy->getMinNumElements()) {
4025 default:
4026 llvm_unreachable("unsupported element count!");
4027 case 1:
4028 case 2:
4029 case 4:
4030 case 8:
4031 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
4032 IntrinsicTy = RTy;
4033 break;
4034 case 16:
4035 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
4036 IntrinsicTy = Pred->getType();
4037 break;
4038 }
4039
4040 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
4041 Value *C = Builder.CreateCall(F, Pred);
4042 assert(C->getType() == RTy && "Unexpected return type!");
4043 return C;
4044}
4045
4047 llvm::StructType *Ty) {
4048 if (PredTuple->getType() == Ty)
4049 return PredTuple;
4050
4051 Value *Ret = llvm::PoisonValue::get(Ty);
4052 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
4053 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
4054 Pred = EmitSVEPredicateCast(
4055 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
4056 Ret = Builder.CreateInsertValue(Ret, Pred, I);
4057 }
4058
4059 return Ret;
4060}
4061
4064 unsigned IntID) {
4065 auto *ResultTy = getSVEType(TypeFlags);
4066 auto *OverloadedTy =
4067 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
4068
4069 Function *F = nullptr;
4070 if (Ops[1]->getType()->isVectorTy())
4071 // This is the "vector base, scalar offset" case. In order to uniquely
4072 // map this built-in to an LLVM IR intrinsic, we need both the return type
4073 // and the type of the vector base.
4074 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
4075 else
4076 // This is the "scalar base, vector offset case". The type of the offset
4077 // is encoded in the name of the intrinsic. We only need to specify the
4078 // return type in order to uniquely map this built-in to an LLVM IR
4079 // intrinsic.
4080 F = CGM.getIntrinsic(IntID, OverloadedTy);
4081
4082 // At the ACLE level there's only one predicate type, svbool_t, which is
4083 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4084 // actual type being loaded. For example, when loading doubles (i64) the
4085 // predicate should be <n x 2 x i1> instead. At the IR level the type of
4086 // the predicate and the data being loaded must match. Cast to the type
4087 // expected by the intrinsic. The intrinsic itself should be defined in
4088 // a way than enforces relations between parameter types.
4089 Ops[0] = EmitSVEPredicateCast(
4090 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
4091
4092 // Pass 0 when the offset is missing. This can only be applied when using
4093 // the "vector base" addressing mode for which ACLE allows no offset. The
4094 // corresponding LLVM IR always requires an offset.
4095 if (Ops.size() == 2) {
4096 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4097 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4098 }
4099
4100 // For "vector base, scalar index" scale the index so that it becomes a
4101 // scalar offset.
4102 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
4103 unsigned BytesPerElt =
4104 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4105 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4106 }
4107
4108 Value *Call = Builder.CreateCall(F, Ops);
4109
4110 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
4111 // other cases it's folded into a nop.
4112 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
4113 : Builder.CreateSExt(Call, ResultTy);
4114}
4115
4118 unsigned IntID) {
4119 auto *SrcDataTy = getSVEType(TypeFlags);
4120 auto *OverloadedTy =
4121 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
4122
4123 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
4124 // it's the first argument. Move it accordingly.
4125 Ops.insert(Ops.begin(), Ops.pop_back_val());
4126
4127 Function *F = nullptr;
4128 if (Ops[2]->getType()->isVectorTy())
4129 // This is the "vector base, scalar offset" case. In order to uniquely
4130 // map this built-in to an LLVM IR intrinsic, we need both the return type
4131 // and the type of the vector base.
4132 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
4133 else
4134 // This is the "scalar base, vector offset case". The type of the offset
4135 // is encoded in the name of the intrinsic. We only need to specify the
4136 // return type in order to uniquely map this built-in to an LLVM IR
4137 // intrinsic.
4138 F = CGM.getIntrinsic(IntID, OverloadedTy);
4139
4140 // Pass 0 when the offset is missing. This can only be applied when using
4141 // the "vector base" addressing mode for which ACLE allows no offset. The
4142 // corresponding LLVM IR always requires an offset.
4143 if (Ops.size() == 3) {
4144 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4145 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4146 }
4147
4148 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4149 // folded into a nop.
4150 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
4151
4152 // At the ACLE level there's only one predicate type, svbool_t, which is
4153 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4154 // actual type being stored. For example, when storing doubles (i64) the
4155 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4156 // the predicate and the data being stored must match. Cast to the type
4157 // expected by the intrinsic. The intrinsic itself should be defined in
4158 // a way that enforces relations between parameter types.
4159 Ops[1] = EmitSVEPredicateCast(
4160 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
4161
4162 // For "vector base, scalar index" scale the index so that it becomes a
4163 // scalar offset.
4164 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4165 unsigned BytesPerElt =
4166 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4167 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
4168 }
4169
4170 return Builder.CreateCall(F, Ops);
4171}
4172
4175 unsigned IntID) {
4176 // The gather prefetches are overloaded on the vector input - this can either
4177 // be the vector of base addresses or vector of offsets.
4178 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
4179 if (!OverloadedTy)
4180 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
4181
4182 // Cast the predicate from svbool_t to the right number of elements.
4183 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
4184
4185 // vector + imm addressing modes
4186 if (Ops[1]->getType()->isVectorTy()) {
4187 if (Ops.size() == 3) {
4188 // Pass 0 for 'vector+imm' when the index is omitted.
4189 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4190
4191 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4192 std::swap(Ops[2], Ops[3]);
4193 } else {
4194 // Index needs to be passed as scaled offset.
4195 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4196 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4197 if (BytesPerElt > 1)
4198 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4199 }
4200 }
4201
4202 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
4203 return Builder.CreateCall(F, Ops);
4204}
4205
4208 unsigned IntID) {
4209 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4210 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4211 Value *BasePtr = Ops[1];
4212
4213 // Does the load have an offset?
4214 if (Ops.size() > 2)
4215 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4216
4217 Function *F = CGM.getIntrinsic(IntID, {VTy});
4218 return Builder.CreateCall(F, {Predicate, BasePtr});
4219}
4220
4223 unsigned IntID) {
4224 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4225
4226 unsigned N;
4227 switch (IntID) {
4228 case Intrinsic::aarch64_sve_st2:
4229 case Intrinsic::aarch64_sve_st1_pn_x2:
4230 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4231 case Intrinsic::aarch64_sve_st2q:
4232 N = 2;
4233 break;
4234 case Intrinsic::aarch64_sve_st3:
4235 case Intrinsic::aarch64_sve_st3q:
4236 N = 3;
4237 break;
4238 case Intrinsic::aarch64_sve_st4:
4239 case Intrinsic::aarch64_sve_st1_pn_x4:
4240 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4241 case Intrinsic::aarch64_sve_st4q:
4242 N = 4;
4243 break;
4244 default:
4245 llvm_unreachable("unknown intrinsic!");
4246 }
4247
4248 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4249 Value *BasePtr = Ops[1];
4250
4251 // Does the store have an offset?
4252 if (Ops.size() > (2 + N))
4253 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4254
4255 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4256 // need to break up the tuple vector.
4258 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4259 Operands.push_back(Ops[I]);
4260 Operands.append({Predicate, BasePtr});
4261 Function *F = CGM.getIntrinsic(IntID, { VTy });
4262
4263 return Builder.CreateCall(F, Operands);
4264}
4265
4266// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4267// svpmullt_pair intrinsics, with the exception that their results are bitcast
4268// to a wider type.
4271 unsigned BuiltinID) {
4272 // Splat scalar operand to vector (intrinsics with _n infix)
4273 if (TypeFlags.hasSplatOperand()) {
4274 unsigned OpNo = TypeFlags.getSplatOperand();
4275 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4276 }
4277
4278 // The pair-wise function has a narrower overloaded type.
4279 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
4280 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
4281
4282 // Now bitcast to the wider result type.
4283 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4284 return EmitSVEReinterpret(Call, Ty);
4285}
4286
4288 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4289 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4290 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
4291 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
4292}
4293
4296 unsigned BuiltinID) {
4297 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4298 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
4299 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4300
4301 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
4302 Value *BasePtr = Ops[1];
4303
4304 // Implement the index operand if not omitted.
4305 if (Ops.size() > 3)
4306 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4307
4308 Value *PrfOp = Ops.back();
4309
4310 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
4311 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
4312}
4313
4315 llvm::Type *ReturnTy,
4317 unsigned IntrinsicID,
4318 bool IsZExtReturn) {
4319 QualType LangPTy = E->getArg(1)->getType();
4320 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4321 LangPTy->castAs<PointerType>()->getPointeeType());
4322
4323 // Mfloat8 types is stored as a vector, so extra work
4324 // to extract sclar element type is necessary.
4325 if (MemEltTy->isVectorTy()) {
4326 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4327 "Only <1 x i8> expected");
4328 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4329 }
4330
4331 // The vector type that is returned may be different from the
4332 // eventual type loaded from memory.
4333 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
4334 llvm::ScalableVectorType *MemoryTy = nullptr;
4335 llvm::ScalableVectorType *PredTy = nullptr;
4336 bool IsQuadLoad = false;
4337 switch (IntrinsicID) {
4338 case Intrinsic::aarch64_sve_ld1uwq:
4339 case Intrinsic::aarch64_sve_ld1udq:
4340 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4341 PredTy = llvm::ScalableVectorType::get(
4342 llvm::Type::getInt1Ty(getLLVMContext()), 1);
4343 IsQuadLoad = true;
4344 break;
4345 default:
4346 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4347 PredTy = MemoryTy;
4348 break;
4349 }
4350
4351 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4352 Value *BasePtr = Ops[1];
4353
4354 // Does the load have an offset?
4355 if (Ops.size() > 2)
4356 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4357
4358 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
4359 auto *Load =
4360 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
4361 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4362 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
4363
4364 if (IsQuadLoad)
4365 return Load;
4366
4367 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
4368 : Builder.CreateSExt(Load, VectorTy);
4369}
4370
4373 unsigned IntrinsicID) {
4374 QualType LangPTy = E->getArg(1)->getType();
4375 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4376 LangPTy->castAs<PointerType>()->getPointeeType());
4377
4378 // Mfloat8 types is stored as a vector, so extra work
4379 // to extract sclar element type is necessary.
4380 if (MemEltTy->isVectorTy()) {
4381 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4382 "Only <1 x i8> expected");
4383 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4384 }
4385
4386 // The vector type that is stored may be different from the
4387 // eventual type stored to memory.
4388 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
4389 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4390
4391 auto PredTy = MemoryTy;
4392 auto AddrMemoryTy = MemoryTy;
4393 bool IsQuadStore = false;
4394
4395 switch (IntrinsicID) {
4396 case Intrinsic::aarch64_sve_st1wq:
4397 case Intrinsic::aarch64_sve_st1dq:
4398 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4399 PredTy =
4400 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
4401 IsQuadStore = true;
4402 break;
4403 default:
4404 break;
4405 }
4406 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4407 Value *BasePtr = Ops[1];
4408
4409 // Does the store have an offset?
4410 if (Ops.size() == 4)
4411 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
4412
4413 // Last value is always the data
4414 Value *Val =
4415 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
4416
4417 Function *F =
4418 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
4419 auto *Store =
4420 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
4421 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4422 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
4423 return Store;
4424}
4425
4428 unsigned IntID) {
4429 Ops[2] = EmitSVEPredicateCast(
4431
4432 SmallVector<Value *> NewOps;
4433 NewOps.push_back(Ops[2]);
4434
4435 llvm::Value *BasePtr = Ops[3];
4436 llvm::Value *RealSlice = Ops[1];
4437 // If the intrinsic contains the vnum parameter, multiply it with the vector
4438 // size in bytes.
4439 if (Ops.size() == 5) {
4440 Function *StreamingVectorLength =
4441 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
4442 llvm::Value *StreamingVectorLengthCall =
4443 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
4444 llvm::ConstantInt::get(Int64Ty, 8), "svl",
4445 /* HasNUW */ true, /* HasNSW */ true);
4446 llvm::Value *Mulvl =
4447 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
4448 // The type of the ptr parameter is void *, so use Int8Ty here.
4449 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
4450 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
4451 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
4452 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
4453 }
4454 NewOps.push_back(BasePtr);
4455 NewOps.push_back(Ops[0]);
4456 NewOps.push_back(RealSlice);
4457 Function *F = CGM.getIntrinsic(IntID);
4458 return Builder.CreateCall(F, NewOps);
4459}
4460
4463 unsigned IntID) {
4464 auto *VecTy = getSVEType(TypeFlags);
4465 Function *F = CGM.getIntrinsic(IntID, VecTy);
4466 if (TypeFlags.isReadZA())
4467 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
4468 else if (TypeFlags.isWriteZA())
4469 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
4470 return Builder.CreateCall(F, Ops);
4471}
4472
4475 unsigned IntID) {
4476 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4477 if (Ops.size() == 0)
4478 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
4479 Function *F = CGM.getIntrinsic(IntID, {});
4480 return Builder.CreateCall(F, Ops);
4481}
4482
4485 unsigned IntID) {
4486 if (Ops.size() == 2)
4487 Ops.push_back(Builder.getInt32(0));
4488 else
4489 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
4490 Function *F = CGM.getIntrinsic(IntID, {});
4491 return Builder.CreateCall(F, Ops);
4492}
4493
4494// Limit the usage of scalable llvm IR generated by the ACLE by using the
4495// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4496Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4497 return Builder.CreateVectorSplat(
4498 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
4499}
4500
4502 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4503#ifndef NDEBUG
4504 auto *VecTy = cast<llvm::VectorType>(Ty);
4505 ElementCount EC = VecTy->getElementCount();
4506 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4507 "Only <1 x i8> expected");
4508#endif
4509 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
4510 }
4511 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
4512}
4513
4515 // FIXME: For big endian this needs an additional REV, or needs a separate
4516 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4517 // instruction is defined as 'bitwise' equivalent from memory point of
4518 // view (when storing/reloading), whereas the svreinterpret builtin
4519 // implements bitwise equivalent cast from register point of view.
4520 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4521
4522 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
4523 Value *Tuple = llvm::PoisonValue::get(Ty);
4524
4525 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4526 Value *In = Builder.CreateExtractValue(Val, I);
4527 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
4528 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
4529 }
4530
4531 return Tuple;
4532 }
4533
4534 return Builder.CreateBitCast(Val, Ty);
4535}
4536
4537static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4539 auto *SplatZero = Constant::getNullValue(Ty);
4540 Ops.insert(Ops.begin(), SplatZero);
4541}
4542
4543static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4545 auto *SplatUndef = UndefValue::get(Ty);
4546 Ops.insert(Ops.begin(), SplatUndef);
4547}
4548
4549SmallVector<llvm::Type *, 2>
4551 llvm::Type *ResultType,
4552 ArrayRef<Value *> Ops) {
4553 if (TypeFlags.isOverloadNone())
4554 return {};
4555
4556 llvm::Type *DefaultType = getSVEType(TypeFlags);
4557
4558 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4559 return {DefaultType, Ops[1]->getType()};
4560
4561 if (TypeFlags.isOverloadWhileRW())
4562 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4563
4564 if (TypeFlags.isOverloadFirstandLast())
4565 return {Ops[0]->getType(), Ops.back()->getType()};
4566
4567 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4568 ResultType->isVectorTy())
4569 return {ResultType, Ops[1]->getType()};
4570
4571 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4572 return {DefaultType};
4573}
4574
4576 ArrayRef<Value *> Ops) {
4577 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4578 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4579 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
4580
4581 if (TypeFlags.isTupleSet())
4582 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
4583 return Builder.CreateExtractValue(Ops[0], Idx);
4584}
4585
4587 llvm::Type *Ty,
4588 ArrayRef<Value *> Ops) {
4589 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4590
4591 Value *Tuple = llvm::PoisonValue::get(Ty);
4592 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4593 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
4594
4595 return Tuple;
4596}
4597
4599 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4600 SVETypeFlags TypeFlags) {
4601 // Find out if any arguments are required to be integer constant expressions.
4602 unsigned ICEArguments = 0;
4604 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4605 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4606
4607 // Tuple set/get only requires one insert/extract vector, which is
4608 // created by EmitSVETupleSetOrGet.
4609 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4610
4611 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4612 bool IsICE = ICEArguments & (1 << i);
4613 Value *Arg = EmitScalarExpr(E->getArg(i));
4614
4615 if (IsICE) {
4616 // If this is required to be a constant, constant fold it so that we know
4617 // that the generated intrinsic gets a ConstantInt.
4618 std::optional<llvm::APSInt> Result =
4620 assert(Result && "Expected argument to be a constant");
4621
4622 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4623 // truncate because the immediate has been range checked and no valid
4624 // immediate requires more than a handful of bits.
4625 *Result = Result->extOrTrunc(32);
4626 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4627 continue;
4628 }
4629
4630 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4631 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4632 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4633
4634 continue;
4635 }
4636
4637 Ops.push_back(Arg);
4638 }
4639}
4640
4642 const CallExpr *E) {
4643 llvm::Type *Ty = ConvertType(E->getType());
4644 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4645 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4646 Value *Val = EmitScalarExpr(E->getArg(0));
4647 return EmitSVEReinterpret(Val, Ty);
4648 }
4649
4652
4654 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4655 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4656
4657 if (TypeFlags.isLoad())
4658 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4659 TypeFlags.isZExtReturn());
4660 if (TypeFlags.isStore())
4661 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4662 if (TypeFlags.isGatherLoad())
4663 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4664 if (TypeFlags.isScatterStore())
4665 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4666 if (TypeFlags.isPrefetch())
4667 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4668 if (TypeFlags.isGatherPrefetch())
4669 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4670 if (TypeFlags.isStructLoad())
4671 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4672 if (TypeFlags.isStructStore())
4673 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4674 if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4675 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4676 if (TypeFlags.isTupleCreate())
4677 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4678 if (TypeFlags.isUndef())
4679 return UndefValue::get(Ty);
4680
4681 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
4682 // -------------------------------------------------------------------
4683 if (Builtin->LLVMIntrinsic != 0) {
4684 // Emit set FPMR for intrinsics that require it
4685 if (TypeFlags.setsFPMR())
4686 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4687 Ops.pop_back_val());
4688 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4690
4691 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4693
4694 // Some ACLE builtins leave out the argument to specify the predicate
4695 // pattern, which is expected to be expanded to an SV_ALL pattern.
4696 if (TypeFlags.isAppendSVALL())
4697 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4698 if (TypeFlags.isInsertOp1SVALL())
4699 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4700
4701 // Predicates must match the main datatype.
4702 for (Value *&Op : Ops)
4703 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4704 if (PredTy->getElementType()->isIntegerTy(1))
4705 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4706
4707 // Splat scalar operand to vector (intrinsics with _n infix)
4708 if (TypeFlags.hasSplatOperand()) {
4709 unsigned OpNo = TypeFlags.getSplatOperand();
4710 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4711 }
4712
4713 if (TypeFlags.isReverseCompare())
4714 std::swap(Ops[1], Ops[2]);
4715 else if (TypeFlags.isReverseUSDOT())
4716 std::swap(Ops[1], Ops[2]);
4717 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4718 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4719 std::swap(Ops[1], Ops[2]);
4720 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4721 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4722 std::swap(Ops[1], Ops[3]);
4723
4724 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4725 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4726 llvm::Type *OpndTy = Ops[1]->getType();
4727 auto *SplatZero = Constant::getNullValue(OpndTy);
4728 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4729 }
4730
4731 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4732 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4733 Value *Call = Builder.CreateCall(F, Ops);
4734
4735 if (Call->getType() == Ty)
4736 return Call;
4737
4738 // Predicate results must be converted to svbool_t.
4739 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4740 return EmitSVEPredicateCast(Call, PredTy);
4741 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4742 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4743
4744 llvm_unreachable("unsupported element count!");
4745 }
4746
4747 switch (BuiltinID) {
4748 default:
4749 return nullptr;
4750
4751 case SVE::BI__builtin_sve_svreinterpret_b: {
4752 auto SVCountTy =
4753 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4754 Function *CastFromSVCountF =
4755 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4756 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4757 }
4758 case SVE::BI__builtin_sve_svreinterpret_c: {
4759 auto SVCountTy =
4760 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4761 Function *CastToSVCountF =
4762 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4763 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4764 }
4765
4766 case SVE::BI__builtin_sve_svpsel_lane_b8:
4767 case SVE::BI__builtin_sve_svpsel_lane_b16:
4768 case SVE::BI__builtin_sve_svpsel_lane_b32:
4769 case SVE::BI__builtin_sve_svpsel_lane_b64:
4770 case SVE::BI__builtin_sve_svpsel_lane_c8:
4771 case SVE::BI__builtin_sve_svpsel_lane_c16:
4772 case SVE::BI__builtin_sve_svpsel_lane_c32:
4773 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4774 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4775 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4776 "aarch64.svcount")) &&
4777 "Unexpected TargetExtType");
4778 auto SVCountTy =
4779 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4780 Function *CastFromSVCountF =
4781 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4782 Function *CastToSVCountF =
4783 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4784
4785 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4786 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4787 llvm::Value *Ops0 =
4788 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4789 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4790 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4791 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4792 }
4793 case SVE::BI__builtin_sve_svmov_b_z: {
4794 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4795 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4796 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4797 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4798 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4799 }
4800
4801 case SVE::BI__builtin_sve_svnot_b_z: {
4802 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4803 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4804 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4805 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4806 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4807 }
4808
4809 case SVE::BI__builtin_sve_svmovlb_u16:
4810 case SVE::BI__builtin_sve_svmovlb_u32:
4811 case SVE::BI__builtin_sve_svmovlb_u64:
4812 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4813
4814 case SVE::BI__builtin_sve_svmovlb_s16:
4815 case SVE::BI__builtin_sve_svmovlb_s32:
4816 case SVE::BI__builtin_sve_svmovlb_s64:
4817 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4818
4819 case SVE::BI__builtin_sve_svmovlt_u16:
4820 case SVE::BI__builtin_sve_svmovlt_u32:
4821 case SVE::BI__builtin_sve_svmovlt_u64:
4822 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4823
4824 case SVE::BI__builtin_sve_svmovlt_s16:
4825 case SVE::BI__builtin_sve_svmovlt_s32:
4826 case SVE::BI__builtin_sve_svmovlt_s64:
4827 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4828
4829 case SVE::BI__builtin_sve_svpmullt_u16:
4830 case SVE::BI__builtin_sve_svpmullt_u64:
4831 case SVE::BI__builtin_sve_svpmullt_n_u16:
4832 case SVE::BI__builtin_sve_svpmullt_n_u64:
4833 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4834
4835 case SVE::BI__builtin_sve_svpmullb_u16:
4836 case SVE::BI__builtin_sve_svpmullb_u64:
4837 case SVE::BI__builtin_sve_svpmullb_n_u16:
4838 case SVE::BI__builtin_sve_svpmullb_n_u64:
4839 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4840
4841 case SVE::BI__builtin_sve_svdup_n_b8:
4842 case SVE::BI__builtin_sve_svdup_n_b16:
4843 case SVE::BI__builtin_sve_svdup_n_b32:
4844 case SVE::BI__builtin_sve_svdup_n_b64: {
4845 Value *CmpNE =
4846 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4847 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4848 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4850 }
4851
4852 case SVE::BI__builtin_sve_svdupq_n_b8:
4853 case SVE::BI__builtin_sve_svdupq_n_b16:
4854 case SVE::BI__builtin_sve_svdupq_n_b32:
4855 case SVE::BI__builtin_sve_svdupq_n_b64:
4856 case SVE::BI__builtin_sve_svdupq_n_u8:
4857 case SVE::BI__builtin_sve_svdupq_n_s8:
4858 case SVE::BI__builtin_sve_svdupq_n_u64:
4859 case SVE::BI__builtin_sve_svdupq_n_f64:
4860 case SVE::BI__builtin_sve_svdupq_n_s64:
4861 case SVE::BI__builtin_sve_svdupq_n_u16:
4862 case SVE::BI__builtin_sve_svdupq_n_f16:
4863 case SVE::BI__builtin_sve_svdupq_n_bf16:
4864 case SVE::BI__builtin_sve_svdupq_n_s16:
4865 case SVE::BI__builtin_sve_svdupq_n_u32:
4866 case SVE::BI__builtin_sve_svdupq_n_f32:
4867 case SVE::BI__builtin_sve_svdupq_n_s32: {
4868 // These builtins are implemented by storing each element to an array and using
4869 // ld1rq to materialize a vector.
4870 unsigned NumOpnds = Ops.size();
4871
4872 bool IsBoolTy =
4873 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4874
4875 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4876 // so that the compare can use the width that is natural for the expected
4877 // number of predicate lanes.
4878 llvm::Type *EltTy = Ops[0]->getType();
4879 if (IsBoolTy)
4880 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4881
4883 for (unsigned I = 0; I < NumOpnds; ++I)
4884 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4885 Value *Vec = BuildVector(VecOps);
4886
4887 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4888 Value *InsertSubVec = Builder.CreateInsertVector(
4889 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4890
4891 Function *F =
4892 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4893 Value *DupQLane =
4894 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4895
4896 if (!IsBoolTy)
4897 return DupQLane;
4898
4899 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4900 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4901
4902 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4903 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4904 : Intrinsic::aarch64_sve_cmpne_wide,
4905 OverloadedTy);
4906 Value *Call = Builder.CreateCall(
4907 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4909 }
4910
4911 case SVE::BI__builtin_sve_svpfalse_b:
4912 return ConstantInt::getFalse(Ty);
4913
4914 case SVE::BI__builtin_sve_svpfalse_c: {
4915 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4916 Function *CastToSVCountF =
4917 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4918 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4919 }
4920
4921 case SVE::BI__builtin_sve_svlen_bf16:
4922 case SVE::BI__builtin_sve_svlen_f16:
4923 case SVE::BI__builtin_sve_svlen_f32:
4924 case SVE::BI__builtin_sve_svlen_f64:
4925 case SVE::BI__builtin_sve_svlen_s8:
4926 case SVE::BI__builtin_sve_svlen_s16:
4927 case SVE::BI__builtin_sve_svlen_s32:
4928 case SVE::BI__builtin_sve_svlen_s64:
4929 case SVE::BI__builtin_sve_svlen_u8:
4930 case SVE::BI__builtin_sve_svlen_u16:
4931 case SVE::BI__builtin_sve_svlen_u32:
4932 case SVE::BI__builtin_sve_svlen_u64: {
4933 SVETypeFlags TF(Builtin->TypeModifier);
4934 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4935 }
4936
4937 case SVE::BI__builtin_sve_svtbl2_u8:
4938 case SVE::BI__builtin_sve_svtbl2_s8:
4939 case SVE::BI__builtin_sve_svtbl2_u16:
4940 case SVE::BI__builtin_sve_svtbl2_s16:
4941 case SVE::BI__builtin_sve_svtbl2_u32:
4942 case SVE::BI__builtin_sve_svtbl2_s32:
4943 case SVE::BI__builtin_sve_svtbl2_u64:
4944 case SVE::BI__builtin_sve_svtbl2_s64:
4945 case SVE::BI__builtin_sve_svtbl2_f16:
4946 case SVE::BI__builtin_sve_svtbl2_bf16:
4947 case SVE::BI__builtin_sve_svtbl2_f32:
4948 case SVE::BI__builtin_sve_svtbl2_f64: {
4949 SVETypeFlags TF(Builtin->TypeModifier);
4950 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4951 return Builder.CreateCall(F, Ops);
4952 }
4953
4954 case SVE::BI__builtin_sve_svset_neonq_s8:
4955 case SVE::BI__builtin_sve_svset_neonq_s16:
4956 case SVE::BI__builtin_sve_svset_neonq_s32:
4957 case SVE::BI__builtin_sve_svset_neonq_s64:
4958 case SVE::BI__builtin_sve_svset_neonq_u8:
4959 case SVE::BI__builtin_sve_svset_neonq_u16:
4960 case SVE::BI__builtin_sve_svset_neonq_u32:
4961 case SVE::BI__builtin_sve_svset_neonq_u64:
4962 case SVE::BI__builtin_sve_svset_neonq_f16:
4963 case SVE::BI__builtin_sve_svset_neonq_f32:
4964 case SVE::BI__builtin_sve_svset_neonq_f64:
4965 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4966 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4967 }
4968
4969 case SVE::BI__builtin_sve_svget_neonq_s8:
4970 case SVE::BI__builtin_sve_svget_neonq_s16:
4971 case SVE::BI__builtin_sve_svget_neonq_s32:
4972 case SVE::BI__builtin_sve_svget_neonq_s64:
4973 case SVE::BI__builtin_sve_svget_neonq_u8:
4974 case SVE::BI__builtin_sve_svget_neonq_u16:
4975 case SVE::BI__builtin_sve_svget_neonq_u32:
4976 case SVE::BI__builtin_sve_svget_neonq_u64:
4977 case SVE::BI__builtin_sve_svget_neonq_f16:
4978 case SVE::BI__builtin_sve_svget_neonq_f32:
4979 case SVE::BI__builtin_sve_svget_neonq_f64:
4980 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4981 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4982 }
4983
4984 case SVE::BI__builtin_sve_svdup_neonq_s8:
4985 case SVE::BI__builtin_sve_svdup_neonq_s16:
4986 case SVE::BI__builtin_sve_svdup_neonq_s32:
4987 case SVE::BI__builtin_sve_svdup_neonq_s64:
4988 case SVE::BI__builtin_sve_svdup_neonq_u8:
4989 case SVE::BI__builtin_sve_svdup_neonq_u16:
4990 case SVE::BI__builtin_sve_svdup_neonq_u32:
4991 case SVE::BI__builtin_sve_svdup_neonq_u64:
4992 case SVE::BI__builtin_sve_svdup_neonq_f16:
4993 case SVE::BI__builtin_sve_svdup_neonq_f32:
4994 case SVE::BI__builtin_sve_svdup_neonq_f64:
4995 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4996 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4997 uint64_t(0));
4998 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4999 {Insert, Builder.getInt64(0)});
5000 }
5001 }
5002
5003 /// Should not happen
5004 return nullptr;
5005}
5006
5007static void swapCommutativeSMEOperands(unsigned BuiltinID,
5009 unsigned MultiVec;
5010 switch (BuiltinID) {
5011 default:
5012 return;
5013 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
5014 MultiVec = 1;
5015 break;
5016 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
5017 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
5018 MultiVec = 2;
5019 break;
5020 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
5021 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
5022 MultiVec = 4;
5023 break;
5024 }
5025
5026 if (MultiVec > 0)
5027 for (unsigned I = 0; I < MultiVec; ++I)
5028 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
5029}
5030
5032 const CallExpr *E) {
5035
5037 SVETypeFlags TypeFlags(Builtin->TypeModifier);
5038 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
5039
5040 if (TypeFlags.isLoad() || TypeFlags.isStore())
5041 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
5042 if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
5043 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
5044 if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
5045 BuiltinID == SME::BI__builtin_sme_svzero_za)
5046 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
5047 if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
5048 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
5049 BuiltinID == SME::BI__builtin_sme_svldr_za ||
5050 BuiltinID == SME::BI__builtin_sme_svstr_za)
5051 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
5052
5053 // Emit set FPMR for intrinsics that require it
5054 if (TypeFlags.setsFPMR())
5055 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
5056 Ops.pop_back_val());
5057 // Handle builtins which require their multi-vector operands to be swapped
5058 swapCommutativeSMEOperands(BuiltinID, Ops);
5059
5060 auto isCntsBuiltin = [&]() {
5061 switch (BuiltinID) {
5062 default:
5063 return 0;
5064 case SME::BI__builtin_sme_svcntsb:
5065 return 8;
5066 case SME::BI__builtin_sme_svcntsh:
5067 return 4;
5068 case SME::BI__builtin_sme_svcntsw:
5069 return 2;
5070 }
5071 };
5072
5073 if (auto Mul = isCntsBuiltin()) {
5074 llvm::Value *Cntd =
5075 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
5076 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
5077 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
5078 }
5079
5080 // Should not happen!
5081 if (Builtin->LLVMIntrinsic == 0)
5082 return nullptr;
5083
5084 // Predicates must match the main datatype.
5085 for (Value *&Op : Ops)
5086 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
5087 if (PredTy->getElementType()->isIntegerTy(1))
5088 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
5089
5090 Function *F =
5091 TypeFlags.isOverloadNone()
5092 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
5093 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
5094
5095 return Builder.CreateCall(F, Ops);
5096}
5097
5098/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
5099/// return it as an i8 pointer.
5101 LLVMContext &Context = CGF.CGM.getLLVMContext();
5102 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
5103 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5104 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5105 llvm::Function *F =
5106 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
5107 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
5108 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
5109}
5110
5112 const CallExpr *E,
5113 llvm::Triple::ArchType Arch) {
5114 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
5115 BuiltinID <= clang::AArch64::LastSVEBuiltin)
5116 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
5117
5118 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
5119 BuiltinID <= clang::AArch64::LastSMEBuiltin)
5120 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
5121
5122 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
5123 return EmitAArch64CpuSupports(E);
5124
5125 unsigned HintID = static_cast<unsigned>(-1);
5126 switch (BuiltinID) {
5127 default: break;
5128 case clang::AArch64::BI__builtin_arm_nop:
5129 HintID = 0;
5130 break;
5131 case clang::AArch64::BI__builtin_arm_yield:
5132 case clang::AArch64::BI__yield:
5133 HintID = 1;
5134 break;
5135 case clang::AArch64::BI__builtin_arm_wfe:
5136 case clang::AArch64::BI__wfe:
5137 HintID = 2;
5138 break;
5139 case clang::AArch64::BI__builtin_arm_wfi:
5140 case clang::AArch64::BI__wfi:
5141 HintID = 3;
5142 break;
5143 case clang::AArch64::BI__builtin_arm_sev:
5144 case clang::AArch64::BI__sev:
5145 HintID = 4;
5146 break;
5147 case clang::AArch64::BI__builtin_arm_sevl:
5148 case clang::AArch64::BI__sevl:
5149 HintID = 5;
5150 break;
5151 }
5152
5153 if (HintID != static_cast<unsigned>(-1)) {
5154 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5155 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5156 }
5157
5158 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5159 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5160 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5161 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
5162 }
5163
5164 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5165 // Create call to __arm_sme_state and store the results to the two pointers.
5166 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
5167 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
5168 false),
5169 "__arm_sme_state"));
5170 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
5171 "aarch64_pstate_sm_compatible");
5172 CI->setAttributes(Attrs);
5173 CI->setCallingConv(
5174 llvm::CallingConv::
5175 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5176 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
5178 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
5180 }
5181
5182 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5183 assert((getContext().getTypeSize(E->getType()) == 32) &&
5184 "rbit of unusual size!");
5185 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5186 return Builder.CreateCall(
5187 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5188 }
5189 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5190 assert((getContext().getTypeSize(E->getType()) == 64) &&
5191 "rbit of unusual size!");
5192 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5193 return Builder.CreateCall(
5194 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5195 }
5196
5197 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5198 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5199 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5200 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5201 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5202 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5203 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
5204 return Res;
5205 }
5206
5207 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5208 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5209 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5210 "cls");
5211 }
5212 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5213 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5214 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5215 "cls");
5216 }
5217
5218 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5219 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5220 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5221 llvm::Type *Ty = Arg->getType();
5222 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5223 Arg, "frint32z");
5224 }
5225
5226 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5227 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5228 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5229 llvm::Type *Ty = Arg->getType();
5230 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5231 Arg, "frint64z");
5232 }
5233
5234 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5235 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5236 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5237 llvm::Type *Ty = Arg->getType();
5238 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5239 Arg, "frint32x");
5240 }
5241
5242 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5243 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5244 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5245 llvm::Type *Ty = Arg->getType();
5246 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5247 Arg, "frint64x");
5248 }
5249
5250 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5251 assert((getContext().getTypeSize(E->getType()) == 32) &&
5252 "__jcvt of unusual size!");
5253 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5254 return Builder.CreateCall(
5255 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5256 }
5257
5258 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5259 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5260 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5261 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5262 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
5263 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
5264
5265 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5266 // Load from the address via an LLVM intrinsic, receiving a
5267 // tuple of 8 i64 words, and store each one to ValPtr.
5268 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5269 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
5270 llvm::Value *ToRet;
5271 for (size_t i = 0; i < 8; i++) {
5272 llvm::Value *ValOffsetPtr =
5273 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5274 Address Addr =
5275 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5276 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
5277 }
5278 return ToRet;
5279 }
5280
5281 // Load 8 i64 words from ValPtr, and store them to the address
5282 // via an LLVM intrinsic.
5284 Args.push_back(MemAddr);
5285 for (size_t i = 0; i < 8; i++) {
5286 llvm::Value *ValOffsetPtr =
5287 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5288 Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5289 Args.push_back(Builder.CreateLoad(Addr));
5290 }
5291
5292 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5293 ? Intrinsic::aarch64_st64b
5294 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5295 ? Intrinsic::aarch64_st64bv
5296 : Intrinsic::aarch64_st64bv0);
5297 Function *F = CGM.getIntrinsic(Intr);
5298 return Builder.CreateCall(F, Args);
5299 }
5300
5301 if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
5302 Value *StoreAddr = EmitScalarExpr(E->getArg(0));
5303 Value *StoreValue = EmitScalarExpr(E->getArg(1));
5304
5305 auto *OrderC = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5306 auto *PolicyC = cast<ConstantInt>(EmitScalarExpr(E->getArg(3)));
5307
5308 // Compute pointee bit-width from arg0 and create as i32 constant
5309 QualType ValQT =
5311 unsigned SizeBits = getContext().getTypeSize(ValQT);
5312 auto *SizeC = llvm::ConstantInt::get(Int32Ty, SizeBits);
5313
5314 Value *StoreValue64 = Builder.CreateIntCast(StoreValue, Int64Ty,
5315 ValQT->isSignedIntegerType());
5316
5317 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store,
5318 {StoreAddr->getType()});
5319
5320 // Emit a single intrinsic so backend can expand to STSHH followed by
5321 // atomic store, to guarantee STSHH immediately precedes STR insn
5322 return Builder.CreateCall(
5323 F, {StoreAddr, StoreValue64,
5324 ConstantInt::get(Int32Ty, OrderC->getZExtValue()),
5325 ConstantInt::get(Int32Ty, PolicyC->getZExtValue()), SizeC});
5326 }
5327
5328 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5329 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5330
5331 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5332 ? Intrinsic::aarch64_rndr
5333 : Intrinsic::aarch64_rndrrs);
5334 Function *F = CGM.getIntrinsic(Intr);
5335 llvm::Value *Val = Builder.CreateCall(F);
5336 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
5337 Value *Status = Builder.CreateExtractValue(Val, 1);
5338
5339 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
5340 Builder.CreateStore(RandomValue, MemAddress);
5341 Status = Builder.CreateZExt(Status, Int32Ty);
5342 return Status;
5343 }
5344
5345 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5346 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5347 const FunctionDecl *FD = E->getDirectCallee();
5348 Value *Ops[2];
5349 for (unsigned i = 0; i < 2; i++)
5350 Ops[i] = EmitScalarExpr(E->getArg(i));
5351 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5352 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5353 StringRef Name = FD->getName();
5354 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5355 }
5356
5357 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5358 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5359 getContext().getTypeSize(E->getType()) == 128) {
5360 Function *F =
5361 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5362 ? Intrinsic::aarch64_ldaxp
5363 : Intrinsic::aarch64_ldxp);
5364
5365 Value *LdPtr = EmitScalarExpr(E->getArg(0));
5366 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
5367
5368 Value *Val0 = Builder.CreateExtractValue(Val, 1);
5369 Value *Val1 = Builder.CreateExtractValue(Val, 0);
5370 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5371 Val0 = Builder.CreateZExt(Val0, Int128Ty);
5372 Val1 = Builder.CreateZExt(Val1, Int128Ty);
5373
5374 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5375 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5376 Val = Builder.CreateOr(Val, Val1);
5377 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5378 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5379 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5380 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5381
5382 QualType Ty = E->getType();
5383 llvm::Type *RealResTy = ConvertType(Ty);
5384 llvm::Type *IntTy =
5385 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5386
5387 Function *F =
5388 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5389 ? Intrinsic::aarch64_ldaxr
5390 : Intrinsic::aarch64_ldxr,
5391 DefaultPtrTy);
5392 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5393 Val->addParamAttr(
5394 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
5395
5396 if (RealResTy->isPointerTy())
5397 return Builder.CreateIntToPtr(Val, RealResTy);
5398
5399 llvm::Type *IntResTy = llvm::IntegerType::get(
5400 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5401 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
5402 RealResTy);
5403 }
5404
5405 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5406 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5407 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5408 Function *F =
5409 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5410 ? Intrinsic::aarch64_stlxp
5411 : Intrinsic::aarch64_stxp);
5412 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5413
5414 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5415 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5416
5417 Tmp = Tmp.withElementType(STy);
5418 llvm::Value *Val = Builder.CreateLoad(Tmp);
5419
5420 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5421 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5422 Value *StPtr = EmitScalarExpr(E->getArg(1));
5423 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5424 }
5425
5426 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5427 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5428 Value *StoreVal = EmitScalarExpr(E->getArg(0));
5429 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5430
5431 QualType Ty = E->getArg(0)->getType();
5432 llvm::Type *StoreTy =
5433 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5434
5435 if (StoreVal->getType()->isPointerTy())
5436 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5437 else {
5438 llvm::Type *IntTy = llvm::IntegerType::get(
5440 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5441 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5442 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5443 }
5444
5445 Function *F =
5446 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5447 ? Intrinsic::aarch64_stlxr
5448 : Intrinsic::aarch64_stxr,
5449 StoreAddr->getType());
5450 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5451 CI->addParamAttr(
5452 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
5453 return CI;
5454 }
5455
5456 if (BuiltinID == clang::AArch64::BI__getReg) {
5458 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5459 llvm_unreachable("Sema will ensure that the parameter is constant");
5460
5461 llvm::APSInt Value = Result.Val.getInt();
5462 LLVMContext &Context = CGM.getLLVMContext();
5463 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
5464
5465 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5466 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5467 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5468
5469 llvm::Function *F =
5470 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
5471 return Builder.CreateCall(F, Metadata);
5472 }
5473
5474 if (BuiltinID == clang::AArch64::BI__break) {
5476 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5477 llvm_unreachable("Sema will ensure that the parameter is constant");
5478
5479 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5480 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5481 }
5482
5483 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5484 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5485 return Builder.CreateCall(F);
5486 }
5487
5488 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5489 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5490 llvm::SyncScope::SingleThread);
5491
5492 // CRC32
5493 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5494 switch (BuiltinID) {
5495 case clang::AArch64::BI__builtin_arm_crc32b:
5496 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5497 case clang::AArch64::BI__builtin_arm_crc32cb:
5498 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5499 case clang::AArch64::BI__builtin_arm_crc32h:
5500 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5501 case clang::AArch64::BI__builtin_arm_crc32ch:
5502 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5503 case clang::AArch64::BI__builtin_arm_crc32w:
5504 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5505 case clang::AArch64::BI__builtin_arm_crc32cw:
5506 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5507 case clang::AArch64::BI__builtin_arm_crc32d:
5508 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5509 case clang::AArch64::BI__builtin_arm_crc32cd:
5510 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5511 }
5512
5513 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5514 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5515 Value *Arg1 = EmitScalarExpr(E->getArg(1));
5516 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5517
5518 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5519 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5520
5521 return Builder.CreateCall(F, {Arg0, Arg1});
5522 }
5523
5524 // Memory Operations (MOPS)
5525 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5526 Value *Dst = EmitScalarExpr(E->getArg(0));
5527 Value *Val = EmitScalarExpr(E->getArg(1));
5528 Value *Size = EmitScalarExpr(E->getArg(2));
5529 Val = Builder.CreateTrunc(Val, Int8Ty);
5530 Size = Builder.CreateIntCast(Size, Int64Ty, false);
5531 return Builder.CreateCall(
5532 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5533 }
5534
5535 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
5536 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
5537 return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
5538
5539 // Memory Tagging Extensions (MTE) Intrinsics
5540 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5541 switch (BuiltinID) {
5542 case clang::AArch64::BI__builtin_arm_irg:
5543 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5544 case clang::AArch64::BI__builtin_arm_addg:
5545 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5546 case clang::AArch64::BI__builtin_arm_gmi:
5547 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5548 case clang::AArch64::BI__builtin_arm_ldg:
5549 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5550 case clang::AArch64::BI__builtin_arm_stg:
5551 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5552 case clang::AArch64::BI__builtin_arm_subp:
5553 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5554 }
5555
5556 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5557 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5559 Value *Mask = EmitScalarExpr(E->getArg(1));
5560
5561 Mask = Builder.CreateZExt(Mask, Int64Ty);
5562 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5563 {Pointer, Mask});
5564 }
5565 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5567 Value *TagOffset = EmitScalarExpr(E->getArg(1));
5568
5569 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
5570 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5571 {Pointer, TagOffset});
5572 }
5573 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5575 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
5576
5577 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
5578 return Builder.CreateCall(
5579 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5580 }
5581 // Although it is possible to supply a different return
5582 // address (first arg) to this intrinsic, for now we set
5583 // return address same as input address.
5584 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5585 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5586 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5587 {TagAddress, TagAddress});
5588 }
5589 // Although it is possible to supply a different tag (to set)
5590 // to this intrinsic (as first arg), for now we supply
5591 // the tag that is in input address arg (common use case).
5592 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5593 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5594 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5595 {TagAddress, TagAddress});
5596 }
5597 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5598 Value *PointerA = EmitScalarExpr(E->getArg(0));
5599 Value *PointerB = EmitScalarExpr(E->getArg(1));
5600 return Builder.CreateCall(
5601 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5602 }
5603 }
5604
5605 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5606 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5607 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5608 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5609 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5610 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5611 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5612 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5613
5614 SpecialRegisterAccessKind AccessKind = Write;
5615 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5616 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5617 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5618 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5619 AccessKind = VolatileRead;
5620
5621 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5622 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5623
5624 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5625 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5626
5627 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5628 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5629
5630 llvm::Type *ValueType;
5631 llvm::Type *RegisterType = Int64Ty;
5632 if (Is32Bit) {
5633 ValueType = Int32Ty;
5634 } else if (Is128Bit) {
5635 llvm::Type *Int128Ty =
5636 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5637 ValueType = Int128Ty;
5638 RegisterType = Int128Ty;
5639 } else if (IsPointerBuiltin) {
5640 ValueType = VoidPtrTy;
5641 } else {
5642 ValueType = Int64Ty;
5643 };
5644
5645 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5646 AccessKind);
5647 }
5648
5649 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5650 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5651 BuiltinID == clang::AArch64::BI__sys) {
5652 LLVMContext &Context = CGM.getLLVMContext();
5653
5654 unsigned SysReg =
5655 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5656
5657 std::string SysRegStr;
5658 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5659 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5660 ? ((1 << 1) | ((SysReg >> 14) & 1))
5661 : 1;
5662 llvm::raw_string_ostream(SysRegStr)
5663 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5664 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5665 << (SysReg & 7);
5666
5667 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5668 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5669 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5670
5671 llvm::Type *RegisterType = Int64Ty;
5672 llvm::Type *Types[] = { RegisterType };
5673
5674 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5675 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5676
5677 return Builder.CreateCall(F, Metadata);
5678 }
5679
5680 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5681 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5682 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5683 if (BuiltinID == clang::AArch64::BI__sys) {
5684 // Return 0 for convenience, even though MSVC returns some other undefined
5685 // value.
5686 Result = ConstantInt::get(Builder.getInt32Ty(), 0);
5687 }
5688 return Result;
5689 }
5690
5691 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5692 llvm::Function *F =
5693 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5694 return Builder.CreateCall(F);
5695 }
5696
5697 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5698 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5699 return Builder.CreateCall(F);
5700 }
5701
5702 if (BuiltinID == clang::AArch64::BI__mulh ||
5703 BuiltinID == clang::AArch64::BI__umulh) {
5704 llvm::Type *ResType = ConvertType(E->getType());
5705 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5706
5707 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5708 Value *LHS =
5709 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5710 Value *RHS =
5711 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5712
5713 Value *MulResult, *HigherBits;
5714 if (IsSigned) {
5715 MulResult = Builder.CreateNSWMul(LHS, RHS);
5716 HigherBits = Builder.CreateAShr(MulResult, 64);
5717 } else {
5718 MulResult = Builder.CreateNUWMul(LHS, RHS);
5719 HigherBits = Builder.CreateLShr(MulResult, 64);
5720 }
5721 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5722
5723 return HigherBits;
5724 }
5725
5726 if (BuiltinID == AArch64::BI__writex18byte ||
5727 BuiltinID == AArch64::BI__writex18word ||
5728 BuiltinID == AArch64::BI__writex18dword ||
5729 BuiltinID == AArch64::BI__writex18qword) {
5730 // Process the args first
5731 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5732 Value *DataArg = EmitScalarExpr(E->getArg(1));
5733
5734 // Read x18 as i8*
5735 llvm::Value *X18 = readX18AsPtr(*this);
5736
5737 // Store val at x18 + offset
5738 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5739 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5740 StoreInst *Store =
5741 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5742 return Store;
5743 }
5744
5745 if (BuiltinID == AArch64::BI__readx18byte ||
5746 BuiltinID == AArch64::BI__readx18word ||
5747 BuiltinID == AArch64::BI__readx18dword ||
5748 BuiltinID == AArch64::BI__readx18qword) {
5749 // Process the args first
5750 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5751
5752 // Read x18 as i8*
5753 llvm::Value *X18 = readX18AsPtr(*this);
5754
5755 // Load x18 + offset
5756 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5757 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5758 llvm::Type *IntTy = ConvertType(E->getType());
5759 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5760 return Load;
5761 }
5762
5763 if (BuiltinID == AArch64::BI__addx18byte ||
5764 BuiltinID == AArch64::BI__addx18word ||
5765 BuiltinID == AArch64::BI__addx18dword ||
5766 BuiltinID == AArch64::BI__addx18qword ||
5767 BuiltinID == AArch64::BI__incx18byte ||
5768 BuiltinID == AArch64::BI__incx18word ||
5769 BuiltinID == AArch64::BI__incx18dword ||
5770 BuiltinID == AArch64::BI__incx18qword) {
5771 llvm::Type *IntTy;
5772 bool isIncrement;
5773 switch (BuiltinID) {
5774 case AArch64::BI__incx18byte:
5775 IntTy = Int8Ty;
5776 isIncrement = true;
5777 break;
5778 case AArch64::BI__incx18word:
5779 IntTy = Int16Ty;
5780 isIncrement = true;
5781 break;
5782 case AArch64::BI__incx18dword:
5783 IntTy = Int32Ty;
5784 isIncrement = true;
5785 break;
5786 case AArch64::BI__incx18qword:
5787 IntTy = Int64Ty;
5788 isIncrement = true;
5789 break;
5790 default:
5791 IntTy = ConvertType(E->getArg(1)->getType());
5792 isIncrement = false;
5793 break;
5794 }
5795 // Process the args first
5796 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5797 Value *ValToAdd =
5798 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5799
5800 // Read x18 as i8*
5801 llvm::Value *X18 = readX18AsPtr(*this);
5802
5803 // Load x18 + offset
5804 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5805 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5806 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5807
5808 // Add values
5809 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5810
5811 // Store val at x18 + offset
5812 StoreInst *Store =
5813 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5814 return Store;
5815 }
5816
5817 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5818 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5819 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5820 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5821 Value *Arg = EmitScalarExpr(E->getArg(0));
5822 llvm::Type *RetTy = ConvertType(E->getType());
5823 return Builder.CreateBitCast(Arg, RetTy);
5824 }
5825
5826 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5827 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5828 BuiltinID == AArch64::BI_CountLeadingZeros ||
5829 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5830 Value *Arg = EmitScalarExpr(E->getArg(0));
5831 llvm::Type *ArgType = Arg->getType();
5832
5833 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5834 BuiltinID == AArch64::BI_CountLeadingOnes64)
5835 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5836
5837 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5838 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5839
5840 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5841 BuiltinID == AArch64::BI_CountLeadingZeros64)
5842 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5843 return Result;
5844 }
5845
5846 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5847 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5848 Value *Arg = EmitScalarExpr(E->getArg(0));
5849
5850 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5851 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5852 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5853
5854 Value *Result = Builder.CreateCall(F, Arg, "cls");
5855 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5856 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5857 return Result;
5858 }
5859
5860 if (BuiltinID == AArch64::BI_CountOneBits ||
5861 BuiltinID == AArch64::BI_CountOneBits64) {
5862 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5863 llvm::Type *ArgType = ArgValue->getType();
5864 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5865
5866 Value *Result = Builder.CreateCall(F, ArgValue);
5867 if (BuiltinID == AArch64::BI_CountOneBits64)
5868 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5869 return Result;
5870 }
5871
5872 if (BuiltinID == AArch64::BI__prefetch) {
5874 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5875 Value *Locality = ConstantInt::get(Int32Ty, 3);
5876 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5877 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5878 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5879 }
5880
5881 if (BuiltinID == AArch64::BI__hlt) {
5882 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5883 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5884
5885 // Return 0 for convenience, even though MSVC returns some other undefined
5886 // value.
5887 return ConstantInt::get(Builder.getInt32Ty(), 0);
5888 }
5889
5890 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5891 return Builder.CreateFPTrunc(
5892 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5893 Builder.getFloatTy()),
5894 Builder.getBFloatTy());
5895
5896 // Handle MSVC intrinsics before argument evaluation to prevent double
5897 // evaluation.
5898 if (std::optional<MSVCIntrin> MsvcIntId =
5900 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5901
5902 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5903 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5904 return P.first == BuiltinID;
5905 });
5906 if (It != end(NEONEquivalentIntrinsicMap))
5907 BuiltinID = It->second;
5908
5909 // Check whether this is an SISD builtin.
5910 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5911 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5912 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5913 bool IsSISD = (Builtin != nullptr);
5914
5915 // Find out if any arguments are required to be integer constant
5916 // expressions.
5917 unsigned ICEArguments = 0;
5919 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5920 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5921
5923 Address PtrOp0 = Address::invalid();
5924 // Note the assumption that SISD intrinsics do not contain extra arguments.
5925 // TODO: Fold this into a single function call instead of, effectively, two
5926 // separate checks.
5927 bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
5928 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5929 for (unsigned i = 0, e = NumArgs; i != e; i++) {
5930 if (i == 0) {
5931 switch (BuiltinID) {
5932 case NEON::BI__builtin_neon_vld1_v:
5933 case NEON::BI__builtin_neon_vld1q_v:
5934 case NEON::BI__builtin_neon_vld1_dup_v:
5935 case NEON::BI__builtin_neon_vld1q_dup_v:
5936 case NEON::BI__builtin_neon_vld1_lane_v:
5937 case NEON::BI__builtin_neon_vld1q_lane_v:
5938 case NEON::BI__builtin_neon_vst1_v:
5939 case NEON::BI__builtin_neon_vst1q_v:
5940 case NEON::BI__builtin_neon_vst1_lane_v:
5941 case NEON::BI__builtin_neon_vst1q_lane_v:
5942 case NEON::BI__builtin_neon_vldap1_lane_s64:
5943 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5944 case NEON::BI__builtin_neon_vstl1_lane_s64:
5945 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5946 // Get the alignment for the argument in addition to the value;
5947 // we'll use it later.
5948 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5949 Ops.push_back(PtrOp0.emitRawPointer(*this));
5950 continue;
5951 }
5952 }
5953 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5954 }
5955
5956 if (Builtin) {
5958 assert(Result && "SISD intrinsic should have been handled");
5959 return Result;
5960 }
5961
5962 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5964 if (std::optional<llvm::APSInt> Result =
5966 // Determine the type of this overloaded NEON intrinsic.
5967 Type = NeonTypeFlags(Result->getZExtValue());
5968
5969 bool usgn = Type.isUnsigned();
5970 bool quad = Type.isQuad();
5971 unsigned Int;
5972
5973 // Not all intrinsics handled by the common case work for AArch64 yet, so only
5974 // defer to common code if it's been added to our special map.
5977
5978 if (Builtin)
5980 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5981 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5982 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
5983
5984 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
5985 return V;
5986
5987 // Handle non-overloaded intrinsics first.
5988 switch (BuiltinID) {
5989 default: break;
5990 case NEON::BI__builtin_neon_vabsh_f16:
5991 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5992 case NEON::BI__builtin_neon_vaddq_p128: {
5993 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5994 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5995 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5996 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5997 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5998 return Builder.CreateBitCast(Ops[0], Int128Ty);
5999 }
6000 case NEON::BI__builtin_neon_vldrq_p128: {
6001 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
6002 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
6004 }
6005 case NEON::BI__builtin_neon_vstrq_p128: {
6006 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6007 }
6008 case NEON::BI__builtin_neon_vcvts_f32_u32:
6009 case NEON::BI__builtin_neon_vcvtd_f64_u64:
6010 usgn = true;
6011 [[fallthrough]];
6012 case NEON::BI__builtin_neon_vcvts_f32_s32:
6013 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
6014 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
6015 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
6016 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
6017 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
6018 if (usgn)
6019 return Builder.CreateUIToFP(Ops[0], FTy);
6020 return Builder.CreateSIToFP(Ops[0], FTy);
6021 }
6022 case NEON::BI__builtin_neon_vcvth_f16_u16:
6023 case NEON::BI__builtin_neon_vcvth_f16_u32:
6024 case NEON::BI__builtin_neon_vcvth_f16_u64:
6025 usgn = true;
6026 [[fallthrough]];
6027 case NEON::BI__builtin_neon_vcvth_f16_s16:
6028 case NEON::BI__builtin_neon_vcvth_f16_s32:
6029 case NEON::BI__builtin_neon_vcvth_f16_s64: {
6030 llvm::Type *FTy = HalfTy;
6031 llvm::Type *InTy;
6032 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
6033 InTy = Int64Ty;
6034 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
6035 InTy = Int32Ty;
6036 else
6037 InTy = Int16Ty;
6038 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
6039 if (usgn)
6040 return Builder.CreateUIToFP(Ops[0], FTy);
6041 return Builder.CreateSIToFP(Ops[0], FTy);
6042 }
6043 case NEON::BI__builtin_neon_vcvtah_u16_f16:
6044 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6045 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6046 case NEON::BI__builtin_neon_vcvtph_u16_f16:
6047 case NEON::BI__builtin_neon_vcvth_u16_f16:
6048 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6049 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6050 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6051 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6052 case NEON::BI__builtin_neon_vcvth_s16_f16: {
6053 llvm::Type *InTy = Int16Ty;
6054 llvm::Type* FTy = HalfTy;
6055 llvm::Type *Tys[2] = {InTy, FTy};
6056 switch (BuiltinID) {
6057 default: llvm_unreachable("missing builtin ID in switch!");
6058 case NEON::BI__builtin_neon_vcvtah_u16_f16:
6059 Int = Intrinsic::aarch64_neon_fcvtau; break;
6060 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6061 Int = Intrinsic::aarch64_neon_fcvtmu; break;
6062 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6063 Int = Intrinsic::aarch64_neon_fcvtnu; break;
6064 case NEON::BI__builtin_neon_vcvtph_u16_f16:
6065 Int = Intrinsic::aarch64_neon_fcvtpu; break;
6066 case NEON::BI__builtin_neon_vcvth_u16_f16:
6067 Int = Intrinsic::aarch64_neon_fcvtzu; break;
6068 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6069 Int = Intrinsic::aarch64_neon_fcvtas; break;
6070 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6071 Int = Intrinsic::aarch64_neon_fcvtms; break;
6072 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6073 Int = Intrinsic::aarch64_neon_fcvtns; break;
6074 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6075 Int = Intrinsic::aarch64_neon_fcvtps; break;
6076 case NEON::BI__builtin_neon_vcvth_s16_f16:
6077 Int = Intrinsic::aarch64_neon_fcvtzs; break;
6078 }
6079 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
6080 }
6081 case NEON::BI__builtin_neon_vcaleh_f16:
6082 case NEON::BI__builtin_neon_vcalth_f16:
6083 case NEON::BI__builtin_neon_vcageh_f16:
6084 case NEON::BI__builtin_neon_vcagth_f16: {
6085 llvm::Type* InTy = Int32Ty;
6086 llvm::Type* FTy = HalfTy;
6087 llvm::Type *Tys[2] = {InTy, FTy};
6088 switch (BuiltinID) {
6089 default: llvm_unreachable("missing builtin ID in switch!");
6090 case NEON::BI__builtin_neon_vcageh_f16:
6091 Int = Intrinsic::aarch64_neon_facge; break;
6092 case NEON::BI__builtin_neon_vcagth_f16:
6093 Int = Intrinsic::aarch64_neon_facgt; break;
6094 case NEON::BI__builtin_neon_vcaleh_f16:
6095 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
6096 case NEON::BI__builtin_neon_vcalth_f16:
6097 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
6098 }
6099 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
6100 return Builder.CreateTrunc(Ops[0], Int16Ty);
6101 }
6102 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6103 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
6104 llvm::Type* InTy = Int32Ty;
6105 llvm::Type* FTy = HalfTy;
6106 llvm::Type *Tys[2] = {InTy, FTy};
6107 switch (BuiltinID) {
6108 default: llvm_unreachable("missing builtin ID in switch!");
6109 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6110 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
6111 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
6112 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
6113 }
6114 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
6115 return Builder.CreateTrunc(Ops[0], Int16Ty);
6116 }
6117 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6118 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
6119 llvm::Type* FTy = HalfTy;
6120 llvm::Type* InTy = Int32Ty;
6121 llvm::Type *Tys[2] = {FTy, InTy};
6122 switch (BuiltinID) {
6123 default: llvm_unreachable("missing builtin ID in switch!");
6124 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6125 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
6126 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
6127 break;
6128 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
6129 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
6130 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
6131 break;
6132 }
6133 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
6134 }
6135 case NEON::BI__builtin_neon_vpaddd_s64: {
6136 // TODO: Isn't this handled by
6137 // EmitCommonNeonSISDBuiltinExpr?
6138 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
6139 // The vector is v2f64, so make sure it's bitcast to that.
6140 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2i64");
6141 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6142 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6143 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
6144 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
6145 // Pairwise addition of a v2f64 into a scalar f64.
6146 return Builder.CreateAdd(Op0, Op1, "vpaddd");
6147 }
6148 case NEON::BI__builtin_neon_vpaddd_f64: {
6149 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
6150 // The vector is v2f64, so make sure it's bitcast to that.
6151 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f64");
6152 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6153 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6154 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
6155 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
6156 // Pairwise addition of a v2f64 into a scalar f64.
6157 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6158 }
6159 case NEON::BI__builtin_neon_vpadds_f32: {
6160 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
6161 // The vector is v2f32, so make sure it's bitcast to that.
6162 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f32");
6163 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6164 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6165 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
6166 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
6167 // Pairwise addition of a v2f32 into a scalar f32.
6168 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6169 }
6170 case NEON::BI__builtin_neon_vceqzd_s64:
6173 ICmpInst::ICMP_EQ, "vceqz");
6174 case NEON::BI__builtin_neon_vceqzd_f64:
6175 case NEON::BI__builtin_neon_vceqzs_f32:
6176 case NEON::BI__builtin_neon_vceqzh_f16:
6179 ICmpInst::FCMP_OEQ, "vceqz");
6180 case NEON::BI__builtin_neon_vcgezd_s64:
6183 ICmpInst::ICMP_SGE, "vcgez");
6184 case NEON::BI__builtin_neon_vcgezd_f64:
6185 case NEON::BI__builtin_neon_vcgezs_f32:
6186 case NEON::BI__builtin_neon_vcgezh_f16:
6189 ICmpInst::FCMP_OGE, "vcgez");
6190 case NEON::BI__builtin_neon_vclezd_s64:
6193 ICmpInst::ICMP_SLE, "vclez");
6194 case NEON::BI__builtin_neon_vclezd_f64:
6195 case NEON::BI__builtin_neon_vclezs_f32:
6196 case NEON::BI__builtin_neon_vclezh_f16:
6199 ICmpInst::FCMP_OLE, "vclez");
6200 case NEON::BI__builtin_neon_vcgtzd_s64:
6203 ICmpInst::ICMP_SGT, "vcgtz");
6204 case NEON::BI__builtin_neon_vcgtzd_f64:
6205 case NEON::BI__builtin_neon_vcgtzs_f32:
6206 case NEON::BI__builtin_neon_vcgtzh_f16:
6209 ICmpInst::FCMP_OGT, "vcgtz");
6210 case NEON::BI__builtin_neon_vcltzd_s64:
6213 ICmpInst::ICMP_SLT, "vcltz");
6214
6215 case NEON::BI__builtin_neon_vcltzd_f64:
6216 case NEON::BI__builtin_neon_vcltzs_f32:
6217 case NEON::BI__builtin_neon_vcltzh_f16:
6220 ICmpInst::FCMP_OLT, "vcltz");
6221
6222 case NEON::BI__builtin_neon_vceqzd_u64: {
6225 ICmpInst::ICMP_EQ, "vceqzd");
6226 }
6227 case NEON::BI__builtin_neon_vceqd_f64:
6228 case NEON::BI__builtin_neon_vcled_f64:
6229 case NEON::BI__builtin_neon_vcltd_f64:
6230 case NEON::BI__builtin_neon_vcged_f64:
6231 case NEON::BI__builtin_neon_vcgtd_f64: {
6232 llvm::CmpInst::Predicate P;
6233 switch (BuiltinID) {
6234 default: llvm_unreachable("missing builtin ID in switch!");
6235 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6236 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6237 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6238 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6239 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6240 }
6241 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6242 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6243 if (P == llvm::FCmpInst::FCMP_OEQ)
6244 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6245 else
6246 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6247 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
6248 }
6249 case NEON::BI__builtin_neon_vceqs_f32:
6250 case NEON::BI__builtin_neon_vcles_f32:
6251 case NEON::BI__builtin_neon_vclts_f32:
6252 case NEON::BI__builtin_neon_vcges_f32:
6253 case NEON::BI__builtin_neon_vcgts_f32: {
6254 llvm::CmpInst::Predicate P;
6255 switch (BuiltinID) {
6256 default: llvm_unreachable("missing builtin ID in switch!");
6257 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6258 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6259 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6260 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6261 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6262 }
6263 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
6264 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
6265 if (P == llvm::FCmpInst::FCMP_OEQ)
6266 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6267 else
6268 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6269 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
6270 }
6271 case NEON::BI__builtin_neon_vceqh_f16:
6272 case NEON::BI__builtin_neon_vcleh_f16:
6273 case NEON::BI__builtin_neon_vclth_f16:
6274 case NEON::BI__builtin_neon_vcgeh_f16:
6275 case NEON::BI__builtin_neon_vcgth_f16: {
6276 llvm::CmpInst::Predicate P;
6277 switch (BuiltinID) {
6278 default: llvm_unreachable("missing builtin ID in switch!");
6279 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6280 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6281 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6282 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6283 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6284 }
6285 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6286 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
6287 if (P == llvm::FCmpInst::FCMP_OEQ)
6288 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6289 else
6290 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6291 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
6292 }
6293 case NEON::BI__builtin_neon_vceqd_s64:
6294 case NEON::BI__builtin_neon_vceqd_u64:
6295 case NEON::BI__builtin_neon_vcgtd_s64:
6296 case NEON::BI__builtin_neon_vcgtd_u64:
6297 case NEON::BI__builtin_neon_vcltd_s64:
6298 case NEON::BI__builtin_neon_vcltd_u64:
6299 case NEON::BI__builtin_neon_vcged_u64:
6300 case NEON::BI__builtin_neon_vcged_s64:
6301 case NEON::BI__builtin_neon_vcled_u64:
6302 case NEON::BI__builtin_neon_vcled_s64: {
6303 llvm::CmpInst::Predicate P;
6304 switch (BuiltinID) {
6305 default: llvm_unreachable("missing builtin ID in switch!");
6306 case NEON::BI__builtin_neon_vceqd_s64:
6307 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6308 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6309 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6310 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6311 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6312 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6313 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6314 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6315 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6316 }
6317 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6318 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6319 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
6320 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
6321 }
6322 case NEON::BI__builtin_neon_vnegd_s64:
6323 return Builder.CreateNeg(Ops[0], "vnegd");
6324 case NEON::BI__builtin_neon_vnegh_f16:
6325 return Builder.CreateFNeg(Ops[0], "vnegh");
6326 case NEON::BI__builtin_neon_vtstd_s64:
6327 case NEON::BI__builtin_neon_vtstd_u64: {
6328 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6329 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6330 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6331 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6332 llvm::Constant::getNullValue(Int64Ty));
6333 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6334 }
6335 case NEON::BI__builtin_neon_vset_lane_i8:
6336 case NEON::BI__builtin_neon_vset_lane_i16:
6337 case NEON::BI__builtin_neon_vset_lane_i32:
6338 case NEON::BI__builtin_neon_vset_lane_i64:
6339 case NEON::BI__builtin_neon_vset_lane_bf16:
6340 case NEON::BI__builtin_neon_vset_lane_f32:
6341 case NEON::BI__builtin_neon_vsetq_lane_i8:
6342 case NEON::BI__builtin_neon_vsetq_lane_i16:
6343 case NEON::BI__builtin_neon_vsetq_lane_i32:
6344 case NEON::BI__builtin_neon_vsetq_lane_i64:
6345 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6346 case NEON::BI__builtin_neon_vsetq_lane_f32:
6347 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6348 case NEON::BI__builtin_neon_vset_lane_f64:
6349 // The vector type needs a cast for the v1f64 variant.
6350 Ops[1] =
6351 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
6352 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6353 case NEON::BI__builtin_neon_vset_lane_mf8:
6354 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6355 // The input vector type needs a cast to scalar type.
6356 Ops[0] =
6357 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
6358 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6359 case NEON::BI__builtin_neon_vsetq_lane_f64:
6360 // The vector type needs a cast for the v2f64 variant.
6361 Ops[1] =
6362 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
6363 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6364
6365 case NEON::BI__builtin_neon_vget_lane_i8:
6366 case NEON::BI__builtin_neon_vdupb_lane_i8:
6367 Ops[0] =
6368 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
6369 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6370 case NEON::BI__builtin_neon_vgetq_lane_i8:
6371 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6372 Ops[0] =
6373 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
6374 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6375 case NEON::BI__builtin_neon_vget_lane_mf8:
6376 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6377 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6378 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6379 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6380 case NEON::BI__builtin_neon_vget_lane_i16:
6381 case NEON::BI__builtin_neon_vduph_lane_i16:
6382 Ops[0] =
6383 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
6384 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6385 case NEON::BI__builtin_neon_vgetq_lane_i16:
6386 case NEON::BI__builtin_neon_vduph_laneq_i16:
6387 Ops[0] =
6388 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
6389 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6390 case NEON::BI__builtin_neon_vget_lane_i32:
6391 case NEON::BI__builtin_neon_vdups_lane_i32:
6392 Ops[0] =
6393 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
6394 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6395 case NEON::BI__builtin_neon_vdups_lane_f32:
6396 Ops[0] =
6397 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6398 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdups_lane");
6399 case NEON::BI__builtin_neon_vgetq_lane_i32:
6400 case NEON::BI__builtin_neon_vdups_laneq_i32:
6401 Ops[0] =
6402 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
6403 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6404 case NEON::BI__builtin_neon_vget_lane_i64:
6405 case NEON::BI__builtin_neon_vdupd_lane_i64:
6406 Ops[0] =
6407 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
6408 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6409 case NEON::BI__builtin_neon_vdupd_lane_f64:
6410 Ops[0] =
6411 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6412 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdupd_lane");
6413 case NEON::BI__builtin_neon_vgetq_lane_i64:
6414 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6415 Ops[0] =
6416 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
6417 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6418 case NEON::BI__builtin_neon_vget_lane_f32:
6419 Ops[0] =
6420 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6421 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6422 case NEON::BI__builtin_neon_vget_lane_f64:
6423 Ops[0] =
6424 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6425 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6426 case NEON::BI__builtin_neon_vgetq_lane_f32:
6427 case NEON::BI__builtin_neon_vdups_laneq_f32:
6428 Ops[0] =
6429 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
6430 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6431 case NEON::BI__builtin_neon_vgetq_lane_f64:
6432 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6433 Ops[0] =
6434 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
6435 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6436 case NEON::BI__builtin_neon_vaddh_f16:
6437 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
6438 case NEON::BI__builtin_neon_vsubh_f16:
6439 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
6440 case NEON::BI__builtin_neon_vmulh_f16:
6441 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
6442 case NEON::BI__builtin_neon_vdivh_f16:
6443 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
6444 case NEON::BI__builtin_neon_vfmah_f16:
6445 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6447 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6448 {Ops[1], Ops[2], Ops[0]});
6449 case NEON::BI__builtin_neon_vfmsh_f16: {
6450 Value *Neg = Builder.CreateFNeg(Ops[1], "vsubh");
6451
6452 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6454 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6455 {Neg, Ops[2], Ops[0]});
6456 }
6457 case NEON::BI__builtin_neon_vaddd_s64:
6458 case NEON::BI__builtin_neon_vaddd_u64:
6459 return Builder.CreateAdd(Ops[0], Ops[1], "vaddd");
6460 case NEON::BI__builtin_neon_vsubd_s64:
6461 case NEON::BI__builtin_neon_vsubd_u64:
6462 return Builder.CreateSub(Ops[0], Ops[1], "vsubd");
6463 case NEON::BI__builtin_neon_vqdmlalh_s16:
6464 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6465 SmallVector<Value *, 2> ProductOps;
6466 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6467 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6468 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6469 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6470 ProductOps, "vqdmlXl");
6471 Constant *CI = ConstantInt::get(SizeTy, 0);
6472 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6473
6474 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6475 ? Intrinsic::aarch64_neon_sqadd
6476 : Intrinsic::aarch64_neon_sqsub;
6477 // Drop the 2nd multiplication argument before the accumulation
6478 Ops.pop_back();
6479 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6480 }
6481 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6482 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6483 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6484 Ops, "vqshlu_n");
6485 }
6486 case NEON::BI__builtin_neon_vqshld_n_u64:
6487 case NEON::BI__builtin_neon_vqshld_n_s64: {
6488 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6489 ? Intrinsic::aarch64_neon_uqshl
6490 : Intrinsic::aarch64_neon_sqshl;
6491 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6492 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6493 }
6494 case NEON::BI__builtin_neon_vrshrd_n_u64:
6495 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6496 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6497 ? Intrinsic::aarch64_neon_urshl
6498 : Intrinsic::aarch64_neon_srshl;
6499 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6500 Ops[1] = ConstantInt::get(Int64Ty, -SV);
6501 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6502 }
6503 case NEON::BI__builtin_neon_vrsrad_n_u64:
6504 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6505 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6506 ? Intrinsic::aarch64_neon_urshl
6507 : Intrinsic::aarch64_neon_srshl;
6508 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6509 Ops[2] = Builder.CreateNeg(Ops[2]);
6510 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6511 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6512 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6513 }
6514 case NEON::BI__builtin_neon_vshld_n_s64:
6515 case NEON::BI__builtin_neon_vshld_n_u64: {
6516 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
6517 return Builder.CreateShl(
6518 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6519 }
6520 case NEON::BI__builtin_neon_vshrd_n_s64: {
6521 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
6522 return Builder.CreateAShr(
6523 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6524 Amt->getZExtValue())),
6525 "shrd_n");
6526 }
6527 case NEON::BI__builtin_neon_vshrd_n_u64: {
6528 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
6529 uint64_t ShiftAmt = Amt->getZExtValue();
6530 // Right-shifting an unsigned value by its size yields 0.
6531 if (ShiftAmt == 64)
6532 return ConstantInt::get(Int64Ty, 0);
6533 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6534 "shrd_n");
6535 }
6536 case NEON::BI__builtin_neon_vsrad_n_s64: {
6537 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
6538 Ops[1] = Builder.CreateAShr(
6539 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6540 Amt->getZExtValue())),
6541 "shrd_n");
6542 return Builder.CreateAdd(Ops[0], Ops[1]);
6543 }
6544 case NEON::BI__builtin_neon_vsrad_n_u64: {
6545 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
6546 uint64_t ShiftAmt = Amt->getZExtValue();
6547 // Right-shifting an unsigned value by its size yields 0.
6548 // As Op + 0 = Op, return Ops[0] directly.
6549 if (ShiftAmt == 64)
6550 return Ops[0];
6551 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6552 "shrd_n");
6553 return Builder.CreateAdd(Ops[0], Ops[1]);
6554 }
6555 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6556 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6557 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6558 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6559 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
6560 SmallVector<Value *, 2> ProductOps;
6561 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6562 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6563 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6564 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6565 ProductOps, "vqdmlXl");
6566 Constant *CI = ConstantInt::get(SizeTy, 0);
6567 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6568 // Drop lane-selection and the corresponding vector argument (these have
6569 // already been used)
6570 Ops.pop_back_n(2);
6571
6572 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6573 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6574 ? Intrinsic::aarch64_neon_sqadd
6575 : Intrinsic::aarch64_neon_sqsub;
6576 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6577 }
6578 case NEON::BI__builtin_neon_vqdmlals_s32:
6579 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6580 SmallVector<Value *, 2> ProductOps;
6581 ProductOps.push_back(Ops[1]);
6582 ProductOps.push_back(Ops[2]);
6583 Ops[1] =
6584 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6585 ProductOps, "vqdmlXl");
6586
6587 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6588 ? Intrinsic::aarch64_neon_sqadd
6589 : Intrinsic::aarch64_neon_sqsub;
6590 // Drop the 2nd multiplication argument before the accumulation
6591 Ops.pop_back();
6592 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6593 }
6594 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6595 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6596 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6597 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6598 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
6599 SmallVector<Value *, 2> ProductOps;
6600 ProductOps.push_back(Ops[1]);
6601 ProductOps.push_back(Ops[2]);
6602 Ops[1] =
6603 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6604 ProductOps, "vqdmlXl");
6605 // Drop lane-selection and the corresponding vector argument (these have
6606 // already been used)
6607 Ops.pop_back_n(2);
6608
6609 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6610 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6611 ? Intrinsic::aarch64_neon_sqadd
6612 : Intrinsic::aarch64_neon_sqsub;
6613 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6614 }
6615 case NEON::BI__builtin_neon_vget_lane_bf16:
6616 case NEON::BI__builtin_neon_vduph_lane_bf16:
6617 case NEON::BI__builtin_neon_vduph_lane_f16: {
6618 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6619 }
6620 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6621 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6622 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6623 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6624 }
6625 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6626 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6627 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6628 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6629 }
6630 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6631 SmallVector<int, 16> ConcatMask(8);
6632 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6633 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6634 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6635 llvm::Value *Trunc =
6636 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6637 return Builder.CreateShuffleVector(
6638 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6639 }
6640 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6641 SmallVector<int, 16> ConcatMask(8);
6642 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6643 SmallVector<int, 16> LoMask(4);
6644 std::iota(LoMask.begin(), LoMask.end(), 0);
6645 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6646 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6647 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6648 llvm::Value *Inactive = Builder.CreateShuffleVector(
6649 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6650 llvm::Value *Trunc =
6651 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6652 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6653 }
6654
6655 case clang::AArch64::BI_InterlockedAdd:
6656 case clang::AArch64::BI_InterlockedAdd_acq:
6657 case clang::AArch64::BI_InterlockedAdd_rel:
6658 case clang::AArch64::BI_InterlockedAdd_nf:
6659 case clang::AArch64::BI_InterlockedAdd64:
6660 case clang::AArch64::BI_InterlockedAdd64_acq:
6661 case clang::AArch64::BI_InterlockedAdd64_rel:
6662 case clang::AArch64::BI_InterlockedAdd64_nf: {
6663 Address DestAddr = CheckAtomicAlignment(*this, E);
6664 Value *Val = Ops[1];
6665 llvm::AtomicOrdering Ordering;
6666 switch (BuiltinID) {
6667 case clang::AArch64::BI_InterlockedAdd:
6668 case clang::AArch64::BI_InterlockedAdd64:
6669 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6670 break;
6671 case clang::AArch64::BI_InterlockedAdd_acq:
6672 case clang::AArch64::BI_InterlockedAdd64_acq:
6673 Ordering = llvm::AtomicOrdering::Acquire;
6674 break;
6675 case clang::AArch64::BI_InterlockedAdd_rel:
6676 case clang::AArch64::BI_InterlockedAdd64_rel:
6677 Ordering = llvm::AtomicOrdering::Release;
6678 break;
6679 case clang::AArch64::BI_InterlockedAdd_nf:
6680 case clang::AArch64::BI_InterlockedAdd64_nf:
6681 Ordering = llvm::AtomicOrdering::Monotonic;
6682 break;
6683 default:
6684 llvm_unreachable("missing builtin ID in switch!");
6685 }
6686 AtomicRMWInst *RMWI =
6687 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6688 return Builder.CreateAdd(RMWI, Val);
6689 }
6690 }
6691
6692 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6693 llvm::Type *Ty = VTy;
6694 if (!Ty)
6695 return nullptr;
6696
6697 bool ExtractLow = false;
6698 bool ExtendLaneArg = false;
6699 switch (BuiltinID) {
6700 default: return nullptr;
6701 case NEON::BI__builtin_neon_vbsl_v:
6702 case NEON::BI__builtin_neon_vbslq_v: {
6703 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6704 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6705 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6706 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6707
6708 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6709 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6710 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6711 return Builder.CreateBitCast(Ops[0], Ty);
6712 }
6713 case NEON::BI__builtin_neon_vfma_lane_v:
6714 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6715 // The ARM builtins (and instructions) have the addend as the first
6716 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6717 Value *Addend = Ops[0];
6718 Value *Multiplicand = Ops[1];
6719 Value *LaneSource = Ops[2];
6720 Ops[0] = Multiplicand;
6721 Ops[1] = LaneSource;
6722 Ops[2] = Addend;
6723
6724 // Now adjust things to handle the lane access.
6725 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6726 ? llvm::FixedVectorType::get(VTy->getElementType(),
6727 VTy->getNumElements() / 2)
6728 : VTy;
6729 llvm::Constant *cst = cast<Constant>(Ops[3]);
6730 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6731 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6732 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6733
6734 Ops.pop_back();
6735 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6736 : Intrinsic::fma;
6737 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6738 }
6739 case NEON::BI__builtin_neon_vfma_laneq_v: {
6740 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6741 // v1f64 fma should be mapped to Neon scalar f64 fma
6742 if (VTy && VTy->getElementType() == DoubleTy) {
6743 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6744 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6745 llvm::FixedVectorType *VTy =
6747 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6748 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6749 Value *Result;
6751 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6752 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6753 return Builder.CreateBitCast(Result, Ty);
6754 }
6755 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6756 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6757
6758 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6759 VTy->getNumElements() * 2);
6760 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6761 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6762 cast<ConstantInt>(Ops[3]));
6763 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6764
6766 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6767 {Ops[2], Ops[1], Ops[0]});
6768 }
6769 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6770 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6771 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6772
6773 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6774 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6776 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6777 {Ops[2], Ops[1], Ops[0]});
6778 }
6779 case NEON::BI__builtin_neon_vfmah_lane_f16:
6780 case NEON::BI__builtin_neon_vfmas_lane_f32:
6781 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6782 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6783 case NEON::BI__builtin_neon_vfmad_lane_f64:
6784 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6785 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6786 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6788 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6789 {Ops[1], Ops[2], Ops[0]});
6790 }
6791 case NEON::BI__builtin_neon_vmull_v:
6792 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6793 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6794 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6795 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6796 case NEON::BI__builtin_neon_vmax_v:
6797 case NEON::BI__builtin_neon_vmaxq_v:
6798 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6799 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6800 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6801 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6802 case NEON::BI__builtin_neon_vmaxh_f16: {
6803 Int = Intrinsic::aarch64_neon_fmax;
6804 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6805 }
6806 case NEON::BI__builtin_neon_vmin_v:
6807 case NEON::BI__builtin_neon_vminq_v:
6808 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6809 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6810 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6811 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6812 case NEON::BI__builtin_neon_vminh_f16: {
6813 Int = Intrinsic::aarch64_neon_fmin;
6814 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6815 }
6816 case NEON::BI__builtin_neon_vabd_v:
6817 case NEON::BI__builtin_neon_vabdq_v:
6818 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6819 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6820 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6821 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6822 case NEON::BI__builtin_neon_vpadal_v:
6823 case NEON::BI__builtin_neon_vpadalq_v: {
6824 unsigned ArgElts = VTy->getNumElements();
6825 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6826 unsigned BitWidth = EltTy->getBitWidth();
6827 auto *ArgTy = llvm::FixedVectorType::get(
6828 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6829 llvm::Type* Tys[2] = { VTy, ArgTy };
6830 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6832 TmpOps.push_back(Ops[1]);
6833 Function *F = CGM.getIntrinsic(Int, Tys);
6834 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6835 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6836 return Builder.CreateAdd(tmp, addend);
6837 }
6838 case NEON::BI__builtin_neon_vpmin_v:
6839 case NEON::BI__builtin_neon_vpminq_v:
6840 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6841 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6842 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6843 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6844 case NEON::BI__builtin_neon_vpmax_v:
6845 case NEON::BI__builtin_neon_vpmaxq_v:
6846 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6847 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6848 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6849 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6850 case NEON::BI__builtin_neon_vminnm_v:
6851 case NEON::BI__builtin_neon_vminnmq_v:
6852 Int = Intrinsic::aarch64_neon_fminnm;
6853 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6854 case NEON::BI__builtin_neon_vminnmh_f16:
6855 Int = Intrinsic::aarch64_neon_fminnm;
6856 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6857 case NEON::BI__builtin_neon_vmaxnm_v:
6858 case NEON::BI__builtin_neon_vmaxnmq_v:
6859 Int = Intrinsic::aarch64_neon_fmaxnm;
6860 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6861 case NEON::BI__builtin_neon_vmaxnmh_f16:
6862 Int = Intrinsic::aarch64_neon_fmaxnm;
6863 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6864 case NEON::BI__builtin_neon_vrecpss_f32: {
6865 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6866 Ops, "vrecps");
6867 }
6868 case NEON::BI__builtin_neon_vrecpsd_f64:
6869 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6870 Ops, "vrecps");
6871 case NEON::BI__builtin_neon_vrecpsh_f16:
6872 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6873 Ops, "vrecps");
6874 case NEON::BI__builtin_neon_vqshrun_n_v:
6875 Int = Intrinsic::aarch64_neon_sqshrun;
6876 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6877 case NEON::BI__builtin_neon_vqrshrun_n_v:
6878 Int = Intrinsic::aarch64_neon_sqrshrun;
6879 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6880 case NEON::BI__builtin_neon_vqshrn_n_v:
6881 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6882 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6883 case NEON::BI__builtin_neon_vrshrn_n_v:
6884 Int = Intrinsic::aarch64_neon_rshrn;
6885 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6886 case NEON::BI__builtin_neon_vqrshrn_n_v:
6887 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6888 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6889 case NEON::BI__builtin_neon_vrndah_f16: {
6890 Int = Builder.getIsFPConstrained()
6891 ? Intrinsic::experimental_constrained_round
6892 : Intrinsic::round;
6893 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6894 }
6895 case NEON::BI__builtin_neon_vrnda_v:
6896 case NEON::BI__builtin_neon_vrndaq_v: {
6897 Int = Builder.getIsFPConstrained()
6898 ? Intrinsic::experimental_constrained_round
6899 : Intrinsic::round;
6900 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6901 }
6902 case NEON::BI__builtin_neon_vrndih_f16: {
6903 Int = Builder.getIsFPConstrained()
6904 ? Intrinsic::experimental_constrained_nearbyint
6905 : Intrinsic::nearbyint;
6906 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6907 }
6908 case NEON::BI__builtin_neon_vrndmh_f16: {
6909 Int = Builder.getIsFPConstrained()
6910 ? Intrinsic::experimental_constrained_floor
6911 : Intrinsic::floor;
6912 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6913 }
6914 case NEON::BI__builtin_neon_vrndm_v:
6915 case NEON::BI__builtin_neon_vrndmq_v: {
6916 Int = Builder.getIsFPConstrained()
6917 ? Intrinsic::experimental_constrained_floor
6918 : Intrinsic::floor;
6919 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6920 }
6921 case NEON::BI__builtin_neon_vrndnh_f16: {
6922 Int = Builder.getIsFPConstrained()
6923 ? Intrinsic::experimental_constrained_roundeven
6924 : Intrinsic::roundeven;
6925 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6926 }
6927 case NEON::BI__builtin_neon_vrndn_v:
6928 case NEON::BI__builtin_neon_vrndnq_v: {
6929 Int = Builder.getIsFPConstrained()
6930 ? Intrinsic::experimental_constrained_roundeven
6931 : Intrinsic::roundeven;
6932 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6933 }
6934 case NEON::BI__builtin_neon_vrndns_f32: {
6935 Int = Builder.getIsFPConstrained()
6936 ? Intrinsic::experimental_constrained_roundeven
6937 : Intrinsic::roundeven;
6938 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6939 }
6940 case NEON::BI__builtin_neon_vrndph_f16: {
6941 Int = Builder.getIsFPConstrained()
6942 ? Intrinsic::experimental_constrained_ceil
6943 : Intrinsic::ceil;
6944 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6945 }
6946 case NEON::BI__builtin_neon_vrndp_v:
6947 case NEON::BI__builtin_neon_vrndpq_v: {
6948 Int = Builder.getIsFPConstrained()
6949 ? Intrinsic::experimental_constrained_ceil
6950 : Intrinsic::ceil;
6951 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6952 }
6953 case NEON::BI__builtin_neon_vrndxh_f16: {
6954 Int = Builder.getIsFPConstrained()
6955 ? Intrinsic::experimental_constrained_rint
6956 : Intrinsic::rint;
6957 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6958 }
6959 case NEON::BI__builtin_neon_vrndx_v:
6960 case NEON::BI__builtin_neon_vrndxq_v: {
6961 Int = Builder.getIsFPConstrained()
6962 ? Intrinsic::experimental_constrained_rint
6963 : Intrinsic::rint;
6964 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6965 }
6966 case NEON::BI__builtin_neon_vrndh_f16: {
6967 Int = Builder.getIsFPConstrained()
6968 ? Intrinsic::experimental_constrained_trunc
6969 : Intrinsic::trunc;
6970 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6971 }
6972 case NEON::BI__builtin_neon_vrnd32x_f32:
6973 case NEON::BI__builtin_neon_vrnd32xq_f32:
6974 case NEON::BI__builtin_neon_vrnd32x_f64:
6975 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6976 Int = Intrinsic::aarch64_neon_frint32x;
6977 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6978 }
6979 case NEON::BI__builtin_neon_vrnd32z_f32:
6980 case NEON::BI__builtin_neon_vrnd32zq_f32:
6981 case NEON::BI__builtin_neon_vrnd32z_f64:
6982 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6983 Int = Intrinsic::aarch64_neon_frint32z;
6984 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6985 }
6986 case NEON::BI__builtin_neon_vrnd64x_f32:
6987 case NEON::BI__builtin_neon_vrnd64xq_f32:
6988 case NEON::BI__builtin_neon_vrnd64x_f64:
6989 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6990 Int = Intrinsic::aarch64_neon_frint64x;
6991 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6992 }
6993 case NEON::BI__builtin_neon_vrnd64z_f32:
6994 case NEON::BI__builtin_neon_vrnd64zq_f32:
6995 case NEON::BI__builtin_neon_vrnd64z_f64:
6996 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6997 Int = Intrinsic::aarch64_neon_frint64z;
6998 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6999 }
7000 case NEON::BI__builtin_neon_vrnd_v:
7001 case NEON::BI__builtin_neon_vrndq_v: {
7002 Int = Builder.getIsFPConstrained()
7003 ? Intrinsic::experimental_constrained_trunc
7004 : Intrinsic::trunc;
7005 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
7006 }
7007 case NEON::BI__builtin_neon_vcvt_f64_v:
7008 case NEON::BI__builtin_neon_vcvtq_f64_v:
7009 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7010 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
7011 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7012 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7013 case NEON::BI__builtin_neon_vcvt_f64_f32: {
7014 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
7015 "unexpected vcvt_f64_f32 builtin");
7016 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
7017 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
7018
7019 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
7020 }
7021 case NEON::BI__builtin_neon_vcvt_f32_f64: {
7022 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
7023 "unexpected vcvt_f32_f64 builtin");
7024 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
7025 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
7026
7027 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
7028 }
7029 case NEON::BI__builtin_neon_vcvt_s32_v:
7030 case NEON::BI__builtin_neon_vcvt_u32_v:
7031 case NEON::BI__builtin_neon_vcvt_s64_v:
7032 case NEON::BI__builtin_neon_vcvt_u64_v:
7033 case NEON::BI__builtin_neon_vcvt_s16_f16:
7034 case NEON::BI__builtin_neon_vcvt_u16_f16:
7035 case NEON::BI__builtin_neon_vcvtq_s32_v:
7036 case NEON::BI__builtin_neon_vcvtq_u32_v:
7037 case NEON::BI__builtin_neon_vcvtq_s64_v:
7038 case NEON::BI__builtin_neon_vcvtq_u64_v:
7039 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7040 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7041 Int =
7042 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
7043 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
7044 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
7045 }
7046 case NEON::BI__builtin_neon_vcvta_s16_f16:
7047 case NEON::BI__builtin_neon_vcvta_u16_f16:
7048 case NEON::BI__builtin_neon_vcvta_s32_v:
7049 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7050 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7051 case NEON::BI__builtin_neon_vcvta_u32_v:
7052 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7053 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7054 case NEON::BI__builtin_neon_vcvta_s64_v:
7055 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7056 case NEON::BI__builtin_neon_vcvta_u64_v:
7057 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
7058 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
7059 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7060 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
7061 }
7062 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7063 case NEON::BI__builtin_neon_vcvtm_s32_v:
7064 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7065 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7066 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7067 case NEON::BI__builtin_neon_vcvtm_u32_v:
7068 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7069 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7070 case NEON::BI__builtin_neon_vcvtm_s64_v:
7071 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7072 case NEON::BI__builtin_neon_vcvtm_u64_v:
7073 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7074 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
7075 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7076 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
7077 }
7078 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7079 case NEON::BI__builtin_neon_vcvtn_s32_v:
7080 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7081 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7082 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7083 case NEON::BI__builtin_neon_vcvtn_u32_v:
7084 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7085 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7086 case NEON::BI__builtin_neon_vcvtn_s64_v:
7087 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7088 case NEON::BI__builtin_neon_vcvtn_u64_v:
7089 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
7090 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
7091 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7092 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
7093 }
7094 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7095 case NEON::BI__builtin_neon_vcvtp_s32_v:
7096 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7097 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7098 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7099 case NEON::BI__builtin_neon_vcvtp_u32_v:
7100 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7101 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7102 case NEON::BI__builtin_neon_vcvtp_s64_v:
7103 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7104 case NEON::BI__builtin_neon_vcvtp_u64_v:
7105 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7106 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7107 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7108 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
7109 }
7110 case NEON::BI__builtin_neon_vmulx_v:
7111 case NEON::BI__builtin_neon_vmulxq_v: {
7112 Int = Intrinsic::aarch64_neon_fmulx;
7113 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
7114 }
7115 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7116 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7117 // vmulx_lane should be mapped to Neon scalar mulx after
7118 // extracting the scalar element
7119 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7120 Ops.pop_back();
7121 Int = Intrinsic::aarch64_neon_fmulx;
7122 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
7123 }
7124 case NEON::BI__builtin_neon_vmul_lane_v:
7125 case NEON::BI__builtin_neon_vmul_laneq_v: {
7126 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7127 bool Quad = false;
7128 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7129 Quad = true;
7130 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7131 llvm::FixedVectorType *VTy =
7133 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7134 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7135 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
7136 return Builder.CreateBitCast(Result, Ty);
7137 }
7138 case NEON::BI__builtin_neon_vpmaxnm_v:
7139 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7140 Int = Intrinsic::aarch64_neon_fmaxnmp;
7141 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
7142 }
7143 case NEON::BI__builtin_neon_vpminnm_v:
7144 case NEON::BI__builtin_neon_vpminnmq_v: {
7145 Int = Intrinsic::aarch64_neon_fminnmp;
7146 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
7147 }
7148 case NEON::BI__builtin_neon_vsqrth_f16: {
7149 Int = Builder.getIsFPConstrained()
7150 ? Intrinsic::experimental_constrained_sqrt
7151 : Intrinsic::sqrt;
7152 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
7153 }
7154 case NEON::BI__builtin_neon_vsqrt_v:
7155 case NEON::BI__builtin_neon_vsqrtq_v: {
7156 Int = Builder.getIsFPConstrained()
7157 ? Intrinsic::experimental_constrained_sqrt
7158 : Intrinsic::sqrt;
7159 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7160 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
7161 }
7162 case NEON::BI__builtin_neon_vrbit_v:
7163 case NEON::BI__builtin_neon_vrbitq_v: {
7164 Int = Intrinsic::bitreverse;
7165 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
7166 }
7167 case NEON::BI__builtin_neon_vmaxv_f16: {
7168 Int = Intrinsic::aarch64_neon_fmaxv;
7169 Ty = HalfTy;
7170 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7171 llvm::Type *Tys[2] = {Ty, VTy};
7172 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7173 return Builder.CreateTrunc(Ops[0], HalfTy);
7174 }
7175 case NEON::BI__builtin_neon_vmaxvq_f16: {
7176 Int = Intrinsic::aarch64_neon_fmaxv;
7177 Ty = HalfTy;
7178 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7179 llvm::Type *Tys[2] = {Ty, VTy};
7180 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7181 return Builder.CreateTrunc(Ops[0], HalfTy);
7182 }
7183 case NEON::BI__builtin_neon_vminv_f16: {
7184 Int = Intrinsic::aarch64_neon_fminv;
7185 Ty = HalfTy;
7186 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7187 llvm::Type *Tys[2] = {Ty, VTy};
7188 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7189 return Builder.CreateTrunc(Ops[0], HalfTy);
7190 }
7191 case NEON::BI__builtin_neon_vminvq_f16: {
7192 Int = Intrinsic::aarch64_neon_fminv;
7193 Ty = HalfTy;
7194 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7195 llvm::Type *Tys[2] = {Ty, VTy};
7196 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7197 return Builder.CreateTrunc(Ops[0], HalfTy);
7198 }
7199 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7200 Int = Intrinsic::aarch64_neon_fmaxnmv;
7201 Ty = HalfTy;
7202 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7203 llvm::Type *Tys[2] = {Ty, VTy};
7204 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7205 return Builder.CreateTrunc(Ops[0], HalfTy);
7206 }
7207 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7208 Int = Intrinsic::aarch64_neon_fmaxnmv;
7209 Ty = HalfTy;
7210 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7211 llvm::Type *Tys[2] = {Ty, VTy};
7212 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7213 return Builder.CreateTrunc(Ops[0], HalfTy);
7214 }
7215 case NEON::BI__builtin_neon_vminnmv_f16: {
7216 Int = Intrinsic::aarch64_neon_fminnmv;
7217 Ty = HalfTy;
7218 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7219 llvm::Type *Tys[2] = {Ty, VTy};
7220 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7221 return Builder.CreateTrunc(Ops[0], HalfTy);
7222 }
7223 case NEON::BI__builtin_neon_vminnmvq_f16: {
7224 Int = Intrinsic::aarch64_neon_fminnmv;
7225 Ty = HalfTy;
7226 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7227 llvm::Type *Tys[2] = {Ty, VTy};
7228 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7229 return Builder.CreateTrunc(Ops[0], HalfTy);
7230 }
7231 case NEON::BI__builtin_neon_vmul_n_f64: {
7232 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7233 Value *RHS = Builder.CreateBitCast(Ops[1], DoubleTy);
7234 return Builder.CreateFMul(Ops[0], RHS);
7235 }
7236 case NEON::BI__builtin_neon_vaddlv_u8: {
7237 Int = Intrinsic::aarch64_neon_uaddlv;
7238 Ty = Int32Ty;
7239 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7240 llvm::Type *Tys[2] = {Ty, VTy};
7241 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7242 return Builder.CreateTrunc(Ops[0], Int16Ty);
7243 }
7244 case NEON::BI__builtin_neon_vaddlv_u16: {
7245 Int = Intrinsic::aarch64_neon_uaddlv;
7246 Ty = Int32Ty;
7247 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7248 llvm::Type *Tys[2] = {Ty, VTy};
7249 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7250 }
7251 case NEON::BI__builtin_neon_vaddlvq_u8: {
7252 Int = Intrinsic::aarch64_neon_uaddlv;
7253 Ty = Int32Ty;
7254 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7255 llvm::Type *Tys[2] = {Ty, VTy};
7256 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7257 return Builder.CreateTrunc(Ops[0], Int16Ty);
7258 }
7259 case NEON::BI__builtin_neon_vaddlvq_u16: {
7260 Int = Intrinsic::aarch64_neon_uaddlv;
7261 Ty = Int32Ty;
7262 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7263 llvm::Type *Tys[2] = {Ty, VTy};
7264 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7265 }
7266 case NEON::BI__builtin_neon_vaddlv_s8: {
7267 Int = Intrinsic::aarch64_neon_saddlv;
7268 Ty = Int32Ty;
7269 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7270 llvm::Type *Tys[2] = {Ty, VTy};
7271 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7272 return Builder.CreateTrunc(Ops[0], Int16Ty);
7273 }
7274 case NEON::BI__builtin_neon_vaddlv_s16: {
7275 Int = Intrinsic::aarch64_neon_saddlv;
7276 Ty = Int32Ty;
7277 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7278 llvm::Type *Tys[2] = {Ty, VTy};
7279 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7280 }
7281 case NEON::BI__builtin_neon_vaddlvq_s8: {
7282 Int = Intrinsic::aarch64_neon_saddlv;
7283 Ty = Int32Ty;
7284 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7285 llvm::Type *Tys[2] = {Ty, VTy};
7286 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7287 return Builder.CreateTrunc(Ops[0], Int16Ty);
7288 }
7289 case NEON::BI__builtin_neon_vaddlvq_s16: {
7290 Int = Intrinsic::aarch64_neon_saddlv;
7291 Ty = Int32Ty;
7292 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7293 llvm::Type *Tys[2] = {Ty, VTy};
7294 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7295 }
7296 case NEON::BI__builtin_neon_vsri_n_v:
7297 case NEON::BI__builtin_neon_vsriq_n_v: {
7298 Int = Intrinsic::aarch64_neon_vsri;
7299 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7300 return EmitNeonCall(Intrin, Ops, "vsri_n");
7301 }
7302 case NEON::BI__builtin_neon_vsli_n_v:
7303 case NEON::BI__builtin_neon_vsliq_n_v: {
7304 Int = Intrinsic::aarch64_neon_vsli;
7305 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7306 return EmitNeonCall(Intrin, Ops, "vsli_n");
7307 }
7308 case NEON::BI__builtin_neon_vsra_n_v:
7309 case NEON::BI__builtin_neon_vsraq_n_v:
7310 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7311 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
7312 return Builder.CreateAdd(Ops[0], Ops[1]);
7313 case NEON::BI__builtin_neon_vrsra_n_v:
7314 case NEON::BI__builtin_neon_vrsraq_n_v: {
7315 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7317 TmpOps.push_back(Ops[1]);
7318 TmpOps.push_back(Ops[2]);
7319 Function* F = CGM.getIntrinsic(Int, Ty);
7320 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
7321 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7322 return Builder.CreateAdd(Ops[0], tmp);
7323 }
7324 case NEON::BI__builtin_neon_vld1_v:
7325 case NEON::BI__builtin_neon_vld1q_v: {
7326 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
7327 }
7328 case NEON::BI__builtin_neon_vst1_v:
7329 case NEON::BI__builtin_neon_vst1q_v:
7330 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7331 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7332 case NEON::BI__builtin_neon_vld1_lane_v:
7333 case NEON::BI__builtin_neon_vld1q_lane_v: {
7334 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7335 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7336 PtrOp0.getAlignment());
7337 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7338 }
7339 case NEON::BI__builtin_neon_vldap1_lane_s64:
7340 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7341 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7342 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7343 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
7344 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7345 Ops[0] = LI;
7346 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
7347 }
7348 case NEON::BI__builtin_neon_vld1_dup_v:
7349 case NEON::BI__builtin_neon_vld1q_dup_v: {
7350 Value *V = PoisonValue::get(Ty);
7351 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7352 PtrOp0.getAlignment());
7353 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7354 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7355 return EmitNeonSplat(Ops[0], CI);
7356 }
7357 case NEON::BI__builtin_neon_vst1_lane_v:
7358 case NEON::BI__builtin_neon_vst1q_lane_v:
7359 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7360 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7361 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7362 case NEON::BI__builtin_neon_vstl1_lane_s64:
7363 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7364 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7365 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7366 llvm::StoreInst *SI =
7367 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7368 SI->setAtomic(llvm::AtomicOrdering::Release);
7369 return SI;
7370 }
7371 case NEON::BI__builtin_neon_vld2_v:
7372 case NEON::BI__builtin_neon_vld2q_v: {
7373 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7374 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7375 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7376 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7377 }
7378 case NEON::BI__builtin_neon_vld3_v:
7379 case NEON::BI__builtin_neon_vld3q_v: {
7380 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7381 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7382 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7383 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7384 }
7385 case NEON::BI__builtin_neon_vld4_v:
7386 case NEON::BI__builtin_neon_vld4q_v: {
7387 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7388 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7389 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7390 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7391 }
7392 case NEON::BI__builtin_neon_vld2_dup_v:
7393 case NEON::BI__builtin_neon_vld2q_dup_v: {
7394 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7395 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7396 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7397 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7398 }
7399 case NEON::BI__builtin_neon_vld3_dup_v:
7400 case NEON::BI__builtin_neon_vld3q_dup_v: {
7401 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7402 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7403 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7404 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7405 }
7406 case NEON::BI__builtin_neon_vld4_dup_v:
7407 case NEON::BI__builtin_neon_vld4q_dup_v: {
7408 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7409 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7410 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7411 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7412 }
7413 case NEON::BI__builtin_neon_vld2_lane_v:
7414 case NEON::BI__builtin_neon_vld2q_lane_v: {
7415 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7416 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7417 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7418 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7419 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7420 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7421 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
7422 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7423 }
7424 case NEON::BI__builtin_neon_vld3_lane_v:
7425 case NEON::BI__builtin_neon_vld3q_lane_v: {
7426 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7427 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7428 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7429 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7430 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7431 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7432 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7433 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
7434 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7435 }
7436 case NEON::BI__builtin_neon_vld4_lane_v:
7437 case NEON::BI__builtin_neon_vld4q_lane_v: {
7438 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7439 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7440 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7441 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7442 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7443 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7444 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7445 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7446 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
7447 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7448 }
7449 case NEON::BI__builtin_neon_vst2_v:
7450 case NEON::BI__builtin_neon_vst2q_v: {
7451 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7452 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7453 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7454 Ops, "");
7455 }
7456 case NEON::BI__builtin_neon_vst2_lane_v:
7457 case NEON::BI__builtin_neon_vst2q_lane_v: {
7458 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7459 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7460 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7461 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7462 Ops, "");
7463 }
7464 case NEON::BI__builtin_neon_vst3_v:
7465 case NEON::BI__builtin_neon_vst3q_v: {
7466 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7467 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7468 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7469 Ops, "");
7470 }
7471 case NEON::BI__builtin_neon_vst3_lane_v:
7472 case NEON::BI__builtin_neon_vst3q_lane_v: {
7473 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7474 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7475 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7476 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7477 Ops, "");
7478 }
7479 case NEON::BI__builtin_neon_vst4_v:
7480 case NEON::BI__builtin_neon_vst4q_v: {
7481 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7482 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7483 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7484 Ops, "");
7485 }
7486 case NEON::BI__builtin_neon_vst4_lane_v:
7487 case NEON::BI__builtin_neon_vst4q_lane_v: {
7488 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7489 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7490 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7491 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7492 Ops, "");
7493 }
7494 case NEON::BI__builtin_neon_vtrn_v:
7495 case NEON::BI__builtin_neon_vtrnq_v: {
7496 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7497 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7498 Value *SV = nullptr;
7499
7500 for (unsigned vi = 0; vi != 2; ++vi) {
7501 SmallVector<int, 16> Indices;
7502 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7503 Indices.push_back(i+vi);
7504 Indices.push_back(i+e+vi);
7505 }
7506 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7507 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7508 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7509 }
7510 return SV;
7511 }
7512 case NEON::BI__builtin_neon_vuzp_v:
7513 case NEON::BI__builtin_neon_vuzpq_v: {
7514 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7515 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7516 Value *SV = nullptr;
7517
7518 for (unsigned vi = 0; vi != 2; ++vi) {
7519 SmallVector<int, 16> Indices;
7520 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7521 Indices.push_back(2*i+vi);
7522
7523 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7524 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7525 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7526 }
7527 return SV;
7528 }
7529 case NEON::BI__builtin_neon_vzip_v:
7530 case NEON::BI__builtin_neon_vzipq_v: {
7531 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7532 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7533 Value *SV = nullptr;
7534
7535 for (unsigned vi = 0; vi != 2; ++vi) {
7536 SmallVector<int, 16> Indices;
7537 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7538 Indices.push_back((i + vi*e) >> 1);
7539 Indices.push_back(((i + vi*e) >> 1)+e);
7540 }
7541 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7542 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7543 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7544 }
7545 return SV;
7546 }
7547 case NEON::BI__builtin_neon_vqtbl1q_v: {
7548 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7549 Ops, "vtbl1");
7550 }
7551 case NEON::BI__builtin_neon_vqtbl2q_v: {
7552 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7553 Ops, "vtbl2");
7554 }
7555 case NEON::BI__builtin_neon_vqtbl3q_v: {
7556 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7557 Ops, "vtbl3");
7558 }
7559 case NEON::BI__builtin_neon_vqtbl4q_v: {
7560 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7561 Ops, "vtbl4");
7562 }
7563 case NEON::BI__builtin_neon_vqtbx1q_v: {
7564 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7565 Ops, "vtbx1");
7566 }
7567 case NEON::BI__builtin_neon_vqtbx2q_v: {
7568 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7569 Ops, "vtbx2");
7570 }
7571 case NEON::BI__builtin_neon_vqtbx3q_v: {
7572 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7573 Ops, "vtbx3");
7574 }
7575 case NEON::BI__builtin_neon_vqtbx4q_v: {
7576 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7577 Ops, "vtbx4");
7578 }
7579 case NEON::BI__builtin_neon_vsqadd_v:
7580 case NEON::BI__builtin_neon_vsqaddq_v: {
7581 Int = Intrinsic::aarch64_neon_usqadd;
7582 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7583 }
7584 case NEON::BI__builtin_neon_vuqadd_v:
7585 case NEON::BI__builtin_neon_vuqaddq_v: {
7586 Int = Intrinsic::aarch64_neon_suqadd;
7587 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7588 }
7589
7590 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7591 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7592 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7593 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7594 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7595 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7596 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7597 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7598 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7599 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7600 llvm::Type *Tys[2];
7601 Tys[0] = Ty;
7602 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7603 /*isQuad*/ false));
7604 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7605 }
7606 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7607 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7608 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7609 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7610 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7611 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7612 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7613 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7614 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7615 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7616 llvm::Type *Tys[2];
7617 Tys[0] = Ty;
7618 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7619 /*isQuad*/ true));
7620 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7621 }
7622 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7623 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7624 case NEON::BI__builtin_neon_vluti2_lane_f16:
7625 case NEON::BI__builtin_neon_vluti2_lane_p16:
7626 case NEON::BI__builtin_neon_vluti2_lane_p8:
7627 case NEON::BI__builtin_neon_vluti2_lane_s16:
7628 case NEON::BI__builtin_neon_vluti2_lane_s8:
7629 case NEON::BI__builtin_neon_vluti2_lane_u16:
7630 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7631 Int = Intrinsic::aarch64_neon_vluti2_lane;
7632 llvm::Type *Tys[2];
7633 Tys[0] = Ty;
7634 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7635 /*isQuad*/ false));
7636 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7637 }
7638 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7639 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7640 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7641 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7642 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7643 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7644 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7645 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7646 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7647 Int = Intrinsic::aarch64_neon_vluti2_lane;
7648 llvm::Type *Tys[2];
7649 Tys[0] = Ty;
7650 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7651 /*isQuad*/ true));
7652 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7653 }
7654 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7655 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7656 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7657 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7658 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7659 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7660 }
7661 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7662 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7663 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7664 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7665 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7666 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7667 }
7668 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7669 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7670 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7671 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7672 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7673 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7674 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7675 }
7676 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7677 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7678 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7679 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7680 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7681 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7682 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7683 }
7684 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7685 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7686 {llvm::FixedVectorType::get(HalfTy, 8),
7687 llvm::FixedVectorType::get(Int8Ty, 16)},
7688 Ops, E, "fmmla");
7689 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7690 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7691 {llvm::FixedVectorType::get(FloatTy, 4),
7692 llvm::FixedVectorType::get(Int8Ty, 16)},
7693 Ops, E, "fmmla");
7694 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7695 ExtractLow = true;
7696 [[fallthrough]];
7697 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7698 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7699 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7700 llvm::FixedVectorType::get(BFloatTy, 8),
7701 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7702 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7703 ExtractLow = true;
7704 [[fallthrough]];
7705 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7706 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7707 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7708 llvm::FixedVectorType::get(BFloatTy, 8),
7709 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7710 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7711 ExtractLow = true;
7712 [[fallthrough]];
7713 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7714 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7715 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7716 llvm::FixedVectorType::get(HalfTy, 8),
7717 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7718 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7719 ExtractLow = true;
7720 [[fallthrough]];
7721 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7722 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7723 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7724 llvm::FixedVectorType::get(HalfTy, 8),
7725 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7726 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7727 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7728 llvm::FixedVectorType::get(Int8Ty, 8),
7729 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7730 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7731 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7732 llvm::FixedVectorType::get(Int8Ty, 8),
7733 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7734 E, "vfcvtn");
7735 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7736 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7737 llvm::FixedVectorType::get(Int8Ty, 16),
7738 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7739 E, "vfcvtn");
7740 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7741 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7742 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7743 uint64_t(0));
7744 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7745 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7746 }
7747
7748 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7749 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7750 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7751 Ops, E, "fdot2");
7752 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7753 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7754 ExtendLaneArg = true;
7755 [[fallthrough]];
7756 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7757 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7758 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7759 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7760 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7761 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7762 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7763 FloatTy, Ops, E, "fdot4");
7764 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7765 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7766 ExtendLaneArg = true;
7767 [[fallthrough]];
7768 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7769 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7770 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7771 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7772
7773 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7774 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7775 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7776 "vmlal");
7777 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7778 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7779 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7780 "vmlal");
7781 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7782 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7783 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7784 "vmlall");
7785 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7786 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7787 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7788 "vmlall");
7789 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7790 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7791 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7792 "vmlall");
7793 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7794 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7795 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7796 "vmlall");
7797 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7798 ExtendLaneArg = true;
7799 [[fallthrough]];
7800 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7801 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7802 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7803 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7804 ExtendLaneArg = true;
7805 [[fallthrough]];
7806 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7807 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7808 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7809 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7810 ExtendLaneArg = true;
7811 [[fallthrough]];
7812 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7813 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7814 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7815 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7816 ExtendLaneArg = true;
7817 [[fallthrough]];
7818 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7819 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7820 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7821 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7822 ExtendLaneArg = true;
7823 [[fallthrough]];
7824 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7825 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7826 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7827 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7828 ExtendLaneArg = true;
7829 [[fallthrough]];
7830 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7831 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7832 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7833 case NEON::BI__builtin_neon_vamin_f16:
7834 case NEON::BI__builtin_neon_vaminq_f16:
7835 case NEON::BI__builtin_neon_vamin_f32:
7836 case NEON::BI__builtin_neon_vaminq_f32:
7837 case NEON::BI__builtin_neon_vaminq_f64: {
7838 Int = Intrinsic::aarch64_neon_famin;
7839 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7840 }
7841 case NEON::BI__builtin_neon_vamax_f16:
7842 case NEON::BI__builtin_neon_vamaxq_f16:
7843 case NEON::BI__builtin_neon_vamax_f32:
7844 case NEON::BI__builtin_neon_vamaxq_f32:
7845 case NEON::BI__builtin_neon_vamaxq_f64: {
7846 Int = Intrinsic::aarch64_neon_famax;
7847 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7848 }
7849 case NEON::BI__builtin_neon_vscale_f16:
7850 case NEON::BI__builtin_neon_vscaleq_f16:
7851 case NEON::BI__builtin_neon_vscale_f32:
7852 case NEON::BI__builtin_neon_vscaleq_f32:
7853 case NEON::BI__builtin_neon_vscaleq_f64: {
7854 Int = Intrinsic::aarch64_neon_fp8_fscale;
7855 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7856 }
7857 }
7858}
7859
7861 const CallExpr *E) {
7862 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7863 BuiltinID == BPF::BI__builtin_btf_type_id ||
7864 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7865 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7866 "unexpected BPF builtin");
7867
7868 // A sequence number, injected into IR builtin functions, to
7869 // prevent CSE given the only difference of the function
7870 // may just be the debuginfo metadata.
7871 static uint32_t BuiltinSeqNum;
7872
7873 switch (BuiltinID) {
7874 default:
7875 llvm_unreachable("Unexpected BPF builtin");
7876 case BPF::BI__builtin_preserve_field_info: {
7877 const Expr *Arg = E->getArg(0);
7878 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7879
7880 if (!getDebugInfo()) {
7881 CGM.Error(E->getExprLoc(),
7882 "using __builtin_preserve_field_info() without -g");
7883 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7884 : EmitLValue(Arg).emitRawPointer(*this);
7885 }
7886
7887 // Enable underlying preserve_*_access_index() generation.
7888 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7889 IsInPreservedAIRegion = true;
7890 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7891 : EmitLValue(Arg).emitRawPointer(*this);
7892 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7893
7894 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7895 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7896
7897 // Built the IR for the preserve_field_info intrinsic.
7898 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7899 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7900 {FieldAddr->getType()});
7901 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7902 }
7903 case BPF::BI__builtin_btf_type_id:
7904 case BPF::BI__builtin_preserve_type_info: {
7905 if (!getDebugInfo()) {
7906 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7907 return nullptr;
7908 }
7909
7910 const Expr *Arg0 = E->getArg(0);
7911 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7912 Arg0->getType(), Arg0->getExprLoc());
7913
7914 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7915 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7916 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7917
7918 llvm::Function *FnDecl;
7919 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7920 FnDecl = Intrinsic::getOrInsertDeclaration(
7921 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7922 else
7923 FnDecl = Intrinsic::getOrInsertDeclaration(
7924 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7925 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
7926 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7927 return Fn;
7928 }
7929 case BPF::BI__builtin_preserve_enum_value: {
7930 if (!getDebugInfo()) {
7931 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7932 return nullptr;
7933 }
7934
7935 const Expr *Arg0 = E->getArg(0);
7936 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7937 Arg0->getType(), Arg0->getExprLoc());
7938
7939 // Find enumerator
7940 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
7941 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
7942 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
7943 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
7944
7945 auto InitVal = Enumerator->getInitVal();
7946 std::string InitValStr;
7947 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7948 InitValStr = std::to_string(InitVal.getSExtValue());
7949 else
7950 InitValStr = std::to_string(InitVal.getZExtValue());
7951 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7952 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
7953
7954 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7955 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7956 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7957
7958 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7959 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
7960 CallInst *Fn =
7961 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
7962 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7963 return Fn;
7964 }
7965 }
7966}
7967
7970 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7971 "Not a power-of-two sized vector!");
7972 bool AllConstants = true;
7973 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7974 AllConstants &= isa<Constant>(Ops[i]);
7975
7976 // If this is a constant vector, create a ConstantVector.
7977 if (AllConstants) {
7979 for (llvm::Value *Op : Ops)
7980 CstOps.push_back(cast<Constant>(Op));
7981 return llvm::ConstantVector::get(CstOps);
7982 }
7983
7984 // Otherwise, insertelement the values to build the vector.
7985 Value *Result = llvm::PoisonValue::get(
7986 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
7987
7988 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7989 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
7990
7991 return Result;
7992}
7993
7994Value *CodeGenFunction::EmitAArch64CpuInit() {
7995 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
7996 llvm::FunctionCallee Func =
7997 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
7998 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
7999 cast<llvm::GlobalValue>(Func.getCallee())
8000 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8001 return Builder.CreateCall(Func);
8002}
8003
8004Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
8005 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
8006 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
8008 ArgStr.split(Features, "+");
8009 for (auto &Feature : Features) {
8010 Feature = Feature.trim();
8011 if (!llvm::AArch64::parseFMVExtension(Feature))
8012 return Builder.getFalse();
8013 if (Feature != "default")
8014 Features.push_back(Feature);
8015 }
8016 return EmitAArch64CpuSupports(Features);
8017}
8018
8019llvm::Value *
8020CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8021 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
8022 Value *Result = Builder.getTrue();
8023 if (FeaturesMask != 0) {
8024 // Get features from structure in runtime library
8025 // struct {
8026 // unsigned long long features;
8027 // } __aarch64_cpu_features;
8028 llvm::Type *STy = llvm::StructType::get(Int64Ty);
8029 llvm::Constant *AArch64CPUFeatures =
8030 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
8031 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
8032 llvm::Value *CpuFeatures = Builder.CreateGEP(
8033 STy, AArch64CPUFeatures,
8034 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
8035 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
8037 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
8038 Value *Bitset = Builder.CreateAnd(Features, Mask);
8039 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
8040 Result = Builder.CreateAnd(Result, Cmp);
8041 }
8042 return Result;
8043}
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, mlir::Type eltTy)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
#define NEONMAP0(NameBase)
@ Vectorize1ArgType
@ FpCmpzModifiers
@ Use64BitVectors
@ VectorizeArgTypes
@ VectorRetGetArgs01
@ InventFloatType
@ VectorizeRetType
@ Use128BitVectors
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > intrinsicMap, unsigned builtinID, bool &mapProvenSorted)
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:2618
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:3493
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:399
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:3463
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:3456
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4537
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:1663
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3715
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:5007
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:1675
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:1645
static llvm::Value * ARMMVECreateFPToSI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3587
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2739
static llvm::Value * ARMMVECreateFPToUI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3595
static llvm::Value * ARMMVECreateSIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3571
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:1676
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4543
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:3452
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:1677
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:3530
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:4002
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:1487
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:359
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:5100
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:3557
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:31
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:190
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:600
static llvm::Value * ARMMVECreateUIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3579
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:3519
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:3545
SpecialRegisterAccessKind
Definition ARM.cpp:2609
@ VolatileRead
Definition ARM.cpp:2611
@ NormalRead
Definition ARM.cpp:2610
@ Write
Definition ARM.cpp:2612
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:3485
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:1672
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1743
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:342
static Value * EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:2684
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:2536
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:1674
TokenType getType() const
Returns the token's type, e.g.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:57
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:226
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3129
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1603
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:551
Address getAddress() const
Definition CGValue.h:691
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:4011
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:470
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:7969
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4206
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:4314
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:445
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4173
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4461
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3870
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4116
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4371
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:5031
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:4598
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4062
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1701
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3886
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1802
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4641
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:491
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2766
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3959
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7860
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4483
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4586
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4269
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3603
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:507
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4550
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:485
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:5111
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:454
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3858
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3704
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:2507
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:302
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:4287
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:4046
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4294
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4473
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1591
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4221
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:189
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4426
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3996
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:4514
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1707
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3924
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:425
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4575
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:441
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:379
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3095
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3086
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:454
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:277
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2000
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3336
QualType getPointeeType() const
Definition TypeBase.h:3346
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
EltType getEltType() const
bool isOverloadFirstandLast() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:712
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1839
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2230
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9285
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:753
QualType getType() const
Definition Decl.h:723
QualType getType() const
Definition Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:154
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:840
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601