clang 23.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(F, Args);
355 else
356 return CGF.Builder.CreateCall(F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasFastHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
369 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
372 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
376 else
377 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
379 if (HasFastHalfType)
380 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
381 else
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 else
445 return Builder.CreateCall(F, Ops, name);
446}
447
451 const CallExpr *E, const char *name) {
452 llvm::Value *FPM =
453 EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
454 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
455 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
456}
457
459 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
460 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
461
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
465 Ops[1]->getType()};
466 if (ExtendLaneArg) {
467 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
468 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
469 uint64_t(0));
470 }
471 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
472}
473
475 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
476 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
477
478 if (ExtendLaneArg) {
479 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
480 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
481 uint64_t(0));
482 }
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
486 Ops, E, name);
487}
488
490 bool neg) {
491 int SV = cast<ConstantInt>(V)->getSExtValue();
492 return ConstantInt::getSigned(Ty, neg ? -SV : SV);
493}
494
495Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
496 llvm::Type *Ty1, bool Extract,
498 const CallExpr *E,
499 const char *name) {
500 llvm::Type *Tys[] = {Ty0, Ty1};
501 if (Extract) {
502 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
503 // the vector.
504 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
505 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
506 }
507 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
508}
509
510// Right-shift a vector by a constant.
512 llvm::Type *Ty, bool usgn,
513 const char *name) {
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
515
516 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
518
519 Vec = Builder.CreateBitCast(Vec, Ty);
520
521 // lshr/ashr are undefined when the shift amount is equal to the vector
522 // element size.
523 if (ShiftAmt == EltSize) {
524 if (usgn) {
525 // Right-shifting an unsigned value by its size yields 0.
526 return llvm::ConstantAggregateZero::get(VTy);
527 } else {
528 // Right-shifting a signed value by its size is equivalent
529 // to a shift of size-1.
530 --ShiftAmt;
531 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
532 }
533 }
534
535 Shift = EmitNeonShiftVector(Shift, Ty, false);
536 if (usgn)
537 return Builder.CreateLShr(Vec, Shift, name);
538 else
539 return Builder.CreateAShr(Vec, Shift, name);
540}
541
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
555
562};
563
564namespace {
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
567 unsigned BuiltinID;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
571
572 bool operator<(unsigned RHSBuiltinID) const {
573 return BuiltinID < RHSBuiltinID;
574 }
575 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
576 return BuiltinID < TE.BuiltinID;
577 }
578};
579} // end anonymous namespace
580
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
583
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
587
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
591 TypeModifier }
592
593// clang-format off
594static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
595 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
596 NEONMAP0(splat_lane_v),
597 NEONMAP0(splat_laneq_v),
598 NEONMAP0(splatq_lane_v),
599 NEONMAP0(splatq_laneq_v),
600 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
601 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
602 NEONMAP1(vabs_v, arm_neon_vabs, 0),
603 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
604 NEONMAP0(vadd_v),
605 NEONMAP0(vaddhn_v),
606 NEONMAP0(vaddq_v),
607 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
608 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
609 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
610 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
611 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
613 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
614 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
615 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
616 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
617 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
618 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
619 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
620 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
621 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
622 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
623 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
624 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
625 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
626 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
628 NEONMAP1(vcage_v, arm_neon_vacge, 0),
629 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
630 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
632 NEONMAP1(vcale_v, arm_neon_vacge, 0),
633 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
634 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
635 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
636 NEONMAP0(vceqz_v),
637 NEONMAP0(vceqzq_v),
638 NEONMAP0(vcgez_v),
639 NEONMAP0(vcgezq_v),
640 NEONMAP0(vcgtz_v),
641 NEONMAP0(vcgtzq_v),
642 NEONMAP0(vclez_v),
643 NEONMAP0(vclezq_v),
644 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
645 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
646 NEONMAP0(vcltz_v),
647 NEONMAP0(vcltzq_v),
648 NEONMAP1(vclz_v, ctlz, Add1ArgType),
649 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
650 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
651 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
652 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
653 NEONMAP0(vcvt_f16_s16),
654 NEONMAP0(vcvt_f16_u16),
655 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
656 NEONMAP0(vcvt_f32_v),
657 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
658 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
659 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
660 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
663 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
665 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
666 NEONMAP0(vcvt_s16_f16),
667 NEONMAP0(vcvt_s32_v),
668 NEONMAP0(vcvt_s64_v),
669 NEONMAP0(vcvt_u16_f16),
670 NEONMAP0(vcvt_u32_v),
671 NEONMAP0(vcvt_u64_v),
672 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
675 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
678 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
681 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
684 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
685 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
688 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
691 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
694 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
697 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
700 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
703 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
706 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
709 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
712 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
715 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
718 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
720 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
721 NEONMAP0(vcvtq_f16_s16),
722 NEONMAP0(vcvtq_f16_u16),
723 NEONMAP0(vcvtq_f32_v),
724 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
725 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
726 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
727 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
730 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
732 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
733 NEONMAP0(vcvtq_s16_f16),
734 NEONMAP0(vcvtq_s32_v),
735 NEONMAP0(vcvtq_s64_v),
736 NEONMAP0(vcvtq_u16_f16),
737 NEONMAP0(vcvtq_u32_v),
738 NEONMAP0(vcvtq_u64_v),
739 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
740 NEONMAP1(vdot_u32, arm_neon_udot, 0),
741 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
742 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
743 NEONMAP0(vext_v),
744 NEONMAP0(vextq_v),
745 NEONMAP0(vfma_v),
746 NEONMAP0(vfmaq_v),
747 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
748 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
750 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
751 NEONMAP0(vld1_dup_v),
752 NEONMAP1(vld1_v, arm_neon_vld1, 0),
753 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
754 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
755 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
756 NEONMAP0(vld1q_dup_v),
757 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
758 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
759 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
760 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
761 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
762 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
763 NEONMAP1(vld2_v, arm_neon_vld2, 0),
764 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
765 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
766 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
767 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
768 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
769 NEONMAP1(vld3_v, arm_neon_vld3, 0),
770 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
771 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
772 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
773 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
774 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
775 NEONMAP1(vld4_v, arm_neon_vld4, 0),
776 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
777 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
778 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
779 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
780 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
781 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
782 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
783 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
784 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
785 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
786 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
787 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
788 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
789 NEONMAP0(vmovl_v),
790 NEONMAP0(vmovn_v),
791 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
792 NEONMAP0(vmull_v),
793 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
794 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
795 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
796 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
797 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
798 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
799 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
800 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
801 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
802 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
803 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
804 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
805 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
806 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
807 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
808 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
809 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
810 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
811 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
812 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
813 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
814 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
815 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
816 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
817 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
818 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
819 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
820 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
821 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
822 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
823 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
824 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
825 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
826 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
827 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
828 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
829 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
830 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
831 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
832 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
833 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
834 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
835 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
836 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
837 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
838 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
839 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
840 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
841 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
842 NEONMAP1(vrnd_v, trunc, Add1ArgType),
843 NEONMAP1(vrnda_v, round, Add1ArgType),
844 NEONMAP1(vrndaq_v, round, Add1ArgType),
845 NEONMAP0(vrndi_v),
846 NEONMAP0(vrndiq_v),
847 NEONMAP1(vrndm_v, floor, Add1ArgType),
848 NEONMAP1(vrndmq_v, floor, Add1ArgType),
849 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
850 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
851 NEONMAP1(vrndp_v, ceil, Add1ArgType),
852 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
853 NEONMAP1(vrndq_v, trunc, Add1ArgType),
854 NEONMAP1(vrndx_v, rint, Add1ArgType),
855 NEONMAP1(vrndxq_v, rint, Add1ArgType),
856 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
857 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
858 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
859 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
860 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
861 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
862 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
863 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
864 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
865 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
866 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
867 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
868 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
869 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
870 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
871 NEONMAP0(vshl_n_v),
872 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
873 NEONMAP0(vshll_n_v),
874 NEONMAP0(vshlq_n_v),
875 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
876 NEONMAP0(vshr_n_v),
877 NEONMAP0(vshrn_n_v),
878 NEONMAP0(vshrq_n_v),
879 NEONMAP1(vst1_v, arm_neon_vst1, 0),
880 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
881 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
882 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
883 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
884 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
885 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
886 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
887 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
888 NEONMAP1(vst2_v, arm_neon_vst2, 0),
889 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
890 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
891 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
892 NEONMAP1(vst3_v, arm_neon_vst3, 0),
893 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
894 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
895 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
896 NEONMAP1(vst4_v, arm_neon_vst4, 0),
897 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
898 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
899 NEONMAP0(vsubhn_v),
900 NEONMAP0(vtrn_v),
901 NEONMAP0(vtrnq_v),
902 NEONMAP0(vtst_v),
903 NEONMAP0(vtstq_v),
904 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
906 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
907 NEONMAP0(vuzp_v),
908 NEONMAP0(vuzpq_v),
909 NEONMAP0(vzip_v),
910 NEONMAP0(vzipq_v)
911};
912
913static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
914 NEONMAP0(splat_lane_v),
915 NEONMAP0(splat_laneq_v),
916 NEONMAP0(splatq_lane_v),
917 NEONMAP0(splatq_laneq_v),
918 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
919 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
920 NEONMAP0(vadd_v),
921 NEONMAP0(vaddhn_v),
922 NEONMAP0(vaddq_p128),
923 NEONMAP0(vaddq_v),
924 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
925 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
926 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
927 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
928 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
929 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
930 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
931 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
932 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
933 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
936 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
938 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
939 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
940 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
941 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
942 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
943 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
944 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
945 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
946 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
948 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
951 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
953 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
955 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
957 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
958 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
959 NEONMAP0(vceqz_v),
960 NEONMAP0(vceqzq_v),
961 NEONMAP0(vcgez_v),
962 NEONMAP0(vcgezq_v),
963 NEONMAP0(vcgtz_v),
964 NEONMAP0(vcgtzq_v),
965 NEONMAP0(vclez_v),
966 NEONMAP0(vclezq_v),
967 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
968 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
969 NEONMAP0(vcltz_v),
970 NEONMAP0(vcltzq_v),
971 NEONMAP1(vclz_v, ctlz, Add1ArgType),
972 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
973 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
974 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
975 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
976 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
977 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
978 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
979 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
980 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
981 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
982 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
983 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
984 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
985 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
986 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
987 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
988 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
989 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
990 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
991 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
992 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
993 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
994 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
995 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
996 NEONMAP0(vcvt_f16_s16),
997 NEONMAP0(vcvt_f16_u16),
998 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
999 NEONMAP0(vcvt_f32_v),
1000 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1001 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1002 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1004 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1007 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1009 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1010 NEONMAP0(vcvtq_f16_s16),
1011 NEONMAP0(vcvtq_f16_u16),
1012 NEONMAP0(vcvtq_f32_v),
1013 NEONMAP0(vcvtq_high_bf16_f32),
1014 NEONMAP0(vcvtq_low_bf16_f32),
1015 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1016 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1017 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1019 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1022 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1024 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1025 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1026 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1027 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1028 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1029 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1030 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1031 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1032 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1033 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1038 NEONMAP0(vext_v),
1039 NEONMAP0(vextq_v),
1040 NEONMAP0(vfma_v),
1041 NEONMAP0(vfmaq_v),
1042 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1043 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1044 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1045 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1046 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1047 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1048 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1049 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1050 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1051 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1052 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1053 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1054 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1055 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1056 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1057 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1058 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1059 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1060 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1061 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1062 NEONMAP0(vmovl_v),
1063 NEONMAP0(vmovn_v),
1064 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1065 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1066 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1067 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1068 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1069 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1070 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1071 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1072 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1073 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1074 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1075 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1076 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1077 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1078 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1079 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1080 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1081 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1082 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1083 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1084 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1085 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1086 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1087 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1088 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1089 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1090 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1091 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1092 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1093 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1094 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1095 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1096 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1097 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1098 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1099 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1100 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1101 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1102 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1103 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1104 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1105 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1106 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1107 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1108 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1109 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1110 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1111 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1112 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1113 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1114 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1115 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1116 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1117 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1118 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1119 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1120 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1121 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1122 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1123 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1124 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1125 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1126 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1127 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1128 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1129 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1130 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1131 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1132 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1133 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1134 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1135 NEONMAP0(vrndi_v),
1136 NEONMAP0(vrndiq_v),
1137 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1138 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1139 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1140 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1141 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1142 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1143 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1144 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1145 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1146 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1147 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1148 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1149 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1150 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1151 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1152 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1153 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1154 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1155 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1156 NEONMAP0(vshl_n_v),
1157 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1158 NEONMAP0(vshll_n_v),
1159 NEONMAP0(vshlq_n_v),
1160 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1161 NEONMAP0(vshr_n_v),
1162 NEONMAP0(vshrn_n_v),
1163 NEONMAP0(vshrq_n_v),
1164 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1165 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1166 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1167 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1168 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1169 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1170 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1171 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1172 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1173 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1174 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1175 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1176 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1177 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1178 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1179 NEONMAP0(vsubhn_v),
1180 NEONMAP0(vtst_v),
1181 NEONMAP0(vtstq_v),
1182 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1184 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1185 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1186};
1187
1188static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1189 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1190 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1191 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1192 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1193 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1194 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1195 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1196 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1197 NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
1198 NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
1199 NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
1200 NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
1201 NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
1202 NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
1203 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1204 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1205 NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
1206 NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
1207 NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
1208 NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
1209 NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
1210 NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
1211 NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
1212 NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
1213 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1214 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1215 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1216 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1217 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1218 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1219 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1220 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1221 NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1222 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1223 NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1224 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1225 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1226 NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1227 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1228 NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1229 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1230 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1231 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1232 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1233 NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1234 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1237 NEONMAP0(vcvth_bf16_f32),
1238 NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1242 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1243 NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1244 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1245 NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1246 NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1247 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1248 NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1249 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1250 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1251 NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1252 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1253 NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1254 NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1255 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1256 NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1257 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1258 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1259 NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1260 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1261 NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1262 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1263 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1264 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1265 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1266 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1267 NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1268 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1269 NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1270 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1271 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1272 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1273 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1274 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1275 NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
1276 NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
1277 NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
1278 NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
1279 NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
1280 NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
1281 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1282 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1283 NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
1284 NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
1285 NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
1286 NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
1287 NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
1288 NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
1289 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1290 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1291 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1292 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1293 NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
1294 NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
1295 NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
1296 NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
1297 NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
1298 NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
1299 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1300 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1301 NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
1302 NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
1303 NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
1304 NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
1305 NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
1306 NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
1307 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1308 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1309 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1310 NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
1311 NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
1312 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1313 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1314 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1315 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1316 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1317 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1318 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1319 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1320 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1321 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1322 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1323 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1324 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1325 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1326 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1327 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1328 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1329 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1330 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1331 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1332 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1333 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1334 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1335 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1336 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1337 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1338 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1339 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1340 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1341 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1342 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1343 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1344 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1345 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1346 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1347 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1348 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1349 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1350 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1351 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1352 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1353 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1354 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1355 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1356 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1357 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1358 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1359 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1360 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1361 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1362 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1363 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1364 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1365 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1366 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1367 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1368 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1369 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1370 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1371 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1372 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1373 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1374 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1375 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1376 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1377 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1378 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1379 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1380 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1381 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1382 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1383 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1384 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1385 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1386 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1387 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1389 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1390 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1391 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1392 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1393 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1394 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1395 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1396 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1397 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1398 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1399 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1400 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1401 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1402 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1403 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1404 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1405 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1406 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1407 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1408 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1409 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1410 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1411 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1412 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1413 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1414 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1415 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1416 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1417 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1418 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1419 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1420 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1421 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1422 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1423 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1424 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1425 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1426 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1427 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1428 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1429 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1430 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1431 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1432 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1433 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1434 // FP16 scalar intrinisics go here.
1435 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1436 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1437 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1438 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1439 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1440 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1441 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1442 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1443 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1444 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1445 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1446 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1447 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1448 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1449 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1450 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1451 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1452 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1453 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1454 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1455 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1456 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1457 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1458 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1459 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1460 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1461 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1462 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1463 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1464 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1465 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1466 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1467 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1468 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1469};
1470// clang-format on
1471
1472// Some intrinsics are equivalent for codegen.
1473static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1474 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1475 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1476 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1477 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1478 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1479 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1480 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1481 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1482 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1483 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1484 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1485 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1486 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1487 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1488 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1489 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1490 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1491 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1492 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1493 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1494 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1495 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1496 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1497 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1498 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1499 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1500 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1501 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1502 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1503 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1504 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1505 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1506 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1507 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1508 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1509 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1510 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1511 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1512 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1513 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1514 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1515 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1516 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1517 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1518 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1519 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1520 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1521 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1522 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1523 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1524 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1525 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1526 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1527 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1528 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1529 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1530 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1531 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1532 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1533 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1534 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1535 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1536 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1537 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1538 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1539 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1540 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1541 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1542 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1543 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1544 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1545 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1546 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1547 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1548 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1549 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1550 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1551 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1552 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1553 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1554 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1555 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1556 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1557 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1558 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1559 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1560 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1561 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1562 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1563 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1564 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1565 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1566 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1567 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1568 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1569 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1570 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1571 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1572 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1573 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1574 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1575 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1576 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1577 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1578 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1579 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1580 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1581 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1582 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1583 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1584 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1585 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1586 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1587 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1588 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1589 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1590 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1591 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1592 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1593 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1594 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1595 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1596 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1597 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1598 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1599 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1600 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1601 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1602 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1603 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1604 // arbitrary one to be handled as tha canonical variation.
1605 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1606 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1607 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1608 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1609 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1610 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1611 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1612 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1613 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1614 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1615 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1616 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1617};
1618
1619#undef NEONMAP0
1620#undef NEONMAP1
1621#undef NEONMAP2
1622
1623#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1624 { \
1625 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1626 TypeModifier \
1627 }
1628
1629#define SVEMAP2(NameBase, TypeModifier) \
1630 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1631static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1632#define GET_SVE_LLVM_INTRINSIC_MAP
1633#include "clang/Basic/arm_sve_builtin_cg.inc"
1634#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1635#undef GET_SVE_LLVM_INTRINSIC_MAP
1636};
1637
1638#undef SVEMAP1
1639#undef SVEMAP2
1640
1641#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1642 { \
1643 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1644 TypeModifier \
1645 }
1646
1647#define SMEMAP2(NameBase, TypeModifier) \
1648 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1649static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1650#define GET_SME_LLVM_INTRINSIC_MAP
1651#include "clang/Basic/arm_sme_builtin_cg.inc"
1652#undef GET_SME_LLVM_INTRINSIC_MAP
1653};
1654
1655#undef SMEMAP1
1656#undef SMEMAP2
1657
1659
1664
1665static const ARMVectorIntrinsicInfo *
1667 unsigned BuiltinID, bool &MapProvenSorted) {
1668
1669#ifndef NDEBUG
1670 if (!MapProvenSorted) {
1671 assert(llvm::is_sorted(IntrinsicMap));
1672 MapProvenSorted = true;
1673 }
1674#endif
1675
1676 const ARMVectorIntrinsicInfo *Builtin =
1677 llvm::lower_bound(IntrinsicMap, BuiltinID);
1678
1679 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1680 return Builtin;
1681
1682 return nullptr;
1683}
1684
1686 unsigned Modifier,
1687 llvm::Type *ArgType,
1688 const CallExpr *E) {
1689 int VectorSize = 0;
1690 if (Modifier & Use64BitVectors)
1691 VectorSize = 64;
1692 else if (Modifier & Use128BitVectors)
1693 VectorSize = 128;
1694
1695 // Return type.
1697 if (Modifier & AddRetType) {
1698 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1699 if (Modifier & VectorizeRetType)
1700 Ty = llvm::FixedVectorType::get(
1701 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1702
1703 Tys.push_back(Ty);
1704 }
1705
1706 // Arguments.
1707 if (Modifier & VectorizeArgTypes) {
1708 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1709 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1710 }
1711
1712 if (Modifier & (Add1ArgType | Add2ArgTypes))
1713 Tys.push_back(ArgType);
1714
1715 if (Modifier & Add2ArgTypes)
1716 Tys.push_back(ArgType);
1717
1718 if (Modifier & InventFloatType)
1719 Tys.push_back(FloatTy);
1720
1721 return CGM.getIntrinsic(IntrinsicID, Tys);
1722}
1723
1725 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1726 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1727 unsigned BuiltinID = SISDInfo.BuiltinID;
1728 unsigned int Int = SISDInfo.LLVMIntrinsic;
1729 unsigned Modifier = SISDInfo.TypeModifier;
1730 const char *s = SISDInfo.NameHint;
1731
1732 switch (BuiltinID) {
1733 case NEON::BI__builtin_neon_vcled_s64:
1734 case NEON::BI__builtin_neon_vcled_u64:
1735 case NEON::BI__builtin_neon_vcles_f32:
1736 case NEON::BI__builtin_neon_vcled_f64:
1737 case NEON::BI__builtin_neon_vcltd_s64:
1738 case NEON::BI__builtin_neon_vcltd_u64:
1739 case NEON::BI__builtin_neon_vclts_f32:
1740 case NEON::BI__builtin_neon_vcltd_f64:
1741 case NEON::BI__builtin_neon_vcales_f32:
1742 case NEON::BI__builtin_neon_vcaled_f64:
1743 case NEON::BI__builtin_neon_vcalts_f32:
1744 case NEON::BI__builtin_neon_vcaltd_f64:
1745 // Only one direction of comparisons actually exist, cmle is actually a cmge
1746 // with swapped operands. The table gives us the right intrinsic but we
1747 // still need to do the swap.
1748 std::swap(Ops[0], Ops[1]);
1749 break;
1750 }
1751
1752 assert(Int && "Generic code assumes a valid intrinsic");
1753
1754 // Determine the type(s) of this overloaded AArch64 intrinsic.
1755 const Expr *Arg = E->getArg(0);
1756 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
1757 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
1758
1759 int j = 0;
1760 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1761 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1762 ai != ae; ++ai, ++j) {
1763 llvm::Type *ArgTy = ai->getType();
1764 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1765 ArgTy->getPrimitiveSizeInBits())
1766 continue;
1767
1768 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1769 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1770 // it before inserting.
1771 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1772 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1773 Ops[j] =
1774 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1775 }
1776
1777 Value *Result = CGF.EmitNeonCall(F, Ops, s);
1778 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1779 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1780 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1781 return CGF.Builder.CreateExtractElement(Result, C0);
1782
1783 return CGF.Builder.CreateBitCast(Result, ResultType, s);
1784}
1785
1787 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1788 const char *NameHint, unsigned Modifier, const CallExpr *E,
1789 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1790 llvm::Triple::ArchType Arch) {
1791 // Get the last argument, which specifies the vector type.
1792 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1793 std::optional<llvm::APSInt> NeonTypeConst =
1795 if (!NeonTypeConst)
1796 return nullptr;
1797
1798 // Determine the type of this overloaded NEON intrinsic.
1799 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1800 const bool Usgn = Type.isUnsigned();
1801 const bool Quad = Type.isQuad();
1802 const bool Floating = Type.isFloatingPoint();
1803 const bool HasFastHalfType = getTarget().hasFastHalfType();
1804 const bool AllowBFloatArgsAndRet =
1805 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1806
1807 llvm::FixedVectorType *VTy =
1808 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1809 llvm::Type *Ty = VTy;
1810 if (!Ty)
1811 return nullptr;
1812
1813 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1814 return Builder.getInt32(addr.getAlignment().getQuantity());
1815 };
1816
1817 unsigned Int = LLVMIntrinsic;
1818 if ((Modifier & UnsignedAlts) && !Usgn)
1819 Int = AltLLVMIntrinsic;
1820
1821 switch (BuiltinID) {
1822 default: break;
1823 case NEON::BI__builtin_neon_splat_lane_v:
1824 case NEON::BI__builtin_neon_splat_laneq_v:
1825 case NEON::BI__builtin_neon_splatq_lane_v:
1826 case NEON::BI__builtin_neon_splatq_laneq_v: {
1827 auto NumElements = VTy->getElementCount();
1828 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1829 NumElements = NumElements * 2;
1830 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1831 NumElements = NumElements.divideCoefficientBy(2);
1832
1833 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1834 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1835 }
1836 case NEON::BI__builtin_neon_vpadd_v:
1837 case NEON::BI__builtin_neon_vpaddq_v:
1838 // We don't allow fp/int overloading of intrinsics.
1839 if (VTy->getElementType()->isFloatingPointTy() &&
1840 Int == Intrinsic::aarch64_neon_addp)
1841 Int = Intrinsic::aarch64_neon_faddp;
1842 break;
1843 case NEON::BI__builtin_neon_vabs_v:
1844 case NEON::BI__builtin_neon_vabsq_v:
1845 if (VTy->getElementType()->isFloatingPointTy())
1846 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1847 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1848 case NEON::BI__builtin_neon_vadd_v:
1849 case NEON::BI__builtin_neon_vaddq_v: {
1850 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1851 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1852 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1853 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1854 return Builder.CreateBitCast(Ops[0], Ty);
1855 }
1856 case NEON::BI__builtin_neon_vaddhn_v: {
1857 llvm::FixedVectorType *SrcTy =
1858 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1859
1860 // %sum = add <4 x i32> %lhs, %rhs
1861 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1862 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1863 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1864
1865 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1866 Constant *ShiftAmt =
1867 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1868 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1869
1870 // %res = trunc <4 x i32> %high to <4 x i16>
1871 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1872 }
1873 case NEON::BI__builtin_neon_vcale_v:
1874 case NEON::BI__builtin_neon_vcaleq_v:
1875 case NEON::BI__builtin_neon_vcalt_v:
1876 case NEON::BI__builtin_neon_vcaltq_v:
1877 std::swap(Ops[0], Ops[1]);
1878 [[fallthrough]];
1879 case NEON::BI__builtin_neon_vcage_v:
1880 case NEON::BI__builtin_neon_vcageq_v:
1881 case NEON::BI__builtin_neon_vcagt_v:
1882 case NEON::BI__builtin_neon_vcagtq_v: {
1883 llvm::Type *Ty;
1884 switch (VTy->getScalarSizeInBits()) {
1885 default: llvm_unreachable("unexpected type");
1886 case 32:
1887 Ty = FloatTy;
1888 break;
1889 case 64:
1890 Ty = DoubleTy;
1891 break;
1892 case 16:
1893 Ty = HalfTy;
1894 break;
1895 }
1896 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1897 llvm::Type *Tys[] = { VTy, VecFlt };
1898 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1899 return EmitNeonCall(F, Ops, NameHint);
1900 }
1901 case NEON::BI__builtin_neon_vceqz_v:
1902 case NEON::BI__builtin_neon_vceqzq_v:
1904 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1905 case NEON::BI__builtin_neon_vcgez_v:
1906 case NEON::BI__builtin_neon_vcgezq_v:
1908 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1909 "vcgez");
1910 case NEON::BI__builtin_neon_vclez_v:
1911 case NEON::BI__builtin_neon_vclezq_v:
1913 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1914 "vclez");
1915 case NEON::BI__builtin_neon_vcgtz_v:
1916 case NEON::BI__builtin_neon_vcgtzq_v:
1918 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1919 "vcgtz");
1920 case NEON::BI__builtin_neon_vcltz_v:
1921 case NEON::BI__builtin_neon_vcltzq_v:
1923 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1924 "vcltz");
1925 case NEON::BI__builtin_neon_vclz_v:
1926 case NEON::BI__builtin_neon_vclzq_v:
1927 // We generate target-independent intrinsic, which needs a second argument
1928 // for whether or not clz of zero is undefined; on ARM it isn't.
1929 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1930 break;
1931 case NEON::BI__builtin_neon_vcvt_f32_v:
1932 case NEON::BI__builtin_neon_vcvtq_f32_v:
1933 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1934 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1935 HasFastHalfType);
1936 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1937 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1938 case NEON::BI__builtin_neon_vcvt_f16_s16:
1939 case NEON::BI__builtin_neon_vcvt_f16_u16:
1940 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1941 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1942 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1943 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1944 HasFastHalfType);
1945 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1946 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1947 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1948 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1949 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1950 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1951 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1952 Function *F = CGM.getIntrinsic(Int, Tys);
1953 return EmitNeonCall(F, Ops, "vcvt_n");
1954 }
1955 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1956 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1957 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1958 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1959 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1960 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1961 Function *F = CGM.getIntrinsic(Int, Tys);
1962 return EmitNeonCall(F, Ops, "vcvt_n");
1963 }
1964 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1965 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1966 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1967 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1968 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1969 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1970 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1971 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1972 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1973 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1974 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1975 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1976 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1977 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1978 return EmitNeonCall(F, Ops, "vcvt_n");
1979 }
1980 case NEON::BI__builtin_neon_vcvt_s32_v:
1981 case NEON::BI__builtin_neon_vcvt_u32_v:
1982 case NEON::BI__builtin_neon_vcvt_s64_v:
1983 case NEON::BI__builtin_neon_vcvt_u64_v:
1984 case NEON::BI__builtin_neon_vcvt_s16_f16:
1985 case NEON::BI__builtin_neon_vcvt_u16_f16:
1986 case NEON::BI__builtin_neon_vcvtq_s32_v:
1987 case NEON::BI__builtin_neon_vcvtq_u32_v:
1988 case NEON::BI__builtin_neon_vcvtq_s64_v:
1989 case NEON::BI__builtin_neon_vcvtq_u64_v:
1990 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1991 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1992 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1993 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1994 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1995 }
1996 case NEON::BI__builtin_neon_vcvta_s16_f16:
1997 case NEON::BI__builtin_neon_vcvta_s32_v:
1998 case NEON::BI__builtin_neon_vcvta_s64_v:
1999 case NEON::BI__builtin_neon_vcvta_u16_f16:
2000 case NEON::BI__builtin_neon_vcvta_u32_v:
2001 case NEON::BI__builtin_neon_vcvta_u64_v:
2002 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
2003 case NEON::BI__builtin_neon_vcvtaq_s32_v:
2004 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2005 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2006 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2007 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2008 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2009 case NEON::BI__builtin_neon_vcvtn_s32_v:
2010 case NEON::BI__builtin_neon_vcvtn_s64_v:
2011 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2012 case NEON::BI__builtin_neon_vcvtn_u32_v:
2013 case NEON::BI__builtin_neon_vcvtn_u64_v:
2014 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2015 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2016 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2017 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2018 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2019 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2020 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2021 case NEON::BI__builtin_neon_vcvtp_s32_v:
2022 case NEON::BI__builtin_neon_vcvtp_s64_v:
2023 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2024 case NEON::BI__builtin_neon_vcvtp_u32_v:
2025 case NEON::BI__builtin_neon_vcvtp_u64_v:
2026 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2027 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2028 case NEON::BI__builtin_neon_vcvtpq_s64_v:
2029 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2030 case NEON::BI__builtin_neon_vcvtpq_u32_v:
2031 case NEON::BI__builtin_neon_vcvtpq_u64_v:
2032 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2033 case NEON::BI__builtin_neon_vcvtm_s32_v:
2034 case NEON::BI__builtin_neon_vcvtm_s64_v:
2035 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2036 case NEON::BI__builtin_neon_vcvtm_u32_v:
2037 case NEON::BI__builtin_neon_vcvtm_u64_v:
2038 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2039 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2040 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2041 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2042 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2043 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2044 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
2045 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2046 }
2047 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2048 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2049 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2050
2051 }
2052 case NEON::BI__builtin_neon_vext_v:
2053 case NEON::BI__builtin_neon_vextq_v: {
2054 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2055 SmallVector<int, 16> Indices;
2056 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2057 Indices.push_back(i+CV);
2058
2059 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2060 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2061 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
2062 }
2063 case NEON::BI__builtin_neon_vfma_v:
2064 case NEON::BI__builtin_neon_vfmaq_v: {
2065 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2066 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2067 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2068
2069 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2071 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2072 {Ops[1], Ops[2], Ops[0]});
2073 }
2074 case NEON::BI__builtin_neon_vld1_v:
2075 case NEON::BI__builtin_neon_vld1q_v: {
2076 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2077 Ops.push_back(getAlignmentValue32(PtrOp0));
2078 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
2079 }
2080 case NEON::BI__builtin_neon_vld1_x2_v:
2081 case NEON::BI__builtin_neon_vld1q_x2_v:
2082 case NEON::BI__builtin_neon_vld1_x3_v:
2083 case NEON::BI__builtin_neon_vld1q_x3_v:
2084 case NEON::BI__builtin_neon_vld1_x4_v:
2085 case NEON::BI__builtin_neon_vld1q_x4_v: {
2086 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2087 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2088 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
2089 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2090 }
2091 case NEON::BI__builtin_neon_vld2_v:
2092 case NEON::BI__builtin_neon_vld2q_v:
2093 case NEON::BI__builtin_neon_vld3_v:
2094 case NEON::BI__builtin_neon_vld3q_v:
2095 case NEON::BI__builtin_neon_vld4_v:
2096 case NEON::BI__builtin_neon_vld4q_v:
2097 case NEON::BI__builtin_neon_vld2_dup_v:
2098 case NEON::BI__builtin_neon_vld2q_dup_v:
2099 case NEON::BI__builtin_neon_vld3_dup_v:
2100 case NEON::BI__builtin_neon_vld3q_dup_v:
2101 case NEON::BI__builtin_neon_vld4_dup_v:
2102 case NEON::BI__builtin_neon_vld4q_dup_v: {
2103 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2104 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2105 Value *Align = getAlignmentValue32(PtrOp1);
2106 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2107 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2108 }
2109 case NEON::BI__builtin_neon_vld1_dup_v:
2110 case NEON::BI__builtin_neon_vld1q_dup_v: {
2111 Value *V = PoisonValue::get(Ty);
2112 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2113 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
2114 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
2115 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2116 return EmitNeonSplat(Ops[0], CI);
2117 }
2118 case NEON::BI__builtin_neon_vld2_lane_v:
2119 case NEON::BI__builtin_neon_vld2q_lane_v:
2120 case NEON::BI__builtin_neon_vld3_lane_v:
2121 case NEON::BI__builtin_neon_vld3q_lane_v:
2122 case NEON::BI__builtin_neon_vld4_lane_v:
2123 case NEON::BI__builtin_neon_vld4q_lane_v: {
2124 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2125 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2126 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2127 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
2128 Ops.push_back(getAlignmentValue32(PtrOp1));
2129 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
2130 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2131 }
2132 case NEON::BI__builtin_neon_vmovl_v: {
2133 llvm::FixedVectorType *DTy =
2134 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2135 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2136 if (Usgn)
2137 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2138 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2139 }
2140 case NEON::BI__builtin_neon_vmovn_v: {
2141 llvm::FixedVectorType *QTy =
2142 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2143 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2144 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2145 }
2146 case NEON::BI__builtin_neon_vmull_v:
2147 // FIXME: the integer vmull operations could be emitted in terms of pure
2148 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2149 // hoisting the exts outside loops. Until global ISel comes along that can
2150 // see through such movement this leads to bad CodeGen. So we need an
2151 // intrinsic for now.
2152 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2153 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2154 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2155 case NEON::BI__builtin_neon_vpadal_v:
2156 case NEON::BI__builtin_neon_vpadalq_v: {
2157 // The source operand type has twice as many elements of half the size.
2158 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2159 llvm::Type *EltTy =
2160 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2161 auto *NarrowTy =
2162 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2163 llvm::Type *Tys[2] = { Ty, NarrowTy };
2164 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2165 }
2166 case NEON::BI__builtin_neon_vpaddl_v:
2167 case NEON::BI__builtin_neon_vpaddlq_v: {
2168 // The source operand type has twice as many elements of half the size.
2169 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2170 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2171 auto *NarrowTy =
2172 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2173 llvm::Type *Tys[2] = { Ty, NarrowTy };
2174 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2175 }
2176 case NEON::BI__builtin_neon_vqdmlal_v:
2177 case NEON::BI__builtin_neon_vqdmlsl_v: {
2178 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2179 Ops[1] =
2180 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
2181 Ops.resize(2);
2182 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2183 }
2184 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2185 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2186 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2187 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2188 auto *RTy = cast<llvm::FixedVectorType>(Ty);
2189 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2190 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2191 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2192 RTy->getNumElements() * 2);
2193 llvm::Type *Tys[2] = {
2194 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2195 /*isQuad*/ false))};
2196 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2197 }
2198 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2199 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2200 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2201 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2202 llvm::Type *Tys[2] = {
2203 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2204 /*isQuad*/ true))};
2205 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2206 }
2207 case NEON::BI__builtin_neon_vqshl_n_v:
2208 case NEON::BI__builtin_neon_vqshlq_n_v:
2209 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2210 1, false);
2211 case NEON::BI__builtin_neon_vqshlu_n_v:
2212 case NEON::BI__builtin_neon_vqshluq_n_v:
2213 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
2214 1, false);
2215 case NEON::BI__builtin_neon_vrecpe_v:
2216 case NEON::BI__builtin_neon_vrecpeq_v:
2217 case NEON::BI__builtin_neon_vrsqrte_v:
2218 case NEON::BI__builtin_neon_vrsqrteq_v:
2219 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2220 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2221 case NEON::BI__builtin_neon_vrndi_v:
2222 case NEON::BI__builtin_neon_vrndiq_v:
2223 Int = Builder.getIsFPConstrained()
2224 ? Intrinsic::experimental_constrained_nearbyint
2225 : Intrinsic::nearbyint;
2226 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2227 case NEON::BI__builtin_neon_vrshr_n_v:
2228 case NEON::BI__builtin_neon_vrshrq_n_v:
2229 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
2230 1, true);
2231 case NEON::BI__builtin_neon_vsha512hq_u64:
2232 case NEON::BI__builtin_neon_vsha512h2q_u64:
2233 case NEON::BI__builtin_neon_vsha512su0q_u64:
2234 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2235 Function *F = CGM.getIntrinsic(Int);
2236 return EmitNeonCall(F, Ops, "");
2237 }
2238 case NEON::BI__builtin_neon_vshl_n_v:
2239 case NEON::BI__builtin_neon_vshlq_n_v:
2240 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2241 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2242 "vshl_n");
2243 case NEON::BI__builtin_neon_vshll_n_v: {
2244 llvm::FixedVectorType *SrcTy =
2245 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2246 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2247 if (Usgn)
2248 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
2249 else
2250 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
2251 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
2252 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
2253 }
2254 case NEON::BI__builtin_neon_vshrn_n_v: {
2255 llvm::FixedVectorType *SrcTy =
2256 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2257 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2258 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
2259 if (Usgn)
2260 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
2261 else
2262 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
2263 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
2264 }
2265 case NEON::BI__builtin_neon_vshr_n_v:
2266 case NEON::BI__builtin_neon_vshrq_n_v:
2267 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
2268 case NEON::BI__builtin_neon_vst1_v:
2269 case NEON::BI__builtin_neon_vst1q_v:
2270 case NEON::BI__builtin_neon_vst2_v:
2271 case NEON::BI__builtin_neon_vst2q_v:
2272 case NEON::BI__builtin_neon_vst3_v:
2273 case NEON::BI__builtin_neon_vst3q_v:
2274 case NEON::BI__builtin_neon_vst4_v:
2275 case NEON::BI__builtin_neon_vst4q_v:
2276 case NEON::BI__builtin_neon_vst2_lane_v:
2277 case NEON::BI__builtin_neon_vst2q_lane_v:
2278 case NEON::BI__builtin_neon_vst3_lane_v:
2279 case NEON::BI__builtin_neon_vst3q_lane_v:
2280 case NEON::BI__builtin_neon_vst4_lane_v:
2281 case NEON::BI__builtin_neon_vst4q_lane_v: {
2282 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2283 Ops.push_back(getAlignmentValue32(PtrOp0));
2284 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
2285 }
2286 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2287 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2288 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2289 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2290 case NEON::BI__builtin_neon_vsm4eq_u32: {
2291 Function *F = CGM.getIntrinsic(Int);
2292 return EmitNeonCall(F, Ops, "");
2293 }
2294 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2295 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2296 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2297 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2298 Function *F = CGM.getIntrinsic(Int);
2299 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
2300 return EmitNeonCall(F, Ops, "");
2301 }
2302 case NEON::BI__builtin_neon_vst1_x2_v:
2303 case NEON::BI__builtin_neon_vst1q_x2_v:
2304 case NEON::BI__builtin_neon_vst1_x3_v:
2305 case NEON::BI__builtin_neon_vst1q_x3_v:
2306 case NEON::BI__builtin_neon_vst1_x4_v:
2307 case NEON::BI__builtin_neon_vst1q_x4_v: {
2308 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2309 // in AArch64 it comes last. We may want to stick to one or another.
2310 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2311 Arch == llvm::Triple::aarch64_32) {
2312 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2313 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2314 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2315 }
2316 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
2317 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2318 }
2319 case NEON::BI__builtin_neon_vsubhn_v: {
2320 llvm::FixedVectorType *SrcTy =
2321 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2322
2323 // %sum = add <4 x i32> %lhs, %rhs
2324 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2325 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2326 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
2327
2328 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2329 Constant *ShiftAmt =
2330 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2331 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
2332
2333 // %res = trunc <4 x i32> %high to <4 x i16>
2334 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
2335 }
2336 case NEON::BI__builtin_neon_vtrn_v:
2337 case NEON::BI__builtin_neon_vtrnq_v: {
2338 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2339 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2340 Value *SV = nullptr;
2341
2342 for (unsigned vi = 0; vi != 2; ++vi) {
2343 SmallVector<int, 16> Indices;
2344 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2345 Indices.push_back(i+vi);
2346 Indices.push_back(i+e+vi);
2347 }
2348 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2349 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
2350 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2351 }
2352 return SV;
2353 }
2354 case NEON::BI__builtin_neon_vtst_v:
2355 case NEON::BI__builtin_neon_vtstq_v: {
2356 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2357 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2358 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2359 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2360 ConstantAggregateZero::get(Ty));
2361 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2362 }
2363 case NEON::BI__builtin_neon_vuzp_v:
2364 case NEON::BI__builtin_neon_vuzpq_v: {
2365 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2366 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2367 Value *SV = nullptr;
2368
2369 for (unsigned vi = 0; vi != 2; ++vi) {
2370 SmallVector<int, 16> Indices;
2371 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2372 Indices.push_back(2*i+vi);
2373
2374 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2375 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
2376 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2377 }
2378 return SV;
2379 }
2380 case NEON::BI__builtin_neon_vxarq_u64: {
2381 Function *F = CGM.getIntrinsic(Int);
2382 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
2383 return EmitNeonCall(F, Ops, "");
2384 }
2385 case NEON::BI__builtin_neon_vzip_v:
2386 case NEON::BI__builtin_neon_vzipq_v: {
2387 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2388 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2389 Value *SV = nullptr;
2390
2391 for (unsigned vi = 0; vi != 2; ++vi) {
2392 SmallVector<int, 16> Indices;
2393 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2394 Indices.push_back((i + vi*e) >> 1);
2395 Indices.push_back(((i + vi*e) >> 1)+e);
2396 }
2397 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2398 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
2399 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2400 }
2401 return SV;
2402 }
2403 case NEON::BI__builtin_neon_vdot_s32:
2404 case NEON::BI__builtin_neon_vdot_u32:
2405 case NEON::BI__builtin_neon_vdotq_s32:
2406 case NEON::BI__builtin_neon_vdotq_u32: {
2407 auto *InputTy =
2408 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2409 llvm::Type *Tys[2] = { Ty, InputTy };
2410 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
2411 }
2412 case NEON::BI__builtin_neon_vfmlal_low_f16:
2413 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2414 auto *InputTy =
2415 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2416 llvm::Type *Tys[2] = { Ty, InputTy };
2417 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
2418 }
2419 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2420 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2421 auto *InputTy =
2422 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2423 llvm::Type *Tys[2] = { Ty, InputTy };
2424 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
2425 }
2426 case NEON::BI__builtin_neon_vfmlal_high_f16:
2427 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2428 auto *InputTy =
2429 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2430 llvm::Type *Tys[2] = { Ty, InputTy };
2431 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
2432 }
2433 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2434 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2435 auto *InputTy =
2436 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2437 llvm::Type *Tys[2] = { Ty, InputTy };
2438 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
2439 }
2440 case NEON::BI__builtin_neon_vmmlaq_s32:
2441 case NEON::BI__builtin_neon_vmmlaq_u32: {
2442 auto *InputTy =
2443 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2444 llvm::Type *Tys[2] = { Ty, InputTy };
2445 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
2446 }
2447 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2448 auto *InputTy =
2449 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2450 llvm::Type *Tys[2] = { Ty, InputTy };
2451 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
2452 }
2453 case NEON::BI__builtin_neon_vusdot_s32:
2454 case NEON::BI__builtin_neon_vusdotq_s32: {
2455 auto *InputTy =
2456 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2457 llvm::Type *Tys[2] = { Ty, InputTy };
2458 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
2459 }
2460 case NEON::BI__builtin_neon_vbfdot_f32:
2461 case NEON::BI__builtin_neon_vbfdotq_f32: {
2462 llvm::Type *InputTy =
2463 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2464 llvm::Type *Tys[2] = { Ty, InputTy };
2465 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
2466 }
2467 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2468 llvm::Type *Tys[1] = { Ty };
2469 Function *F = CGM.getIntrinsic(Int, Tys);
2470 return EmitNeonCall(F, Ops, "vcvtfp2bf");
2471 }
2472
2473 }
2474
2475 assert(Int && "Expected valid intrinsic number");
2476
2477 // Determine the type(s) of this overloaded AArch64 intrinsic.
2478 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
2479
2480 Value *Result = EmitNeonCall(F, Ops, NameHint);
2481 llvm::Type *ResultType = ConvertType(E->getType());
2482 // AArch64 intrinsic one-element vector type cast to
2483 // scalar type expected by the builtin
2484 return Builder.CreateBitCast(Result, ResultType, NameHint);
2485}
2486
2487Value *
2489 const CmpInst::Predicate Pred,
2490 const Twine &Name) {
2491
2492 if (isa<FixedVectorType>(Ty)) {
2493 // Vector types are cast to i8 vectors. Recover original type.
2494 Op = Builder.CreateBitCast(Op, Ty);
2495 }
2496
2497 if (CmpInst::isFPPredicate(Pred)) {
2498 if (Pred == CmpInst::FCMP_OEQ)
2499 Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
2500 else
2501 Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
2502 } else {
2503 Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
2504 }
2505
2506 llvm::Type *ResTy = Ty;
2507 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
2508 ResTy = FixedVectorType::get(
2509 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
2510 VTy->getNumElements());
2511
2512 return Builder.CreateSExt(Op, ResTy, Name);
2513}
2514
2516 Value *ExtOp, Value *IndexOp,
2517 llvm::Type *ResTy, unsigned IntID,
2518 const char *Name) {
2520 if (ExtOp)
2521 TblOps.push_back(ExtOp);
2522
2523 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2524 SmallVector<int, 16> Indices;
2525 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
2526 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2527 Indices.push_back(2*i);
2528 Indices.push_back(2*i+1);
2529 }
2530
2531 int PairPos = 0, End = Ops.size() - 1;
2532 while (PairPos < End) {
2533 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2534 Ops[PairPos+1], Indices,
2535 Name));
2536 PairPos += 2;
2537 }
2538
2539 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2540 // of the 128-bit lookup table with zero.
2541 if (PairPos == End) {
2542 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
2543 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2544 ZeroTbl, Indices, Name));
2545 }
2546
2547 Function *TblF;
2548 TblOps.push_back(IndexOp);
2549 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
2550
2551 return CGF.EmitNeonCall(TblF, TblOps, Name);
2552}
2553
2554Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2555 unsigned Value;
2556 switch (BuiltinID) {
2557 default:
2558 return nullptr;
2559 case clang::ARM::BI__builtin_arm_nop:
2560 Value = 0;
2561 break;
2562 case clang::ARM::BI__builtin_arm_yield:
2563 case clang::ARM::BI__yield:
2564 Value = 1;
2565 break;
2566 case clang::ARM::BI__builtin_arm_wfe:
2567 case clang::ARM::BI__wfe:
2568 Value = 2;
2569 break;
2570 case clang::ARM::BI__builtin_arm_wfi:
2571 case clang::ARM::BI__wfi:
2572 Value = 3;
2573 break;
2574 case clang::ARM::BI__builtin_arm_sev:
2575 case clang::ARM::BI__sev:
2576 Value = 4;
2577 break;
2578 case clang::ARM::BI__builtin_arm_sevl:
2579 case clang::ARM::BI__sevl:
2580 Value = 5;
2581 break;
2582 }
2583
2584 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2585 llvm::ConstantInt::get(Int32Ty, Value));
2586}
2587
2593
2594// Generates the IR for the read/write special register builtin,
2595// ValueType is the type of the value that is to be written or read,
2596// RegisterType is the type of the register being written to or read from.
2598 const CallExpr *E,
2599 llvm::Type *RegisterType,
2600 llvm::Type *ValueType,
2601 SpecialRegisterAccessKind AccessKind,
2602 StringRef SysReg = "") {
2603 // write and register intrinsics only support 32, 64 and 128 bit operations.
2604 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2605 RegisterType->isIntegerTy(128)) &&
2606 "Unsupported size for register.");
2607
2608 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2609 CodeGen::CodeGenModule &CGM = CGF.CGM;
2610 LLVMContext &Context = CGM.getLLVMContext();
2611
2612 if (SysReg.empty()) {
2613 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2614 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2615 }
2616
2617 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2618 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2619 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2620
2621 llvm::Type *Types[] = { RegisterType };
2622
2623 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2624 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2625 && "Can't fit 64-bit value in 32-bit register");
2626
2627 if (AccessKind != Write) {
2628 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2629 llvm::Function *F = CGM.getIntrinsic(
2630 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2631 : Intrinsic::read_register,
2632 Types);
2633 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2634
2635 if (MixedTypes)
2636 // Read into 64 bit register and then truncate result to 32 bit.
2637 return Builder.CreateTrunc(Call, ValueType);
2638
2639 if (ValueType->isPointerTy())
2640 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2641 return Builder.CreateIntToPtr(Call, ValueType);
2642
2643 return Call;
2644 }
2645
2646 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2647 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2648 if (MixedTypes) {
2649 // Extend 32 bit write value to 64 bit to pass to write.
2650 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2651 return Builder.CreateCall(F, { Metadata, ArgValue });
2652 }
2653
2654 if (ValueType->isPointerTy()) {
2655 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2656 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2657 return Builder.CreateCall(F, { Metadata, ArgValue });
2658 }
2659
2660 return Builder.CreateCall(F, { Metadata, ArgValue });
2661}
2662
2663static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2664 const CallExpr *E) {
2665 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2666 CodeGen::CodeGenModule &CGM = CGF.CGM;
2668
2669 auto getIntArg = [&](unsigned ArgNo) {
2670 Expr::EvalResult Result;
2671 if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
2672 llvm_unreachable("Expected constant argument to range prefetch.");
2673 return Result.Val.getInt().getExtValue();
2674 };
2675
2676 Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
2677 Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
2678 Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
2679
2680 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2681 auto Length = getIntArg(3);
2682 auto Count = getIntArg(4) - 1;
2683 auto Stride = getIntArg(5);
2684 auto Distance = getIntArg(6);
2685
2686 // Map ReuseDistance given in bytes to four bits representing decreasing
2687 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2688 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2689 // over the maximum is represented by 0 (distance not known).
2690 if (Distance > 0) {
2691 Distance = llvm::Log2_32_Ceil(Distance);
2692 if (Distance < 15)
2693 Distance = 15;
2694 else if (Distance > 29)
2695 Distance = 0;
2696 else
2697 Distance = 30 - Distance;
2698 }
2699
2700 uint64_t Mask22 = (1ULL << 22) - 1;
2701 uint64_t Mask16 = (1ULL << 16) - 1;
2702 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2703 ((Count & Mask16) << 22) | (Length & Mask22);
2704
2705 Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
2706 } else
2707 Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
2708
2709 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
2710 Ops);
2711}
2712
2713/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2714/// argument that specifies the vector type.
2715static bool HasExtraNeonArgument(unsigned BuiltinID) {
2716 switch (BuiltinID) {
2717 default: break;
2718 case NEON::BI__builtin_neon_vget_lane_i8:
2719 case NEON::BI__builtin_neon_vget_lane_i16:
2720 case NEON::BI__builtin_neon_vget_lane_bf16:
2721 case NEON::BI__builtin_neon_vget_lane_i32:
2722 case NEON::BI__builtin_neon_vget_lane_i64:
2723 case NEON::BI__builtin_neon_vget_lane_mf8:
2724 case NEON::BI__builtin_neon_vget_lane_f32:
2725 case NEON::BI__builtin_neon_vgetq_lane_i8:
2726 case NEON::BI__builtin_neon_vgetq_lane_i16:
2727 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2728 case NEON::BI__builtin_neon_vgetq_lane_i32:
2729 case NEON::BI__builtin_neon_vgetq_lane_i64:
2730 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2731 case NEON::BI__builtin_neon_vgetq_lane_f32:
2732 case NEON::BI__builtin_neon_vduph_lane_bf16:
2733 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2734 case NEON::BI__builtin_neon_vset_lane_i8:
2735 case NEON::BI__builtin_neon_vset_lane_mf8:
2736 case NEON::BI__builtin_neon_vset_lane_i16:
2737 case NEON::BI__builtin_neon_vset_lane_bf16:
2738 case NEON::BI__builtin_neon_vset_lane_i32:
2739 case NEON::BI__builtin_neon_vset_lane_i64:
2740 case NEON::BI__builtin_neon_vset_lane_f32:
2741 case NEON::BI__builtin_neon_vsetq_lane_i8:
2742 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2743 case NEON::BI__builtin_neon_vsetq_lane_i16:
2744 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2745 case NEON::BI__builtin_neon_vsetq_lane_i32:
2746 case NEON::BI__builtin_neon_vsetq_lane_i64:
2747 case NEON::BI__builtin_neon_vsetq_lane_f32:
2748 case NEON::BI__builtin_neon_vsha1h_u32:
2749 case NEON::BI__builtin_neon_vsha1cq_u32:
2750 case NEON::BI__builtin_neon_vsha1pq_u32:
2751 case NEON::BI__builtin_neon_vsha1mq_u32:
2752 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2753 case clang::ARM::BI_MoveToCoprocessor:
2754 case clang::ARM::BI_MoveToCoprocessor2:
2755 return false;
2756 }
2757 return true;
2758}
2759
2761 const CallExpr *E,
2763 llvm::Triple::ArchType Arch) {
2764 if (auto Hint = GetValueForARMHint(BuiltinID))
2765 return Hint;
2766
2767 if (BuiltinID == clang::ARM::BI__emit) {
2768 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2769 llvm::FunctionType *FTy =
2770 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2771
2773 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2774 llvm_unreachable("Sema will ensure that the parameter is constant");
2775
2776 llvm::APSInt Value = Result.Val.getInt();
2777 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2778
2779 llvm::InlineAsm *Emit =
2780 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2781 /*hasSideEffects=*/true)
2782 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2783 /*hasSideEffects=*/true);
2784
2785 return Builder.CreateCall(Emit);
2786 }
2787
2788 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2789 Value *Option = EmitScalarExpr(E->getArg(0));
2790 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2791 }
2792
2793 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2795 Value *RW = EmitScalarExpr(E->getArg(1));
2796 Value *IsData = EmitScalarExpr(E->getArg(2));
2797
2798 // Locality is not supported on ARM target
2799 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2800
2801 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2802 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2803 }
2804
2805 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2806 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2807 return Builder.CreateCall(
2808 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2809 }
2810
2811 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2812 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2813 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2814 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2815 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2816 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2817 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2818 return Res;
2819 }
2820
2821
2822 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2823 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2824 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2825 }
2826 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2827 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2828 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2829 "cls");
2830 }
2831
2832 if (BuiltinID == clang::ARM::BI__clear_cache) {
2833 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2834 const FunctionDecl *FD = E->getDirectCallee();
2835 Value *Ops[2];
2836 for (unsigned i = 0; i < 2; i++)
2837 Ops[i] = EmitScalarExpr(E->getArg(i));
2838 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2839 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2840 StringRef Name = FD->getName();
2841 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2842 }
2843
2844 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2845 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2846 Function *F;
2847
2848 switch (BuiltinID) {
2849 default: llvm_unreachable("unexpected builtin");
2850 case clang::ARM::BI__builtin_arm_mcrr:
2851 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2852 break;
2853 case clang::ARM::BI__builtin_arm_mcrr2:
2854 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2855 break;
2856 }
2857
2858 // MCRR{2} instruction has 5 operands but
2859 // the intrinsic has 4 because Rt and Rt2
2860 // are represented as a single unsigned 64
2861 // bit integer in the intrinsic definition
2862 // but internally it's represented as 2 32
2863 // bit integers.
2864
2865 Value *Coproc = EmitScalarExpr(E->getArg(0));
2866 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2867 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2868 Value *CRm = EmitScalarExpr(E->getArg(3));
2869
2870 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2871 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2872 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2873 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2874
2875 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2876 }
2877
2878 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2879 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2880 Function *F;
2881
2882 switch (BuiltinID) {
2883 default: llvm_unreachable("unexpected builtin");
2884 case clang::ARM::BI__builtin_arm_mrrc:
2885 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2886 break;
2887 case clang::ARM::BI__builtin_arm_mrrc2:
2888 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2889 break;
2890 }
2891
2892 Value *Coproc = EmitScalarExpr(E->getArg(0));
2893 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2894 Value *CRm = EmitScalarExpr(E->getArg(2));
2895 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2896
2897 // Returns an unsigned 64 bit integer, represented
2898 // as two 32 bit integers.
2899
2900 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2901 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2902 Rt = Builder.CreateZExt(Rt, Int64Ty);
2903 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2904
2905 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2906 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2907 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2908
2909 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2910 }
2911
2912 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2913 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2914 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2915 getContext().getTypeSize(E->getType()) == 64) ||
2916 BuiltinID == clang::ARM::BI__ldrexd) {
2917 Function *F;
2918
2919 switch (BuiltinID) {
2920 default: llvm_unreachable("unexpected builtin");
2921 case clang::ARM::BI__builtin_arm_ldaex:
2922 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2923 break;
2924 case clang::ARM::BI__builtin_arm_ldrexd:
2925 case clang::ARM::BI__builtin_arm_ldrex:
2926 case clang::ARM::BI__ldrexd:
2927 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2928 break;
2929 }
2930
2931 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2932 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2933
2934 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2935 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2936 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2937 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2938
2939 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2940 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2941 Val = Builder.CreateOr(Val, Val1);
2942 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2943 }
2944
2945 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2946 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2947 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2948
2949 QualType Ty = E->getType();
2950 llvm::Type *RealResTy = ConvertType(Ty);
2951 llvm::Type *IntTy =
2952 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2953
2954 Function *F = CGM.getIntrinsic(
2955 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2956 : Intrinsic::arm_ldrex,
2957 DefaultPtrTy);
2958 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2959 Val->addParamAttr(
2960 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2961
2962 if (RealResTy->isPointerTy())
2963 return Builder.CreateIntToPtr(Val, RealResTy);
2964 else {
2965 llvm::Type *IntResTy = llvm::IntegerType::get(
2966 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2967 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2968 RealResTy);
2969 }
2970 }
2971
2972 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2973 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2974 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2975 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2976 Function *F = CGM.getIntrinsic(
2977 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2978 : Intrinsic::arm_strexd);
2979 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2980
2981 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
2982 Value *Val = EmitScalarExpr(E->getArg(0));
2983 Builder.CreateStore(Val, Tmp);
2984
2985 Address LdPtr = Tmp.withElementType(STy);
2986 Val = Builder.CreateLoad(LdPtr);
2987
2988 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2989 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2990 Value *StPtr = EmitScalarExpr(E->getArg(1));
2991 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2992 }
2993
2994 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2995 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2996 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2997 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2998
2999 QualType Ty = E->getArg(0)->getType();
3000 llvm::Type *StoreTy =
3001 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
3002
3003 if (StoreVal->getType()->isPointerTy())
3004 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
3005 else {
3006 llvm::Type *IntTy = llvm::IntegerType::get(
3008 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
3009 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
3010 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
3011 }
3012
3013 Function *F = CGM.getIntrinsic(
3014 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
3015 : Intrinsic::arm_strex,
3016 StoreAddr->getType());
3017
3018 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
3019 CI->addParamAttr(
3020 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
3021 return CI;
3022 }
3023
3024 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
3025 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
3026 return Builder.CreateCall(F);
3027 }
3028
3029 // CRC32
3030 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3031 switch (BuiltinID) {
3032 case clang::ARM::BI__builtin_arm_crc32b:
3033 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3034 case clang::ARM::BI__builtin_arm_crc32cb:
3035 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3036 case clang::ARM::BI__builtin_arm_crc32h:
3037 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3038 case clang::ARM::BI__builtin_arm_crc32ch:
3039 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3040 case clang::ARM::BI__builtin_arm_crc32w:
3041 case clang::ARM::BI__builtin_arm_crc32d:
3042 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3043 case clang::ARM::BI__builtin_arm_crc32cw:
3044 case clang::ARM::BI__builtin_arm_crc32cd:
3045 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3046 }
3047
3048 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3049 Value *Arg0 = EmitScalarExpr(E->getArg(0));
3050 Value *Arg1 = EmitScalarExpr(E->getArg(1));
3051
3052 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
3053 // intrinsics, hence we need different codegen for these cases.
3054 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
3055 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
3056 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3057 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3058 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3059 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3060
3061 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3062 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
3063 return Builder.CreateCall(F, {Res, Arg1b});
3064 } else {
3065 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3066
3067 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3068 return Builder.CreateCall(F, {Arg0, Arg1});
3069 }
3070 }
3071
3072 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3073 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3074 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3075 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
3076 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
3077 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
3078
3079 SpecialRegisterAccessKind AccessKind = Write;
3080 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3081 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3082 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
3083 AccessKind = VolatileRead;
3084
3085 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3086 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
3087
3088 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3089 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
3090
3091 llvm::Type *ValueType;
3092 llvm::Type *RegisterType;
3093 if (IsPointerBuiltin) {
3094 ValueType = VoidPtrTy;
3096 } else if (Is64Bit) {
3097 ValueType = RegisterType = Int64Ty;
3098 } else {
3099 ValueType = RegisterType = Int32Ty;
3100 }
3101
3102 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
3103 AccessKind);
3104 }
3105
3106 if (BuiltinID == ARM::BI__builtin_sponentry) {
3107 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
3108 return Builder.CreateCall(F);
3109 }
3110
3111 // Handle MSVC intrinsics before argument evaluation to prevent double
3112 // evaluation.
3113 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3114 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
3115
3116 // Deal with MVE builtins
3117 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3118 return Result;
3119 // Handle CDE builtins
3120 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3121 return Result;
3122
3123 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3124 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
3125 return P.first == BuiltinID;
3126 });
3127 if (It != end(NEONEquivalentIntrinsicMap))
3128 BuiltinID = It->second;
3129
3130 // Find out if any arguments are required to be integer constant
3131 // expressions.
3132 unsigned ICEArguments = 0;
3134 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3135 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3136
3137 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3138 return Builder.getInt32(addr.getAlignment().getQuantity());
3139 };
3140
3141 Address PtrOp0 = Address::invalid();
3142 Address PtrOp1 = Address::invalid();
3144 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3145 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3146 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3147 if (i == 0) {
3148 switch (BuiltinID) {
3149 case NEON::BI__builtin_neon_vld1_v:
3150 case NEON::BI__builtin_neon_vld1q_v:
3151 case NEON::BI__builtin_neon_vld1q_lane_v:
3152 case NEON::BI__builtin_neon_vld1_lane_v:
3153 case NEON::BI__builtin_neon_vld1_dup_v:
3154 case NEON::BI__builtin_neon_vld1q_dup_v:
3155 case NEON::BI__builtin_neon_vst1_v:
3156 case NEON::BI__builtin_neon_vst1q_v:
3157 case NEON::BI__builtin_neon_vst1q_lane_v:
3158 case NEON::BI__builtin_neon_vst1_lane_v:
3159 case NEON::BI__builtin_neon_vst2_v:
3160 case NEON::BI__builtin_neon_vst2q_v:
3161 case NEON::BI__builtin_neon_vst2_lane_v:
3162 case NEON::BI__builtin_neon_vst2q_lane_v:
3163 case NEON::BI__builtin_neon_vst3_v:
3164 case NEON::BI__builtin_neon_vst3q_v:
3165 case NEON::BI__builtin_neon_vst3_lane_v:
3166 case NEON::BI__builtin_neon_vst3q_lane_v:
3167 case NEON::BI__builtin_neon_vst4_v:
3168 case NEON::BI__builtin_neon_vst4q_v:
3169 case NEON::BI__builtin_neon_vst4_lane_v:
3170 case NEON::BI__builtin_neon_vst4q_lane_v:
3171 // Get the alignment for the argument in addition to the value;
3172 // we'll use it later.
3173 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3174 Ops.push_back(PtrOp0.emitRawPointer(*this));
3175 continue;
3176 }
3177 }
3178 if (i == 1) {
3179 switch (BuiltinID) {
3180 case NEON::BI__builtin_neon_vld2_v:
3181 case NEON::BI__builtin_neon_vld2q_v:
3182 case NEON::BI__builtin_neon_vld3_v:
3183 case NEON::BI__builtin_neon_vld3q_v:
3184 case NEON::BI__builtin_neon_vld4_v:
3185 case NEON::BI__builtin_neon_vld4q_v:
3186 case NEON::BI__builtin_neon_vld2_lane_v:
3187 case NEON::BI__builtin_neon_vld2q_lane_v:
3188 case NEON::BI__builtin_neon_vld3_lane_v:
3189 case NEON::BI__builtin_neon_vld3q_lane_v:
3190 case NEON::BI__builtin_neon_vld4_lane_v:
3191 case NEON::BI__builtin_neon_vld4q_lane_v:
3192 case NEON::BI__builtin_neon_vld2_dup_v:
3193 case NEON::BI__builtin_neon_vld2q_dup_v:
3194 case NEON::BI__builtin_neon_vld3_dup_v:
3195 case NEON::BI__builtin_neon_vld3q_dup_v:
3196 case NEON::BI__builtin_neon_vld4_dup_v:
3197 case NEON::BI__builtin_neon_vld4q_dup_v:
3198 // Get the alignment for the argument in addition to the value;
3199 // we'll use it later.
3200 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3201 Ops.push_back(PtrOp1.emitRawPointer(*this));
3202 continue;
3203 }
3204 }
3205
3206 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
3207 }
3208
3209 switch (BuiltinID) {
3210 default: break;
3211
3212 case NEON::BI__builtin_neon_vget_lane_i8:
3213 case NEON::BI__builtin_neon_vget_lane_i16:
3214 case NEON::BI__builtin_neon_vget_lane_i32:
3215 case NEON::BI__builtin_neon_vget_lane_i64:
3216 case NEON::BI__builtin_neon_vget_lane_bf16:
3217 case NEON::BI__builtin_neon_vget_lane_f32:
3218 case NEON::BI__builtin_neon_vgetq_lane_i8:
3219 case NEON::BI__builtin_neon_vgetq_lane_i16:
3220 case NEON::BI__builtin_neon_vgetq_lane_i32:
3221 case NEON::BI__builtin_neon_vgetq_lane_i64:
3222 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3223 case NEON::BI__builtin_neon_vgetq_lane_f32:
3224 case NEON::BI__builtin_neon_vduph_lane_bf16:
3225 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3226 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3227
3228 case NEON::BI__builtin_neon_vrndns_f32: {
3229 Value *Arg = EmitScalarExpr(E->getArg(0));
3230 llvm::Type *Tys[] = {Arg->getType()};
3231 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
3232 return Builder.CreateCall(F, {Arg}, "vrndn"); }
3233
3234 case NEON::BI__builtin_neon_vset_lane_i8:
3235 case NEON::BI__builtin_neon_vset_lane_i16:
3236 case NEON::BI__builtin_neon_vset_lane_i32:
3237 case NEON::BI__builtin_neon_vset_lane_i64:
3238 case NEON::BI__builtin_neon_vset_lane_bf16:
3239 case NEON::BI__builtin_neon_vset_lane_f32:
3240 case NEON::BI__builtin_neon_vsetq_lane_i8:
3241 case NEON::BI__builtin_neon_vsetq_lane_i16:
3242 case NEON::BI__builtin_neon_vsetq_lane_i32:
3243 case NEON::BI__builtin_neon_vsetq_lane_i64:
3244 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3245 case NEON::BI__builtin_neon_vsetq_lane_f32:
3246 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3247
3248 case NEON::BI__builtin_neon_vsha1h_u32:
3249 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3250 "vsha1h");
3251 case NEON::BI__builtin_neon_vsha1cq_u32:
3252 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3253 "vsha1h");
3254 case NEON::BI__builtin_neon_vsha1pq_u32:
3255 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3256 "vsha1h");
3257 case NEON::BI__builtin_neon_vsha1mq_u32:
3258 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3259 "vsha1h");
3260
3261 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3262 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3263 "vcvtbfp2bf");
3264 }
3265
3266 // The ARM _MoveToCoprocessor builtins put the input register value as
3267 // the first argument, but the LLVM intrinsic expects it as the third one.
3268 case clang::ARM::BI_MoveToCoprocessor:
3269 case clang::ARM::BI_MoveToCoprocessor2: {
3270 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
3271 ? Intrinsic::arm_mcr
3272 : Intrinsic::arm_mcr2);
3273 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3274 Ops[3], Ops[4], Ops[5]});
3275 }
3276 }
3277
3278 // Get the last argument, which specifies the vector type.
3279 assert(HasExtraArg);
3280 const Expr *Arg = E->getArg(E->getNumArgs()-1);
3281 std::optional<llvm::APSInt> Result =
3283 if (!Result)
3284 return nullptr;
3285
3286 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3287 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3288 // Determine the overloaded type of this builtin.
3289 llvm::Type *Ty;
3290 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3291 Ty = FloatTy;
3292 else
3293 Ty = DoubleTy;
3294
3295 // Determine whether this is an unsigned conversion or not.
3296 bool usgn = Result->getZExtValue() == 1;
3297 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3298
3299 // Call the appropriate intrinsic.
3300 Function *F = CGM.getIntrinsic(Int, Ty);
3301 return Builder.CreateCall(F, Ops, "vcvtr");
3302 }
3303
3304 // Determine the type of this overloaded NEON intrinsic.
3305 NeonTypeFlags Type = Result->getZExtValue();
3306 bool usgn = Type.isUnsigned();
3307 bool rightShift = false;
3308
3309 llvm::FixedVectorType *VTy =
3310 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
3311 getTarget().hasBFloat16Type());
3312 llvm::Type *Ty = VTy;
3313 if (!Ty)
3314 return nullptr;
3315
3316 // Many NEON builtins have identical semantics and uses in ARM and
3317 // AArch64. Emit these in a single function.
3318 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3319 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3320 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3321 if (Builtin)
3323 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3324 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3325
3326 unsigned Int;
3327 switch (BuiltinID) {
3328 default: return nullptr;
3329 case NEON::BI__builtin_neon_vld1q_lane_v:
3330 // Handle 64-bit integer elements as a special case. Use shuffles of
3331 // one-element vectors to avoid poor code for i64 in the backend.
3332 if (VTy->getElementType()->isIntegerTy(64)) {
3333 // Extract the other lane.
3334 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3335 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3336 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3337 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3338 // Load the value as a one-element vector.
3339 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3340 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3341 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3342 Value *Align = getAlignmentValue32(PtrOp0);
3343 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3344 // Combine them.
3345 int Indices[] = {1 - Lane, Lane};
3346 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
3347 }
3348 [[fallthrough]];
3349 case NEON::BI__builtin_neon_vld1_lane_v: {
3350 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3351 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
3352 Value *Ld = Builder.CreateLoad(PtrOp0);
3353 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3354 }
3355 case NEON::BI__builtin_neon_vqrshrn_n_v:
3356 Int =
3357 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3358 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3359 1, true);
3360 case NEON::BI__builtin_neon_vqrshrun_n_v:
3361 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3362 Ops, "vqrshrun_n", 1, true);
3363 case NEON::BI__builtin_neon_vqshrn_n_v:
3364 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3365 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3366 1, true);
3367 case NEON::BI__builtin_neon_vqshrun_n_v:
3368 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3369 Ops, "vqshrun_n", 1, true);
3370 case NEON::BI__builtin_neon_vrecpe_v:
3371 case NEON::BI__builtin_neon_vrecpeq_v:
3372 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3373 Ops, "vrecpe");
3374 case NEON::BI__builtin_neon_vrshrn_n_v:
3375 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3376 Ops, "vrshrn_n", 1, true);
3377 case NEON::BI__builtin_neon_vrsra_n_v:
3378 case NEON::BI__builtin_neon_vrsraq_n_v:
3379 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3380 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3381 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3382 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3383 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3384 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3385 case NEON::BI__builtin_neon_vsri_n_v:
3386 case NEON::BI__builtin_neon_vsriq_n_v:
3387 rightShift = true;
3388 [[fallthrough]];
3389 case NEON::BI__builtin_neon_vsli_n_v:
3390 case NEON::BI__builtin_neon_vsliq_n_v:
3391 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3392 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3393 Ops, "vsli_n");
3394 case NEON::BI__builtin_neon_vsra_n_v:
3395 case NEON::BI__builtin_neon_vsraq_n_v:
3396 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3397 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3398 return Builder.CreateAdd(Ops[0], Ops[1]);
3399 case NEON::BI__builtin_neon_vst1q_lane_v:
3400 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3401 // a one-element vector and avoid poor code for i64 in the backend.
3402 if (VTy->getElementType()->isIntegerTy(64)) {
3403 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3404 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3405 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3406 Ops[2] = getAlignmentValue32(PtrOp0);
3407 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3408 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3409 Tys), Ops);
3410 }
3411 [[fallthrough]];
3412 case NEON::BI__builtin_neon_vst1_lane_v: {
3413 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3414 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3415 return Builder.CreateStore(Ops[1],
3416 PtrOp0.withElementType(Ops[1]->getType()));
3417 }
3418 case NEON::BI__builtin_neon_vtbl1_v:
3419 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3420 Ops, "vtbl1");
3421 case NEON::BI__builtin_neon_vtbl2_v:
3422 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3423 Ops, "vtbl2");
3424 case NEON::BI__builtin_neon_vtbl3_v:
3425 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3426 Ops, "vtbl3");
3427 case NEON::BI__builtin_neon_vtbl4_v:
3428 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3429 Ops, "vtbl4");
3430 case NEON::BI__builtin_neon_vtbx1_v:
3431 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3432 Ops, "vtbx1");
3433 case NEON::BI__builtin_neon_vtbx2_v:
3434 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3435 Ops, "vtbx2");
3436 case NEON::BI__builtin_neon_vtbx3_v:
3437 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3438 Ops, "vtbx3");
3439 case NEON::BI__builtin_neon_vtbx4_v:
3440 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3441 Ops, "vtbx4");
3442 }
3443}
3444
3445template<typename Integer>
3447 return E->getIntegerConstantExpr(Context)->getExtValue();
3448}
3449
3450static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3451 llvm::Type *T, bool Unsigned) {
3452 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3453 // which finds it convenient to specify signed/unsigned as a boolean flag.
3454 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
3455}
3456
3457static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3458 uint32_t Shift, bool Unsigned) {
3459 // MVE helper function for integer shift right. This must handle signed vs
3460 // unsigned, and also deal specially with the case where the shift count is
3461 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3462 // undefined behavior, but in MVE it's legal, so we must convert it to code
3463 // that is not undefined in IR.
3464 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
3465 ->getElementType()
3466 ->getPrimitiveSizeInBits();
3467 if (Shift == LaneBits) {
3468 // An unsigned shift of the full lane size always generates zero, so we can
3469 // simply emit a zero vector. A signed shift of the full lane size does the
3470 // same thing as shifting by one bit fewer.
3471 if (Unsigned)
3472 return llvm::Constant::getNullValue(V->getType());
3473 else
3474 --Shift;
3475 }
3476 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
3477}
3478
3479static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3480 // MVE-specific helper function for a vector splat, which infers the element
3481 // count of the output vector by knowing that MVE vectors are all 128 bits
3482 // wide.
3483 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3484 return Builder.CreateVectorSplat(Elements, V);
3485}
3486
3487static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3488 CodeGenFunction *CGF,
3489 llvm::Value *V,
3490 llvm::Type *DestType) {
3491 // Convert one MVE vector type into another by reinterpreting its in-register
3492 // format.
3493 //
3494 // Little-endian, this is identical to a bitcast (which reinterprets the
3495 // memory format). But big-endian, they're not necessarily the same, because
3496 // the register and memory formats map to each other differently depending on
3497 // the lane size.
3498 //
3499 // We generate a bitcast whenever we can (if we're little-endian, or if the
3500 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3501 // that performs the different kind of reinterpretation.
3502 if (CGF->getTarget().isBigEndian() &&
3503 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3504 return Builder.CreateCall(
3505 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
3506 {DestType, V->getType()}),
3507 V);
3508 } else {
3509 return Builder.CreateBitCast(V, DestType);
3510 }
3511}
3512
3513static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3514 // Make a shufflevector that extracts every other element of a vector (evens
3515 // or odds, as desired).
3516 SmallVector<int, 16> Indices;
3517 unsigned InputElements =
3518 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
3519 for (unsigned i = 0; i < InputElements; i += 2)
3520 Indices.push_back(i + Odd);
3521 return Builder.CreateShuffleVector(V, Indices);
3522}
3523
3524static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3525 llvm::Value *V1) {
3526 // Make a shufflevector that interleaves two vectors element by element.
3527 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3528 SmallVector<int, 16> Indices;
3529 unsigned InputElements =
3530 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
3531 for (unsigned i = 0; i < InputElements; i++) {
3532 Indices.push_back(i);
3533 Indices.push_back(i + InputElements);
3534 }
3535 return Builder.CreateShuffleVector(V0, V1, Indices);
3536}
3537
3538template<unsigned HighBit, unsigned OtherBits>
3539static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3540 // MVE-specific helper function to make a vector splat of a constant such as
3541 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3542 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
3543 unsigned LaneBits = T->getPrimitiveSizeInBits();
3544 uint32_t Value = HighBit << (LaneBits - 1);
3545 if (OtherBits)
3546 Value |= (1UL << (LaneBits - 1)) - 1;
3547 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
3548 return ARMMVEVectorSplat(Builder, Lane);
3549}
3550
3551static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3552 llvm::Value *V,
3553 unsigned ReverseWidth) {
3554 // MVE-specific helper function which reverses the elements of a
3555 // vector within every (ReverseWidth)-bit collection of lanes.
3556 SmallVector<int, 16> Indices;
3557 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3558 unsigned Elements = 128 / LaneSize;
3559 unsigned Mask = ReverseWidth / LaneSize - 1;
3560 for (unsigned i = 0; i < Elements; i++)
3561 Indices.push_back(i ^ Mask);
3562 return Builder.CreateShuffleVector(V, Indices);
3563}
3564
3565static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
3566 CodeGenFunction *CGF, llvm::Value *V,
3567 llvm::Type *Ty) {
3568 return Builder.CreateCall(
3569 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
3570 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
3571}
3572
3573static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
3574 CodeGenFunction *CGF, llvm::Value *V,
3575 llvm::Type *Ty) {
3576 return Builder.CreateCall(
3577 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
3578 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
3579}
3580
3581static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
3582 CodeGenFunction *CGF, llvm::Value *V,
3583 llvm::Type *Ty) {
3584 return Builder.CreateCall(
3585 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
3586 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
3587}
3588
3589static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
3590 CodeGenFunction *CGF, llvm::Value *V,
3591 llvm::Type *Ty) {
3592 return Builder.CreateCall(
3593 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
3594 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
3595}
3596
3598 const CallExpr *E,
3600 llvm::Triple::ArchType Arch) {
3601 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3602 Intrinsic::ID IRIntr;
3603 unsigned NumVectors;
3604
3605 // Code autogenerated by Tablegen will handle all the simple builtins.
3606 switch (BuiltinID) {
3607 #include "clang/Basic/arm_mve_builtin_cg.inc"
3608
3609 // If we didn't match an MVE builtin id at all, go back to the
3610 // main EmitARMBuiltinExpr.
3611 default:
3612 return nullptr;
3613 }
3614
3615 // Anything that breaks from that switch is an MVE builtin that
3616 // needs handwritten code to generate.
3617
3618 switch (CustomCodeGenType) {
3619
3620 case CustomCodeGen::VLD24: {
3623
3624 auto MvecCType = E->getType();
3625 auto MvecLType = ConvertType(MvecCType);
3626 assert(MvecLType->isStructTy() &&
3627 "Return type for vld[24]q should be a struct");
3628 assert(MvecLType->getStructNumElements() == 1 &&
3629 "Return-type struct for vld[24]q should have one element");
3630 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3631 assert(MvecLTypeInner->isArrayTy() &&
3632 "Return-type struct for vld[24]q should contain an array");
3633 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3634 "Array member of return-type struct vld[24]q has wrong length");
3635 auto VecLType = MvecLTypeInner->getArrayElementType();
3636
3637 Tys.push_back(VecLType);
3638
3639 auto Addr = E->getArg(0);
3640 Ops.push_back(EmitScalarExpr(Addr));
3641 Tys.push_back(ConvertType(Addr->getType()));
3642
3643 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3644 Value *LoadResult = Builder.CreateCall(F, Ops);
3645 Value *MvecOut = PoisonValue::get(MvecLType);
3646 for (unsigned i = 0; i < NumVectors; ++i) {
3647 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3648 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3649 }
3650
3651 if (ReturnValue.isNull())
3652 return MvecOut;
3653 else
3654 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3655 }
3656
3657 case CustomCodeGen::VST24: {
3660
3661 auto Addr = E->getArg(0);
3662 Ops.push_back(EmitScalarExpr(Addr));
3663 Tys.push_back(ConvertType(Addr->getType()));
3664
3665 auto MvecCType = E->getArg(1)->getType();
3666 auto MvecLType = ConvertType(MvecCType);
3667 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3668 assert(MvecLType->getStructNumElements() == 1 &&
3669 "Data-type struct for vst2q should have one element");
3670 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3671 assert(MvecLTypeInner->isArrayTy() &&
3672 "Data-type struct for vst2q should contain an array");
3673 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3674 "Array member of return-type struct vld[24]q has wrong length");
3675 auto VecLType = MvecLTypeInner->getArrayElementType();
3676
3677 Tys.push_back(VecLType);
3678
3679 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3680 EmitAggExpr(E->getArg(1), MvecSlot);
3681 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3682 for (unsigned i = 0; i < NumVectors; i++)
3683 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3684
3685 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3686 Value *ToReturn = nullptr;
3687 for (unsigned i = 0; i < NumVectors; i++) {
3688 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3689 ToReturn = Builder.CreateCall(F, Ops);
3690 Ops.pop_back();
3691 }
3692 return ToReturn;
3693 }
3694 }
3695 llvm_unreachable("unknown custom codegen type.");
3696}
3697
3699 const CallExpr *E,
3701 llvm::Triple::ArchType Arch) {
3702 switch (BuiltinID) {
3703 default:
3704 return nullptr;
3705#include "clang/Basic/arm_cde_builtin_cg.inc"
3706 }
3707}
3708
3709static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3710 const CallExpr *E,
3712 llvm::Triple::ArchType Arch) {
3713 unsigned int Int = 0;
3714 const char *s = nullptr;
3715
3716 switch (BuiltinID) {
3717 default:
3718 return nullptr;
3719 case NEON::BI__builtin_neon_vtbl1_v:
3720 case NEON::BI__builtin_neon_vqtbl1_v:
3721 case NEON::BI__builtin_neon_vqtbl1q_v:
3722 case NEON::BI__builtin_neon_vtbl2_v:
3723 case NEON::BI__builtin_neon_vqtbl2_v:
3724 case NEON::BI__builtin_neon_vqtbl2q_v:
3725 case NEON::BI__builtin_neon_vtbl3_v:
3726 case NEON::BI__builtin_neon_vqtbl3_v:
3727 case NEON::BI__builtin_neon_vqtbl3q_v:
3728 case NEON::BI__builtin_neon_vtbl4_v:
3729 case NEON::BI__builtin_neon_vqtbl4_v:
3730 case NEON::BI__builtin_neon_vqtbl4q_v:
3731 break;
3732 case NEON::BI__builtin_neon_vtbx1_v:
3733 case NEON::BI__builtin_neon_vqtbx1_v:
3734 case NEON::BI__builtin_neon_vqtbx1q_v:
3735 case NEON::BI__builtin_neon_vtbx2_v:
3736 case NEON::BI__builtin_neon_vqtbx2_v:
3737 case NEON::BI__builtin_neon_vqtbx2q_v:
3738 case NEON::BI__builtin_neon_vtbx3_v:
3739 case NEON::BI__builtin_neon_vqtbx3_v:
3740 case NEON::BI__builtin_neon_vqtbx3q_v:
3741 case NEON::BI__builtin_neon_vtbx4_v:
3742 case NEON::BI__builtin_neon_vqtbx4_v:
3743 case NEON::BI__builtin_neon_vqtbx4q_v:
3744 break;
3745 }
3746
3747 assert(E->getNumArgs() >= 3);
3748
3749 // Get the last argument, which specifies the vector type.
3750 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3751 std::optional<llvm::APSInt> Result =
3753 if (!Result)
3754 return nullptr;
3755
3756 // Determine the type of this overloaded NEON intrinsic.
3757 NeonTypeFlags Type = Result->getZExtValue();
3758 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3759 if (!Ty)
3760 return nullptr;
3761
3762 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3763
3764 // AArch64 scalar builtins are not overloaded, they do not have an extra
3765 // argument that specifies the vector type, need to handle each case.
3766 switch (BuiltinID) {
3767 case NEON::BI__builtin_neon_vtbl1_v: {
3768 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3769 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3770 }
3771 case NEON::BI__builtin_neon_vtbl2_v: {
3772 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3773 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3774 }
3775 case NEON::BI__builtin_neon_vtbl3_v: {
3776 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3777 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3778 }
3779 case NEON::BI__builtin_neon_vtbl4_v: {
3780 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3781 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3782 }
3783 case NEON::BI__builtin_neon_vtbx1_v: {
3784 Value *TblRes =
3785 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3786 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3787
3788 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3789 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3790 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3791
3792 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3793 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3794 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3795 }
3796 case NEON::BI__builtin_neon_vtbx2_v: {
3797 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3798 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3799 }
3800 case NEON::BI__builtin_neon_vtbx3_v: {
3801 Value *TblRes =
3802 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3803 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3804
3805 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3806 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3807 TwentyFourV);
3808 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3809
3810 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3811 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3812 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3813 }
3814 case NEON::BI__builtin_neon_vtbx4_v: {
3815 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3816 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3817 }
3818 case NEON::BI__builtin_neon_vqtbl1_v:
3819 case NEON::BI__builtin_neon_vqtbl1q_v:
3820 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3821 case NEON::BI__builtin_neon_vqtbl2_v:
3822 case NEON::BI__builtin_neon_vqtbl2q_v: {
3823 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3824 case NEON::BI__builtin_neon_vqtbl3_v:
3825 case NEON::BI__builtin_neon_vqtbl3q_v:
3826 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3827 case NEON::BI__builtin_neon_vqtbl4_v:
3828 case NEON::BI__builtin_neon_vqtbl4q_v:
3829 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3830 case NEON::BI__builtin_neon_vqtbx1_v:
3831 case NEON::BI__builtin_neon_vqtbx1q_v:
3832 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3833 case NEON::BI__builtin_neon_vqtbx2_v:
3834 case NEON::BI__builtin_neon_vqtbx2q_v:
3835 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3836 case NEON::BI__builtin_neon_vqtbx3_v:
3837 case NEON::BI__builtin_neon_vqtbx3q_v:
3838 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3839 case NEON::BI__builtin_neon_vqtbx4_v:
3840 case NEON::BI__builtin_neon_vqtbx4q_v:
3841 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3842 }
3843 }
3844
3845 if (!Int)
3846 return nullptr;
3847
3848 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3849 return CGF.EmitNeonCall(F, Ops, s);
3850}
3851
3853 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3854 Op = Builder.CreateBitCast(Op, Int16Ty);
3855 Value *V = PoisonValue::get(VTy);
3856 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3857 Op = Builder.CreateInsertElement(V, Op, CI);
3858 return Op;
3859}
3860
3861/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3862/// access builtin. Only required if it can't be inferred from the base pointer
3863/// operand.
3865 switch (TypeFlags.getMemEltType()) {
3866 case SVETypeFlags::MemEltTyDefault:
3867 return getEltType(TypeFlags);
3868 case SVETypeFlags::MemEltTyInt8:
3869 return Builder.getInt8Ty();
3870 case SVETypeFlags::MemEltTyInt16:
3871 return Builder.getInt16Ty();
3872 case SVETypeFlags::MemEltTyInt32:
3873 return Builder.getInt32Ty();
3874 case SVETypeFlags::MemEltTyInt64:
3875 return Builder.getInt64Ty();
3876 }
3877 llvm_unreachable("Unknown MemEltType");
3878}
3879
3880llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3881 switch (TypeFlags.getEltType()) {
3882 default:
3883 llvm_unreachable("Invalid SVETypeFlag!");
3884
3885 case SVETypeFlags::EltTyMFloat8:
3886 case SVETypeFlags::EltTyInt8:
3887 return Builder.getInt8Ty();
3888 case SVETypeFlags::EltTyInt16:
3889 return Builder.getInt16Ty();
3890 case SVETypeFlags::EltTyInt32:
3891 return Builder.getInt32Ty();
3892 case SVETypeFlags::EltTyInt64:
3893 return Builder.getInt64Ty();
3894 case SVETypeFlags::EltTyInt128:
3895 return Builder.getInt128Ty();
3896
3897 case SVETypeFlags::EltTyFloat16:
3898 return Builder.getHalfTy();
3899 case SVETypeFlags::EltTyFloat32:
3900 return Builder.getFloatTy();
3901 case SVETypeFlags::EltTyFloat64:
3902 return Builder.getDoubleTy();
3903
3904 case SVETypeFlags::EltTyBFloat16:
3905 return Builder.getBFloatTy();
3906
3907 case SVETypeFlags::EltTyBool8:
3908 case SVETypeFlags::EltTyBool16:
3909 case SVETypeFlags::EltTyBool32:
3910 case SVETypeFlags::EltTyBool64:
3911 return Builder.getInt1Ty();
3912 }
3913}
3914
3915// Return the llvm predicate vector type corresponding to the specified element
3916// TypeFlags.
3917llvm::ScalableVectorType *
3919 switch (TypeFlags.getEltType()) {
3920 default: llvm_unreachable("Unhandled SVETypeFlag!");
3921
3922 case SVETypeFlags::EltTyInt8:
3923 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3924 case SVETypeFlags::EltTyInt16:
3925 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3926 case SVETypeFlags::EltTyInt32:
3927 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3928 case SVETypeFlags::EltTyInt64:
3929 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3930
3931 case SVETypeFlags::EltTyBFloat16:
3932 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3933 case SVETypeFlags::EltTyFloat16:
3934 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3935 case SVETypeFlags::EltTyFloat32:
3936 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3937 case SVETypeFlags::EltTyFloat64:
3938 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3939
3940 case SVETypeFlags::EltTyBool8:
3941 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3942 case SVETypeFlags::EltTyBool16:
3943 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3944 case SVETypeFlags::EltTyBool32:
3945 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3946 case SVETypeFlags::EltTyBool64:
3947 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3948 }
3949}
3950
3951// Return the llvm vector type corresponding to the specified element TypeFlags.
3952llvm::ScalableVectorType *
3954 switch (TypeFlags.getEltType()) {
3955 default:
3956 llvm_unreachable("Invalid SVETypeFlag!");
3957
3958 case SVETypeFlags::EltTyInt8:
3959 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3960 case SVETypeFlags::EltTyInt16:
3961 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3962 case SVETypeFlags::EltTyInt32:
3963 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3964 case SVETypeFlags::EltTyInt64:
3965 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3966
3967 case SVETypeFlags::EltTyMFloat8:
3968 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3969 case SVETypeFlags::EltTyFloat16:
3970 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3971 case SVETypeFlags::EltTyBFloat16:
3972 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3973 case SVETypeFlags::EltTyFloat32:
3974 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3975 case SVETypeFlags::EltTyFloat64:
3976 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3977
3978 case SVETypeFlags::EltTyBool8:
3979 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3980 case SVETypeFlags::EltTyBool16:
3981 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3982 case SVETypeFlags::EltTyBool32:
3983 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3984 case SVETypeFlags::EltTyBool64:
3985 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3986 }
3987}
3988
3989llvm::Value *
3991 Function *Ptrue =
3992 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3993 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
3994}
3995
3996constexpr unsigned SVEBitsPerBlock = 128;
3997
3998static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3999 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
4000 return llvm::ScalableVectorType::get(EltTy, NumElts);
4001}
4002
4003// Reinterpret the input predicate so that it can be used to correctly isolate
4004// the elements of the specified datatype.
4006 llvm::ScalableVectorType *VTy) {
4007
4008 if (isa<TargetExtType>(Pred->getType()) &&
4009 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
4010 return Pred;
4011
4012 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
4013 if (Pred->getType() == RTy)
4014 return Pred;
4015
4016 unsigned IntID;
4017 llvm::Type *IntrinsicTy;
4018 switch (VTy->getMinNumElements()) {
4019 default:
4020 llvm_unreachable("unsupported element count!");
4021 case 1:
4022 case 2:
4023 case 4:
4024 case 8:
4025 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
4026 IntrinsicTy = RTy;
4027 break;
4028 case 16:
4029 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
4030 IntrinsicTy = Pred->getType();
4031 break;
4032 }
4033
4034 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
4035 Value *C = Builder.CreateCall(F, Pred);
4036 assert(C->getType() == RTy && "Unexpected return type!");
4037 return C;
4038}
4039
4041 llvm::StructType *Ty) {
4042 if (PredTuple->getType() == Ty)
4043 return PredTuple;
4044
4045 Value *Ret = llvm::PoisonValue::get(Ty);
4046 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
4047 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
4048 Pred = EmitSVEPredicateCast(
4049 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
4050 Ret = Builder.CreateInsertValue(Ret, Pred, I);
4051 }
4052
4053 return Ret;
4054}
4055
4058 unsigned IntID) {
4059 auto *ResultTy = getSVEType(TypeFlags);
4060 auto *OverloadedTy =
4061 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
4062
4063 Function *F = nullptr;
4064 if (Ops[1]->getType()->isVectorTy())
4065 // This is the "vector base, scalar offset" case. In order to uniquely
4066 // map this built-in to an LLVM IR intrinsic, we need both the return type
4067 // and the type of the vector base.
4068 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
4069 else
4070 // This is the "scalar base, vector offset case". The type of the offset
4071 // is encoded in the name of the intrinsic. We only need to specify the
4072 // return type in order to uniquely map this built-in to an LLVM IR
4073 // intrinsic.
4074 F = CGM.getIntrinsic(IntID, OverloadedTy);
4075
4076 // At the ACLE level there's only one predicate type, svbool_t, which is
4077 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4078 // actual type being loaded. For example, when loading doubles (i64) the
4079 // predicate should be <n x 2 x i1> instead. At the IR level the type of
4080 // the predicate and the data being loaded must match. Cast to the type
4081 // expected by the intrinsic. The intrinsic itself should be defined in
4082 // a way than enforces relations between parameter types.
4083 Ops[0] = EmitSVEPredicateCast(
4084 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
4085
4086 // Pass 0 when the offset is missing. This can only be applied when using
4087 // the "vector base" addressing mode for which ACLE allows no offset. The
4088 // corresponding LLVM IR always requires an offset.
4089 if (Ops.size() == 2) {
4090 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4091 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4092 }
4093
4094 // For "vector base, scalar index" scale the index so that it becomes a
4095 // scalar offset.
4096 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
4097 unsigned BytesPerElt =
4098 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4099 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4100 }
4101
4102 Value *Call = Builder.CreateCall(F, Ops);
4103
4104 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
4105 // other cases it's folded into a nop.
4106 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
4107 : Builder.CreateSExt(Call, ResultTy);
4108}
4109
4112 unsigned IntID) {
4113 auto *SrcDataTy = getSVEType(TypeFlags);
4114 auto *OverloadedTy =
4115 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
4116
4117 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
4118 // it's the first argument. Move it accordingly.
4119 Ops.insert(Ops.begin(), Ops.pop_back_val());
4120
4121 Function *F = nullptr;
4122 if (Ops[2]->getType()->isVectorTy())
4123 // This is the "vector base, scalar offset" case. In order to uniquely
4124 // map this built-in to an LLVM IR intrinsic, we need both the return type
4125 // and the type of the vector base.
4126 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
4127 else
4128 // This is the "scalar base, vector offset case". The type of the offset
4129 // is encoded in the name of the intrinsic. We only need to specify the
4130 // return type in order to uniquely map this built-in to an LLVM IR
4131 // intrinsic.
4132 F = CGM.getIntrinsic(IntID, OverloadedTy);
4133
4134 // Pass 0 when the offset is missing. This can only be applied when using
4135 // the "vector base" addressing mode for which ACLE allows no offset. The
4136 // corresponding LLVM IR always requires an offset.
4137 if (Ops.size() == 3) {
4138 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4139 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4140 }
4141
4142 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4143 // folded into a nop.
4144 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
4145
4146 // At the ACLE level there's only one predicate type, svbool_t, which is
4147 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4148 // actual type being stored. For example, when storing doubles (i64) the
4149 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4150 // the predicate and the data being stored must match. Cast to the type
4151 // expected by the intrinsic. The intrinsic itself should be defined in
4152 // a way that enforces relations between parameter types.
4153 Ops[1] = EmitSVEPredicateCast(
4154 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
4155
4156 // For "vector base, scalar index" scale the index so that it becomes a
4157 // scalar offset.
4158 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4159 unsigned BytesPerElt =
4160 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4161 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
4162 }
4163
4164 return Builder.CreateCall(F, Ops);
4165}
4166
4169 unsigned IntID) {
4170 // The gather prefetches are overloaded on the vector input - this can either
4171 // be the vector of base addresses or vector of offsets.
4172 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
4173 if (!OverloadedTy)
4174 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
4175
4176 // Cast the predicate from svbool_t to the right number of elements.
4177 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
4178
4179 // vector + imm addressing modes
4180 if (Ops[1]->getType()->isVectorTy()) {
4181 if (Ops.size() == 3) {
4182 // Pass 0 for 'vector+imm' when the index is omitted.
4183 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4184
4185 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4186 std::swap(Ops[2], Ops[3]);
4187 } else {
4188 // Index needs to be passed as scaled offset.
4189 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4190 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4191 if (BytesPerElt > 1)
4192 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4193 }
4194 }
4195
4196 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
4197 return Builder.CreateCall(F, Ops);
4198}
4199
4202 unsigned IntID) {
4203 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4204 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4205 Value *BasePtr = Ops[1];
4206
4207 // Does the load have an offset?
4208 if (Ops.size() > 2)
4209 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4210
4211 Function *F = CGM.getIntrinsic(IntID, {VTy});
4212 return Builder.CreateCall(F, {Predicate, BasePtr});
4213}
4214
4217 unsigned IntID) {
4218 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4219
4220 unsigned N;
4221 switch (IntID) {
4222 case Intrinsic::aarch64_sve_st2:
4223 case Intrinsic::aarch64_sve_st1_pn_x2:
4224 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4225 case Intrinsic::aarch64_sve_st2q:
4226 N = 2;
4227 break;
4228 case Intrinsic::aarch64_sve_st3:
4229 case Intrinsic::aarch64_sve_st3q:
4230 N = 3;
4231 break;
4232 case Intrinsic::aarch64_sve_st4:
4233 case Intrinsic::aarch64_sve_st1_pn_x4:
4234 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4235 case Intrinsic::aarch64_sve_st4q:
4236 N = 4;
4237 break;
4238 default:
4239 llvm_unreachable("unknown intrinsic!");
4240 }
4241
4242 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4243 Value *BasePtr = Ops[1];
4244
4245 // Does the store have an offset?
4246 if (Ops.size() > (2 + N))
4247 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4248
4249 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4250 // need to break up the tuple vector.
4252 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4253 Operands.push_back(Ops[I]);
4254 Operands.append({Predicate, BasePtr});
4255 Function *F = CGM.getIntrinsic(IntID, { VTy });
4256
4257 return Builder.CreateCall(F, Operands);
4258}
4259
4260// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4261// svpmullt_pair intrinsics, with the exception that their results are bitcast
4262// to a wider type.
4265 unsigned BuiltinID) {
4266 // Splat scalar operand to vector (intrinsics with _n infix)
4267 if (TypeFlags.hasSplatOperand()) {
4268 unsigned OpNo = TypeFlags.getSplatOperand();
4269 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4270 }
4271
4272 // The pair-wise function has a narrower overloaded type.
4273 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
4274 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
4275
4276 // Now bitcast to the wider result type.
4277 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4278 return EmitSVEReinterpret(Call, Ty);
4279}
4280
4282 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4283 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4284 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
4285 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
4286}
4287
4290 unsigned BuiltinID) {
4291 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4292 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
4293 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4294
4295 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
4296 Value *BasePtr = Ops[1];
4297
4298 // Implement the index operand if not omitted.
4299 if (Ops.size() > 3)
4300 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4301
4302 Value *PrfOp = Ops.back();
4303
4304 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
4305 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
4306}
4307
4309 llvm::Type *ReturnTy,
4311 unsigned IntrinsicID,
4312 bool IsZExtReturn) {
4313 QualType LangPTy = E->getArg(1)->getType();
4314 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4315 LangPTy->castAs<PointerType>()->getPointeeType());
4316
4317 // Mfloat8 types is stored as a vector, so extra work
4318 // to extract sclar element type is necessary.
4319 if (MemEltTy->isVectorTy()) {
4320 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4321 "Only <1 x i8> expected");
4322 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4323 }
4324
4325 // The vector type that is returned may be different from the
4326 // eventual type loaded from memory.
4327 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
4328 llvm::ScalableVectorType *MemoryTy = nullptr;
4329 llvm::ScalableVectorType *PredTy = nullptr;
4330 bool IsQuadLoad = false;
4331 switch (IntrinsicID) {
4332 case Intrinsic::aarch64_sve_ld1uwq:
4333 case Intrinsic::aarch64_sve_ld1udq:
4334 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4335 PredTy = llvm::ScalableVectorType::get(
4336 llvm::Type::getInt1Ty(getLLVMContext()), 1);
4337 IsQuadLoad = true;
4338 break;
4339 default:
4340 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4341 PredTy = MemoryTy;
4342 break;
4343 }
4344
4345 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4346 Value *BasePtr = Ops[1];
4347
4348 // Does the load have an offset?
4349 if (Ops.size() > 2)
4350 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4351
4352 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
4353 auto *Load =
4354 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
4355 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4356 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
4357
4358 if (IsQuadLoad)
4359 return Load;
4360
4361 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
4362 : Builder.CreateSExt(Load, VectorTy);
4363}
4364
4367 unsigned IntrinsicID) {
4368 QualType LangPTy = E->getArg(1)->getType();
4369 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4370 LangPTy->castAs<PointerType>()->getPointeeType());
4371
4372 // Mfloat8 types is stored as a vector, so extra work
4373 // to extract sclar element type is necessary.
4374 if (MemEltTy->isVectorTy()) {
4375 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4376 "Only <1 x i8> expected");
4377 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4378 }
4379
4380 // The vector type that is stored may be different from the
4381 // eventual type stored to memory.
4382 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
4383 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4384
4385 auto PredTy = MemoryTy;
4386 auto AddrMemoryTy = MemoryTy;
4387 bool IsQuadStore = false;
4388
4389 switch (IntrinsicID) {
4390 case Intrinsic::aarch64_sve_st1wq:
4391 case Intrinsic::aarch64_sve_st1dq:
4392 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4393 PredTy =
4394 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
4395 IsQuadStore = true;
4396 break;
4397 default:
4398 break;
4399 }
4400 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4401 Value *BasePtr = Ops[1];
4402
4403 // Does the store have an offset?
4404 if (Ops.size() == 4)
4405 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
4406
4407 // Last value is always the data
4408 Value *Val =
4409 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
4410
4411 Function *F =
4412 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
4413 auto *Store =
4414 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
4415 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4416 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
4417 return Store;
4418}
4419
4422 unsigned IntID) {
4423 Ops[2] = EmitSVEPredicateCast(
4425
4426 SmallVector<Value *> NewOps;
4427 NewOps.push_back(Ops[2]);
4428
4429 llvm::Value *BasePtr = Ops[3];
4430 llvm::Value *RealSlice = Ops[1];
4431 // If the intrinsic contains the vnum parameter, multiply it with the vector
4432 // size in bytes.
4433 if (Ops.size() == 5) {
4434 Function *StreamingVectorLength =
4435 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
4436 llvm::Value *StreamingVectorLengthCall =
4437 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
4438 llvm::ConstantInt::get(Int64Ty, 8), "svl",
4439 /* HasNUW */ true, /* HasNSW */ true);
4440 llvm::Value *Mulvl =
4441 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
4442 // The type of the ptr parameter is void *, so use Int8Ty here.
4443 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
4444 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
4445 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
4446 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
4447 }
4448 NewOps.push_back(BasePtr);
4449 NewOps.push_back(Ops[0]);
4450 NewOps.push_back(RealSlice);
4451 Function *F = CGM.getIntrinsic(IntID);
4452 return Builder.CreateCall(F, NewOps);
4453}
4454
4457 unsigned IntID) {
4458 auto *VecTy = getSVEType(TypeFlags);
4459 Function *F = CGM.getIntrinsic(IntID, VecTy);
4460 if (TypeFlags.isReadZA())
4461 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
4462 else if (TypeFlags.isWriteZA())
4463 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
4464 return Builder.CreateCall(F, Ops);
4465}
4466
4469 unsigned IntID) {
4470 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4471 if (Ops.size() == 0)
4472 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
4473 Function *F = CGM.getIntrinsic(IntID, {});
4474 return Builder.CreateCall(F, Ops);
4475}
4476
4479 unsigned IntID) {
4480 if (Ops.size() == 2)
4481 Ops.push_back(Builder.getInt32(0));
4482 else
4483 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
4484 Function *F = CGM.getIntrinsic(IntID, {});
4485 return Builder.CreateCall(F, Ops);
4486}
4487
4488// Limit the usage of scalable llvm IR generated by the ACLE by using the
4489// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4490Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4491 return Builder.CreateVectorSplat(
4492 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
4493}
4494
4496 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4497#ifndef NDEBUG
4498 auto *VecTy = cast<llvm::VectorType>(Ty);
4499 ElementCount EC = VecTy->getElementCount();
4500 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4501 "Only <1 x i8> expected");
4502#endif
4503 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
4504 }
4505 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
4506}
4507
4509 // FIXME: For big endian this needs an additional REV, or needs a separate
4510 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4511 // instruction is defined as 'bitwise' equivalent from memory point of
4512 // view (when storing/reloading), whereas the svreinterpret builtin
4513 // implements bitwise equivalent cast from register point of view.
4514 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4515
4516 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
4517 Value *Tuple = llvm::PoisonValue::get(Ty);
4518
4519 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4520 Value *In = Builder.CreateExtractValue(Val, I);
4521 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
4522 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
4523 }
4524
4525 return Tuple;
4526 }
4527
4528 return Builder.CreateBitCast(Val, Ty);
4529}
4530
4531static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4533 auto *SplatZero = Constant::getNullValue(Ty);
4534 Ops.insert(Ops.begin(), SplatZero);
4535}
4536
4537static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4539 auto *SplatUndef = UndefValue::get(Ty);
4540 Ops.insert(Ops.begin(), SplatUndef);
4541}
4542
4543SmallVector<llvm::Type *, 2>
4545 llvm::Type *ResultType,
4546 ArrayRef<Value *> Ops) {
4547 if (TypeFlags.isOverloadNone())
4548 return {};
4549
4550 llvm::Type *DefaultType = getSVEType(TypeFlags);
4551
4552 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4553 return {DefaultType, Ops[1]->getType()};
4554
4555 if (TypeFlags.isOverloadWhileRW())
4556 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4557
4558 if (TypeFlags.isOverloadFirstandLast())
4559 return {Ops[0]->getType(), Ops.back()->getType()};
4560
4561 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4562 ResultType->isVectorTy())
4563 return {ResultType, Ops[1]->getType()};
4564
4565 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4566 return {DefaultType};
4567}
4568
4570 ArrayRef<Value *> Ops) {
4571 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4572 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4573 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
4574
4575 if (TypeFlags.isTupleSet())
4576 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
4577 return Builder.CreateExtractValue(Ops[0], Idx);
4578}
4579
4581 llvm::Type *Ty,
4582 ArrayRef<Value *> Ops) {
4583 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4584
4585 Value *Tuple = llvm::PoisonValue::get(Ty);
4586 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4587 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
4588
4589 return Tuple;
4590}
4591
4593 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4594 SVETypeFlags TypeFlags) {
4595 // Find out if any arguments are required to be integer constant expressions.
4596 unsigned ICEArguments = 0;
4598 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4599 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4600
4601 // Tuple set/get only requires one insert/extract vector, which is
4602 // created by EmitSVETupleSetOrGet.
4603 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4604
4605 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4606 bool IsICE = ICEArguments & (1 << i);
4607 Value *Arg = EmitScalarExpr(E->getArg(i));
4608
4609 if (IsICE) {
4610 // If this is required to be a constant, constant fold it so that we know
4611 // that the generated intrinsic gets a ConstantInt.
4612 std::optional<llvm::APSInt> Result =
4614 assert(Result && "Expected argument to be a constant");
4615
4616 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4617 // truncate because the immediate has been range checked and no valid
4618 // immediate requires more than a handful of bits.
4619 *Result = Result->extOrTrunc(32);
4620 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4621 continue;
4622 }
4623
4624 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4625 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4626 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4627
4628 continue;
4629 }
4630
4631 Ops.push_back(Arg);
4632 }
4633}
4634
4636 const CallExpr *E) {
4637 llvm::Type *Ty = ConvertType(E->getType());
4638 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4639 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4640 Value *Val = EmitScalarExpr(E->getArg(0));
4641 return EmitSVEReinterpret(Val, Ty);
4642 }
4643
4646
4648 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4649 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4650
4651 if (TypeFlags.isLoad())
4652 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4653 TypeFlags.isZExtReturn());
4654 else if (TypeFlags.isStore())
4655 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4656 else if (TypeFlags.isGatherLoad())
4657 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4658 else if (TypeFlags.isScatterStore())
4659 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4660 else if (TypeFlags.isPrefetch())
4661 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4662 else if (TypeFlags.isGatherPrefetch())
4663 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4664 else if (TypeFlags.isStructLoad())
4665 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4666 else if (TypeFlags.isStructStore())
4667 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4668 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4669 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4670 else if (TypeFlags.isTupleCreate())
4671 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4672 else if (TypeFlags.isUndef())
4673 return UndefValue::get(Ty);
4674 else if (Builtin->LLVMIntrinsic != 0) {
4675 // Emit set FPMR for intrinsics that require it
4676 if (TypeFlags.setsFPMR())
4677 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4678 Ops.pop_back_val());
4679 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4681
4682 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4684
4685 // Some ACLE builtins leave out the argument to specify the predicate
4686 // pattern, which is expected to be expanded to an SV_ALL pattern.
4687 if (TypeFlags.isAppendSVALL())
4688 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4689 if (TypeFlags.isInsertOp1SVALL())
4690 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4691
4692 // Predicates must match the main datatype.
4693 for (Value *&Op : Ops)
4694 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4695 if (PredTy->getElementType()->isIntegerTy(1))
4696 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4697
4698 // Splat scalar operand to vector (intrinsics with _n infix)
4699 if (TypeFlags.hasSplatOperand()) {
4700 unsigned OpNo = TypeFlags.getSplatOperand();
4701 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4702 }
4703
4704 if (TypeFlags.isReverseCompare())
4705 std::swap(Ops[1], Ops[2]);
4706 else if (TypeFlags.isReverseUSDOT())
4707 std::swap(Ops[1], Ops[2]);
4708 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4709 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4710 std::swap(Ops[1], Ops[2]);
4711 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4712 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4713 std::swap(Ops[1], Ops[3]);
4714
4715 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4716 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4717 llvm::Type *OpndTy = Ops[1]->getType();
4718 auto *SplatZero = Constant::getNullValue(OpndTy);
4719 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4720 }
4721
4722 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4723 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4724 Value *Call = Builder.CreateCall(F, Ops);
4725
4726 if (Call->getType() == Ty)
4727 return Call;
4728
4729 // Predicate results must be converted to svbool_t.
4730 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4731 return EmitSVEPredicateCast(Call, PredTy);
4732 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4733 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4734
4735 llvm_unreachable("unsupported element count!");
4736 }
4737
4738 switch (BuiltinID) {
4739 default:
4740 return nullptr;
4741
4742 case SVE::BI__builtin_sve_svreinterpret_b: {
4743 auto SVCountTy =
4744 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4745 Function *CastFromSVCountF =
4746 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4747 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4748 }
4749 case SVE::BI__builtin_sve_svreinterpret_c: {
4750 auto SVCountTy =
4751 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4752 Function *CastToSVCountF =
4753 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4754 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4755 }
4756
4757 case SVE::BI__builtin_sve_svpsel_lane_b8:
4758 case SVE::BI__builtin_sve_svpsel_lane_b16:
4759 case SVE::BI__builtin_sve_svpsel_lane_b32:
4760 case SVE::BI__builtin_sve_svpsel_lane_b64:
4761 case SVE::BI__builtin_sve_svpsel_lane_c8:
4762 case SVE::BI__builtin_sve_svpsel_lane_c16:
4763 case SVE::BI__builtin_sve_svpsel_lane_c32:
4764 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4765 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4766 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4767 "aarch64.svcount")) &&
4768 "Unexpected TargetExtType");
4769 auto SVCountTy =
4770 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4771 Function *CastFromSVCountF =
4772 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4773 Function *CastToSVCountF =
4774 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4775
4776 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4777 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4778 llvm::Value *Ops0 =
4779 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4780 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4781 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4782 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4783 }
4784 case SVE::BI__builtin_sve_svmov_b_z: {
4785 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4786 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4787 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4788 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4789 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4790 }
4791
4792 case SVE::BI__builtin_sve_svnot_b_z: {
4793 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4794 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4795 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4796 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4797 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4798 }
4799
4800 case SVE::BI__builtin_sve_svmovlb_u16:
4801 case SVE::BI__builtin_sve_svmovlb_u32:
4802 case SVE::BI__builtin_sve_svmovlb_u64:
4803 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4804
4805 case SVE::BI__builtin_sve_svmovlb_s16:
4806 case SVE::BI__builtin_sve_svmovlb_s32:
4807 case SVE::BI__builtin_sve_svmovlb_s64:
4808 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4809
4810 case SVE::BI__builtin_sve_svmovlt_u16:
4811 case SVE::BI__builtin_sve_svmovlt_u32:
4812 case SVE::BI__builtin_sve_svmovlt_u64:
4813 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4814
4815 case SVE::BI__builtin_sve_svmovlt_s16:
4816 case SVE::BI__builtin_sve_svmovlt_s32:
4817 case SVE::BI__builtin_sve_svmovlt_s64:
4818 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4819
4820 case SVE::BI__builtin_sve_svpmullt_u16:
4821 case SVE::BI__builtin_sve_svpmullt_u64:
4822 case SVE::BI__builtin_sve_svpmullt_n_u16:
4823 case SVE::BI__builtin_sve_svpmullt_n_u64:
4824 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4825
4826 case SVE::BI__builtin_sve_svpmullb_u16:
4827 case SVE::BI__builtin_sve_svpmullb_u64:
4828 case SVE::BI__builtin_sve_svpmullb_n_u16:
4829 case SVE::BI__builtin_sve_svpmullb_n_u64:
4830 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4831
4832 case SVE::BI__builtin_sve_svdup_n_b8:
4833 case SVE::BI__builtin_sve_svdup_n_b16:
4834 case SVE::BI__builtin_sve_svdup_n_b32:
4835 case SVE::BI__builtin_sve_svdup_n_b64: {
4836 Value *CmpNE =
4837 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4838 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4839 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4841 }
4842
4843 case SVE::BI__builtin_sve_svdupq_n_b8:
4844 case SVE::BI__builtin_sve_svdupq_n_b16:
4845 case SVE::BI__builtin_sve_svdupq_n_b32:
4846 case SVE::BI__builtin_sve_svdupq_n_b64:
4847 case SVE::BI__builtin_sve_svdupq_n_u8:
4848 case SVE::BI__builtin_sve_svdupq_n_s8:
4849 case SVE::BI__builtin_sve_svdupq_n_u64:
4850 case SVE::BI__builtin_sve_svdupq_n_f64:
4851 case SVE::BI__builtin_sve_svdupq_n_s64:
4852 case SVE::BI__builtin_sve_svdupq_n_u16:
4853 case SVE::BI__builtin_sve_svdupq_n_f16:
4854 case SVE::BI__builtin_sve_svdupq_n_bf16:
4855 case SVE::BI__builtin_sve_svdupq_n_s16:
4856 case SVE::BI__builtin_sve_svdupq_n_u32:
4857 case SVE::BI__builtin_sve_svdupq_n_f32:
4858 case SVE::BI__builtin_sve_svdupq_n_s32: {
4859 // These builtins are implemented by storing each element to an array and using
4860 // ld1rq to materialize a vector.
4861 unsigned NumOpnds = Ops.size();
4862
4863 bool IsBoolTy =
4864 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4865
4866 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4867 // so that the compare can use the width that is natural for the expected
4868 // number of predicate lanes.
4869 llvm::Type *EltTy = Ops[0]->getType();
4870 if (IsBoolTy)
4871 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4872
4874 for (unsigned I = 0; I < NumOpnds; ++I)
4875 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4876 Value *Vec = BuildVector(VecOps);
4877
4878 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4879 Value *InsertSubVec = Builder.CreateInsertVector(
4880 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4881
4882 Function *F =
4883 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4884 Value *DupQLane =
4885 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4886
4887 if (!IsBoolTy)
4888 return DupQLane;
4889
4890 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4891 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4892
4893 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4894 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4895 : Intrinsic::aarch64_sve_cmpne_wide,
4896 OverloadedTy);
4897 Value *Call = Builder.CreateCall(
4898 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4900 }
4901
4902 case SVE::BI__builtin_sve_svpfalse_b:
4903 return ConstantInt::getFalse(Ty);
4904
4905 case SVE::BI__builtin_sve_svpfalse_c: {
4906 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4907 Function *CastToSVCountF =
4908 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4909 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4910 }
4911
4912 case SVE::BI__builtin_sve_svlen_bf16:
4913 case SVE::BI__builtin_sve_svlen_f16:
4914 case SVE::BI__builtin_sve_svlen_f32:
4915 case SVE::BI__builtin_sve_svlen_f64:
4916 case SVE::BI__builtin_sve_svlen_s8:
4917 case SVE::BI__builtin_sve_svlen_s16:
4918 case SVE::BI__builtin_sve_svlen_s32:
4919 case SVE::BI__builtin_sve_svlen_s64:
4920 case SVE::BI__builtin_sve_svlen_u8:
4921 case SVE::BI__builtin_sve_svlen_u16:
4922 case SVE::BI__builtin_sve_svlen_u32:
4923 case SVE::BI__builtin_sve_svlen_u64: {
4924 SVETypeFlags TF(Builtin->TypeModifier);
4925 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4926 }
4927
4928 case SVE::BI__builtin_sve_svtbl2_u8:
4929 case SVE::BI__builtin_sve_svtbl2_s8:
4930 case SVE::BI__builtin_sve_svtbl2_u16:
4931 case SVE::BI__builtin_sve_svtbl2_s16:
4932 case SVE::BI__builtin_sve_svtbl2_u32:
4933 case SVE::BI__builtin_sve_svtbl2_s32:
4934 case SVE::BI__builtin_sve_svtbl2_u64:
4935 case SVE::BI__builtin_sve_svtbl2_s64:
4936 case SVE::BI__builtin_sve_svtbl2_f16:
4937 case SVE::BI__builtin_sve_svtbl2_bf16:
4938 case SVE::BI__builtin_sve_svtbl2_f32:
4939 case SVE::BI__builtin_sve_svtbl2_f64: {
4940 SVETypeFlags TF(Builtin->TypeModifier);
4941 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4942 return Builder.CreateCall(F, Ops);
4943 }
4944
4945 case SVE::BI__builtin_sve_svset_neonq_s8:
4946 case SVE::BI__builtin_sve_svset_neonq_s16:
4947 case SVE::BI__builtin_sve_svset_neonq_s32:
4948 case SVE::BI__builtin_sve_svset_neonq_s64:
4949 case SVE::BI__builtin_sve_svset_neonq_u8:
4950 case SVE::BI__builtin_sve_svset_neonq_u16:
4951 case SVE::BI__builtin_sve_svset_neonq_u32:
4952 case SVE::BI__builtin_sve_svset_neonq_u64:
4953 case SVE::BI__builtin_sve_svset_neonq_f16:
4954 case SVE::BI__builtin_sve_svset_neonq_f32:
4955 case SVE::BI__builtin_sve_svset_neonq_f64:
4956 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4957 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4958 }
4959
4960 case SVE::BI__builtin_sve_svget_neonq_s8:
4961 case SVE::BI__builtin_sve_svget_neonq_s16:
4962 case SVE::BI__builtin_sve_svget_neonq_s32:
4963 case SVE::BI__builtin_sve_svget_neonq_s64:
4964 case SVE::BI__builtin_sve_svget_neonq_u8:
4965 case SVE::BI__builtin_sve_svget_neonq_u16:
4966 case SVE::BI__builtin_sve_svget_neonq_u32:
4967 case SVE::BI__builtin_sve_svget_neonq_u64:
4968 case SVE::BI__builtin_sve_svget_neonq_f16:
4969 case SVE::BI__builtin_sve_svget_neonq_f32:
4970 case SVE::BI__builtin_sve_svget_neonq_f64:
4971 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4972 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4973 }
4974
4975 case SVE::BI__builtin_sve_svdup_neonq_s8:
4976 case SVE::BI__builtin_sve_svdup_neonq_s16:
4977 case SVE::BI__builtin_sve_svdup_neonq_s32:
4978 case SVE::BI__builtin_sve_svdup_neonq_s64:
4979 case SVE::BI__builtin_sve_svdup_neonq_u8:
4980 case SVE::BI__builtin_sve_svdup_neonq_u16:
4981 case SVE::BI__builtin_sve_svdup_neonq_u32:
4982 case SVE::BI__builtin_sve_svdup_neonq_u64:
4983 case SVE::BI__builtin_sve_svdup_neonq_f16:
4984 case SVE::BI__builtin_sve_svdup_neonq_f32:
4985 case SVE::BI__builtin_sve_svdup_neonq_f64:
4986 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4987 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4988 uint64_t(0));
4989 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4990 {Insert, Builder.getInt64(0)});
4991 }
4992 }
4993
4994 /// Should not happen
4995 return nullptr;
4996}
4997
4998static void swapCommutativeSMEOperands(unsigned BuiltinID,
5000 unsigned MultiVec;
5001 switch (BuiltinID) {
5002 default:
5003 return;
5004 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
5005 MultiVec = 1;
5006 break;
5007 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
5008 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
5009 MultiVec = 2;
5010 break;
5011 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
5012 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
5013 MultiVec = 4;
5014 break;
5015 }
5016
5017 if (MultiVec > 0)
5018 for (unsigned I = 0; I < MultiVec; ++I)
5019 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
5020}
5021
5023 const CallExpr *E) {
5026
5028 SVETypeFlags TypeFlags(Builtin->TypeModifier);
5029 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
5030
5031 if (TypeFlags.isLoad() || TypeFlags.isStore())
5032 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
5033 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
5034 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
5035 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
5036 BuiltinID == SME::BI__builtin_sme_svzero_za)
5037 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
5038 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
5039 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
5040 BuiltinID == SME::BI__builtin_sme_svldr_za ||
5041 BuiltinID == SME::BI__builtin_sme_svstr_za)
5042 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
5043
5044 // Emit set FPMR for intrinsics that require it
5045 if (TypeFlags.setsFPMR())
5046 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
5047 Ops.pop_back_val());
5048 // Handle builtins which require their multi-vector operands to be swapped
5049 swapCommutativeSMEOperands(BuiltinID, Ops);
5050
5051 auto isCntsBuiltin = [&]() {
5052 switch (BuiltinID) {
5053 default:
5054 return 0;
5055 case SME::BI__builtin_sme_svcntsb:
5056 return 8;
5057 case SME::BI__builtin_sme_svcntsh:
5058 return 4;
5059 case SME::BI__builtin_sme_svcntsw:
5060 return 2;
5061 }
5062 };
5063
5064 if (auto Mul = isCntsBuiltin()) {
5065 llvm::Value *Cntd =
5066 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
5067 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
5068 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
5069 }
5070
5071 // Should not happen!
5072 if (Builtin->LLVMIntrinsic == 0)
5073 return nullptr;
5074
5075 // Predicates must match the main datatype.
5076 for (Value *&Op : Ops)
5077 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
5078 if (PredTy->getElementType()->isIntegerTy(1))
5079 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
5080
5081 Function *F =
5082 TypeFlags.isOverloadNone()
5083 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
5084 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
5085
5086 return Builder.CreateCall(F, Ops);
5087}
5088
5089/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
5090/// return it as an i8 pointer.
5092 LLVMContext &Context = CGF.CGM.getLLVMContext();
5093 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
5094 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5095 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5096 llvm::Function *F =
5097 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
5098 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
5099 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
5100}
5101
5103 const CallExpr *E,
5104 llvm::Triple::ArchType Arch) {
5105 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
5106 BuiltinID <= clang::AArch64::LastSVEBuiltin)
5107 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
5108
5109 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
5110 BuiltinID <= clang::AArch64::LastSMEBuiltin)
5111 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
5112
5113 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
5114 return EmitAArch64CpuSupports(E);
5115
5116 unsigned HintID = static_cast<unsigned>(-1);
5117 switch (BuiltinID) {
5118 default: break;
5119 case clang::AArch64::BI__builtin_arm_nop:
5120 HintID = 0;
5121 break;
5122 case clang::AArch64::BI__builtin_arm_yield:
5123 case clang::AArch64::BI__yield:
5124 HintID = 1;
5125 break;
5126 case clang::AArch64::BI__builtin_arm_wfe:
5127 case clang::AArch64::BI__wfe:
5128 HintID = 2;
5129 break;
5130 case clang::AArch64::BI__builtin_arm_wfi:
5131 case clang::AArch64::BI__wfi:
5132 HintID = 3;
5133 break;
5134 case clang::AArch64::BI__builtin_arm_sev:
5135 case clang::AArch64::BI__sev:
5136 HintID = 4;
5137 break;
5138 case clang::AArch64::BI__builtin_arm_sevl:
5139 case clang::AArch64::BI__sevl:
5140 HintID = 5;
5141 break;
5142 }
5143
5144 if (HintID != static_cast<unsigned>(-1)) {
5145 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5146 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5147 }
5148
5149 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5150 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5151 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5152 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
5153 }
5154
5155 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5156 // Create call to __arm_sme_state and store the results to the two pointers.
5157 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
5158 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
5159 false),
5160 "__arm_sme_state"));
5161 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
5162 "aarch64_pstate_sm_compatible");
5163 CI->setAttributes(Attrs);
5164 CI->setCallingConv(
5165 llvm::CallingConv::
5166 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5167 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
5169 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
5171 }
5172
5173 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5174 assert((getContext().getTypeSize(E->getType()) == 32) &&
5175 "rbit of unusual size!");
5176 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5177 return Builder.CreateCall(
5178 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5179 }
5180 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5181 assert((getContext().getTypeSize(E->getType()) == 64) &&
5182 "rbit of unusual size!");
5183 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5184 return Builder.CreateCall(
5185 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5186 }
5187
5188 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5189 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5190 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5191 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5192 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5193 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5194 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
5195 return Res;
5196 }
5197
5198 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5199 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5200 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5201 "cls");
5202 }
5203 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5204 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5205 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5206 "cls");
5207 }
5208
5209 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5210 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5211 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5212 llvm::Type *Ty = Arg->getType();
5213 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5214 Arg, "frint32z");
5215 }
5216
5217 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5218 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5219 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5220 llvm::Type *Ty = Arg->getType();
5221 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5222 Arg, "frint64z");
5223 }
5224
5225 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5226 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5227 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5228 llvm::Type *Ty = Arg->getType();
5229 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5230 Arg, "frint32x");
5231 }
5232
5233 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5234 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5235 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5236 llvm::Type *Ty = Arg->getType();
5237 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5238 Arg, "frint64x");
5239 }
5240
5241 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5242 assert((getContext().getTypeSize(E->getType()) == 32) &&
5243 "__jcvt of unusual size!");
5244 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5245 return Builder.CreateCall(
5246 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5247 }
5248
5249 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5250 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5251 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5252 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5253 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
5254 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
5255
5256 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5257 // Load from the address via an LLVM intrinsic, receiving a
5258 // tuple of 8 i64 words, and store each one to ValPtr.
5259 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5260 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
5261 llvm::Value *ToRet;
5262 for (size_t i = 0; i < 8; i++) {
5263 llvm::Value *ValOffsetPtr =
5264 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5265 Address Addr =
5266 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5267 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
5268 }
5269 return ToRet;
5270 } else {
5271 // Load 8 i64 words from ValPtr, and store them to the address
5272 // via an LLVM intrinsic.
5274 Args.push_back(MemAddr);
5275 for (size_t i = 0; i < 8; i++) {
5276 llvm::Value *ValOffsetPtr =
5277 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5278 Address Addr =
5279 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5280 Args.push_back(Builder.CreateLoad(Addr));
5281 }
5282
5283 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5284 ? Intrinsic::aarch64_st64b
5285 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5286 ? Intrinsic::aarch64_st64bv
5287 : Intrinsic::aarch64_st64bv0);
5288 Function *F = CGM.getIntrinsic(Intr);
5289 return Builder.CreateCall(F, Args);
5290 }
5291 }
5292
5293 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5294 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5295
5296 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5297 ? Intrinsic::aarch64_rndr
5298 : Intrinsic::aarch64_rndrrs);
5299 Function *F = CGM.getIntrinsic(Intr);
5300 llvm::Value *Val = Builder.CreateCall(F);
5301 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
5302 Value *Status = Builder.CreateExtractValue(Val, 1);
5303
5304 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
5305 Builder.CreateStore(RandomValue, MemAddress);
5306 Status = Builder.CreateZExt(Status, Int32Ty);
5307 return Status;
5308 }
5309
5310 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5311 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5312 const FunctionDecl *FD = E->getDirectCallee();
5313 Value *Ops[2];
5314 for (unsigned i = 0; i < 2; i++)
5315 Ops[i] = EmitScalarExpr(E->getArg(i));
5316 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5317 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5318 StringRef Name = FD->getName();
5319 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5320 }
5321
5322 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5323 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5324 getContext().getTypeSize(E->getType()) == 128) {
5325 Function *F =
5326 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5327 ? Intrinsic::aarch64_ldaxp
5328 : Intrinsic::aarch64_ldxp);
5329
5330 Value *LdPtr = EmitScalarExpr(E->getArg(0));
5331 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
5332
5333 Value *Val0 = Builder.CreateExtractValue(Val, 1);
5334 Value *Val1 = Builder.CreateExtractValue(Val, 0);
5335 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5336 Val0 = Builder.CreateZExt(Val0, Int128Ty);
5337 Val1 = Builder.CreateZExt(Val1, Int128Ty);
5338
5339 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5340 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5341 Val = Builder.CreateOr(Val, Val1);
5342 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5343 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5344 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5345 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5346
5347 QualType Ty = E->getType();
5348 llvm::Type *RealResTy = ConvertType(Ty);
5349 llvm::Type *IntTy =
5350 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5351
5352 Function *F =
5353 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5354 ? Intrinsic::aarch64_ldaxr
5355 : Intrinsic::aarch64_ldxr,
5356 DefaultPtrTy);
5357 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5358 Val->addParamAttr(
5359 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
5360
5361 if (RealResTy->isPointerTy())
5362 return Builder.CreateIntToPtr(Val, RealResTy);
5363
5364 llvm::Type *IntResTy = llvm::IntegerType::get(
5365 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5366 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
5367 RealResTy);
5368 }
5369
5370 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5371 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5372 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5373 Function *F =
5374 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5375 ? Intrinsic::aarch64_stlxp
5376 : Intrinsic::aarch64_stxp);
5377 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5378
5379 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5380 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5381
5382 Tmp = Tmp.withElementType(STy);
5383 llvm::Value *Val = Builder.CreateLoad(Tmp);
5384
5385 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5386 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5387 Value *StPtr = EmitScalarExpr(E->getArg(1));
5388 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5389 }
5390
5391 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5392 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5393 Value *StoreVal = EmitScalarExpr(E->getArg(0));
5394 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5395
5396 QualType Ty = E->getArg(0)->getType();
5397 llvm::Type *StoreTy =
5398 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5399
5400 if (StoreVal->getType()->isPointerTy())
5401 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5402 else {
5403 llvm::Type *IntTy = llvm::IntegerType::get(
5405 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5406 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5407 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5408 }
5409
5410 Function *F =
5411 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5412 ? Intrinsic::aarch64_stlxr
5413 : Intrinsic::aarch64_stxr,
5414 StoreAddr->getType());
5415 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5416 CI->addParamAttr(
5417 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
5418 return CI;
5419 }
5420
5421 if (BuiltinID == clang::AArch64::BI__getReg) {
5423 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5424 llvm_unreachable("Sema will ensure that the parameter is constant");
5425
5426 llvm::APSInt Value = Result.Val.getInt();
5427 LLVMContext &Context = CGM.getLLVMContext();
5428 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
5429
5430 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5431 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5432 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5433
5434 llvm::Function *F =
5435 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
5436 return Builder.CreateCall(F, Metadata);
5437 }
5438
5439 if (BuiltinID == clang::AArch64::BI__break) {
5441 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5442 llvm_unreachable("Sema will ensure that the parameter is constant");
5443
5444 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5445 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5446 }
5447
5448 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5449 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5450 return Builder.CreateCall(F);
5451 }
5452
5453 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5454 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5455 llvm::SyncScope::SingleThread);
5456
5457 // CRC32
5458 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5459 switch (BuiltinID) {
5460 case clang::AArch64::BI__builtin_arm_crc32b:
5461 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5462 case clang::AArch64::BI__builtin_arm_crc32cb:
5463 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5464 case clang::AArch64::BI__builtin_arm_crc32h:
5465 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5466 case clang::AArch64::BI__builtin_arm_crc32ch:
5467 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5468 case clang::AArch64::BI__builtin_arm_crc32w:
5469 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5470 case clang::AArch64::BI__builtin_arm_crc32cw:
5471 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5472 case clang::AArch64::BI__builtin_arm_crc32d:
5473 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5474 case clang::AArch64::BI__builtin_arm_crc32cd:
5475 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5476 }
5477
5478 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5479 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5480 Value *Arg1 = EmitScalarExpr(E->getArg(1));
5481 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5482
5483 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5484 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5485
5486 return Builder.CreateCall(F, {Arg0, Arg1});
5487 }
5488
5489 // Memory Operations (MOPS)
5490 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5491 Value *Dst = EmitScalarExpr(E->getArg(0));
5492 Value *Val = EmitScalarExpr(E->getArg(1));
5493 Value *Size = EmitScalarExpr(E->getArg(2));
5494 Val = Builder.CreateTrunc(Val, Int8Ty);
5495 Size = Builder.CreateIntCast(Size, Int64Ty, false);
5496 return Builder.CreateCall(
5497 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5498 }
5499
5500 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
5501 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
5502 return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
5503
5504 // Memory Tagging Extensions (MTE) Intrinsics
5505 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5506 switch (BuiltinID) {
5507 case clang::AArch64::BI__builtin_arm_irg:
5508 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5509 case clang::AArch64::BI__builtin_arm_addg:
5510 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5511 case clang::AArch64::BI__builtin_arm_gmi:
5512 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5513 case clang::AArch64::BI__builtin_arm_ldg:
5514 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5515 case clang::AArch64::BI__builtin_arm_stg:
5516 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5517 case clang::AArch64::BI__builtin_arm_subp:
5518 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5519 }
5520
5521 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5522 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5524 Value *Mask = EmitScalarExpr(E->getArg(1));
5525
5526 Mask = Builder.CreateZExt(Mask, Int64Ty);
5527 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5528 {Pointer, Mask});
5529 }
5530 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5532 Value *TagOffset = EmitScalarExpr(E->getArg(1));
5533
5534 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
5535 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5536 {Pointer, TagOffset});
5537 }
5538 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5540 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
5541
5542 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
5543 return Builder.CreateCall(
5544 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5545 }
5546 // Although it is possible to supply a different return
5547 // address (first arg) to this intrinsic, for now we set
5548 // return address same as input address.
5549 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5550 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5551 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5552 {TagAddress, TagAddress});
5553 }
5554 // Although it is possible to supply a different tag (to set)
5555 // to this intrinsic (as first arg), for now we supply
5556 // the tag that is in input address arg (common use case).
5557 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5558 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5559 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5560 {TagAddress, TagAddress});
5561 }
5562 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5563 Value *PointerA = EmitScalarExpr(E->getArg(0));
5564 Value *PointerB = EmitScalarExpr(E->getArg(1));
5565 return Builder.CreateCall(
5566 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5567 }
5568 }
5569
5570 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5571 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5572 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5573 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5574 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5575 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5576 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5577 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5578
5579 SpecialRegisterAccessKind AccessKind = Write;
5580 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5581 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5582 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5583 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5584 AccessKind = VolatileRead;
5585
5586 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5587 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5588
5589 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5590 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5591
5592 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5593 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5594
5595 llvm::Type *ValueType;
5596 llvm::Type *RegisterType = Int64Ty;
5597 if (Is32Bit) {
5598 ValueType = Int32Ty;
5599 } else if (Is128Bit) {
5600 llvm::Type *Int128Ty =
5601 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5602 ValueType = Int128Ty;
5603 RegisterType = Int128Ty;
5604 } else if (IsPointerBuiltin) {
5605 ValueType = VoidPtrTy;
5606 } else {
5607 ValueType = Int64Ty;
5608 };
5609
5610 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5611 AccessKind);
5612 }
5613
5614 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5615 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5616 BuiltinID == clang::AArch64::BI__sys) {
5617 LLVMContext &Context = CGM.getLLVMContext();
5618
5619 unsigned SysReg =
5620 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5621
5622 std::string SysRegStr;
5623 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5624 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5625 ? ((1 << 1) | ((SysReg >> 14) & 1))
5626 : 1;
5627 llvm::raw_string_ostream(SysRegStr)
5628 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5629 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5630 << (SysReg & 7);
5631
5632 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5633 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5634 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5635
5636 llvm::Type *RegisterType = Int64Ty;
5637 llvm::Type *Types[] = { RegisterType };
5638
5639 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5640 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5641
5642 return Builder.CreateCall(F, Metadata);
5643 }
5644
5645 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5646 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5647 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5648 if (BuiltinID == clang::AArch64::BI__sys) {
5649 // Return 0 for convenience, even though MSVC returns some other undefined
5650 // value.
5651 Result = ConstantInt::get(Builder.getInt32Ty(), 0);
5652 }
5653 return Result;
5654 }
5655
5656 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5657 llvm::Function *F =
5658 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5659 return Builder.CreateCall(F);
5660 }
5661
5662 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5663 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5664 return Builder.CreateCall(F);
5665 }
5666
5667 if (BuiltinID == clang::AArch64::BI__mulh ||
5668 BuiltinID == clang::AArch64::BI__umulh) {
5669 llvm::Type *ResType = ConvertType(E->getType());
5670 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5671
5672 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5673 Value *LHS =
5674 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5675 Value *RHS =
5676 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5677
5678 Value *MulResult, *HigherBits;
5679 if (IsSigned) {
5680 MulResult = Builder.CreateNSWMul(LHS, RHS);
5681 HigherBits = Builder.CreateAShr(MulResult, 64);
5682 } else {
5683 MulResult = Builder.CreateNUWMul(LHS, RHS);
5684 HigherBits = Builder.CreateLShr(MulResult, 64);
5685 }
5686 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5687
5688 return HigherBits;
5689 }
5690
5691 if (BuiltinID == AArch64::BI__writex18byte ||
5692 BuiltinID == AArch64::BI__writex18word ||
5693 BuiltinID == AArch64::BI__writex18dword ||
5694 BuiltinID == AArch64::BI__writex18qword) {
5695 // Process the args first
5696 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5697 Value *DataArg = EmitScalarExpr(E->getArg(1));
5698
5699 // Read x18 as i8*
5700 llvm::Value *X18 = readX18AsPtr(*this);
5701
5702 // Store val at x18 + offset
5703 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5704 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5705 StoreInst *Store =
5706 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5707 return Store;
5708 }
5709
5710 if (BuiltinID == AArch64::BI__readx18byte ||
5711 BuiltinID == AArch64::BI__readx18word ||
5712 BuiltinID == AArch64::BI__readx18dword ||
5713 BuiltinID == AArch64::BI__readx18qword) {
5714 // Process the args first
5715 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5716
5717 // Read x18 as i8*
5718 llvm::Value *X18 = readX18AsPtr(*this);
5719
5720 // Load x18 + offset
5721 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5722 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5723 llvm::Type *IntTy = ConvertType(E->getType());
5724 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5725 return Load;
5726 }
5727
5728 if (BuiltinID == AArch64::BI__addx18byte ||
5729 BuiltinID == AArch64::BI__addx18word ||
5730 BuiltinID == AArch64::BI__addx18dword ||
5731 BuiltinID == AArch64::BI__addx18qword ||
5732 BuiltinID == AArch64::BI__incx18byte ||
5733 BuiltinID == AArch64::BI__incx18word ||
5734 BuiltinID == AArch64::BI__incx18dword ||
5735 BuiltinID == AArch64::BI__incx18qword) {
5736 llvm::Type *IntTy;
5737 bool isIncrement;
5738 switch (BuiltinID) {
5739 case AArch64::BI__incx18byte:
5740 IntTy = Int8Ty;
5741 isIncrement = true;
5742 break;
5743 case AArch64::BI__incx18word:
5744 IntTy = Int16Ty;
5745 isIncrement = true;
5746 break;
5747 case AArch64::BI__incx18dword:
5748 IntTy = Int32Ty;
5749 isIncrement = true;
5750 break;
5751 case AArch64::BI__incx18qword:
5752 IntTy = Int64Ty;
5753 isIncrement = true;
5754 break;
5755 default:
5756 IntTy = ConvertType(E->getArg(1)->getType());
5757 isIncrement = false;
5758 break;
5759 }
5760 // Process the args first
5761 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5762 Value *ValToAdd =
5763 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5764
5765 // Read x18 as i8*
5766 llvm::Value *X18 = readX18AsPtr(*this);
5767
5768 // Load x18 + offset
5769 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5770 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5771 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5772
5773 // Add values
5774 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5775
5776 // Store val at x18 + offset
5777 StoreInst *Store =
5778 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5779 return Store;
5780 }
5781
5782 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5783 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5784 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5785 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5786 Value *Arg = EmitScalarExpr(E->getArg(0));
5787 llvm::Type *RetTy = ConvertType(E->getType());
5788 return Builder.CreateBitCast(Arg, RetTy);
5789 }
5790
5791 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5792 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5793 BuiltinID == AArch64::BI_CountLeadingZeros ||
5794 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5795 Value *Arg = EmitScalarExpr(E->getArg(0));
5796 llvm::Type *ArgType = Arg->getType();
5797
5798 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5799 BuiltinID == AArch64::BI_CountLeadingOnes64)
5800 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5801
5802 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5803 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5804
5805 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5806 BuiltinID == AArch64::BI_CountLeadingZeros64)
5807 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5808 return Result;
5809 }
5810
5811 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5812 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5813 Value *Arg = EmitScalarExpr(E->getArg(0));
5814
5815 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5816 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5817 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5818
5819 Value *Result = Builder.CreateCall(F, Arg, "cls");
5820 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5821 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5822 return Result;
5823 }
5824
5825 if (BuiltinID == AArch64::BI_CountOneBits ||
5826 BuiltinID == AArch64::BI_CountOneBits64) {
5827 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5828 llvm::Type *ArgType = ArgValue->getType();
5829 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5830
5831 Value *Result = Builder.CreateCall(F, ArgValue);
5832 if (BuiltinID == AArch64::BI_CountOneBits64)
5833 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5834 return Result;
5835 }
5836
5837 if (BuiltinID == AArch64::BI__prefetch) {
5839 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5840 Value *Locality = ConstantInt::get(Int32Ty, 3);
5841 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5842 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5843 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5844 }
5845
5846 if (BuiltinID == AArch64::BI__hlt) {
5847 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5848 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5849
5850 // Return 0 for convenience, even though MSVC returns some other undefined
5851 // value.
5852 return ConstantInt::get(Builder.getInt32Ty(), 0);
5853 }
5854
5855 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5856 return Builder.CreateFPTrunc(
5857 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5858 Builder.getFloatTy()),
5859 Builder.getBFloatTy());
5860
5861 // Handle MSVC intrinsics before argument evaluation to prevent double
5862 // evaluation.
5863 if (std::optional<MSVCIntrin> MsvcIntId =
5865 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5866
5867 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5868 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5869 return P.first == BuiltinID;
5870 });
5871 if (It != end(NEONEquivalentIntrinsicMap))
5872 BuiltinID = It->second;
5873
5874 // Find out if any arguments are required to be integer constant
5875 // expressions.
5876 unsigned ICEArguments = 0;
5878 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5879 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5880
5882 Address PtrOp0 = Address::invalid();
5883 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5884 if (i == 0) {
5885 switch (BuiltinID) {
5886 case NEON::BI__builtin_neon_vld1_v:
5887 case NEON::BI__builtin_neon_vld1q_v:
5888 case NEON::BI__builtin_neon_vld1_dup_v:
5889 case NEON::BI__builtin_neon_vld1q_dup_v:
5890 case NEON::BI__builtin_neon_vld1_lane_v:
5891 case NEON::BI__builtin_neon_vld1q_lane_v:
5892 case NEON::BI__builtin_neon_vst1_v:
5893 case NEON::BI__builtin_neon_vst1q_v:
5894 case NEON::BI__builtin_neon_vst1_lane_v:
5895 case NEON::BI__builtin_neon_vst1q_lane_v:
5896 case NEON::BI__builtin_neon_vldap1_lane_s64:
5897 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5898 case NEON::BI__builtin_neon_vstl1_lane_s64:
5899 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5900 // Get the alignment for the argument in addition to the value;
5901 // we'll use it later.
5902 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5903 Ops.push_back(PtrOp0.emitRawPointer(*this));
5904 continue;
5905 }
5906 }
5907 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5908 }
5909
5910 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5911 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5912 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5913
5914 if (Builtin) {
5915 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5917 assert(Result && "SISD intrinsic should have been handled");
5918 return Result;
5919 }
5920
5921 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5923 if (std::optional<llvm::APSInt> Result =
5925 // Determine the type of this overloaded NEON intrinsic.
5926 Type = NeonTypeFlags(Result->getZExtValue());
5927
5928 bool usgn = Type.isUnsigned();
5929 bool quad = Type.isQuad();
5930
5931 // Handle non-overloaded intrinsics first.
5932 switch (BuiltinID) {
5933 default: break;
5934 case NEON::BI__builtin_neon_vabsh_f16:
5935 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5936 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5937 case NEON::BI__builtin_neon_vaddq_p128: {
5938 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5939 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5940 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5941 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5942 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5943 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5944 return Builder.CreateBitCast(Ops[0], Int128Ty);
5945 }
5946 case NEON::BI__builtin_neon_vldrq_p128: {
5947 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5948 Value *Ptr = EmitScalarExpr(E->getArg(0));
5949 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5951 }
5952 case NEON::BI__builtin_neon_vstrq_p128: {
5953 Value *Ptr = Ops[0];
5954 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5955 }
5956 case NEON::BI__builtin_neon_vcvts_f32_u32:
5957 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5958 usgn = true;
5959 [[fallthrough]];
5960 case NEON::BI__builtin_neon_vcvts_f32_s32:
5961 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5962 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5963 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5964 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5965 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5966 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5967 if (usgn)
5968 return Builder.CreateUIToFP(Ops[0], FTy);
5969 return Builder.CreateSIToFP(Ops[0], FTy);
5970 }
5971 case NEON::BI__builtin_neon_vcvth_f16_u16:
5972 case NEON::BI__builtin_neon_vcvth_f16_u32:
5973 case NEON::BI__builtin_neon_vcvth_f16_u64:
5974 usgn = true;
5975 [[fallthrough]];
5976 case NEON::BI__builtin_neon_vcvth_f16_s16:
5977 case NEON::BI__builtin_neon_vcvth_f16_s32:
5978 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5979 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5980 llvm::Type *FTy = HalfTy;
5981 llvm::Type *InTy;
5982 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5983 InTy = Int64Ty;
5984 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5985 InTy = Int32Ty;
5986 else
5987 InTy = Int16Ty;
5988 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5989 if (usgn)
5990 return Builder.CreateUIToFP(Ops[0], FTy);
5991 return Builder.CreateSIToFP(Ops[0], FTy);
5992 }
5993 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5994 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5995 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5996 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5997 case NEON::BI__builtin_neon_vcvth_u16_f16:
5998 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5999 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6000 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6001 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6002 case NEON::BI__builtin_neon_vcvth_s16_f16: {
6003 unsigned Int;
6004 llvm::Type *InTy = Int16Ty;
6005 llvm::Type* FTy = HalfTy;
6006 llvm::Type *Tys[2] = {InTy, FTy};
6007 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6008 switch (BuiltinID) {
6009 default: llvm_unreachable("missing builtin ID in switch!");
6010 case NEON::BI__builtin_neon_vcvtah_u16_f16:
6011 Int = Intrinsic::aarch64_neon_fcvtau; break;
6012 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6013 Int = Intrinsic::aarch64_neon_fcvtmu; break;
6014 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6015 Int = Intrinsic::aarch64_neon_fcvtnu; break;
6016 case NEON::BI__builtin_neon_vcvtph_u16_f16:
6017 Int = Intrinsic::aarch64_neon_fcvtpu; break;
6018 case NEON::BI__builtin_neon_vcvth_u16_f16:
6019 Int = Intrinsic::aarch64_neon_fcvtzu; break;
6020 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6021 Int = Intrinsic::aarch64_neon_fcvtas; break;
6022 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6023 Int = Intrinsic::aarch64_neon_fcvtms; break;
6024 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6025 Int = Intrinsic::aarch64_neon_fcvtns; break;
6026 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6027 Int = Intrinsic::aarch64_neon_fcvtps; break;
6028 case NEON::BI__builtin_neon_vcvth_s16_f16:
6029 Int = Intrinsic::aarch64_neon_fcvtzs; break;
6030 }
6031 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
6032 }
6033 case NEON::BI__builtin_neon_vcaleh_f16:
6034 case NEON::BI__builtin_neon_vcalth_f16:
6035 case NEON::BI__builtin_neon_vcageh_f16:
6036 case NEON::BI__builtin_neon_vcagth_f16: {
6037 unsigned Int;
6038 llvm::Type* InTy = Int32Ty;
6039 llvm::Type* FTy = HalfTy;
6040 llvm::Type *Tys[2] = {InTy, FTy};
6041 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6042 switch (BuiltinID) {
6043 default: llvm_unreachable("missing builtin ID in switch!");
6044 case NEON::BI__builtin_neon_vcageh_f16:
6045 Int = Intrinsic::aarch64_neon_facge; break;
6046 case NEON::BI__builtin_neon_vcagth_f16:
6047 Int = Intrinsic::aarch64_neon_facgt; break;
6048 case NEON::BI__builtin_neon_vcaleh_f16:
6049 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
6050 case NEON::BI__builtin_neon_vcalth_f16:
6051 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
6052 }
6053 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
6054 return Builder.CreateTrunc(Ops[0], Int16Ty);
6055 }
6056 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6057 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
6058 unsigned Int;
6059 llvm::Type* InTy = Int32Ty;
6060 llvm::Type* FTy = HalfTy;
6061 llvm::Type *Tys[2] = {InTy, FTy};
6062 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6063 switch (BuiltinID) {
6064 default: llvm_unreachable("missing builtin ID in switch!");
6065 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6066 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
6067 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
6068 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
6069 }
6070 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
6071 return Builder.CreateTrunc(Ops[0], Int16Ty);
6072 }
6073 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6074 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
6075 unsigned Int;
6076 llvm::Type* FTy = HalfTy;
6077 llvm::Type* InTy = Int32Ty;
6078 llvm::Type *Tys[2] = {FTy, InTy};
6079 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6080 switch (BuiltinID) {
6081 default: llvm_unreachable("missing builtin ID in switch!");
6082 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6083 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
6084 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
6085 break;
6086 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
6087 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
6088 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
6089 break;
6090 }
6091 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
6092 }
6093 case NEON::BI__builtin_neon_vpaddd_s64: {
6094 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
6095 Value *Vec = EmitScalarExpr(E->getArg(0));
6096 // The vector is v2f64, so make sure it's bitcast to that.
6097 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
6098 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6099 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6100 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6101 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6102 // Pairwise addition of a v2f64 into a scalar f64.
6103 return Builder.CreateAdd(Op0, Op1, "vpaddd");
6104 }
6105 case NEON::BI__builtin_neon_vpaddd_f64: {
6106 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
6107 Value *Vec = EmitScalarExpr(E->getArg(0));
6108 // The vector is v2f64, so make sure it's bitcast to that.
6109 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
6110 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6111 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6112 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6113 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6114 // Pairwise addition of a v2f64 into a scalar f64.
6115 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6116 }
6117 case NEON::BI__builtin_neon_vpadds_f32: {
6118 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
6119 Value *Vec = EmitScalarExpr(E->getArg(0));
6120 // The vector is v2f32, so make sure it's bitcast to that.
6121 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
6122 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6123 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6124 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6125 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6126 // Pairwise addition of a v2f32 into a scalar f32.
6127 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6128 }
6129 case NEON::BI__builtin_neon_vceqzd_s64:
6130 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6133 ICmpInst::ICMP_EQ, "vceqz");
6134 case NEON::BI__builtin_neon_vceqzd_f64:
6135 case NEON::BI__builtin_neon_vceqzs_f32:
6136 case NEON::BI__builtin_neon_vceqzh_f16:
6137 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6140 ICmpInst::FCMP_OEQ, "vceqz");
6141 case NEON::BI__builtin_neon_vcgezd_s64:
6142 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6145 ICmpInst::ICMP_SGE, "vcgez");
6146 case NEON::BI__builtin_neon_vcgezd_f64:
6147 case NEON::BI__builtin_neon_vcgezs_f32:
6148 case NEON::BI__builtin_neon_vcgezh_f16:
6149 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6152 ICmpInst::FCMP_OGE, "vcgez");
6153 case NEON::BI__builtin_neon_vclezd_s64:
6154 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6157 ICmpInst::ICMP_SLE, "vclez");
6158 case NEON::BI__builtin_neon_vclezd_f64:
6159 case NEON::BI__builtin_neon_vclezs_f32:
6160 case NEON::BI__builtin_neon_vclezh_f16:
6161 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6164 ICmpInst::FCMP_OLE, "vclez");
6165 case NEON::BI__builtin_neon_vcgtzd_s64:
6166 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6169 ICmpInst::ICMP_SGT, "vcgtz");
6170 case NEON::BI__builtin_neon_vcgtzd_f64:
6171 case NEON::BI__builtin_neon_vcgtzs_f32:
6172 case NEON::BI__builtin_neon_vcgtzh_f16:
6173 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6176 ICmpInst::FCMP_OGT, "vcgtz");
6177 case NEON::BI__builtin_neon_vcltzd_s64:
6178 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6181 ICmpInst::ICMP_SLT, "vcltz");
6182
6183 case NEON::BI__builtin_neon_vcltzd_f64:
6184 case NEON::BI__builtin_neon_vcltzs_f32:
6185 case NEON::BI__builtin_neon_vcltzh_f16:
6186 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6189 ICmpInst::FCMP_OLT, "vcltz");
6190
6191 case NEON::BI__builtin_neon_vceqzd_u64: {
6192 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6193 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6194 Ops[0] =
6195 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
6196 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
6197 }
6198 case NEON::BI__builtin_neon_vceqd_f64:
6199 case NEON::BI__builtin_neon_vcled_f64:
6200 case NEON::BI__builtin_neon_vcltd_f64:
6201 case NEON::BI__builtin_neon_vcged_f64:
6202 case NEON::BI__builtin_neon_vcgtd_f64: {
6203 llvm::CmpInst::Predicate P;
6204 switch (BuiltinID) {
6205 default: llvm_unreachable("missing builtin ID in switch!");
6206 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6207 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6208 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6209 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6210 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6211 }
6212 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6213 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6214 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6215 if (P == llvm::FCmpInst::FCMP_OEQ)
6216 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6217 else
6218 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6219 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
6220 }
6221 case NEON::BI__builtin_neon_vceqs_f32:
6222 case NEON::BI__builtin_neon_vcles_f32:
6223 case NEON::BI__builtin_neon_vclts_f32:
6224 case NEON::BI__builtin_neon_vcges_f32:
6225 case NEON::BI__builtin_neon_vcgts_f32: {
6226 llvm::CmpInst::Predicate P;
6227 switch (BuiltinID) {
6228 default: llvm_unreachable("missing builtin ID in switch!");
6229 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6230 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6231 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6232 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6233 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6234 }
6235 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6236 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
6237 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
6238 if (P == llvm::FCmpInst::FCMP_OEQ)
6239 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6240 else
6241 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6242 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
6243 }
6244 case NEON::BI__builtin_neon_vceqh_f16:
6245 case NEON::BI__builtin_neon_vcleh_f16:
6246 case NEON::BI__builtin_neon_vclth_f16:
6247 case NEON::BI__builtin_neon_vcgeh_f16:
6248 case NEON::BI__builtin_neon_vcgth_f16: {
6249 llvm::CmpInst::Predicate P;
6250 switch (BuiltinID) {
6251 default: llvm_unreachable("missing builtin ID in switch!");
6252 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6253 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6254 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6255 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6256 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6257 }
6258 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6259 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6260 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
6261 if (P == llvm::FCmpInst::FCMP_OEQ)
6262 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6263 else
6264 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6265 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
6266 }
6267 case NEON::BI__builtin_neon_vceqd_s64:
6268 case NEON::BI__builtin_neon_vceqd_u64:
6269 case NEON::BI__builtin_neon_vcgtd_s64:
6270 case NEON::BI__builtin_neon_vcgtd_u64:
6271 case NEON::BI__builtin_neon_vcltd_s64:
6272 case NEON::BI__builtin_neon_vcltd_u64:
6273 case NEON::BI__builtin_neon_vcged_u64:
6274 case NEON::BI__builtin_neon_vcged_s64:
6275 case NEON::BI__builtin_neon_vcled_u64:
6276 case NEON::BI__builtin_neon_vcled_s64: {
6277 llvm::CmpInst::Predicate P;
6278 switch (BuiltinID) {
6279 default: llvm_unreachable("missing builtin ID in switch!");
6280 case NEON::BI__builtin_neon_vceqd_s64:
6281 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6282 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6283 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6284 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6285 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6286 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6287 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6288 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6289 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6290 }
6291 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6292 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6293 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6294 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
6295 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
6296 }
6297 case NEON::BI__builtin_neon_vtstd_s64:
6298 case NEON::BI__builtin_neon_vtstd_u64: {
6299 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6300 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6301 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6302 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6303 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6304 llvm::Constant::getNullValue(Int64Ty));
6305 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6306 }
6307 case NEON::BI__builtin_neon_vset_lane_i8:
6308 case NEON::BI__builtin_neon_vset_lane_i16:
6309 case NEON::BI__builtin_neon_vset_lane_i32:
6310 case NEON::BI__builtin_neon_vset_lane_i64:
6311 case NEON::BI__builtin_neon_vset_lane_bf16:
6312 case NEON::BI__builtin_neon_vset_lane_f32:
6313 case NEON::BI__builtin_neon_vsetq_lane_i8:
6314 case NEON::BI__builtin_neon_vsetq_lane_i16:
6315 case NEON::BI__builtin_neon_vsetq_lane_i32:
6316 case NEON::BI__builtin_neon_vsetq_lane_i64:
6317 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6318 case NEON::BI__builtin_neon_vsetq_lane_f32:
6319 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6320 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6321 case NEON::BI__builtin_neon_vset_lane_f64:
6322 // The vector type needs a cast for the v1f64 variant.
6323 Ops[1] =
6324 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
6325 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6326 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6327 case NEON::BI__builtin_neon_vset_lane_mf8:
6328 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6329 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6330 // The input vector type needs a cast to scalar type.
6331 Ops[0] =
6332 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
6333 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6334 case NEON::BI__builtin_neon_vsetq_lane_f64:
6335 // The vector type needs a cast for the v2f64 variant.
6336 Ops[1] =
6337 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
6338 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6339 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6340
6341 case NEON::BI__builtin_neon_vget_lane_i8:
6342 case NEON::BI__builtin_neon_vdupb_lane_i8:
6343 Ops[0] =
6344 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
6345 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6346 "vget_lane");
6347 case NEON::BI__builtin_neon_vgetq_lane_i8:
6348 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6349 Ops[0] =
6350 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
6351 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6352 "vgetq_lane");
6353 case NEON::BI__builtin_neon_vget_lane_mf8:
6354 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6355 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6356 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6357 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6358 "vget_lane");
6359 case NEON::BI__builtin_neon_vget_lane_i16:
6360 case NEON::BI__builtin_neon_vduph_lane_i16:
6361 Ops[0] =
6362 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
6363 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6364 "vget_lane");
6365 case NEON::BI__builtin_neon_vgetq_lane_i16:
6366 case NEON::BI__builtin_neon_vduph_laneq_i16:
6367 Ops[0] =
6368 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
6369 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6370 "vgetq_lane");
6371 case NEON::BI__builtin_neon_vget_lane_i32:
6372 case NEON::BI__builtin_neon_vdups_lane_i32:
6373 Ops[0] =
6374 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
6375 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6376 "vget_lane");
6377 case NEON::BI__builtin_neon_vdups_lane_f32:
6378 Ops[0] =
6379 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6380 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6381 "vdups_lane");
6382 case NEON::BI__builtin_neon_vgetq_lane_i32:
6383 case NEON::BI__builtin_neon_vdups_laneq_i32:
6384 Ops[0] =
6385 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
6386 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6387 "vgetq_lane");
6388 case NEON::BI__builtin_neon_vget_lane_i64:
6389 case NEON::BI__builtin_neon_vdupd_lane_i64:
6390 Ops[0] =
6391 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
6392 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6393 "vget_lane");
6394 case NEON::BI__builtin_neon_vdupd_lane_f64:
6395 Ops[0] =
6396 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6397 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6398 "vdupd_lane");
6399 case NEON::BI__builtin_neon_vgetq_lane_i64:
6400 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6401 Ops[0] =
6402 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
6403 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6404 "vgetq_lane");
6405 case NEON::BI__builtin_neon_vget_lane_f32:
6406 Ops[0] =
6407 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6408 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6409 "vget_lane");
6410 case NEON::BI__builtin_neon_vget_lane_f64:
6411 Ops[0] =
6412 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6413 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6414 "vget_lane");
6415 case NEON::BI__builtin_neon_vgetq_lane_f32:
6416 case NEON::BI__builtin_neon_vdups_laneq_f32:
6417 Ops[0] =
6418 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
6419 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6420 "vgetq_lane");
6421 case NEON::BI__builtin_neon_vgetq_lane_f64:
6422 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6423 Ops[0] =
6424 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
6425 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6426 "vgetq_lane");
6427 case NEON::BI__builtin_neon_vaddh_f16:
6428 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6429 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
6430 case NEON::BI__builtin_neon_vsubh_f16:
6431 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6432 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
6433 case NEON::BI__builtin_neon_vmulh_f16:
6434 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6435 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
6436 case NEON::BI__builtin_neon_vdivh_f16:
6437 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6438 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
6439 case NEON::BI__builtin_neon_vfmah_f16:
6440 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6442 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6443 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
6444 case NEON::BI__builtin_neon_vfmsh_f16: {
6445 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
6446
6447 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6449 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6450 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
6451 }
6452 case NEON::BI__builtin_neon_vaddd_s64:
6453 case NEON::BI__builtin_neon_vaddd_u64:
6454 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
6455 case NEON::BI__builtin_neon_vsubd_s64:
6456 case NEON::BI__builtin_neon_vsubd_u64:
6457 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
6458 case NEON::BI__builtin_neon_vqdmlalh_s16:
6459 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6460 SmallVector<Value *, 2> ProductOps;
6461 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6462 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6463 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6464 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6465 ProductOps, "vqdmlXl");
6466 Constant *CI = ConstantInt::get(SizeTy, 0);
6467 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6468
6469 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6470 ? Intrinsic::aarch64_neon_sqadd
6471 : Intrinsic::aarch64_neon_sqsub;
6472 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6473 }
6474 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6475 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6476 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6477 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6478 Ops, "vqshlu_n");
6479 }
6480 case NEON::BI__builtin_neon_vqshld_n_u64:
6481 case NEON::BI__builtin_neon_vqshld_n_s64: {
6482 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6483 ? Intrinsic::aarch64_neon_uqshl
6484 : Intrinsic::aarch64_neon_sqshl;
6485 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6486 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6487 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6488 }
6489 case NEON::BI__builtin_neon_vrshrd_n_u64:
6490 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6491 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6492 ? Intrinsic::aarch64_neon_urshl
6493 : Intrinsic::aarch64_neon_srshl;
6494 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6495 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6496 Ops[1] = ConstantInt::get(Int64Ty, -SV);
6497 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6498 }
6499 case NEON::BI__builtin_neon_vrsrad_n_u64:
6500 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6501 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6502 ? Intrinsic::aarch64_neon_urshl
6503 : Intrinsic::aarch64_neon_srshl;
6504 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6505 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6506 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6507 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6508 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6509 }
6510 case NEON::BI__builtin_neon_vshld_n_s64:
6511 case NEON::BI__builtin_neon_vshld_n_u64: {
6512 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6513 return Builder.CreateShl(
6514 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6515 }
6516 case NEON::BI__builtin_neon_vshrd_n_s64: {
6517 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6518 return Builder.CreateAShr(
6519 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6520 Amt->getZExtValue())),
6521 "shrd_n");
6522 }
6523 case NEON::BI__builtin_neon_vshrd_n_u64: {
6524 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6525 uint64_t ShiftAmt = Amt->getZExtValue();
6526 // Right-shifting an unsigned value by its size yields 0.
6527 if (ShiftAmt == 64)
6528 return ConstantInt::get(Int64Ty, 0);
6529 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6530 "shrd_n");
6531 }
6532 case NEON::BI__builtin_neon_vsrad_n_s64: {
6533 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6534 Ops[1] = Builder.CreateAShr(
6535 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6536 Amt->getZExtValue())),
6537 "shrd_n");
6538 return Builder.CreateAdd(Ops[0], Ops[1]);
6539 }
6540 case NEON::BI__builtin_neon_vsrad_n_u64: {
6541 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6542 uint64_t ShiftAmt = Amt->getZExtValue();
6543 // Right-shifting an unsigned value by its size yields 0.
6544 // As Op + 0 = Op, return Ops[0] directly.
6545 if (ShiftAmt == 64)
6546 return Ops[0];
6547 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6548 "shrd_n");
6549 return Builder.CreateAdd(Ops[0], Ops[1]);
6550 }
6551 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6552 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6553 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6554 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6555 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6556 "lane");
6557 SmallVector<Value *, 2> ProductOps;
6558 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6559 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6560 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6561 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6562 ProductOps, "vqdmlXl");
6563 Constant *CI = ConstantInt::get(SizeTy, 0);
6564 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6565 Ops.pop_back();
6566
6567 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6568 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6569 ? Intrinsic::aarch64_neon_sqadd
6570 : Intrinsic::aarch64_neon_sqsub;
6571 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6572 }
6573 case NEON::BI__builtin_neon_vqdmlals_s32:
6574 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6575 SmallVector<Value *, 2> ProductOps;
6576 ProductOps.push_back(Ops[1]);
6577 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6578 Ops[1] =
6579 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6580 ProductOps, "vqdmlXl");
6581
6582 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6583 ? Intrinsic::aarch64_neon_sqadd
6584 : Intrinsic::aarch64_neon_sqsub;
6585 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6586 }
6587 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6588 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6589 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6590 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6591 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6592 "lane");
6593 SmallVector<Value *, 2> ProductOps;
6594 ProductOps.push_back(Ops[1]);
6595 ProductOps.push_back(Ops[2]);
6596 Ops[1] =
6597 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6598 ProductOps, "vqdmlXl");
6599 Ops.pop_back();
6600
6601 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6602 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6603 ? Intrinsic::aarch64_neon_sqadd
6604 : Intrinsic::aarch64_neon_sqsub;
6605 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6606 }
6607 case NEON::BI__builtin_neon_vget_lane_bf16:
6608 case NEON::BI__builtin_neon_vduph_lane_bf16:
6609 case NEON::BI__builtin_neon_vduph_lane_f16: {
6610 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6611 "vget_lane");
6612 }
6613 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6614 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6615 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6616 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6617 "vgetq_lane");
6618 }
6619 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6620 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6621 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6622 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6623 }
6624 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6625 SmallVector<int, 16> ConcatMask(8);
6626 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6627 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6628 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6629 llvm::Value *Trunc =
6630 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6631 return Builder.CreateShuffleVector(
6632 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6633 }
6634 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6635 SmallVector<int, 16> ConcatMask(8);
6636 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6637 SmallVector<int, 16> LoMask(4);
6638 std::iota(LoMask.begin(), LoMask.end(), 0);
6639 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6640 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6641 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6642 llvm::Value *Inactive = Builder.CreateShuffleVector(
6643 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6644 llvm::Value *Trunc =
6645 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6646 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6647 }
6648
6649 case clang::AArch64::BI_InterlockedAdd:
6650 case clang::AArch64::BI_InterlockedAdd_acq:
6651 case clang::AArch64::BI_InterlockedAdd_rel:
6652 case clang::AArch64::BI_InterlockedAdd_nf:
6653 case clang::AArch64::BI_InterlockedAdd64:
6654 case clang::AArch64::BI_InterlockedAdd64_acq:
6655 case clang::AArch64::BI_InterlockedAdd64_rel:
6656 case clang::AArch64::BI_InterlockedAdd64_nf: {
6657 Address DestAddr = CheckAtomicAlignment(*this, E);
6658 Value *Val = EmitScalarExpr(E->getArg(1));
6659 llvm::AtomicOrdering Ordering;
6660 switch (BuiltinID) {
6661 case clang::AArch64::BI_InterlockedAdd:
6662 case clang::AArch64::BI_InterlockedAdd64:
6663 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6664 break;
6665 case clang::AArch64::BI_InterlockedAdd_acq:
6666 case clang::AArch64::BI_InterlockedAdd64_acq:
6667 Ordering = llvm::AtomicOrdering::Acquire;
6668 break;
6669 case clang::AArch64::BI_InterlockedAdd_rel:
6670 case clang::AArch64::BI_InterlockedAdd64_rel:
6671 Ordering = llvm::AtomicOrdering::Release;
6672 break;
6673 case clang::AArch64::BI_InterlockedAdd_nf:
6674 case clang::AArch64::BI_InterlockedAdd64_nf:
6675 Ordering = llvm::AtomicOrdering::Monotonic;
6676 break;
6677 default:
6678 llvm_unreachable("missing builtin ID in switch!");
6679 }
6680 AtomicRMWInst *RMWI =
6681 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6682 return Builder.CreateAdd(RMWI, Val);
6683 }
6684 }
6685
6686 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6687 llvm::Type *Ty = VTy;
6688 if (!Ty)
6689 return nullptr;
6690
6691 // Not all intrinsics handled by the common case work for AArch64 yet, so only
6692 // defer to common code if it's been added to our special map.
6695
6696 if (Builtin)
6698 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6699 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6700 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
6701
6702 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
6703 return V;
6704
6705 unsigned Int;
6706 bool ExtractLow = false;
6707 bool ExtendLaneArg = false;
6708 switch (BuiltinID) {
6709 default: return nullptr;
6710 case NEON::BI__builtin_neon_vbsl_v:
6711 case NEON::BI__builtin_neon_vbslq_v: {
6712 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6713 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6714 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6715 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6716
6717 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6718 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6719 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6720 return Builder.CreateBitCast(Ops[0], Ty);
6721 }
6722 case NEON::BI__builtin_neon_vfma_lane_v:
6723 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6724 // The ARM builtins (and instructions) have the addend as the first
6725 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6726 Value *Addend = Ops[0];
6727 Value *Multiplicand = Ops[1];
6728 Value *LaneSource = Ops[2];
6729 Ops[0] = Multiplicand;
6730 Ops[1] = LaneSource;
6731 Ops[2] = Addend;
6732
6733 // Now adjust things to handle the lane access.
6734 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6735 ? llvm::FixedVectorType::get(VTy->getElementType(),
6736 VTy->getNumElements() / 2)
6737 : VTy;
6738 llvm::Constant *cst = cast<Constant>(Ops[3]);
6739 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6740 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6741 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6742
6743 Ops.pop_back();
6744 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6745 : Intrinsic::fma;
6746 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6747 }
6748 case NEON::BI__builtin_neon_vfma_laneq_v: {
6749 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6750 // v1f64 fma should be mapped to Neon scalar f64 fma
6751 if (VTy && VTy->getElementType() == DoubleTy) {
6752 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6753 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6754 llvm::FixedVectorType *VTy =
6756 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6757 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6758 Value *Result;
6760 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6761 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6762 return Builder.CreateBitCast(Result, Ty);
6763 }
6764 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6765 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6766
6767 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6768 VTy->getNumElements() * 2);
6769 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6770 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6771 cast<ConstantInt>(Ops[3]));
6772 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6773
6775 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6776 {Ops[2], Ops[1], Ops[0]});
6777 }
6778 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6779 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6780 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6781
6782 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6783 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6785 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6786 {Ops[2], Ops[1], Ops[0]});
6787 }
6788 case NEON::BI__builtin_neon_vfmah_lane_f16:
6789 case NEON::BI__builtin_neon_vfmas_lane_f32:
6790 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6791 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6792 case NEON::BI__builtin_neon_vfmad_lane_f64:
6793 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6794 Ops.push_back(EmitScalarExpr(E->getArg(3)));
6795 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6796 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6798 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6799 {Ops[1], Ops[2], Ops[0]});
6800 }
6801 case NEON::BI__builtin_neon_vmull_v:
6802 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6803 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6804 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6805 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6806 case NEON::BI__builtin_neon_vmax_v:
6807 case NEON::BI__builtin_neon_vmaxq_v:
6808 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6809 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6810 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6811 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6812 case NEON::BI__builtin_neon_vmaxh_f16: {
6813 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6814 Int = Intrinsic::aarch64_neon_fmax;
6815 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6816 }
6817 case NEON::BI__builtin_neon_vmin_v:
6818 case NEON::BI__builtin_neon_vminq_v:
6819 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6820 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6821 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6822 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6823 case NEON::BI__builtin_neon_vminh_f16: {
6824 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6825 Int = Intrinsic::aarch64_neon_fmin;
6826 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6827 }
6828 case NEON::BI__builtin_neon_vabd_v:
6829 case NEON::BI__builtin_neon_vabdq_v:
6830 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6831 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6832 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6833 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6834 case NEON::BI__builtin_neon_vpadal_v:
6835 case NEON::BI__builtin_neon_vpadalq_v: {
6836 unsigned ArgElts = VTy->getNumElements();
6837 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6838 unsigned BitWidth = EltTy->getBitWidth();
6839 auto *ArgTy = llvm::FixedVectorType::get(
6840 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6841 llvm::Type* Tys[2] = { VTy, ArgTy };
6842 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6844 TmpOps.push_back(Ops[1]);
6845 Function *F = CGM.getIntrinsic(Int, Tys);
6846 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6847 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6848 return Builder.CreateAdd(tmp, addend);
6849 }
6850 case NEON::BI__builtin_neon_vpmin_v:
6851 case NEON::BI__builtin_neon_vpminq_v:
6852 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6853 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6854 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6855 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6856 case NEON::BI__builtin_neon_vpmax_v:
6857 case NEON::BI__builtin_neon_vpmaxq_v:
6858 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6859 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6860 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6861 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6862 case NEON::BI__builtin_neon_vminnm_v:
6863 case NEON::BI__builtin_neon_vminnmq_v:
6864 Int = Intrinsic::aarch64_neon_fminnm;
6865 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6866 case NEON::BI__builtin_neon_vminnmh_f16:
6867 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6868 Int = Intrinsic::aarch64_neon_fminnm;
6869 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6870 case NEON::BI__builtin_neon_vmaxnm_v:
6871 case NEON::BI__builtin_neon_vmaxnmq_v:
6872 Int = Intrinsic::aarch64_neon_fmaxnm;
6873 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6874 case NEON::BI__builtin_neon_vmaxnmh_f16:
6875 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6876 Int = Intrinsic::aarch64_neon_fmaxnm;
6877 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6878 case NEON::BI__builtin_neon_vrecpss_f32: {
6879 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6880 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6881 Ops, "vrecps");
6882 }
6883 case NEON::BI__builtin_neon_vrecpsd_f64:
6884 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6885 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6886 Ops, "vrecps");
6887 case NEON::BI__builtin_neon_vrecpsh_f16:
6888 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6889 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6890 Ops, "vrecps");
6891 case NEON::BI__builtin_neon_vqshrun_n_v:
6892 Int = Intrinsic::aarch64_neon_sqshrun;
6893 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6894 case NEON::BI__builtin_neon_vqrshrun_n_v:
6895 Int = Intrinsic::aarch64_neon_sqrshrun;
6896 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6897 case NEON::BI__builtin_neon_vqshrn_n_v:
6898 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6899 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6900 case NEON::BI__builtin_neon_vrshrn_n_v:
6901 Int = Intrinsic::aarch64_neon_rshrn;
6902 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6903 case NEON::BI__builtin_neon_vqrshrn_n_v:
6904 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6905 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6906 case NEON::BI__builtin_neon_vrndah_f16: {
6907 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6908 Int = Builder.getIsFPConstrained()
6909 ? Intrinsic::experimental_constrained_round
6910 : Intrinsic::round;
6911 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6912 }
6913 case NEON::BI__builtin_neon_vrnda_v:
6914 case NEON::BI__builtin_neon_vrndaq_v: {
6915 Int = Builder.getIsFPConstrained()
6916 ? Intrinsic::experimental_constrained_round
6917 : Intrinsic::round;
6918 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6919 }
6920 case NEON::BI__builtin_neon_vrndih_f16: {
6921 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6922 Int = Builder.getIsFPConstrained()
6923 ? Intrinsic::experimental_constrained_nearbyint
6924 : Intrinsic::nearbyint;
6925 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6926 }
6927 case NEON::BI__builtin_neon_vrndmh_f16: {
6928 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6929 Int = Builder.getIsFPConstrained()
6930 ? Intrinsic::experimental_constrained_floor
6931 : Intrinsic::floor;
6932 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6933 }
6934 case NEON::BI__builtin_neon_vrndm_v:
6935 case NEON::BI__builtin_neon_vrndmq_v: {
6936 Int = Builder.getIsFPConstrained()
6937 ? Intrinsic::experimental_constrained_floor
6938 : Intrinsic::floor;
6939 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6940 }
6941 case NEON::BI__builtin_neon_vrndnh_f16: {
6942 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6943 Int = Builder.getIsFPConstrained()
6944 ? Intrinsic::experimental_constrained_roundeven
6945 : Intrinsic::roundeven;
6946 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6947 }
6948 case NEON::BI__builtin_neon_vrndn_v:
6949 case NEON::BI__builtin_neon_vrndnq_v: {
6950 Int = Builder.getIsFPConstrained()
6951 ? Intrinsic::experimental_constrained_roundeven
6952 : Intrinsic::roundeven;
6953 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6954 }
6955 case NEON::BI__builtin_neon_vrndns_f32: {
6956 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6957 Int = Builder.getIsFPConstrained()
6958 ? Intrinsic::experimental_constrained_roundeven
6959 : Intrinsic::roundeven;
6960 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6961 }
6962 case NEON::BI__builtin_neon_vrndph_f16: {
6963 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6964 Int = Builder.getIsFPConstrained()
6965 ? Intrinsic::experimental_constrained_ceil
6966 : Intrinsic::ceil;
6967 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6968 }
6969 case NEON::BI__builtin_neon_vrndp_v:
6970 case NEON::BI__builtin_neon_vrndpq_v: {
6971 Int = Builder.getIsFPConstrained()
6972 ? Intrinsic::experimental_constrained_ceil
6973 : Intrinsic::ceil;
6974 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6975 }
6976 case NEON::BI__builtin_neon_vrndxh_f16: {
6977 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6978 Int = Builder.getIsFPConstrained()
6979 ? Intrinsic::experimental_constrained_rint
6980 : Intrinsic::rint;
6981 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6982 }
6983 case NEON::BI__builtin_neon_vrndx_v:
6984 case NEON::BI__builtin_neon_vrndxq_v: {
6985 Int = Builder.getIsFPConstrained()
6986 ? Intrinsic::experimental_constrained_rint
6987 : Intrinsic::rint;
6988 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6989 }
6990 case NEON::BI__builtin_neon_vrndh_f16: {
6991 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6992 Int = Builder.getIsFPConstrained()
6993 ? Intrinsic::experimental_constrained_trunc
6994 : Intrinsic::trunc;
6995 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6996 }
6997 case NEON::BI__builtin_neon_vrnd32x_f32:
6998 case NEON::BI__builtin_neon_vrnd32xq_f32:
6999 case NEON::BI__builtin_neon_vrnd32x_f64:
7000 case NEON::BI__builtin_neon_vrnd32xq_f64: {
7001 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7002 Int = Intrinsic::aarch64_neon_frint32x;
7003 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
7004 }
7005 case NEON::BI__builtin_neon_vrnd32z_f32:
7006 case NEON::BI__builtin_neon_vrnd32zq_f32:
7007 case NEON::BI__builtin_neon_vrnd32z_f64:
7008 case NEON::BI__builtin_neon_vrnd32zq_f64: {
7009 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7010 Int = Intrinsic::aarch64_neon_frint32z;
7011 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
7012 }
7013 case NEON::BI__builtin_neon_vrnd64x_f32:
7014 case NEON::BI__builtin_neon_vrnd64xq_f32:
7015 case NEON::BI__builtin_neon_vrnd64x_f64:
7016 case NEON::BI__builtin_neon_vrnd64xq_f64: {
7017 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7018 Int = Intrinsic::aarch64_neon_frint64x;
7019 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
7020 }
7021 case NEON::BI__builtin_neon_vrnd64z_f32:
7022 case NEON::BI__builtin_neon_vrnd64zq_f32:
7023 case NEON::BI__builtin_neon_vrnd64z_f64:
7024 case NEON::BI__builtin_neon_vrnd64zq_f64: {
7025 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7026 Int = Intrinsic::aarch64_neon_frint64z;
7027 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
7028 }
7029 case NEON::BI__builtin_neon_vrnd_v:
7030 case NEON::BI__builtin_neon_vrndq_v: {
7031 Int = Builder.getIsFPConstrained()
7032 ? Intrinsic::experimental_constrained_trunc
7033 : Intrinsic::trunc;
7034 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
7035 }
7036 case NEON::BI__builtin_neon_vcvt_f64_v:
7037 case NEON::BI__builtin_neon_vcvtq_f64_v:
7038 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7039 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
7040 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7041 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7042 case NEON::BI__builtin_neon_vcvt_f64_f32: {
7043 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
7044 "unexpected vcvt_f64_f32 builtin");
7045 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
7046 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
7047
7048 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
7049 }
7050 case NEON::BI__builtin_neon_vcvt_f32_f64: {
7051 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
7052 "unexpected vcvt_f32_f64 builtin");
7053 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
7054 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
7055
7056 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
7057 }
7058 case NEON::BI__builtin_neon_vcvt_s32_v:
7059 case NEON::BI__builtin_neon_vcvt_u32_v:
7060 case NEON::BI__builtin_neon_vcvt_s64_v:
7061 case NEON::BI__builtin_neon_vcvt_u64_v:
7062 case NEON::BI__builtin_neon_vcvt_s16_f16:
7063 case NEON::BI__builtin_neon_vcvt_u16_f16:
7064 case NEON::BI__builtin_neon_vcvtq_s32_v:
7065 case NEON::BI__builtin_neon_vcvtq_u32_v:
7066 case NEON::BI__builtin_neon_vcvtq_s64_v:
7067 case NEON::BI__builtin_neon_vcvtq_u64_v:
7068 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7069 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7070 Int =
7071 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
7072 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
7073 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
7074 }
7075 case NEON::BI__builtin_neon_vcvta_s16_f16:
7076 case NEON::BI__builtin_neon_vcvta_u16_f16:
7077 case NEON::BI__builtin_neon_vcvta_s32_v:
7078 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7079 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7080 case NEON::BI__builtin_neon_vcvta_u32_v:
7081 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7082 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7083 case NEON::BI__builtin_neon_vcvta_s64_v:
7084 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7085 case NEON::BI__builtin_neon_vcvta_u64_v:
7086 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
7087 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
7088 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7089 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
7090 }
7091 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7092 case NEON::BI__builtin_neon_vcvtm_s32_v:
7093 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7094 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7095 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7096 case NEON::BI__builtin_neon_vcvtm_u32_v:
7097 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7098 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7099 case NEON::BI__builtin_neon_vcvtm_s64_v:
7100 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7101 case NEON::BI__builtin_neon_vcvtm_u64_v:
7102 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7103 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
7104 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7105 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
7106 }
7107 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7108 case NEON::BI__builtin_neon_vcvtn_s32_v:
7109 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7110 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7111 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7112 case NEON::BI__builtin_neon_vcvtn_u32_v:
7113 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7114 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7115 case NEON::BI__builtin_neon_vcvtn_s64_v:
7116 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7117 case NEON::BI__builtin_neon_vcvtn_u64_v:
7118 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
7119 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
7120 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7121 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
7122 }
7123 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7124 case NEON::BI__builtin_neon_vcvtp_s32_v:
7125 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7126 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7127 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7128 case NEON::BI__builtin_neon_vcvtp_u32_v:
7129 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7130 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7131 case NEON::BI__builtin_neon_vcvtp_s64_v:
7132 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7133 case NEON::BI__builtin_neon_vcvtp_u64_v:
7134 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7135 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7136 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7137 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
7138 }
7139 case NEON::BI__builtin_neon_vmulx_v:
7140 case NEON::BI__builtin_neon_vmulxq_v: {
7141 Int = Intrinsic::aarch64_neon_fmulx;
7142 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
7143 }
7144 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7145 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7146 // vmulx_lane should be mapped to Neon scalar mulx after
7147 // extracting the scalar element
7148 Ops.push_back(EmitScalarExpr(E->getArg(2)));
7149 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7150 Ops.pop_back();
7151 Int = Intrinsic::aarch64_neon_fmulx;
7152 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
7153 }
7154 case NEON::BI__builtin_neon_vmul_lane_v:
7155 case NEON::BI__builtin_neon_vmul_laneq_v: {
7156 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7157 bool Quad = false;
7158 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7159 Quad = true;
7160 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7161 llvm::FixedVectorType *VTy =
7163 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7164 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7165 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
7166 return Builder.CreateBitCast(Result, Ty);
7167 }
7168 case NEON::BI__builtin_neon_vnegd_s64:
7169 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
7170 case NEON::BI__builtin_neon_vnegh_f16:
7171 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
7172 case NEON::BI__builtin_neon_vpmaxnm_v:
7173 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7174 Int = Intrinsic::aarch64_neon_fmaxnmp;
7175 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
7176 }
7177 case NEON::BI__builtin_neon_vpminnm_v:
7178 case NEON::BI__builtin_neon_vpminnmq_v: {
7179 Int = Intrinsic::aarch64_neon_fminnmp;
7180 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
7181 }
7182 case NEON::BI__builtin_neon_vsqrth_f16: {
7183 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7184 Int = Builder.getIsFPConstrained()
7185 ? Intrinsic::experimental_constrained_sqrt
7186 : Intrinsic::sqrt;
7187 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
7188 }
7189 case NEON::BI__builtin_neon_vsqrt_v:
7190 case NEON::BI__builtin_neon_vsqrtq_v: {
7191 Int = Builder.getIsFPConstrained()
7192 ? Intrinsic::experimental_constrained_sqrt
7193 : Intrinsic::sqrt;
7194 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7195 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
7196 }
7197 case NEON::BI__builtin_neon_vrbit_v:
7198 case NEON::BI__builtin_neon_vrbitq_v: {
7199 Int = Intrinsic::bitreverse;
7200 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
7201 }
7202 case NEON::BI__builtin_neon_vmaxv_f16: {
7203 Int = Intrinsic::aarch64_neon_fmaxv;
7204 Ty = HalfTy;
7205 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7206 llvm::Type *Tys[2] = { Ty, VTy };
7207 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7208 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7209 return Builder.CreateTrunc(Ops[0], HalfTy);
7210 }
7211 case NEON::BI__builtin_neon_vmaxvq_f16: {
7212 Int = Intrinsic::aarch64_neon_fmaxv;
7213 Ty = HalfTy;
7214 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7215 llvm::Type *Tys[2] = { Ty, VTy };
7216 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7217 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7218 return Builder.CreateTrunc(Ops[0], HalfTy);
7219 }
7220 case NEON::BI__builtin_neon_vminv_f16: {
7221 Int = Intrinsic::aarch64_neon_fminv;
7222 Ty = HalfTy;
7223 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7224 llvm::Type *Tys[2] = { Ty, VTy };
7225 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7226 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7227 return Builder.CreateTrunc(Ops[0], HalfTy);
7228 }
7229 case NEON::BI__builtin_neon_vminvq_f16: {
7230 Int = Intrinsic::aarch64_neon_fminv;
7231 Ty = HalfTy;
7232 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7233 llvm::Type *Tys[2] = { Ty, VTy };
7234 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7235 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7236 return Builder.CreateTrunc(Ops[0], HalfTy);
7237 }
7238 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7239 Int = Intrinsic::aarch64_neon_fmaxnmv;
7240 Ty = HalfTy;
7241 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7242 llvm::Type *Tys[2] = { Ty, VTy };
7243 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7244 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7245 return Builder.CreateTrunc(Ops[0], HalfTy);
7246 }
7247 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7248 Int = Intrinsic::aarch64_neon_fmaxnmv;
7249 Ty = HalfTy;
7250 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7251 llvm::Type *Tys[2] = { Ty, VTy };
7252 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7253 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7254 return Builder.CreateTrunc(Ops[0], HalfTy);
7255 }
7256 case NEON::BI__builtin_neon_vminnmv_f16: {
7257 Int = Intrinsic::aarch64_neon_fminnmv;
7258 Ty = HalfTy;
7259 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7260 llvm::Type *Tys[2] = { Ty, VTy };
7261 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7262 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7263 return Builder.CreateTrunc(Ops[0], HalfTy);
7264 }
7265 case NEON::BI__builtin_neon_vminnmvq_f16: {
7266 Int = Intrinsic::aarch64_neon_fminnmv;
7267 Ty = HalfTy;
7268 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7269 llvm::Type *Tys[2] = { Ty, VTy };
7270 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7271 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7272 return Builder.CreateTrunc(Ops[0], HalfTy);
7273 }
7274 case NEON::BI__builtin_neon_vmul_n_f64: {
7275 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7276 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
7277 return Builder.CreateFMul(Ops[0], RHS);
7278 }
7279 case NEON::BI__builtin_neon_vaddlv_u8: {
7280 Int = Intrinsic::aarch64_neon_uaddlv;
7281 Ty = Int32Ty;
7282 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7283 llvm::Type *Tys[2] = { Ty, VTy };
7284 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7285 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7286 return Builder.CreateTrunc(Ops[0], Int16Ty);
7287 }
7288 case NEON::BI__builtin_neon_vaddlv_u16: {
7289 Int = Intrinsic::aarch64_neon_uaddlv;
7290 Ty = Int32Ty;
7291 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7292 llvm::Type *Tys[2] = { Ty, VTy };
7293 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7294 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7295 }
7296 case NEON::BI__builtin_neon_vaddlvq_u8: {
7297 Int = Intrinsic::aarch64_neon_uaddlv;
7298 Ty = Int32Ty;
7299 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7300 llvm::Type *Tys[2] = { Ty, VTy };
7301 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7302 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7303 return Builder.CreateTrunc(Ops[0], Int16Ty);
7304 }
7305 case NEON::BI__builtin_neon_vaddlvq_u16: {
7306 Int = Intrinsic::aarch64_neon_uaddlv;
7307 Ty = Int32Ty;
7308 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7309 llvm::Type *Tys[2] = { Ty, VTy };
7310 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7311 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7312 }
7313 case NEON::BI__builtin_neon_vaddlv_s8: {
7314 Int = Intrinsic::aarch64_neon_saddlv;
7315 Ty = Int32Ty;
7316 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7317 llvm::Type *Tys[2] = { Ty, VTy };
7318 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7319 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7320 return Builder.CreateTrunc(Ops[0], Int16Ty);
7321 }
7322 case NEON::BI__builtin_neon_vaddlv_s16: {
7323 Int = Intrinsic::aarch64_neon_saddlv;
7324 Ty = Int32Ty;
7325 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7326 llvm::Type *Tys[2] = { Ty, VTy };
7327 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7328 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7329 }
7330 case NEON::BI__builtin_neon_vaddlvq_s8: {
7331 Int = Intrinsic::aarch64_neon_saddlv;
7332 Ty = Int32Ty;
7333 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7334 llvm::Type *Tys[2] = { Ty, VTy };
7335 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7336 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7337 return Builder.CreateTrunc(Ops[0], Int16Ty);
7338 }
7339 case NEON::BI__builtin_neon_vaddlvq_s16: {
7340 Int = Intrinsic::aarch64_neon_saddlv;
7341 Ty = Int32Ty;
7342 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7343 llvm::Type *Tys[2] = { Ty, VTy };
7344 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7345 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7346 }
7347 case NEON::BI__builtin_neon_vsri_n_v:
7348 case NEON::BI__builtin_neon_vsriq_n_v: {
7349 Int = Intrinsic::aarch64_neon_vsri;
7350 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7351 return EmitNeonCall(Intrin, Ops, "vsri_n");
7352 }
7353 case NEON::BI__builtin_neon_vsli_n_v:
7354 case NEON::BI__builtin_neon_vsliq_n_v: {
7355 Int = Intrinsic::aarch64_neon_vsli;
7356 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7357 return EmitNeonCall(Intrin, Ops, "vsli_n");
7358 }
7359 case NEON::BI__builtin_neon_vsra_n_v:
7360 case NEON::BI__builtin_neon_vsraq_n_v:
7361 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7362 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
7363 return Builder.CreateAdd(Ops[0], Ops[1]);
7364 case NEON::BI__builtin_neon_vrsra_n_v:
7365 case NEON::BI__builtin_neon_vrsraq_n_v: {
7366 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7368 TmpOps.push_back(Ops[1]);
7369 TmpOps.push_back(Ops[2]);
7370 Function* F = CGM.getIntrinsic(Int, Ty);
7371 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
7372 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7373 return Builder.CreateAdd(Ops[0], tmp);
7374 }
7375 case NEON::BI__builtin_neon_vld1_v:
7376 case NEON::BI__builtin_neon_vld1q_v: {
7377 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
7378 }
7379 case NEON::BI__builtin_neon_vst1_v:
7380 case NEON::BI__builtin_neon_vst1q_v:
7381 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7382 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7383 case NEON::BI__builtin_neon_vld1_lane_v:
7384 case NEON::BI__builtin_neon_vld1q_lane_v: {
7385 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7386 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7387 PtrOp0.getAlignment());
7388 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7389 }
7390 case NEON::BI__builtin_neon_vldap1_lane_s64:
7391 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7392 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7393 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7394 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
7395 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7396 Ops[0] = LI;
7397 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
7398 }
7399 case NEON::BI__builtin_neon_vld1_dup_v:
7400 case NEON::BI__builtin_neon_vld1q_dup_v: {
7401 Value *V = PoisonValue::get(Ty);
7402 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7403 PtrOp0.getAlignment());
7404 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7405 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7406 return EmitNeonSplat(Ops[0], CI);
7407 }
7408 case NEON::BI__builtin_neon_vst1_lane_v:
7409 case NEON::BI__builtin_neon_vst1q_lane_v:
7410 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7411 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7412 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7413 case NEON::BI__builtin_neon_vstl1_lane_s64:
7414 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7415 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7416 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7417 llvm::StoreInst *SI =
7418 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7419 SI->setAtomic(llvm::AtomicOrdering::Release);
7420 return SI;
7421 }
7422 case NEON::BI__builtin_neon_vld2_v:
7423 case NEON::BI__builtin_neon_vld2q_v: {
7424 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7425 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7426 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7427 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7428 }
7429 case NEON::BI__builtin_neon_vld3_v:
7430 case NEON::BI__builtin_neon_vld3q_v: {
7431 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7432 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7433 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7434 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7435 }
7436 case NEON::BI__builtin_neon_vld4_v:
7437 case NEON::BI__builtin_neon_vld4q_v: {
7438 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7439 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7440 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7441 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7442 }
7443 case NEON::BI__builtin_neon_vld2_dup_v:
7444 case NEON::BI__builtin_neon_vld2q_dup_v: {
7445 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7446 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7447 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7448 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7449 }
7450 case NEON::BI__builtin_neon_vld3_dup_v:
7451 case NEON::BI__builtin_neon_vld3q_dup_v: {
7452 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7453 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7454 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7455 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7456 }
7457 case NEON::BI__builtin_neon_vld4_dup_v:
7458 case NEON::BI__builtin_neon_vld4q_dup_v: {
7459 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7460 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7461 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7462 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7463 }
7464 case NEON::BI__builtin_neon_vld2_lane_v:
7465 case NEON::BI__builtin_neon_vld2q_lane_v: {
7466 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7467 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7468 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7469 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7470 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7471 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7472 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
7473 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7474 }
7475 case NEON::BI__builtin_neon_vld3_lane_v:
7476 case NEON::BI__builtin_neon_vld3q_lane_v: {
7477 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7478 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7479 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7480 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7481 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7482 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7483 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7484 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
7485 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7486 }
7487 case NEON::BI__builtin_neon_vld4_lane_v:
7488 case NEON::BI__builtin_neon_vld4q_lane_v: {
7489 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7490 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7491 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7492 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7493 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7494 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7495 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7496 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7497 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
7498 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7499 }
7500 case NEON::BI__builtin_neon_vst2_v:
7501 case NEON::BI__builtin_neon_vst2q_v: {
7502 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7503 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7504 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7505 Ops, "");
7506 }
7507 case NEON::BI__builtin_neon_vst2_lane_v:
7508 case NEON::BI__builtin_neon_vst2q_lane_v: {
7509 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7510 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7511 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7512 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7513 Ops, "");
7514 }
7515 case NEON::BI__builtin_neon_vst3_v:
7516 case NEON::BI__builtin_neon_vst3q_v: {
7517 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7518 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7519 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7520 Ops, "");
7521 }
7522 case NEON::BI__builtin_neon_vst3_lane_v:
7523 case NEON::BI__builtin_neon_vst3q_lane_v: {
7524 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7525 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7526 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7527 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7528 Ops, "");
7529 }
7530 case NEON::BI__builtin_neon_vst4_v:
7531 case NEON::BI__builtin_neon_vst4q_v: {
7532 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7533 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7534 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7535 Ops, "");
7536 }
7537 case NEON::BI__builtin_neon_vst4_lane_v:
7538 case NEON::BI__builtin_neon_vst4q_lane_v: {
7539 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7540 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7541 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7542 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7543 Ops, "");
7544 }
7545 case NEON::BI__builtin_neon_vtrn_v:
7546 case NEON::BI__builtin_neon_vtrnq_v: {
7547 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7548 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7549 Value *SV = nullptr;
7550
7551 for (unsigned vi = 0; vi != 2; ++vi) {
7552 SmallVector<int, 16> Indices;
7553 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7554 Indices.push_back(i+vi);
7555 Indices.push_back(i+e+vi);
7556 }
7557 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7558 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7559 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7560 }
7561 return SV;
7562 }
7563 case NEON::BI__builtin_neon_vuzp_v:
7564 case NEON::BI__builtin_neon_vuzpq_v: {
7565 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7566 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7567 Value *SV = nullptr;
7568
7569 for (unsigned vi = 0; vi != 2; ++vi) {
7570 SmallVector<int, 16> Indices;
7571 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7572 Indices.push_back(2*i+vi);
7573
7574 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7575 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7576 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7577 }
7578 return SV;
7579 }
7580 case NEON::BI__builtin_neon_vzip_v:
7581 case NEON::BI__builtin_neon_vzipq_v: {
7582 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7583 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7584 Value *SV = nullptr;
7585
7586 for (unsigned vi = 0; vi != 2; ++vi) {
7587 SmallVector<int, 16> Indices;
7588 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7589 Indices.push_back((i + vi*e) >> 1);
7590 Indices.push_back(((i + vi*e) >> 1)+e);
7591 }
7592 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7593 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7594 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7595 }
7596 return SV;
7597 }
7598 case NEON::BI__builtin_neon_vqtbl1q_v: {
7599 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7600 Ops, "vtbl1");
7601 }
7602 case NEON::BI__builtin_neon_vqtbl2q_v: {
7603 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7604 Ops, "vtbl2");
7605 }
7606 case NEON::BI__builtin_neon_vqtbl3q_v: {
7607 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7608 Ops, "vtbl3");
7609 }
7610 case NEON::BI__builtin_neon_vqtbl4q_v: {
7611 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7612 Ops, "vtbl4");
7613 }
7614 case NEON::BI__builtin_neon_vqtbx1q_v: {
7615 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7616 Ops, "vtbx1");
7617 }
7618 case NEON::BI__builtin_neon_vqtbx2q_v: {
7619 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7620 Ops, "vtbx2");
7621 }
7622 case NEON::BI__builtin_neon_vqtbx3q_v: {
7623 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7624 Ops, "vtbx3");
7625 }
7626 case NEON::BI__builtin_neon_vqtbx4q_v: {
7627 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7628 Ops, "vtbx4");
7629 }
7630 case NEON::BI__builtin_neon_vsqadd_v:
7631 case NEON::BI__builtin_neon_vsqaddq_v: {
7632 Int = Intrinsic::aarch64_neon_usqadd;
7633 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7634 }
7635 case NEON::BI__builtin_neon_vuqadd_v:
7636 case NEON::BI__builtin_neon_vuqaddq_v: {
7637 Int = Intrinsic::aarch64_neon_suqadd;
7638 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7639 }
7640
7641 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7642 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7643 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7644 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7645 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7646 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7647 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7648 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7649 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7650 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7651 llvm::Type *Tys[2];
7652 Tys[0] = Ty;
7653 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7654 /*isQuad*/ false));
7655 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7656 }
7657 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7658 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7659 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7660 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7661 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7662 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7663 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7664 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7665 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7666 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7667 llvm::Type *Tys[2];
7668 Tys[0] = Ty;
7669 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7670 /*isQuad*/ true));
7671 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7672 }
7673 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7674 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7675 case NEON::BI__builtin_neon_vluti2_lane_f16:
7676 case NEON::BI__builtin_neon_vluti2_lane_p16:
7677 case NEON::BI__builtin_neon_vluti2_lane_p8:
7678 case NEON::BI__builtin_neon_vluti2_lane_s16:
7679 case NEON::BI__builtin_neon_vluti2_lane_s8:
7680 case NEON::BI__builtin_neon_vluti2_lane_u16:
7681 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7682 Int = Intrinsic::aarch64_neon_vluti2_lane;
7683 llvm::Type *Tys[2];
7684 Tys[0] = Ty;
7685 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7686 /*isQuad*/ false));
7687 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7688 }
7689 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7690 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7691 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7692 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7693 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7694 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7695 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7696 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7697 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7698 Int = Intrinsic::aarch64_neon_vluti2_lane;
7699 llvm::Type *Tys[2];
7700 Tys[0] = Ty;
7701 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7702 /*isQuad*/ true));
7703 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7704 }
7705 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7706 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7707 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7708 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7709 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7710 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7711 }
7712 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7713 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7714 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7715 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7716 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7717 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7718 }
7719 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7720 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7721 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7722 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7723 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7724 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7725 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7726 }
7727 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7728 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7729 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7730 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7731 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7732 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7733 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7734 }
7735 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7736 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7737 {llvm::FixedVectorType::get(HalfTy, 8),
7738 llvm::FixedVectorType::get(Int8Ty, 16)},
7739 Ops, E, "fmmla");
7740 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7741 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7742 {llvm::FixedVectorType::get(FloatTy, 4),
7743 llvm::FixedVectorType::get(Int8Ty, 16)},
7744 Ops, E, "fmmla");
7745 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7746 ExtractLow = true;
7747 [[fallthrough]];
7748 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7749 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7750 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7751 llvm::FixedVectorType::get(BFloatTy, 8),
7752 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7753 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7754 ExtractLow = true;
7755 [[fallthrough]];
7756 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7757 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7758 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7759 llvm::FixedVectorType::get(BFloatTy, 8),
7760 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7761 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7762 ExtractLow = true;
7763 [[fallthrough]];
7764 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7765 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7766 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7767 llvm::FixedVectorType::get(HalfTy, 8),
7768 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7769 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7770 ExtractLow = true;
7771 [[fallthrough]];
7772 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7773 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7774 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7775 llvm::FixedVectorType::get(HalfTy, 8),
7776 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7777 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7778 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7779 llvm::FixedVectorType::get(Int8Ty, 8),
7780 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7781 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7782 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7783 llvm::FixedVectorType::get(Int8Ty, 8),
7784 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7785 E, "vfcvtn");
7786 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7787 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7788 llvm::FixedVectorType::get(Int8Ty, 16),
7789 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7790 E, "vfcvtn");
7791 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7792 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7793 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7794 uint64_t(0));
7795 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7796 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7797 }
7798
7799 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7800 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7801 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7802 Ops, E, "fdot2");
7803 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7804 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7805 ExtendLaneArg = true;
7806 [[fallthrough]];
7807 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7808 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7809 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7810 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7811 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7812 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7813 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7814 FloatTy, Ops, E, "fdot4");
7815 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7816 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7817 ExtendLaneArg = true;
7818 [[fallthrough]];
7819 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7820 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7821 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7822 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7823
7824 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7825 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7826 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7827 "vmlal");
7828 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7829 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7830 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7831 "vmlal");
7832 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7833 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7834 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7835 "vmlall");
7836 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7837 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7838 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7839 "vmlall");
7840 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7841 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7842 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7843 "vmlall");
7844 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7845 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7846 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7847 "vmlall");
7848 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7849 ExtendLaneArg = true;
7850 [[fallthrough]];
7851 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7852 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7853 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7854 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7855 ExtendLaneArg = true;
7856 [[fallthrough]];
7857 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7858 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7859 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7860 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7861 ExtendLaneArg = true;
7862 [[fallthrough]];
7863 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7864 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7865 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7866 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7867 ExtendLaneArg = true;
7868 [[fallthrough]];
7869 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7870 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7871 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7872 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7873 ExtendLaneArg = true;
7874 [[fallthrough]];
7875 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7876 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7877 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7878 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7879 ExtendLaneArg = true;
7880 [[fallthrough]];
7881 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7882 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7883 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7884 case NEON::BI__builtin_neon_vamin_f16:
7885 case NEON::BI__builtin_neon_vaminq_f16:
7886 case NEON::BI__builtin_neon_vamin_f32:
7887 case NEON::BI__builtin_neon_vaminq_f32:
7888 case NEON::BI__builtin_neon_vaminq_f64: {
7889 Int = Intrinsic::aarch64_neon_famin;
7890 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7891 }
7892 case NEON::BI__builtin_neon_vamax_f16:
7893 case NEON::BI__builtin_neon_vamaxq_f16:
7894 case NEON::BI__builtin_neon_vamax_f32:
7895 case NEON::BI__builtin_neon_vamaxq_f32:
7896 case NEON::BI__builtin_neon_vamaxq_f64: {
7897 Int = Intrinsic::aarch64_neon_famax;
7898 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7899 }
7900 case NEON::BI__builtin_neon_vscale_f16:
7901 case NEON::BI__builtin_neon_vscaleq_f16:
7902 case NEON::BI__builtin_neon_vscale_f32:
7903 case NEON::BI__builtin_neon_vscaleq_f32:
7904 case NEON::BI__builtin_neon_vscaleq_f64: {
7905 Int = Intrinsic::aarch64_neon_fp8_fscale;
7906 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7907 }
7908 }
7909}
7910
7912 const CallExpr *E) {
7913 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7914 BuiltinID == BPF::BI__builtin_btf_type_id ||
7915 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7916 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7917 "unexpected BPF builtin");
7918
7919 // A sequence number, injected into IR builtin functions, to
7920 // prevent CSE given the only difference of the function
7921 // may just be the debuginfo metadata.
7922 static uint32_t BuiltinSeqNum;
7923
7924 switch (BuiltinID) {
7925 default:
7926 llvm_unreachable("Unexpected BPF builtin");
7927 case BPF::BI__builtin_preserve_field_info: {
7928 const Expr *Arg = E->getArg(0);
7929 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7930
7931 if (!getDebugInfo()) {
7932 CGM.Error(E->getExprLoc(),
7933 "using __builtin_preserve_field_info() without -g");
7934 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7935 : EmitLValue(Arg).emitRawPointer(*this);
7936 }
7937
7938 // Enable underlying preserve_*_access_index() generation.
7939 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7940 IsInPreservedAIRegion = true;
7941 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7942 : EmitLValue(Arg).emitRawPointer(*this);
7943 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7944
7945 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7946 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7947
7948 // Built the IR for the preserve_field_info intrinsic.
7949 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7950 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7951 {FieldAddr->getType()});
7952 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7953 }
7954 case BPF::BI__builtin_btf_type_id:
7955 case BPF::BI__builtin_preserve_type_info: {
7956 if (!getDebugInfo()) {
7957 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7958 return nullptr;
7959 }
7960
7961 const Expr *Arg0 = E->getArg(0);
7962 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7963 Arg0->getType(), Arg0->getExprLoc());
7964
7965 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7966 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7967 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7968
7969 llvm::Function *FnDecl;
7970 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7971 FnDecl = Intrinsic::getOrInsertDeclaration(
7972 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7973 else
7974 FnDecl = Intrinsic::getOrInsertDeclaration(
7975 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7976 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
7977 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7978 return Fn;
7979 }
7980 case BPF::BI__builtin_preserve_enum_value: {
7981 if (!getDebugInfo()) {
7982 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7983 return nullptr;
7984 }
7985
7986 const Expr *Arg0 = E->getArg(0);
7987 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7988 Arg0->getType(), Arg0->getExprLoc());
7989
7990 // Find enumerator
7991 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
7992 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
7993 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
7994 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
7995
7996 auto InitVal = Enumerator->getInitVal();
7997 std::string InitValStr;
7998 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7999 InitValStr = std::to_string(InitVal.getSExtValue());
8000 else
8001 InitValStr = std::to_string(InitVal.getZExtValue());
8002 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
8003 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
8004
8005 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
8006 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
8007 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
8008
8009 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
8010 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
8011 CallInst *Fn =
8012 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
8013 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8014 return Fn;
8015 }
8016 }
8017}
8018
8021 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
8022 "Not a power-of-two sized vector!");
8023 bool AllConstants = true;
8024 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
8025 AllConstants &= isa<Constant>(Ops[i]);
8026
8027 // If this is a constant vector, create a ConstantVector.
8028 if (AllConstants) {
8030 for (llvm::Value *Op : Ops)
8031 CstOps.push_back(cast<Constant>(Op));
8032 return llvm::ConstantVector::get(CstOps);
8033 }
8034
8035 // Otherwise, insertelement the values to build the vector.
8036 Value *Result = llvm::PoisonValue::get(
8037 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
8038
8039 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
8040 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
8041
8042 return Result;
8043}
8044
8045Value *CodeGenFunction::EmitAArch64CpuInit() {
8046 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8047 llvm::FunctionCallee Func =
8048 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
8049 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
8050 cast<llvm::GlobalValue>(Func.getCallee())
8051 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8052 return Builder.CreateCall(Func);
8053}
8054
8055Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
8056 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
8057 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
8059 ArgStr.split(Features, "+");
8060 for (auto &Feature : Features) {
8061 Feature = Feature.trim();
8062 if (!llvm::AArch64::parseFMVExtension(Feature))
8063 return Builder.getFalse();
8064 if (Feature != "default")
8065 Features.push_back(Feature);
8066 }
8067 return EmitAArch64CpuSupports(Features);
8068}
8069
8070llvm::Value *
8071CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8072 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
8073 Value *Result = Builder.getTrue();
8074 if (FeaturesMask != 0) {
8075 // Get features from structure in runtime library
8076 // struct {
8077 // unsigned long long features;
8078 // } __aarch64_cpu_features;
8079 llvm::Type *STy = llvm::StructType::get(Int64Ty);
8080 llvm::Constant *AArch64CPUFeatures =
8081 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
8082 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
8083 llvm::Value *CpuFeatures = Builder.CreateGEP(
8084 STy, AArch64CPUFeatures,
8085 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
8086 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
8088 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
8089 Value *Bitset = Builder.CreateAnd(Features, Mask);
8090 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
8091 Result = Builder.CreateAnd(Result, Cmp);
8092 }
8093 return Result;
8094}
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static const AArch64BuiltinInfo * findARMVectorIntrinsicInMap(ArrayRef< AArch64BuiltinInfo > intrinsicMap, unsigned builtinID, bool &mapProvenSorted)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:2597
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:3487
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition ARM.cpp:588
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:3457
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:3450
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4531
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:1649
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3709
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4998
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:1661
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:1631
static llvm::Value * ARMMVECreateFPToSI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3581
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2715
static llvm::Value * ARMMVECreateFPToUI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3589
static llvm::Value * ARMMVECreateSIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3565
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition ARM.cpp:584
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:1662
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4537
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:3446
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:1663
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:3524
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:3996
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:1473
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:359
#define NEONMAP0(NameBase)
Definition ARM.cpp:581
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:5091
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:3551
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:31
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:190
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:594
static llvm::Value * ARMMVECreateUIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:3573
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:3513
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition ARM.cpp:3998
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:3539
SpecialRegisterAccessKind
Definition ARM.cpp:2588
@ VolatileRead
Definition ARM.cpp:2590
@ NormalRead
Definition ARM.cpp:2589
@ Write
Definition ARM.cpp:2591
@ UnsignedAlts
Definition ARM.cpp:551
@ Vectorize1ArgType
Definition ARM.cpp:556
@ FpCmpzModifiers
Definition ARM.cpp:560
@ Use64BitVectors
Definition ARM.cpp:553
@ VectorizeArgTypes
Definition ARM.cpp:548
@ VectorRetGetArgs01
Definition ARM.cpp:558
@ InventFloatType
Definition ARM.cpp:550
@ AddRetType
Definition ARM.cpp:543
@ Add2ArgTypes
Definition ARM.cpp:545
@ VectorizeRetType
Definition ARM.cpp:547
@ VectorRet
Definition ARM.cpp:557
@ Add1ArgType
Definition ARM.cpp:544
@ Use128BitVectors
Definition ARM.cpp:554
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:3479
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:1658
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition ARM.cpp:913
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1724
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:342
static Value * EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:2663
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:2515
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:1660
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition ARM.cpp:1188
TokenType getType() const
Returns the token's type, e.g.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:58
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2943
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3147
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3126
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3134
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1602
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:551
Address getAddress() const
Definition CGValue.h:691
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:4005
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:474
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:8020
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4200
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:4308
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:448
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4167
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4455
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3864
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4110
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4365
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:5022
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:4592
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4056
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1685
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3880
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1786
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4635
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:495
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2760
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3953
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7911
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4477
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4580
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4263
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3597
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:511
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4544
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:489
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:5102
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:458
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3852
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3698
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:2488
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:296
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:4281
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:4040
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4288
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4467
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1575
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4215
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:189
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4420
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3990
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:4508
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1691
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3918
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4569
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:441
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:379
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3094
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3085
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:451
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:276
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2000
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3329
QualType getPointeeType() const
Definition TypeBase.h:3339
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
EltType getEltType() const
bool isOverloadFirstandLast() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:711
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1833
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9178
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:753
QualType getType() const
Definition Decl.h:723
QualType getType() const
Definition Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:154
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
const FunctionProtoType * T
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:827
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601