clang 22.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(F, Args);
355 else
356 return CGF.Builder.CreateCall(F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasFastHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
369 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
372 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
376 else
377 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
379 if (HasFastHalfType)
380 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
381 else
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 else
445 return Builder.CreateCall(F, Ops, name);
446}
447
451 const CallExpr *E, const char *name) {
452 llvm::Value *FPM =
453 EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
454 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
455 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
456}
457
459 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
460 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
461
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
465 Ops[1]->getType()};
466 if (ExtendLaneArg) {
467 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
468 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
469 uint64_t(0));
470 }
471 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
472}
473
475 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
476 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
477
478 if (ExtendLaneArg) {
479 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
480 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
481 uint64_t(0));
482 }
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
486 Ops, E, name);
487}
488
490 bool neg) {
491 int SV = cast<ConstantInt>(V)->getSExtValue();
492 return ConstantInt::get(Ty, neg ? -SV : SV);
493}
494
495Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
496 llvm::Type *Ty1, bool Extract,
498 const CallExpr *E,
499 const char *name) {
500 llvm::Type *Tys[] = {Ty0, Ty1};
501 if (Extract) {
502 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
503 // the vector.
504 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
505 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
506 }
507 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
508}
509
510// Right-shift a vector by a constant.
512 llvm::Type *Ty, bool usgn,
513 const char *name) {
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
515
516 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
518
519 Vec = Builder.CreateBitCast(Vec, Ty);
520
521 // lshr/ashr are undefined when the shift amount is equal to the vector
522 // element size.
523 if (ShiftAmt == EltSize) {
524 if (usgn) {
525 // Right-shifting an unsigned value by its size yields 0.
526 return llvm::ConstantAggregateZero::get(VTy);
527 } else {
528 // Right-shifting a signed value by its size is equivalent
529 // to a shift of size-1.
530 --ShiftAmt;
531 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
532 }
533 }
534
535 Shift = EmitNeonShiftVector(Shift, Ty, false);
536 if (usgn)
537 return Builder.CreateLShr(Vec, Shift, name);
538 else
539 return Builder.CreateAShr(Vec, Shift, name);
540}
541
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
555
562};
563
564namespace {
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
567 unsigned BuiltinID;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
571
572 bool operator<(unsigned RHSBuiltinID) const {
573 return BuiltinID < RHSBuiltinID;
574 }
575 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
576 return BuiltinID < TE.BuiltinID;
577 }
578};
579} // end anonymous namespace
580
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
583
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
587
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
591 TypeModifier }
592
593// clang-format off
594static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
595 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
596 NEONMAP0(splat_lane_v),
597 NEONMAP0(splat_laneq_v),
598 NEONMAP0(splatq_lane_v),
599 NEONMAP0(splatq_laneq_v),
600 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
601 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
602 NEONMAP1(vabs_v, arm_neon_vabs, 0),
603 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
604 NEONMAP0(vadd_v),
605 NEONMAP0(vaddhn_v),
606 NEONMAP0(vaddq_v),
607 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
608 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
609 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
610 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
611 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
613 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
614 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
615 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
616 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
617 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
618 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
619 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
620 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
621 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
622 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
623 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
624 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
625 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
626 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
628 NEONMAP1(vcage_v, arm_neon_vacge, 0),
629 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
630 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
632 NEONMAP1(vcale_v, arm_neon_vacge, 0),
633 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
634 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
635 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
636 NEONMAP0(vceqz_v),
637 NEONMAP0(vceqzq_v),
638 NEONMAP0(vcgez_v),
639 NEONMAP0(vcgezq_v),
640 NEONMAP0(vcgtz_v),
641 NEONMAP0(vcgtzq_v),
642 NEONMAP0(vclez_v),
643 NEONMAP0(vclezq_v),
644 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
645 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
646 NEONMAP0(vcltz_v),
647 NEONMAP0(vcltzq_v),
648 NEONMAP1(vclz_v, ctlz, Add1ArgType),
649 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
650 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
651 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
652 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
653 NEONMAP0(vcvt_f16_s16),
654 NEONMAP0(vcvt_f16_u16),
655 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
656 NEONMAP0(vcvt_f32_v),
657 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
658 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
659 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
660 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
663 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
665 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
666 NEONMAP0(vcvt_s16_f16),
667 NEONMAP0(vcvt_s32_v),
668 NEONMAP0(vcvt_s64_v),
669 NEONMAP0(vcvt_u16_f16),
670 NEONMAP0(vcvt_u32_v),
671 NEONMAP0(vcvt_u64_v),
672 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
675 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
678 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
681 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
684 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
685 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
688 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
691 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
694 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
697 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
700 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
703 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
706 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
709 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
712 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
715 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
718 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
720 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
721 NEONMAP0(vcvtq_f16_s16),
722 NEONMAP0(vcvtq_f16_u16),
723 NEONMAP0(vcvtq_f32_v),
724 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
725 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
726 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
727 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
730 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
732 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
733 NEONMAP0(vcvtq_s16_f16),
734 NEONMAP0(vcvtq_s32_v),
735 NEONMAP0(vcvtq_s64_v),
736 NEONMAP0(vcvtq_u16_f16),
737 NEONMAP0(vcvtq_u32_v),
738 NEONMAP0(vcvtq_u64_v),
739 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
740 NEONMAP1(vdot_u32, arm_neon_udot, 0),
741 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
742 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
743 NEONMAP0(vext_v),
744 NEONMAP0(vextq_v),
745 NEONMAP0(vfma_v),
746 NEONMAP0(vfmaq_v),
747 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
748 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
750 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
751 NEONMAP0(vld1_dup_v),
752 NEONMAP1(vld1_v, arm_neon_vld1, 0),
753 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
754 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
755 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
756 NEONMAP0(vld1q_dup_v),
757 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
758 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
759 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
760 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
761 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
762 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
763 NEONMAP1(vld2_v, arm_neon_vld2, 0),
764 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
765 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
766 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
767 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
768 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
769 NEONMAP1(vld3_v, arm_neon_vld3, 0),
770 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
771 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
772 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
773 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
774 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
775 NEONMAP1(vld4_v, arm_neon_vld4, 0),
776 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
777 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
778 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
779 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
780 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
781 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
782 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
783 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
784 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
785 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
786 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
787 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
788 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
789 NEONMAP0(vmovl_v),
790 NEONMAP0(vmovn_v),
791 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
792 NEONMAP0(vmull_v),
793 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
794 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
795 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
796 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
797 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
798 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
799 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
800 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
801 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
802 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
803 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
804 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
805 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
806 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
807 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
808 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
809 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
810 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
811 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
812 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
813 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
814 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
815 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
816 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
817 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
818 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
819 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
820 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
821 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
822 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
823 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
824 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
825 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
826 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
827 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
828 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
829 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
830 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
831 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
832 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
833 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
834 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
835 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
836 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
837 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
838 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
839 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
840 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
841 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
842 NEONMAP1(vrnd_v, trunc, Add1ArgType),
843 NEONMAP1(vrnda_v, round, Add1ArgType),
844 NEONMAP1(vrndaq_v, round, Add1ArgType),
845 NEONMAP0(vrndi_v),
846 NEONMAP0(vrndiq_v),
847 NEONMAP1(vrndm_v, floor, Add1ArgType),
848 NEONMAP1(vrndmq_v, floor, Add1ArgType),
849 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
850 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
851 NEONMAP1(vrndp_v, ceil, Add1ArgType),
852 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
853 NEONMAP1(vrndq_v, trunc, Add1ArgType),
854 NEONMAP1(vrndx_v, rint, Add1ArgType),
855 NEONMAP1(vrndxq_v, rint, Add1ArgType),
856 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
857 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
858 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
859 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
860 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
861 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
862 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
863 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
864 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
865 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
866 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
867 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
868 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
869 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
870 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
871 NEONMAP0(vshl_n_v),
872 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
873 NEONMAP0(vshll_n_v),
874 NEONMAP0(vshlq_n_v),
875 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
876 NEONMAP0(vshr_n_v),
877 NEONMAP0(vshrn_n_v),
878 NEONMAP0(vshrq_n_v),
879 NEONMAP1(vst1_v, arm_neon_vst1, 0),
880 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
881 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
882 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
883 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
884 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
885 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
886 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
887 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
888 NEONMAP1(vst2_v, arm_neon_vst2, 0),
889 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
890 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
891 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
892 NEONMAP1(vst3_v, arm_neon_vst3, 0),
893 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
894 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
895 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
896 NEONMAP1(vst4_v, arm_neon_vst4, 0),
897 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
898 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
899 NEONMAP0(vsubhn_v),
900 NEONMAP0(vtrn_v),
901 NEONMAP0(vtrnq_v),
902 NEONMAP0(vtst_v),
903 NEONMAP0(vtstq_v),
904 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
906 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
907 NEONMAP0(vuzp_v),
908 NEONMAP0(vuzpq_v),
909 NEONMAP0(vzip_v),
910 NEONMAP0(vzipq_v)
911};
912
913static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
914 NEONMAP0(splat_lane_v),
915 NEONMAP0(splat_laneq_v),
916 NEONMAP0(splatq_lane_v),
917 NEONMAP0(splatq_laneq_v),
918 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
919 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
920 NEONMAP0(vadd_v),
921 NEONMAP0(vaddhn_v),
922 NEONMAP0(vaddq_p128),
923 NEONMAP0(vaddq_v),
924 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
925 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
926 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
927 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
928 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
929 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
930 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
931 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
932 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
933 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
936 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
938 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
939 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
940 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
941 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
942 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
943 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
944 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
945 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
946 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
948 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
951 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
953 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
955 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
957 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
958 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
959 NEONMAP0(vceqz_v),
960 NEONMAP0(vceqzq_v),
961 NEONMAP0(vcgez_v),
962 NEONMAP0(vcgezq_v),
963 NEONMAP0(vcgtz_v),
964 NEONMAP0(vcgtzq_v),
965 NEONMAP0(vclez_v),
966 NEONMAP0(vclezq_v),
967 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
968 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
969 NEONMAP0(vcltz_v),
970 NEONMAP0(vcltzq_v),
971 NEONMAP1(vclz_v, ctlz, Add1ArgType),
972 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
973 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
974 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
975 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
976 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
977 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
978 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
979 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
980 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
981 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
982 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
983 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
984 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
985 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
986 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
987 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
988 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
989 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
990 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
991 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
992 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
993 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
994 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
995 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
996 NEONMAP0(vcvt_f16_s16),
997 NEONMAP0(vcvt_f16_u16),
998 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
999 NEONMAP0(vcvt_f32_v),
1000 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1001 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1002 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1004 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1007 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1009 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1010 NEONMAP0(vcvtq_f16_s16),
1011 NEONMAP0(vcvtq_f16_u16),
1012 NEONMAP0(vcvtq_f32_v),
1013 NEONMAP0(vcvtq_high_bf16_f32),
1014 NEONMAP0(vcvtq_low_bf16_f32),
1015 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1016 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1017 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1019 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1022 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1024 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1025 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1026 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1027 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1028 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1029 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1030 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1031 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1032 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1033 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1038 NEONMAP0(vext_v),
1039 NEONMAP0(vextq_v),
1040 NEONMAP0(vfma_v),
1041 NEONMAP0(vfmaq_v),
1042 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1043 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1044 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1045 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1046 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1047 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1048 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1049 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1050 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1051 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1052 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1053 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1054 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1055 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1056 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1057 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1058 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1059 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1060 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1061 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1062 NEONMAP0(vmovl_v),
1063 NEONMAP0(vmovn_v),
1064 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1065 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1066 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1067 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1068 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1069 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1070 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1071 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1072 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1073 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1074 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1075 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1076 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1077 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1078 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1079 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1080 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1081 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1082 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1083 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1084 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1085 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1086 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1087 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1088 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1089 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1090 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1091 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1092 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1093 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1094 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1095 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1096 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1097 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1098 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1099 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1100 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1101 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1102 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1103 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1104 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1105 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1106 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1107 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1108 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1109 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1110 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1111 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1112 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1113 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1114 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1115 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1116 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1117 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1118 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1119 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1120 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1121 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1122 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1123 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1124 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1125 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1126 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1127 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1128 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1129 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1130 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1131 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1132 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1133 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1134 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1135 NEONMAP0(vrndi_v),
1136 NEONMAP0(vrndiq_v),
1137 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1138 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1139 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1140 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1141 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1142 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1143 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1144 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1145 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1146 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1147 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1148 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1149 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1150 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1151 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1152 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1153 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1154 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1155 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1156 NEONMAP0(vshl_n_v),
1157 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1158 NEONMAP0(vshll_n_v),
1159 NEONMAP0(vshlq_n_v),
1160 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1161 NEONMAP0(vshr_n_v),
1162 NEONMAP0(vshrn_n_v),
1163 NEONMAP0(vshrq_n_v),
1164 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1165 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1166 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1167 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1168 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1169 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1170 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1171 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1172 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1173 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1174 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1175 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1176 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1177 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1178 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1179 NEONMAP0(vsubhn_v),
1180 NEONMAP0(vtst_v),
1181 NEONMAP0(vtstq_v),
1182 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1184 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1185 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1186};
1187
1188static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1189 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1190 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1191 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1192 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1193 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1194 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1195 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1196 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1197 NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
1198 NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
1199 NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
1200 NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
1201 NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
1202 NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
1203 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1204 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1205 NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
1206 NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
1207 NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
1208 NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
1209 NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
1210 NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
1211 NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
1212 NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
1213 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1214 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1215 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1216 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1217 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1218 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1219 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1220 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1221 NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1222 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1223 NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1224 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1225 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1226 NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1227 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1228 NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1229 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1230 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1231 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1232 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1233 NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1234 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1237 NEONMAP0(vcvth_bf16_f32),
1238 NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1242 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1243 NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1244 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1245 NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1246 NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1247 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1248 NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1249 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1250 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1251 NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1252 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1253 NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1254 NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1255 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1256 NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1257 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1258 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1259 NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1260 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1261 NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1262 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1263 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1264 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1265 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1266 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1267 NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1268 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1269 NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1270 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1271 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1272 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1273 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1274 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1275 NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
1276 NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
1277 NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
1278 NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
1279 NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
1280 NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
1281 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1282 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1283 NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
1284 NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
1285 NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
1286 NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
1287 NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
1288 NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
1289 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1290 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1291 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1292 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1293 NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
1294 NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
1295 NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
1296 NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
1297 NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
1298 NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
1299 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1300 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1301 NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
1302 NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
1303 NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
1304 NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
1305 NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
1306 NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
1307 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1308 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1309 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1310 NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
1311 NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
1312 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1313 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1314 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1315 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1316 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1317 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1318 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1319 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1320 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1321 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1322 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1323 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1324 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1325 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1326 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1327 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1328 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1329 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1330 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1331 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1332 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1333 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1334 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1335 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1336 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1337 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1338 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1339 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1340 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1341 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1342 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1343 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1344 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1345 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1346 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1347 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1348 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1349 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1350 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1351 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1352 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1353 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1354 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1355 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1356 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1357 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1358 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1359 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1360 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1361 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1362 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1363 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1364 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1365 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1366 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1367 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1368 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1369 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1370 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1371 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1372 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1373 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1374 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1375 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1376 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1377 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1378 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1379 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1380 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1381 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1382 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1383 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1384 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1385 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1386 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1387 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1389 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1390 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1391 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1392 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1393 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1394 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1395 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1396 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1397 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1398 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1399 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1400 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1401 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1402 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1403 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1404 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1405 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1406 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1407 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1408 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1409 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1410 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1411 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1412 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1413 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1414 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1415 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1416 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1417 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1418 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1419 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1420 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1421 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1422 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1423 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1424 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1425 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1426 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1427 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1428 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1429 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1430 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1431 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1432 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1433 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1434 // FP16 scalar intrinisics go here.
1435 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1436 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1437 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1438 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1439 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1440 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1441 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1442 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1443 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1444 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1445 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1446 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1447 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1448 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1449 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1450 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1451 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1452 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1453 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1454 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1455 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1456 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1457 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1458 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1459 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1460 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1461 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1462 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1463 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1464 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1465 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1466 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1467 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1468 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1469};
1470// clang-format on
1471
1472// Some intrinsics are equivalent for codegen.
1473static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1474 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1475 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1476 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1477 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1478 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1479 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1480 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1481 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1482 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1483 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1484 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1485 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1486 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1487 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1488 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1489 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1490 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1491 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1492 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1493 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1494 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1495 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1496 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1497 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1498 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1499 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1500 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1501 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1502 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1503 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1504 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1505 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1506 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1507 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1508 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1509 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1510 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1511 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1512 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1513 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1514 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1515 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1516 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1517 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1518 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1519 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1520 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1521 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1522 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1523 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1524 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1525 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1526 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1527 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1528 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1529 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1530 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1531 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1532 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1533 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1534 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1535 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1536 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1537 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1538 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1539 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1540 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1541 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1542 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1543 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1544 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1545 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1546 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1547 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1548 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1549 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1550 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1551 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1552 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1553 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1554 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1555 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1556 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1557 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1558 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1559 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1560 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1561 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1562 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1563 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1564 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1565 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1566 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1567 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1568 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1569 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1570 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1571 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1572 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1573 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1574 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1575 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1576 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1577 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1578 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1579 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1580 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1581 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1582 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1583 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1584 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1585 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1586 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1587 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1588 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1589 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1590 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1591 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1592 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1593 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1594 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1595 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1596 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1597 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1598 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1599 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1600 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1601 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1602 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1603 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1604 // arbitrary one to be handled as tha canonical variation.
1605 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1606 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1607 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1608 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1609 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1610 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1611 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1612 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1613 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1614 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1615 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1616 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1617};
1618
1619#undef NEONMAP0
1620#undef NEONMAP1
1621#undef NEONMAP2
1622
1623#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1624 { \
1625 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1626 TypeModifier \
1627 }
1628
1629#define SVEMAP2(NameBase, TypeModifier) \
1630 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1631static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1632#define GET_SVE_LLVM_INTRINSIC_MAP
1633#include "clang/Basic/arm_sve_builtin_cg.inc"
1634#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1635#undef GET_SVE_LLVM_INTRINSIC_MAP
1636};
1637
1638#undef SVEMAP1
1639#undef SVEMAP2
1640
1641#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1642 { \
1643 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1644 TypeModifier \
1645 }
1646
1647#define SMEMAP2(NameBase, TypeModifier) \
1648 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1649static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1650#define GET_SME_LLVM_INTRINSIC_MAP
1651#include "clang/Basic/arm_sme_builtin_cg.inc"
1652#undef GET_SME_LLVM_INTRINSIC_MAP
1653};
1654
1655#undef SMEMAP1
1656#undef SMEMAP2
1657
1659
1664
1665static const ARMVectorIntrinsicInfo *
1667 unsigned BuiltinID, bool &MapProvenSorted) {
1668
1669#ifndef NDEBUG
1670 if (!MapProvenSorted) {
1671 assert(llvm::is_sorted(IntrinsicMap));
1672 MapProvenSorted = true;
1673 }
1674#endif
1675
1676 const ARMVectorIntrinsicInfo *Builtin =
1677 llvm::lower_bound(IntrinsicMap, BuiltinID);
1678
1679 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1680 return Builtin;
1681
1682 return nullptr;
1683}
1684
1686 unsigned Modifier,
1687 llvm::Type *ArgType,
1688 const CallExpr *E) {
1689 int VectorSize = 0;
1690 if (Modifier & Use64BitVectors)
1691 VectorSize = 64;
1692 else if (Modifier & Use128BitVectors)
1693 VectorSize = 128;
1694
1695 // Return type.
1697 if (Modifier & AddRetType) {
1698 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1699 if (Modifier & VectorizeRetType)
1700 Ty = llvm::FixedVectorType::get(
1701 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1702
1703 Tys.push_back(Ty);
1704 }
1705
1706 // Arguments.
1707 if (Modifier & VectorizeArgTypes) {
1708 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1709 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1710 }
1711
1712 if (Modifier & (Add1ArgType | Add2ArgTypes))
1713 Tys.push_back(ArgType);
1714
1715 if (Modifier & Add2ArgTypes)
1716 Tys.push_back(ArgType);
1717
1718 if (Modifier & InventFloatType)
1719 Tys.push_back(FloatTy);
1720
1721 return CGM.getIntrinsic(IntrinsicID, Tys);
1722}
1723
1725 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1726 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1727 unsigned BuiltinID = SISDInfo.BuiltinID;
1728 unsigned int Int = SISDInfo.LLVMIntrinsic;
1729 unsigned Modifier = SISDInfo.TypeModifier;
1730 const char *s = SISDInfo.NameHint;
1731
1732 switch (BuiltinID) {
1733 case NEON::BI__builtin_neon_vcled_s64:
1734 case NEON::BI__builtin_neon_vcled_u64:
1735 case NEON::BI__builtin_neon_vcles_f32:
1736 case NEON::BI__builtin_neon_vcled_f64:
1737 case NEON::BI__builtin_neon_vcltd_s64:
1738 case NEON::BI__builtin_neon_vcltd_u64:
1739 case NEON::BI__builtin_neon_vclts_f32:
1740 case NEON::BI__builtin_neon_vcltd_f64:
1741 case NEON::BI__builtin_neon_vcales_f32:
1742 case NEON::BI__builtin_neon_vcaled_f64:
1743 case NEON::BI__builtin_neon_vcalts_f32:
1744 case NEON::BI__builtin_neon_vcaltd_f64:
1745 // Only one direction of comparisons actually exist, cmle is actually a cmge
1746 // with swapped operands. The table gives us the right intrinsic but we
1747 // still need to do the swap.
1748 std::swap(Ops[0], Ops[1]);
1749 break;
1750 }
1751
1752 assert(Int && "Generic code assumes a valid intrinsic");
1753
1754 // Determine the type(s) of this overloaded AArch64 intrinsic.
1755 const Expr *Arg = E->getArg(0);
1756 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
1757 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
1758
1759 int j = 0;
1760 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1761 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1762 ai != ae; ++ai, ++j) {
1763 llvm::Type *ArgTy = ai->getType();
1764 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1765 ArgTy->getPrimitiveSizeInBits())
1766 continue;
1767
1768 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1769 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1770 // it before inserting.
1771 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1772 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1773 Ops[j] =
1774 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1775 }
1776
1777 Value *Result = CGF.EmitNeonCall(F, Ops, s);
1778 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1779 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1780 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1781 return CGF.Builder.CreateExtractElement(Result, C0);
1782
1783 return CGF.Builder.CreateBitCast(Result, ResultType, s);
1784}
1785
1787 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1788 const char *NameHint, unsigned Modifier, const CallExpr *E,
1789 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1790 llvm::Triple::ArchType Arch) {
1791 // Get the last argument, which specifies the vector type.
1792 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1793 std::optional<llvm::APSInt> NeonTypeConst =
1795 if (!NeonTypeConst)
1796 return nullptr;
1797
1798 // Determine the type of this overloaded NEON intrinsic.
1799 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1800 const bool Usgn = Type.isUnsigned();
1801 const bool Quad = Type.isQuad();
1802 const bool Floating = Type.isFloatingPoint();
1803 const bool HasFastHalfType = getTarget().hasFastHalfType();
1804 const bool AllowBFloatArgsAndRet =
1805 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1806
1807 llvm::FixedVectorType *VTy =
1808 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1809 llvm::Type *Ty = VTy;
1810 if (!Ty)
1811 return nullptr;
1812
1813 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1814 return Builder.getInt32(addr.getAlignment().getQuantity());
1815 };
1816
1817 unsigned Int = LLVMIntrinsic;
1818 if ((Modifier & UnsignedAlts) && !Usgn)
1819 Int = AltLLVMIntrinsic;
1820
1821 switch (BuiltinID) {
1822 default: break;
1823 case NEON::BI__builtin_neon_splat_lane_v:
1824 case NEON::BI__builtin_neon_splat_laneq_v:
1825 case NEON::BI__builtin_neon_splatq_lane_v:
1826 case NEON::BI__builtin_neon_splatq_laneq_v: {
1827 auto NumElements = VTy->getElementCount();
1828 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1829 NumElements = NumElements * 2;
1830 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1831 NumElements = NumElements.divideCoefficientBy(2);
1832
1833 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1834 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1835 }
1836 case NEON::BI__builtin_neon_vpadd_v:
1837 case NEON::BI__builtin_neon_vpaddq_v:
1838 // We don't allow fp/int overloading of intrinsics.
1839 if (VTy->getElementType()->isFloatingPointTy() &&
1840 Int == Intrinsic::aarch64_neon_addp)
1841 Int = Intrinsic::aarch64_neon_faddp;
1842 break;
1843 case NEON::BI__builtin_neon_vabs_v:
1844 case NEON::BI__builtin_neon_vabsq_v:
1845 if (VTy->getElementType()->isFloatingPointTy())
1846 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1847 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1848 case NEON::BI__builtin_neon_vadd_v:
1849 case NEON::BI__builtin_neon_vaddq_v: {
1850 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1851 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1852 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1853 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1854 return Builder.CreateBitCast(Ops[0], Ty);
1855 }
1856 case NEON::BI__builtin_neon_vaddhn_v: {
1857 llvm::FixedVectorType *SrcTy =
1858 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1859
1860 // %sum = add <4 x i32> %lhs, %rhs
1861 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1862 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1863 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1864
1865 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1866 Constant *ShiftAmt =
1867 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1868 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1869
1870 // %res = trunc <4 x i32> %high to <4 x i16>
1871 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1872 }
1873 case NEON::BI__builtin_neon_vcale_v:
1874 case NEON::BI__builtin_neon_vcaleq_v:
1875 case NEON::BI__builtin_neon_vcalt_v:
1876 case NEON::BI__builtin_neon_vcaltq_v:
1877 std::swap(Ops[0], Ops[1]);
1878 [[fallthrough]];
1879 case NEON::BI__builtin_neon_vcage_v:
1880 case NEON::BI__builtin_neon_vcageq_v:
1881 case NEON::BI__builtin_neon_vcagt_v:
1882 case NEON::BI__builtin_neon_vcagtq_v: {
1883 llvm::Type *Ty;
1884 switch (VTy->getScalarSizeInBits()) {
1885 default: llvm_unreachable("unexpected type");
1886 case 32:
1887 Ty = FloatTy;
1888 break;
1889 case 64:
1890 Ty = DoubleTy;
1891 break;
1892 case 16:
1893 Ty = HalfTy;
1894 break;
1895 }
1896 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1897 llvm::Type *Tys[] = { VTy, VecFlt };
1898 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1899 return EmitNeonCall(F, Ops, NameHint);
1900 }
1901 case NEON::BI__builtin_neon_vceqz_v:
1902 case NEON::BI__builtin_neon_vceqzq_v:
1904 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1905 case NEON::BI__builtin_neon_vcgez_v:
1906 case NEON::BI__builtin_neon_vcgezq_v:
1908 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1909 "vcgez");
1910 case NEON::BI__builtin_neon_vclez_v:
1911 case NEON::BI__builtin_neon_vclezq_v:
1913 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1914 "vclez");
1915 case NEON::BI__builtin_neon_vcgtz_v:
1916 case NEON::BI__builtin_neon_vcgtzq_v:
1918 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1919 "vcgtz");
1920 case NEON::BI__builtin_neon_vcltz_v:
1921 case NEON::BI__builtin_neon_vcltzq_v:
1923 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1924 "vcltz");
1925 case NEON::BI__builtin_neon_vclz_v:
1926 case NEON::BI__builtin_neon_vclzq_v:
1927 // We generate target-independent intrinsic, which needs a second argument
1928 // for whether or not clz of zero is undefined; on ARM it isn't.
1929 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1930 break;
1931 case NEON::BI__builtin_neon_vcvt_f32_v:
1932 case NEON::BI__builtin_neon_vcvtq_f32_v:
1933 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1934 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1935 HasFastHalfType);
1936 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1937 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1938 case NEON::BI__builtin_neon_vcvt_f16_s16:
1939 case NEON::BI__builtin_neon_vcvt_f16_u16:
1940 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1941 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1942 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1943 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1944 HasFastHalfType);
1945 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1946 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1947 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1948 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1949 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1950 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1951 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1952 Function *F = CGM.getIntrinsic(Int, Tys);
1953 return EmitNeonCall(F, Ops, "vcvt_n");
1954 }
1955 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1956 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1957 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1958 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1959 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1960 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1961 Function *F = CGM.getIntrinsic(Int, Tys);
1962 return EmitNeonCall(F, Ops, "vcvt_n");
1963 }
1964 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1965 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1966 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1967 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1968 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1969 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1970 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1971 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1972 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1973 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1974 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1975 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1976 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1977 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1978 return EmitNeonCall(F, Ops, "vcvt_n");
1979 }
1980 case NEON::BI__builtin_neon_vcvt_s32_v:
1981 case NEON::BI__builtin_neon_vcvt_u32_v:
1982 case NEON::BI__builtin_neon_vcvt_s64_v:
1983 case NEON::BI__builtin_neon_vcvt_u64_v:
1984 case NEON::BI__builtin_neon_vcvt_s16_f16:
1985 case NEON::BI__builtin_neon_vcvt_u16_f16:
1986 case NEON::BI__builtin_neon_vcvtq_s32_v:
1987 case NEON::BI__builtin_neon_vcvtq_u32_v:
1988 case NEON::BI__builtin_neon_vcvtq_s64_v:
1989 case NEON::BI__builtin_neon_vcvtq_u64_v:
1990 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1991 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1992 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1993 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1994 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1995 }
1996 case NEON::BI__builtin_neon_vcvta_s16_f16:
1997 case NEON::BI__builtin_neon_vcvta_s32_v:
1998 case NEON::BI__builtin_neon_vcvta_s64_v:
1999 case NEON::BI__builtin_neon_vcvta_u16_f16:
2000 case NEON::BI__builtin_neon_vcvta_u32_v:
2001 case NEON::BI__builtin_neon_vcvta_u64_v:
2002 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
2003 case NEON::BI__builtin_neon_vcvtaq_s32_v:
2004 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2005 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2006 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2007 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2008 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2009 case NEON::BI__builtin_neon_vcvtn_s32_v:
2010 case NEON::BI__builtin_neon_vcvtn_s64_v:
2011 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2012 case NEON::BI__builtin_neon_vcvtn_u32_v:
2013 case NEON::BI__builtin_neon_vcvtn_u64_v:
2014 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2015 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2016 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2017 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2018 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2019 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2020 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2021 case NEON::BI__builtin_neon_vcvtp_s32_v:
2022 case NEON::BI__builtin_neon_vcvtp_s64_v:
2023 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2024 case NEON::BI__builtin_neon_vcvtp_u32_v:
2025 case NEON::BI__builtin_neon_vcvtp_u64_v:
2026 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2027 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2028 case NEON::BI__builtin_neon_vcvtpq_s64_v:
2029 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2030 case NEON::BI__builtin_neon_vcvtpq_u32_v:
2031 case NEON::BI__builtin_neon_vcvtpq_u64_v:
2032 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2033 case NEON::BI__builtin_neon_vcvtm_s32_v:
2034 case NEON::BI__builtin_neon_vcvtm_s64_v:
2035 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2036 case NEON::BI__builtin_neon_vcvtm_u32_v:
2037 case NEON::BI__builtin_neon_vcvtm_u64_v:
2038 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2039 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2040 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2041 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2042 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2043 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2044 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
2045 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2046 }
2047 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2048 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2049 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2050
2051 }
2052 case NEON::BI__builtin_neon_vext_v:
2053 case NEON::BI__builtin_neon_vextq_v: {
2054 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2055 SmallVector<int, 16> Indices;
2056 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2057 Indices.push_back(i+CV);
2058
2059 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2060 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2061 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
2062 }
2063 case NEON::BI__builtin_neon_vfma_v:
2064 case NEON::BI__builtin_neon_vfmaq_v: {
2065 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2066 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2067 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2068
2069 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2071 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2072 {Ops[1], Ops[2], Ops[0]});
2073 }
2074 case NEON::BI__builtin_neon_vld1_v:
2075 case NEON::BI__builtin_neon_vld1q_v: {
2076 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2077 Ops.push_back(getAlignmentValue32(PtrOp0));
2078 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
2079 }
2080 case NEON::BI__builtin_neon_vld1_x2_v:
2081 case NEON::BI__builtin_neon_vld1q_x2_v:
2082 case NEON::BI__builtin_neon_vld1_x3_v:
2083 case NEON::BI__builtin_neon_vld1q_x3_v:
2084 case NEON::BI__builtin_neon_vld1_x4_v:
2085 case NEON::BI__builtin_neon_vld1q_x4_v: {
2086 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2087 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2088 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
2089 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2090 }
2091 case NEON::BI__builtin_neon_vld2_v:
2092 case NEON::BI__builtin_neon_vld2q_v:
2093 case NEON::BI__builtin_neon_vld3_v:
2094 case NEON::BI__builtin_neon_vld3q_v:
2095 case NEON::BI__builtin_neon_vld4_v:
2096 case NEON::BI__builtin_neon_vld4q_v:
2097 case NEON::BI__builtin_neon_vld2_dup_v:
2098 case NEON::BI__builtin_neon_vld2q_dup_v:
2099 case NEON::BI__builtin_neon_vld3_dup_v:
2100 case NEON::BI__builtin_neon_vld3q_dup_v:
2101 case NEON::BI__builtin_neon_vld4_dup_v:
2102 case NEON::BI__builtin_neon_vld4q_dup_v: {
2103 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2104 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2105 Value *Align = getAlignmentValue32(PtrOp1);
2106 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2107 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2108 }
2109 case NEON::BI__builtin_neon_vld1_dup_v:
2110 case NEON::BI__builtin_neon_vld1q_dup_v: {
2111 Value *V = PoisonValue::get(Ty);
2112 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2113 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
2114 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
2115 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2116 return EmitNeonSplat(Ops[0], CI);
2117 }
2118 case NEON::BI__builtin_neon_vld2_lane_v:
2119 case NEON::BI__builtin_neon_vld2q_lane_v:
2120 case NEON::BI__builtin_neon_vld3_lane_v:
2121 case NEON::BI__builtin_neon_vld3q_lane_v:
2122 case NEON::BI__builtin_neon_vld4_lane_v:
2123 case NEON::BI__builtin_neon_vld4q_lane_v: {
2124 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2125 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2126 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2127 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
2128 Ops.push_back(getAlignmentValue32(PtrOp1));
2129 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
2130 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2131 }
2132 case NEON::BI__builtin_neon_vmovl_v: {
2133 llvm::FixedVectorType *DTy =
2134 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2135 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2136 if (Usgn)
2137 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2138 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2139 }
2140 case NEON::BI__builtin_neon_vmovn_v: {
2141 llvm::FixedVectorType *QTy =
2142 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2143 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2144 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2145 }
2146 case NEON::BI__builtin_neon_vmull_v:
2147 // FIXME: the integer vmull operations could be emitted in terms of pure
2148 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2149 // hoisting the exts outside loops. Until global ISel comes along that can
2150 // see through such movement this leads to bad CodeGen. So we need an
2151 // intrinsic for now.
2152 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2153 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2154 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2155 case NEON::BI__builtin_neon_vpadal_v:
2156 case NEON::BI__builtin_neon_vpadalq_v: {
2157 // The source operand type has twice as many elements of half the size.
2158 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2159 llvm::Type *EltTy =
2160 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2161 auto *NarrowTy =
2162 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2163 llvm::Type *Tys[2] = { Ty, NarrowTy };
2164 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2165 }
2166 case NEON::BI__builtin_neon_vpaddl_v:
2167 case NEON::BI__builtin_neon_vpaddlq_v: {
2168 // The source operand type has twice as many elements of half the size.
2169 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2170 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2171 auto *NarrowTy =
2172 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2173 llvm::Type *Tys[2] = { Ty, NarrowTy };
2174 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2175 }
2176 case NEON::BI__builtin_neon_vqdmlal_v:
2177 case NEON::BI__builtin_neon_vqdmlsl_v: {
2178 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2179 Ops[1] =
2180 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
2181 Ops.resize(2);
2182 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2183 }
2184 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2185 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2186 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2187 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2188 auto *RTy = cast<llvm::FixedVectorType>(Ty);
2189 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2190 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2191 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2192 RTy->getNumElements() * 2);
2193 llvm::Type *Tys[2] = {
2194 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2195 /*isQuad*/ false))};
2196 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2197 }
2198 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2199 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2200 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2201 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2202 llvm::Type *Tys[2] = {
2203 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2204 /*isQuad*/ true))};
2205 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2206 }
2207 case NEON::BI__builtin_neon_vqshl_n_v:
2208 case NEON::BI__builtin_neon_vqshlq_n_v:
2209 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2210 1, false);
2211 case NEON::BI__builtin_neon_vqshlu_n_v:
2212 case NEON::BI__builtin_neon_vqshluq_n_v:
2213 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
2214 1, false);
2215 case NEON::BI__builtin_neon_vrecpe_v:
2216 case NEON::BI__builtin_neon_vrecpeq_v:
2217 case NEON::BI__builtin_neon_vrsqrte_v:
2218 case NEON::BI__builtin_neon_vrsqrteq_v:
2219 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2220 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2221 case NEON::BI__builtin_neon_vrndi_v:
2222 case NEON::BI__builtin_neon_vrndiq_v:
2223 Int = Builder.getIsFPConstrained()
2224 ? Intrinsic::experimental_constrained_nearbyint
2225 : Intrinsic::nearbyint;
2226 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2227 case NEON::BI__builtin_neon_vrshr_n_v:
2228 case NEON::BI__builtin_neon_vrshrq_n_v:
2229 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
2230 1, true);
2231 case NEON::BI__builtin_neon_vsha512hq_u64:
2232 case NEON::BI__builtin_neon_vsha512h2q_u64:
2233 case NEON::BI__builtin_neon_vsha512su0q_u64:
2234 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2235 Function *F = CGM.getIntrinsic(Int);
2236 return EmitNeonCall(F, Ops, "");
2237 }
2238 case NEON::BI__builtin_neon_vshl_n_v:
2239 case NEON::BI__builtin_neon_vshlq_n_v:
2240 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2241 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2242 "vshl_n");
2243 case NEON::BI__builtin_neon_vshll_n_v: {
2244 llvm::FixedVectorType *SrcTy =
2245 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2246 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2247 if (Usgn)
2248 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
2249 else
2250 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
2251 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
2252 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
2253 }
2254 case NEON::BI__builtin_neon_vshrn_n_v: {
2255 llvm::FixedVectorType *SrcTy =
2256 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2257 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2258 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
2259 if (Usgn)
2260 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
2261 else
2262 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
2263 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
2264 }
2265 case NEON::BI__builtin_neon_vshr_n_v:
2266 case NEON::BI__builtin_neon_vshrq_n_v:
2267 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
2268 case NEON::BI__builtin_neon_vst1_v:
2269 case NEON::BI__builtin_neon_vst1q_v:
2270 case NEON::BI__builtin_neon_vst2_v:
2271 case NEON::BI__builtin_neon_vst2q_v:
2272 case NEON::BI__builtin_neon_vst3_v:
2273 case NEON::BI__builtin_neon_vst3q_v:
2274 case NEON::BI__builtin_neon_vst4_v:
2275 case NEON::BI__builtin_neon_vst4q_v:
2276 case NEON::BI__builtin_neon_vst2_lane_v:
2277 case NEON::BI__builtin_neon_vst2q_lane_v:
2278 case NEON::BI__builtin_neon_vst3_lane_v:
2279 case NEON::BI__builtin_neon_vst3q_lane_v:
2280 case NEON::BI__builtin_neon_vst4_lane_v:
2281 case NEON::BI__builtin_neon_vst4q_lane_v: {
2282 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2283 Ops.push_back(getAlignmentValue32(PtrOp0));
2284 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
2285 }
2286 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2287 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2288 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2289 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2290 case NEON::BI__builtin_neon_vsm4eq_u32: {
2291 Function *F = CGM.getIntrinsic(Int);
2292 return EmitNeonCall(F, Ops, "");
2293 }
2294 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2295 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2296 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2297 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2298 Function *F = CGM.getIntrinsic(Int);
2299 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
2300 return EmitNeonCall(F, Ops, "");
2301 }
2302 case NEON::BI__builtin_neon_vst1_x2_v:
2303 case NEON::BI__builtin_neon_vst1q_x2_v:
2304 case NEON::BI__builtin_neon_vst1_x3_v:
2305 case NEON::BI__builtin_neon_vst1q_x3_v:
2306 case NEON::BI__builtin_neon_vst1_x4_v:
2307 case NEON::BI__builtin_neon_vst1q_x4_v: {
2308 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2309 // in AArch64 it comes last. We may want to stick to one or another.
2310 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2311 Arch == llvm::Triple::aarch64_32) {
2312 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2313 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2314 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2315 }
2316 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
2317 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2318 }
2319 case NEON::BI__builtin_neon_vsubhn_v: {
2320 llvm::FixedVectorType *SrcTy =
2321 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2322
2323 // %sum = add <4 x i32> %lhs, %rhs
2324 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2325 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2326 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
2327
2328 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2329 Constant *ShiftAmt =
2330 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2331 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
2332
2333 // %res = trunc <4 x i32> %high to <4 x i16>
2334 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
2335 }
2336 case NEON::BI__builtin_neon_vtrn_v:
2337 case NEON::BI__builtin_neon_vtrnq_v: {
2338 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2339 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2340 Value *SV = nullptr;
2341
2342 for (unsigned vi = 0; vi != 2; ++vi) {
2343 SmallVector<int, 16> Indices;
2344 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2345 Indices.push_back(i+vi);
2346 Indices.push_back(i+e+vi);
2347 }
2348 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2349 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
2350 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2351 }
2352 return SV;
2353 }
2354 case NEON::BI__builtin_neon_vtst_v:
2355 case NEON::BI__builtin_neon_vtstq_v: {
2356 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2357 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2358 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2359 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2360 ConstantAggregateZero::get(Ty));
2361 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2362 }
2363 case NEON::BI__builtin_neon_vuzp_v:
2364 case NEON::BI__builtin_neon_vuzpq_v: {
2365 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2366 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2367 Value *SV = nullptr;
2368
2369 for (unsigned vi = 0; vi != 2; ++vi) {
2370 SmallVector<int, 16> Indices;
2371 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2372 Indices.push_back(2*i+vi);
2373
2374 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2375 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
2376 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2377 }
2378 return SV;
2379 }
2380 case NEON::BI__builtin_neon_vxarq_u64: {
2381 Function *F = CGM.getIntrinsic(Int);
2382 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
2383 return EmitNeonCall(F, Ops, "");
2384 }
2385 case NEON::BI__builtin_neon_vzip_v:
2386 case NEON::BI__builtin_neon_vzipq_v: {
2387 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2388 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2389 Value *SV = nullptr;
2390
2391 for (unsigned vi = 0; vi != 2; ++vi) {
2392 SmallVector<int, 16> Indices;
2393 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2394 Indices.push_back((i + vi*e) >> 1);
2395 Indices.push_back(((i + vi*e) >> 1)+e);
2396 }
2397 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2398 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
2399 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2400 }
2401 return SV;
2402 }
2403 case NEON::BI__builtin_neon_vdot_s32:
2404 case NEON::BI__builtin_neon_vdot_u32:
2405 case NEON::BI__builtin_neon_vdotq_s32:
2406 case NEON::BI__builtin_neon_vdotq_u32: {
2407 auto *InputTy =
2408 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2409 llvm::Type *Tys[2] = { Ty, InputTy };
2410 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
2411 }
2412 case NEON::BI__builtin_neon_vfmlal_low_f16:
2413 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2414 auto *InputTy =
2415 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2416 llvm::Type *Tys[2] = { Ty, InputTy };
2417 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
2418 }
2419 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2420 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2421 auto *InputTy =
2422 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2423 llvm::Type *Tys[2] = { Ty, InputTy };
2424 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
2425 }
2426 case NEON::BI__builtin_neon_vfmlal_high_f16:
2427 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2428 auto *InputTy =
2429 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2430 llvm::Type *Tys[2] = { Ty, InputTy };
2431 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
2432 }
2433 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2434 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2435 auto *InputTy =
2436 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2437 llvm::Type *Tys[2] = { Ty, InputTy };
2438 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
2439 }
2440 case NEON::BI__builtin_neon_vmmlaq_s32:
2441 case NEON::BI__builtin_neon_vmmlaq_u32: {
2442 auto *InputTy =
2443 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2444 llvm::Type *Tys[2] = { Ty, InputTy };
2445 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
2446 }
2447 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2448 auto *InputTy =
2449 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2450 llvm::Type *Tys[2] = { Ty, InputTy };
2451 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
2452 }
2453 case NEON::BI__builtin_neon_vusdot_s32:
2454 case NEON::BI__builtin_neon_vusdotq_s32: {
2455 auto *InputTy =
2456 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2457 llvm::Type *Tys[2] = { Ty, InputTy };
2458 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
2459 }
2460 case NEON::BI__builtin_neon_vbfdot_f32:
2461 case NEON::BI__builtin_neon_vbfdotq_f32: {
2462 llvm::Type *InputTy =
2463 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2464 llvm::Type *Tys[2] = { Ty, InputTy };
2465 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
2466 }
2467 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2468 llvm::Type *Tys[1] = { Ty };
2469 Function *F = CGM.getIntrinsic(Int, Tys);
2470 return EmitNeonCall(F, Ops, "vcvtfp2bf");
2471 }
2472
2473 }
2474
2475 assert(Int && "Expected valid intrinsic number");
2476
2477 // Determine the type(s) of this overloaded AArch64 intrinsic.
2478 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
2479
2480 Value *Result = EmitNeonCall(F, Ops, NameHint);
2481 llvm::Type *ResultType = ConvertType(E->getType());
2482 // AArch64 intrinsic one-element vector type cast to
2483 // scalar type expected by the builtin
2484 return Builder.CreateBitCast(Result, ResultType, NameHint);
2485}
2486
2487Value *
2489 const CmpInst::Predicate Pred,
2490 const Twine &Name) {
2491
2492 if (isa<FixedVectorType>(Ty)) {
2493 // Vector types are cast to i8 vectors. Recover original type.
2494 Op = Builder.CreateBitCast(Op, Ty);
2495 }
2496
2497 if (CmpInst::isFPPredicate(Pred)) {
2498 if (Pred == CmpInst::FCMP_OEQ)
2499 Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
2500 else
2501 Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
2502 } else {
2503 Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
2504 }
2505
2506 llvm::Type *ResTy = Ty;
2507 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
2508 ResTy = FixedVectorType::get(
2509 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
2510 VTy->getNumElements());
2511
2512 return Builder.CreateSExt(Op, ResTy, Name);
2513}
2514
2516 Value *ExtOp, Value *IndexOp,
2517 llvm::Type *ResTy, unsigned IntID,
2518 const char *Name) {
2520 if (ExtOp)
2521 TblOps.push_back(ExtOp);
2522
2523 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2524 SmallVector<int, 16> Indices;
2525 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
2526 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2527 Indices.push_back(2*i);
2528 Indices.push_back(2*i+1);
2529 }
2530
2531 int PairPos = 0, End = Ops.size() - 1;
2532 while (PairPos < End) {
2533 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2534 Ops[PairPos+1], Indices,
2535 Name));
2536 PairPos += 2;
2537 }
2538
2539 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2540 // of the 128-bit lookup table with zero.
2541 if (PairPos == End) {
2542 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
2543 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2544 ZeroTbl, Indices, Name));
2545 }
2546
2547 Function *TblF;
2548 TblOps.push_back(IndexOp);
2549 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
2550
2551 return CGF.EmitNeonCall(TblF, TblOps, Name);
2552}
2553
2554Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2555 unsigned Value;
2556 switch (BuiltinID) {
2557 default:
2558 return nullptr;
2559 case clang::ARM::BI__builtin_arm_nop:
2560 Value = 0;
2561 break;
2562 case clang::ARM::BI__builtin_arm_yield:
2563 case clang::ARM::BI__yield:
2564 Value = 1;
2565 break;
2566 case clang::ARM::BI__builtin_arm_wfe:
2567 case clang::ARM::BI__wfe:
2568 Value = 2;
2569 break;
2570 case clang::ARM::BI__builtin_arm_wfi:
2571 case clang::ARM::BI__wfi:
2572 Value = 3;
2573 break;
2574 case clang::ARM::BI__builtin_arm_sev:
2575 case clang::ARM::BI__sev:
2576 Value = 4;
2577 break;
2578 case clang::ARM::BI__builtin_arm_sevl:
2579 case clang::ARM::BI__sevl:
2580 Value = 5;
2581 break;
2582 }
2583
2584 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2585 llvm::ConstantInt::get(Int32Ty, Value));
2586}
2587
2593
2594// Generates the IR for the read/write special register builtin,
2595// ValueType is the type of the value that is to be written or read,
2596// RegisterType is the type of the register being written to or read from.
2598 const CallExpr *E,
2599 llvm::Type *RegisterType,
2600 llvm::Type *ValueType,
2601 SpecialRegisterAccessKind AccessKind,
2602 StringRef SysReg = "") {
2603 // write and register intrinsics only support 32, 64 and 128 bit operations.
2604 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2605 RegisterType->isIntegerTy(128)) &&
2606 "Unsupported size for register.");
2607
2608 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2609 CodeGen::CodeGenModule &CGM = CGF.CGM;
2610 LLVMContext &Context = CGM.getLLVMContext();
2611
2612 if (SysReg.empty()) {
2613 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2614 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2615 }
2616
2617 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2618 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2619 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2620
2621 llvm::Type *Types[] = { RegisterType };
2622
2623 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2624 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2625 && "Can't fit 64-bit value in 32-bit register");
2626
2627 if (AccessKind != Write) {
2628 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2629 llvm::Function *F = CGM.getIntrinsic(
2630 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2631 : Intrinsic::read_register,
2632 Types);
2633 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2634
2635 if (MixedTypes)
2636 // Read into 64 bit register and then truncate result to 32 bit.
2637 return Builder.CreateTrunc(Call, ValueType);
2638
2639 if (ValueType->isPointerTy())
2640 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2641 return Builder.CreateIntToPtr(Call, ValueType);
2642
2643 return Call;
2644 }
2645
2646 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2647 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2648 if (MixedTypes) {
2649 // Extend 32 bit write value to 64 bit to pass to write.
2650 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2651 return Builder.CreateCall(F, { Metadata, ArgValue });
2652 }
2653
2654 if (ValueType->isPointerTy()) {
2655 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2656 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2657 return Builder.CreateCall(F, { Metadata, ArgValue });
2658 }
2659
2660 return Builder.CreateCall(F, { Metadata, ArgValue });
2661}
2662
2663/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2664/// argument that specifies the vector type.
2665static bool HasExtraNeonArgument(unsigned BuiltinID) {
2666 switch (BuiltinID) {
2667 default: break;
2668 case NEON::BI__builtin_neon_vget_lane_i8:
2669 case NEON::BI__builtin_neon_vget_lane_i16:
2670 case NEON::BI__builtin_neon_vget_lane_bf16:
2671 case NEON::BI__builtin_neon_vget_lane_i32:
2672 case NEON::BI__builtin_neon_vget_lane_i64:
2673 case NEON::BI__builtin_neon_vget_lane_mf8:
2674 case NEON::BI__builtin_neon_vget_lane_f32:
2675 case NEON::BI__builtin_neon_vgetq_lane_i8:
2676 case NEON::BI__builtin_neon_vgetq_lane_i16:
2677 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2678 case NEON::BI__builtin_neon_vgetq_lane_i32:
2679 case NEON::BI__builtin_neon_vgetq_lane_i64:
2680 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2681 case NEON::BI__builtin_neon_vgetq_lane_f32:
2682 case NEON::BI__builtin_neon_vduph_lane_bf16:
2683 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2684 case NEON::BI__builtin_neon_vset_lane_i8:
2685 case NEON::BI__builtin_neon_vset_lane_mf8:
2686 case NEON::BI__builtin_neon_vset_lane_i16:
2687 case NEON::BI__builtin_neon_vset_lane_bf16:
2688 case NEON::BI__builtin_neon_vset_lane_i32:
2689 case NEON::BI__builtin_neon_vset_lane_i64:
2690 case NEON::BI__builtin_neon_vset_lane_f32:
2691 case NEON::BI__builtin_neon_vsetq_lane_i8:
2692 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2693 case NEON::BI__builtin_neon_vsetq_lane_i16:
2694 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2695 case NEON::BI__builtin_neon_vsetq_lane_i32:
2696 case NEON::BI__builtin_neon_vsetq_lane_i64:
2697 case NEON::BI__builtin_neon_vsetq_lane_f32:
2698 case NEON::BI__builtin_neon_vsha1h_u32:
2699 case NEON::BI__builtin_neon_vsha1cq_u32:
2700 case NEON::BI__builtin_neon_vsha1pq_u32:
2701 case NEON::BI__builtin_neon_vsha1mq_u32:
2702 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2703 case clang::ARM::BI_MoveToCoprocessor:
2704 case clang::ARM::BI_MoveToCoprocessor2:
2705 return false;
2706 }
2707 return true;
2708}
2709
2711 const CallExpr *E,
2713 llvm::Triple::ArchType Arch) {
2714 if (auto Hint = GetValueForARMHint(BuiltinID))
2715 return Hint;
2716
2717 if (BuiltinID == clang::ARM::BI__emit) {
2718 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2719 llvm::FunctionType *FTy =
2720 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2721
2723 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2724 llvm_unreachable("Sema will ensure that the parameter is constant");
2725
2726 llvm::APSInt Value = Result.Val.getInt();
2727 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2728
2729 llvm::InlineAsm *Emit =
2730 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2731 /*hasSideEffects=*/true)
2732 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2733 /*hasSideEffects=*/true);
2734
2735 return Builder.CreateCall(Emit);
2736 }
2737
2738 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2739 Value *Option = EmitScalarExpr(E->getArg(0));
2740 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2741 }
2742
2743 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2745 Value *RW = EmitScalarExpr(E->getArg(1));
2746 Value *IsData = EmitScalarExpr(E->getArg(2));
2747
2748 // Locality is not supported on ARM target
2749 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2750
2751 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2752 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2753 }
2754
2755 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2756 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2757 return Builder.CreateCall(
2758 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2759 }
2760
2761 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2762 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2763 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2764 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2765 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2766 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2767 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2768 return Res;
2769 }
2770
2771
2772 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2773 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2774 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2775 }
2776 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2777 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2778 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2779 "cls");
2780 }
2781
2782 if (BuiltinID == clang::ARM::BI__clear_cache) {
2783 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2784 const FunctionDecl *FD = E->getDirectCallee();
2785 Value *Ops[2];
2786 for (unsigned i = 0; i < 2; i++)
2787 Ops[i] = EmitScalarExpr(E->getArg(i));
2788 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2789 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2790 StringRef Name = FD->getName();
2791 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2792 }
2793
2794 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2795 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2796 Function *F;
2797
2798 switch (BuiltinID) {
2799 default: llvm_unreachable("unexpected builtin");
2800 case clang::ARM::BI__builtin_arm_mcrr:
2801 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2802 break;
2803 case clang::ARM::BI__builtin_arm_mcrr2:
2804 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2805 break;
2806 }
2807
2808 // MCRR{2} instruction has 5 operands but
2809 // the intrinsic has 4 because Rt and Rt2
2810 // are represented as a single unsigned 64
2811 // bit integer in the intrinsic definition
2812 // but internally it's represented as 2 32
2813 // bit integers.
2814
2815 Value *Coproc = EmitScalarExpr(E->getArg(0));
2816 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2817 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2818 Value *CRm = EmitScalarExpr(E->getArg(3));
2819
2820 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2821 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2822 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2823 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2824
2825 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2826 }
2827
2828 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2829 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2830 Function *F;
2831
2832 switch (BuiltinID) {
2833 default: llvm_unreachable("unexpected builtin");
2834 case clang::ARM::BI__builtin_arm_mrrc:
2835 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2836 break;
2837 case clang::ARM::BI__builtin_arm_mrrc2:
2838 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2839 break;
2840 }
2841
2842 Value *Coproc = EmitScalarExpr(E->getArg(0));
2843 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2844 Value *CRm = EmitScalarExpr(E->getArg(2));
2845 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2846
2847 // Returns an unsigned 64 bit integer, represented
2848 // as two 32 bit integers.
2849
2850 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2851 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2852 Rt = Builder.CreateZExt(Rt, Int64Ty);
2853 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2854
2855 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2856 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2857 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2858
2859 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2860 }
2861
2862 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2863 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2864 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2865 getContext().getTypeSize(E->getType()) == 64) ||
2866 BuiltinID == clang::ARM::BI__ldrexd) {
2867 Function *F;
2868
2869 switch (BuiltinID) {
2870 default: llvm_unreachable("unexpected builtin");
2871 case clang::ARM::BI__builtin_arm_ldaex:
2872 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2873 break;
2874 case clang::ARM::BI__builtin_arm_ldrexd:
2875 case clang::ARM::BI__builtin_arm_ldrex:
2876 case clang::ARM::BI__ldrexd:
2877 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2878 break;
2879 }
2880
2881 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2882 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2883
2884 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2885 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2886 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2887 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2888
2889 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2890 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2891 Val = Builder.CreateOr(Val, Val1);
2892 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2893 }
2894
2895 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2896 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2897 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2898
2899 QualType Ty = E->getType();
2900 llvm::Type *RealResTy = ConvertType(Ty);
2901 llvm::Type *IntTy =
2902 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2903
2904 Function *F = CGM.getIntrinsic(
2905 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2906 : Intrinsic::arm_ldrex,
2907 DefaultPtrTy);
2908 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2909 Val->addParamAttr(
2910 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2911
2912 if (RealResTy->isPointerTy())
2913 return Builder.CreateIntToPtr(Val, RealResTy);
2914 else {
2915 llvm::Type *IntResTy = llvm::IntegerType::get(
2916 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2917 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2918 RealResTy);
2919 }
2920 }
2921
2922 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2923 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2924 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2925 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2926 Function *F = CGM.getIntrinsic(
2927 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2928 : Intrinsic::arm_strexd);
2929 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2930
2931 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
2932 Value *Val = EmitScalarExpr(E->getArg(0));
2933 Builder.CreateStore(Val, Tmp);
2934
2935 Address LdPtr = Tmp.withElementType(STy);
2936 Val = Builder.CreateLoad(LdPtr);
2937
2938 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2939 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2940 Value *StPtr = EmitScalarExpr(E->getArg(1));
2941 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2942 }
2943
2944 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2945 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2946 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2947 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2948
2949 QualType Ty = E->getArg(0)->getType();
2950 llvm::Type *StoreTy =
2951 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2952
2953 if (StoreVal->getType()->isPointerTy())
2954 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2955 else {
2956 llvm::Type *IntTy = llvm::IntegerType::get(
2958 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2959 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2960 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2961 }
2962
2963 Function *F = CGM.getIntrinsic(
2964 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2965 : Intrinsic::arm_strex,
2966 StoreAddr->getType());
2967
2968 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2969 CI->addParamAttr(
2970 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2971 return CI;
2972 }
2973
2974 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2975 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2976 return Builder.CreateCall(F);
2977 }
2978
2979 // CRC32
2980 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2981 switch (BuiltinID) {
2982 case clang::ARM::BI__builtin_arm_crc32b:
2983 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2984 case clang::ARM::BI__builtin_arm_crc32cb:
2985 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2986 case clang::ARM::BI__builtin_arm_crc32h:
2987 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2988 case clang::ARM::BI__builtin_arm_crc32ch:
2989 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2990 case clang::ARM::BI__builtin_arm_crc32w:
2991 case clang::ARM::BI__builtin_arm_crc32d:
2992 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2993 case clang::ARM::BI__builtin_arm_crc32cw:
2994 case clang::ARM::BI__builtin_arm_crc32cd:
2995 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2996 }
2997
2998 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2999 Value *Arg0 = EmitScalarExpr(E->getArg(0));
3000 Value *Arg1 = EmitScalarExpr(E->getArg(1));
3001
3002 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
3003 // intrinsics, hence we need different codegen for these cases.
3004 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
3005 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
3006 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3007 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3008 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3009 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3010
3011 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3012 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
3013 return Builder.CreateCall(F, {Res, Arg1b});
3014 } else {
3015 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3016
3017 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3018 return Builder.CreateCall(F, {Arg0, Arg1});
3019 }
3020 }
3021
3022 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3023 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3024 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3025 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
3026 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
3027 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
3028
3029 SpecialRegisterAccessKind AccessKind = Write;
3030 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3031 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3032 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
3033 AccessKind = VolatileRead;
3034
3035 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3036 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
3037
3038 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3039 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
3040
3041 llvm::Type *ValueType;
3042 llvm::Type *RegisterType;
3043 if (IsPointerBuiltin) {
3044 ValueType = VoidPtrTy;
3046 } else if (Is64Bit) {
3047 ValueType = RegisterType = Int64Ty;
3048 } else {
3049 ValueType = RegisterType = Int32Ty;
3050 }
3051
3052 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
3053 AccessKind);
3054 }
3055
3056 if (BuiltinID == ARM::BI__builtin_sponentry) {
3057 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
3058 return Builder.CreateCall(F);
3059 }
3060
3061 // Handle MSVC intrinsics before argument evaluation to prevent double
3062 // evaluation.
3063 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3064 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
3065
3066 // Deal with MVE builtins
3067 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3068 return Result;
3069 // Handle CDE builtins
3070 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3071 return Result;
3072
3073 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3074 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
3075 return P.first == BuiltinID;
3076 });
3077 if (It != end(NEONEquivalentIntrinsicMap))
3078 BuiltinID = It->second;
3079
3080 // Find out if any arguments are required to be integer constant
3081 // expressions.
3082 unsigned ICEArguments = 0;
3084 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3085 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3086
3087 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3088 return Builder.getInt32(addr.getAlignment().getQuantity());
3089 };
3090
3091 Address PtrOp0 = Address::invalid();
3092 Address PtrOp1 = Address::invalid();
3094 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3095 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3096 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3097 if (i == 0) {
3098 switch (BuiltinID) {
3099 case NEON::BI__builtin_neon_vld1_v:
3100 case NEON::BI__builtin_neon_vld1q_v:
3101 case NEON::BI__builtin_neon_vld1q_lane_v:
3102 case NEON::BI__builtin_neon_vld1_lane_v:
3103 case NEON::BI__builtin_neon_vld1_dup_v:
3104 case NEON::BI__builtin_neon_vld1q_dup_v:
3105 case NEON::BI__builtin_neon_vst1_v:
3106 case NEON::BI__builtin_neon_vst1q_v:
3107 case NEON::BI__builtin_neon_vst1q_lane_v:
3108 case NEON::BI__builtin_neon_vst1_lane_v:
3109 case NEON::BI__builtin_neon_vst2_v:
3110 case NEON::BI__builtin_neon_vst2q_v:
3111 case NEON::BI__builtin_neon_vst2_lane_v:
3112 case NEON::BI__builtin_neon_vst2q_lane_v:
3113 case NEON::BI__builtin_neon_vst3_v:
3114 case NEON::BI__builtin_neon_vst3q_v:
3115 case NEON::BI__builtin_neon_vst3_lane_v:
3116 case NEON::BI__builtin_neon_vst3q_lane_v:
3117 case NEON::BI__builtin_neon_vst4_v:
3118 case NEON::BI__builtin_neon_vst4q_v:
3119 case NEON::BI__builtin_neon_vst4_lane_v:
3120 case NEON::BI__builtin_neon_vst4q_lane_v:
3121 // Get the alignment for the argument in addition to the value;
3122 // we'll use it later.
3123 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3124 Ops.push_back(PtrOp0.emitRawPointer(*this));
3125 continue;
3126 }
3127 }
3128 if (i == 1) {
3129 switch (BuiltinID) {
3130 case NEON::BI__builtin_neon_vld2_v:
3131 case NEON::BI__builtin_neon_vld2q_v:
3132 case NEON::BI__builtin_neon_vld3_v:
3133 case NEON::BI__builtin_neon_vld3q_v:
3134 case NEON::BI__builtin_neon_vld4_v:
3135 case NEON::BI__builtin_neon_vld4q_v:
3136 case NEON::BI__builtin_neon_vld2_lane_v:
3137 case NEON::BI__builtin_neon_vld2q_lane_v:
3138 case NEON::BI__builtin_neon_vld3_lane_v:
3139 case NEON::BI__builtin_neon_vld3q_lane_v:
3140 case NEON::BI__builtin_neon_vld4_lane_v:
3141 case NEON::BI__builtin_neon_vld4q_lane_v:
3142 case NEON::BI__builtin_neon_vld2_dup_v:
3143 case NEON::BI__builtin_neon_vld2q_dup_v:
3144 case NEON::BI__builtin_neon_vld3_dup_v:
3145 case NEON::BI__builtin_neon_vld3q_dup_v:
3146 case NEON::BI__builtin_neon_vld4_dup_v:
3147 case NEON::BI__builtin_neon_vld4q_dup_v:
3148 // Get the alignment for the argument in addition to the value;
3149 // we'll use it later.
3150 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3151 Ops.push_back(PtrOp1.emitRawPointer(*this));
3152 continue;
3153 }
3154 }
3155
3156 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
3157 }
3158
3159 switch (BuiltinID) {
3160 default: break;
3161
3162 case NEON::BI__builtin_neon_vget_lane_i8:
3163 case NEON::BI__builtin_neon_vget_lane_i16:
3164 case NEON::BI__builtin_neon_vget_lane_i32:
3165 case NEON::BI__builtin_neon_vget_lane_i64:
3166 case NEON::BI__builtin_neon_vget_lane_bf16:
3167 case NEON::BI__builtin_neon_vget_lane_f32:
3168 case NEON::BI__builtin_neon_vgetq_lane_i8:
3169 case NEON::BI__builtin_neon_vgetq_lane_i16:
3170 case NEON::BI__builtin_neon_vgetq_lane_i32:
3171 case NEON::BI__builtin_neon_vgetq_lane_i64:
3172 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3173 case NEON::BI__builtin_neon_vgetq_lane_f32:
3174 case NEON::BI__builtin_neon_vduph_lane_bf16:
3175 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3176 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3177
3178 case NEON::BI__builtin_neon_vrndns_f32: {
3179 Value *Arg = EmitScalarExpr(E->getArg(0));
3180 llvm::Type *Tys[] = {Arg->getType()};
3181 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
3182 return Builder.CreateCall(F, {Arg}, "vrndn"); }
3183
3184 case NEON::BI__builtin_neon_vset_lane_i8:
3185 case NEON::BI__builtin_neon_vset_lane_i16:
3186 case NEON::BI__builtin_neon_vset_lane_i32:
3187 case NEON::BI__builtin_neon_vset_lane_i64:
3188 case NEON::BI__builtin_neon_vset_lane_bf16:
3189 case NEON::BI__builtin_neon_vset_lane_f32:
3190 case NEON::BI__builtin_neon_vsetq_lane_i8:
3191 case NEON::BI__builtin_neon_vsetq_lane_i16:
3192 case NEON::BI__builtin_neon_vsetq_lane_i32:
3193 case NEON::BI__builtin_neon_vsetq_lane_i64:
3194 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3195 case NEON::BI__builtin_neon_vsetq_lane_f32:
3196 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3197
3198 case NEON::BI__builtin_neon_vsha1h_u32:
3199 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3200 "vsha1h");
3201 case NEON::BI__builtin_neon_vsha1cq_u32:
3202 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3203 "vsha1h");
3204 case NEON::BI__builtin_neon_vsha1pq_u32:
3205 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3206 "vsha1h");
3207 case NEON::BI__builtin_neon_vsha1mq_u32:
3208 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3209 "vsha1h");
3210
3211 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3212 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3213 "vcvtbfp2bf");
3214 }
3215
3216 // The ARM _MoveToCoprocessor builtins put the input register value as
3217 // the first argument, but the LLVM intrinsic expects it as the third one.
3218 case clang::ARM::BI_MoveToCoprocessor:
3219 case clang::ARM::BI_MoveToCoprocessor2: {
3220 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
3221 ? Intrinsic::arm_mcr
3222 : Intrinsic::arm_mcr2);
3223 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3224 Ops[3], Ops[4], Ops[5]});
3225 }
3226 }
3227
3228 // Get the last argument, which specifies the vector type.
3229 assert(HasExtraArg);
3230 const Expr *Arg = E->getArg(E->getNumArgs()-1);
3231 std::optional<llvm::APSInt> Result =
3233 if (!Result)
3234 return nullptr;
3235
3236 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3237 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3238 // Determine the overloaded type of this builtin.
3239 llvm::Type *Ty;
3240 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3241 Ty = FloatTy;
3242 else
3243 Ty = DoubleTy;
3244
3245 // Determine whether this is an unsigned conversion or not.
3246 bool usgn = Result->getZExtValue() == 1;
3247 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3248
3249 // Call the appropriate intrinsic.
3250 Function *F = CGM.getIntrinsic(Int, Ty);
3251 return Builder.CreateCall(F, Ops, "vcvtr");
3252 }
3253
3254 // Determine the type of this overloaded NEON intrinsic.
3255 NeonTypeFlags Type = Result->getZExtValue();
3256 bool usgn = Type.isUnsigned();
3257 bool rightShift = false;
3258
3259 llvm::FixedVectorType *VTy =
3260 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
3261 getTarget().hasBFloat16Type());
3262 llvm::Type *Ty = VTy;
3263 if (!Ty)
3264 return nullptr;
3265
3266 // Many NEON builtins have identical semantics and uses in ARM and
3267 // AArch64. Emit these in a single function.
3268 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3269 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3270 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3271 if (Builtin)
3273 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3274 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3275
3276 unsigned Int;
3277 switch (BuiltinID) {
3278 default: return nullptr;
3279 case NEON::BI__builtin_neon_vld1q_lane_v:
3280 // Handle 64-bit integer elements as a special case. Use shuffles of
3281 // one-element vectors to avoid poor code for i64 in the backend.
3282 if (VTy->getElementType()->isIntegerTy(64)) {
3283 // Extract the other lane.
3284 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3285 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3286 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3287 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3288 // Load the value as a one-element vector.
3289 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3290 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3291 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3292 Value *Align = getAlignmentValue32(PtrOp0);
3293 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3294 // Combine them.
3295 int Indices[] = {1 - Lane, Lane};
3296 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
3297 }
3298 [[fallthrough]];
3299 case NEON::BI__builtin_neon_vld1_lane_v: {
3300 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3301 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
3302 Value *Ld = Builder.CreateLoad(PtrOp0);
3303 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3304 }
3305 case NEON::BI__builtin_neon_vqrshrn_n_v:
3306 Int =
3307 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3308 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3309 1, true);
3310 case NEON::BI__builtin_neon_vqrshrun_n_v:
3311 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3312 Ops, "vqrshrun_n", 1, true);
3313 case NEON::BI__builtin_neon_vqshrn_n_v:
3314 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3315 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3316 1, true);
3317 case NEON::BI__builtin_neon_vqshrun_n_v:
3318 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3319 Ops, "vqshrun_n", 1, true);
3320 case NEON::BI__builtin_neon_vrecpe_v:
3321 case NEON::BI__builtin_neon_vrecpeq_v:
3322 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3323 Ops, "vrecpe");
3324 case NEON::BI__builtin_neon_vrshrn_n_v:
3325 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3326 Ops, "vrshrn_n", 1, true);
3327 case NEON::BI__builtin_neon_vrsra_n_v:
3328 case NEON::BI__builtin_neon_vrsraq_n_v:
3329 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3330 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3331 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3332 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3333 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3334 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3335 case NEON::BI__builtin_neon_vsri_n_v:
3336 case NEON::BI__builtin_neon_vsriq_n_v:
3337 rightShift = true;
3338 [[fallthrough]];
3339 case NEON::BI__builtin_neon_vsli_n_v:
3340 case NEON::BI__builtin_neon_vsliq_n_v:
3341 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3342 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3343 Ops, "vsli_n");
3344 case NEON::BI__builtin_neon_vsra_n_v:
3345 case NEON::BI__builtin_neon_vsraq_n_v:
3346 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3347 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3348 return Builder.CreateAdd(Ops[0], Ops[1]);
3349 case NEON::BI__builtin_neon_vst1q_lane_v:
3350 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3351 // a one-element vector and avoid poor code for i64 in the backend.
3352 if (VTy->getElementType()->isIntegerTy(64)) {
3353 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3354 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3355 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3356 Ops[2] = getAlignmentValue32(PtrOp0);
3357 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3358 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3359 Tys), Ops);
3360 }
3361 [[fallthrough]];
3362 case NEON::BI__builtin_neon_vst1_lane_v: {
3363 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3364 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3365 return Builder.CreateStore(Ops[1],
3366 PtrOp0.withElementType(Ops[1]->getType()));
3367 }
3368 case NEON::BI__builtin_neon_vtbl1_v:
3369 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3370 Ops, "vtbl1");
3371 case NEON::BI__builtin_neon_vtbl2_v:
3372 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3373 Ops, "vtbl2");
3374 case NEON::BI__builtin_neon_vtbl3_v:
3375 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3376 Ops, "vtbl3");
3377 case NEON::BI__builtin_neon_vtbl4_v:
3378 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3379 Ops, "vtbl4");
3380 case NEON::BI__builtin_neon_vtbx1_v:
3381 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3382 Ops, "vtbx1");
3383 case NEON::BI__builtin_neon_vtbx2_v:
3384 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3385 Ops, "vtbx2");
3386 case NEON::BI__builtin_neon_vtbx3_v:
3387 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3388 Ops, "vtbx3");
3389 case NEON::BI__builtin_neon_vtbx4_v:
3390 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3391 Ops, "vtbx4");
3392 }
3393}
3394
3395template<typename Integer>
3397 return E->getIntegerConstantExpr(Context)->getExtValue();
3398}
3399
3400static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3401 llvm::Type *T, bool Unsigned) {
3402 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3403 // which finds it convenient to specify signed/unsigned as a boolean flag.
3404 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
3405}
3406
3407static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3408 uint32_t Shift, bool Unsigned) {
3409 // MVE helper function for integer shift right. This must handle signed vs
3410 // unsigned, and also deal specially with the case where the shift count is
3411 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3412 // undefined behavior, but in MVE it's legal, so we must convert it to code
3413 // that is not undefined in IR.
3414 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
3415 ->getElementType()
3416 ->getPrimitiveSizeInBits();
3417 if (Shift == LaneBits) {
3418 // An unsigned shift of the full lane size always generates zero, so we can
3419 // simply emit a zero vector. A signed shift of the full lane size does the
3420 // same thing as shifting by one bit fewer.
3421 if (Unsigned)
3422 return llvm::Constant::getNullValue(V->getType());
3423 else
3424 --Shift;
3425 }
3426 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
3427}
3428
3429static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3430 // MVE-specific helper function for a vector splat, which infers the element
3431 // count of the output vector by knowing that MVE vectors are all 128 bits
3432 // wide.
3433 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3434 return Builder.CreateVectorSplat(Elements, V);
3435}
3436
3437static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3438 CodeGenFunction *CGF,
3439 llvm::Value *V,
3440 llvm::Type *DestType) {
3441 // Convert one MVE vector type into another by reinterpreting its in-register
3442 // format.
3443 //
3444 // Little-endian, this is identical to a bitcast (which reinterprets the
3445 // memory format). But big-endian, they're not necessarily the same, because
3446 // the register and memory formats map to each other differently depending on
3447 // the lane size.
3448 //
3449 // We generate a bitcast whenever we can (if we're little-endian, or if the
3450 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3451 // that performs the different kind of reinterpretation.
3452 if (CGF->getTarget().isBigEndian() &&
3453 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3454 return Builder.CreateCall(
3455 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
3456 {DestType, V->getType()}),
3457 V);
3458 } else {
3459 return Builder.CreateBitCast(V, DestType);
3460 }
3461}
3462
3463static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3464 // Make a shufflevector that extracts every other element of a vector (evens
3465 // or odds, as desired).
3466 SmallVector<int, 16> Indices;
3467 unsigned InputElements =
3468 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
3469 for (unsigned i = 0; i < InputElements; i += 2)
3470 Indices.push_back(i + Odd);
3471 return Builder.CreateShuffleVector(V, Indices);
3472}
3473
3474static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3475 llvm::Value *V1) {
3476 // Make a shufflevector that interleaves two vectors element by element.
3477 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3478 SmallVector<int, 16> Indices;
3479 unsigned InputElements =
3480 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
3481 for (unsigned i = 0; i < InputElements; i++) {
3482 Indices.push_back(i);
3483 Indices.push_back(i + InputElements);
3484 }
3485 return Builder.CreateShuffleVector(V0, V1, Indices);
3486}
3487
3488template<unsigned HighBit, unsigned OtherBits>
3489static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3490 // MVE-specific helper function to make a vector splat of a constant such as
3491 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3492 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
3493 unsigned LaneBits = T->getPrimitiveSizeInBits();
3494 uint32_t Value = HighBit << (LaneBits - 1);
3495 if (OtherBits)
3496 Value |= (1UL << (LaneBits - 1)) - 1;
3497 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
3498 return ARMMVEVectorSplat(Builder, Lane);
3499}
3500
3501static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3502 llvm::Value *V,
3503 unsigned ReverseWidth) {
3504 // MVE-specific helper function which reverses the elements of a
3505 // vector within every (ReverseWidth)-bit collection of lanes.
3506 SmallVector<int, 16> Indices;
3507 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3508 unsigned Elements = 128 / LaneSize;
3509 unsigned Mask = ReverseWidth / LaneSize - 1;
3510 for (unsigned i = 0; i < Elements; i++)
3511 Indices.push_back(i ^ Mask);
3512 return Builder.CreateShuffleVector(V, Indices);
3513}
3514
3516 const CallExpr *E,
3518 llvm::Triple::ArchType Arch) {
3519 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3520 Intrinsic::ID IRIntr;
3521 unsigned NumVectors;
3522
3523 // Code autogenerated by Tablegen will handle all the simple builtins.
3524 switch (BuiltinID) {
3525 #include "clang/Basic/arm_mve_builtin_cg.inc"
3526
3527 // If we didn't match an MVE builtin id at all, go back to the
3528 // main EmitARMBuiltinExpr.
3529 default:
3530 return nullptr;
3531 }
3532
3533 // Anything that breaks from that switch is an MVE builtin that
3534 // needs handwritten code to generate.
3535
3536 switch (CustomCodeGenType) {
3537
3538 case CustomCodeGen::VLD24: {
3541
3542 auto MvecCType = E->getType();
3543 auto MvecLType = ConvertType(MvecCType);
3544 assert(MvecLType->isStructTy() &&
3545 "Return type for vld[24]q should be a struct");
3546 assert(MvecLType->getStructNumElements() == 1 &&
3547 "Return-type struct for vld[24]q should have one element");
3548 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3549 assert(MvecLTypeInner->isArrayTy() &&
3550 "Return-type struct for vld[24]q should contain an array");
3551 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3552 "Array member of return-type struct vld[24]q has wrong length");
3553 auto VecLType = MvecLTypeInner->getArrayElementType();
3554
3555 Tys.push_back(VecLType);
3556
3557 auto Addr = E->getArg(0);
3558 Ops.push_back(EmitScalarExpr(Addr));
3559 Tys.push_back(ConvertType(Addr->getType()));
3560
3561 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3562 Value *LoadResult = Builder.CreateCall(F, Ops);
3563 Value *MvecOut = PoisonValue::get(MvecLType);
3564 for (unsigned i = 0; i < NumVectors; ++i) {
3565 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3566 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3567 }
3568
3569 if (ReturnValue.isNull())
3570 return MvecOut;
3571 else
3572 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3573 }
3574
3575 case CustomCodeGen::VST24: {
3578
3579 auto Addr = E->getArg(0);
3580 Ops.push_back(EmitScalarExpr(Addr));
3581 Tys.push_back(ConvertType(Addr->getType()));
3582
3583 auto MvecCType = E->getArg(1)->getType();
3584 auto MvecLType = ConvertType(MvecCType);
3585 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3586 assert(MvecLType->getStructNumElements() == 1 &&
3587 "Data-type struct for vst2q should have one element");
3588 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3589 assert(MvecLTypeInner->isArrayTy() &&
3590 "Data-type struct for vst2q should contain an array");
3591 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3592 "Array member of return-type struct vld[24]q has wrong length");
3593 auto VecLType = MvecLTypeInner->getArrayElementType();
3594
3595 Tys.push_back(VecLType);
3596
3597 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3598 EmitAggExpr(E->getArg(1), MvecSlot);
3599 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3600 for (unsigned i = 0; i < NumVectors; i++)
3601 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3602
3603 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3604 Value *ToReturn = nullptr;
3605 for (unsigned i = 0; i < NumVectors; i++) {
3606 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3607 ToReturn = Builder.CreateCall(F, Ops);
3608 Ops.pop_back();
3609 }
3610 return ToReturn;
3611 }
3612 }
3613 llvm_unreachable("unknown custom codegen type.");
3614}
3615
3617 const CallExpr *E,
3619 llvm::Triple::ArchType Arch) {
3620 switch (BuiltinID) {
3621 default:
3622 return nullptr;
3623#include "clang/Basic/arm_cde_builtin_cg.inc"
3624 }
3625}
3626
3627static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3628 const CallExpr *E,
3630 llvm::Triple::ArchType Arch) {
3631 unsigned int Int = 0;
3632 const char *s = nullptr;
3633
3634 switch (BuiltinID) {
3635 default:
3636 return nullptr;
3637 case NEON::BI__builtin_neon_vtbl1_v:
3638 case NEON::BI__builtin_neon_vqtbl1_v:
3639 case NEON::BI__builtin_neon_vqtbl1q_v:
3640 case NEON::BI__builtin_neon_vtbl2_v:
3641 case NEON::BI__builtin_neon_vqtbl2_v:
3642 case NEON::BI__builtin_neon_vqtbl2q_v:
3643 case NEON::BI__builtin_neon_vtbl3_v:
3644 case NEON::BI__builtin_neon_vqtbl3_v:
3645 case NEON::BI__builtin_neon_vqtbl3q_v:
3646 case NEON::BI__builtin_neon_vtbl4_v:
3647 case NEON::BI__builtin_neon_vqtbl4_v:
3648 case NEON::BI__builtin_neon_vqtbl4q_v:
3649 break;
3650 case NEON::BI__builtin_neon_vtbx1_v:
3651 case NEON::BI__builtin_neon_vqtbx1_v:
3652 case NEON::BI__builtin_neon_vqtbx1q_v:
3653 case NEON::BI__builtin_neon_vtbx2_v:
3654 case NEON::BI__builtin_neon_vqtbx2_v:
3655 case NEON::BI__builtin_neon_vqtbx2q_v:
3656 case NEON::BI__builtin_neon_vtbx3_v:
3657 case NEON::BI__builtin_neon_vqtbx3_v:
3658 case NEON::BI__builtin_neon_vqtbx3q_v:
3659 case NEON::BI__builtin_neon_vtbx4_v:
3660 case NEON::BI__builtin_neon_vqtbx4_v:
3661 case NEON::BI__builtin_neon_vqtbx4q_v:
3662 break;
3663 }
3664
3665 assert(E->getNumArgs() >= 3);
3666
3667 // Get the last argument, which specifies the vector type.
3668 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3669 std::optional<llvm::APSInt> Result =
3671 if (!Result)
3672 return nullptr;
3673
3674 // Determine the type of this overloaded NEON intrinsic.
3675 NeonTypeFlags Type = Result->getZExtValue();
3676 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3677 if (!Ty)
3678 return nullptr;
3679
3680 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3681
3682 // AArch64 scalar builtins are not overloaded, they do not have an extra
3683 // argument that specifies the vector type, need to handle each case.
3684 switch (BuiltinID) {
3685 case NEON::BI__builtin_neon_vtbl1_v: {
3686 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3687 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3688 }
3689 case NEON::BI__builtin_neon_vtbl2_v: {
3690 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3691 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3692 }
3693 case NEON::BI__builtin_neon_vtbl3_v: {
3694 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3695 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3696 }
3697 case NEON::BI__builtin_neon_vtbl4_v: {
3698 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3699 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3700 }
3701 case NEON::BI__builtin_neon_vtbx1_v: {
3702 Value *TblRes =
3703 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3704 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3705
3706 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3707 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3708 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3709
3710 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3711 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3712 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3713 }
3714 case NEON::BI__builtin_neon_vtbx2_v: {
3715 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3716 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3717 }
3718 case NEON::BI__builtin_neon_vtbx3_v: {
3719 Value *TblRes =
3720 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3721 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3722
3723 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3724 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3725 TwentyFourV);
3726 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3727
3728 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3729 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3730 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3731 }
3732 case NEON::BI__builtin_neon_vtbx4_v: {
3733 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3734 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3735 }
3736 case NEON::BI__builtin_neon_vqtbl1_v:
3737 case NEON::BI__builtin_neon_vqtbl1q_v:
3738 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3739 case NEON::BI__builtin_neon_vqtbl2_v:
3740 case NEON::BI__builtin_neon_vqtbl2q_v: {
3741 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3742 case NEON::BI__builtin_neon_vqtbl3_v:
3743 case NEON::BI__builtin_neon_vqtbl3q_v:
3744 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3745 case NEON::BI__builtin_neon_vqtbl4_v:
3746 case NEON::BI__builtin_neon_vqtbl4q_v:
3747 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3748 case NEON::BI__builtin_neon_vqtbx1_v:
3749 case NEON::BI__builtin_neon_vqtbx1q_v:
3750 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3751 case NEON::BI__builtin_neon_vqtbx2_v:
3752 case NEON::BI__builtin_neon_vqtbx2q_v:
3753 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3754 case NEON::BI__builtin_neon_vqtbx3_v:
3755 case NEON::BI__builtin_neon_vqtbx3q_v:
3756 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3757 case NEON::BI__builtin_neon_vqtbx4_v:
3758 case NEON::BI__builtin_neon_vqtbx4q_v:
3759 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3760 }
3761 }
3762
3763 if (!Int)
3764 return nullptr;
3765
3766 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3767 return CGF.EmitNeonCall(F, Ops, s);
3768}
3769
3771 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3772 Op = Builder.CreateBitCast(Op, Int16Ty);
3773 Value *V = PoisonValue::get(VTy);
3774 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3775 Op = Builder.CreateInsertElement(V, Op, CI);
3776 return Op;
3777}
3778
3779/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3780/// access builtin. Only required if it can't be inferred from the base pointer
3781/// operand.
3783 switch (TypeFlags.getMemEltType()) {
3784 case SVETypeFlags::MemEltTyDefault:
3785 return getEltType(TypeFlags);
3786 case SVETypeFlags::MemEltTyInt8:
3787 return Builder.getInt8Ty();
3788 case SVETypeFlags::MemEltTyInt16:
3789 return Builder.getInt16Ty();
3790 case SVETypeFlags::MemEltTyInt32:
3791 return Builder.getInt32Ty();
3792 case SVETypeFlags::MemEltTyInt64:
3793 return Builder.getInt64Ty();
3794 }
3795 llvm_unreachable("Unknown MemEltType");
3796}
3797
3798llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3799 switch (TypeFlags.getEltType()) {
3800 default:
3801 llvm_unreachable("Invalid SVETypeFlag!");
3802
3803 case SVETypeFlags::EltTyMFloat8:
3804 case SVETypeFlags::EltTyInt8:
3805 return Builder.getInt8Ty();
3806 case SVETypeFlags::EltTyInt16:
3807 return Builder.getInt16Ty();
3808 case SVETypeFlags::EltTyInt32:
3809 return Builder.getInt32Ty();
3810 case SVETypeFlags::EltTyInt64:
3811 return Builder.getInt64Ty();
3812 case SVETypeFlags::EltTyInt128:
3813 return Builder.getInt128Ty();
3814
3815 case SVETypeFlags::EltTyFloat16:
3816 return Builder.getHalfTy();
3817 case SVETypeFlags::EltTyFloat32:
3818 return Builder.getFloatTy();
3819 case SVETypeFlags::EltTyFloat64:
3820 return Builder.getDoubleTy();
3821
3822 case SVETypeFlags::EltTyBFloat16:
3823 return Builder.getBFloatTy();
3824
3825 case SVETypeFlags::EltTyBool8:
3826 case SVETypeFlags::EltTyBool16:
3827 case SVETypeFlags::EltTyBool32:
3828 case SVETypeFlags::EltTyBool64:
3829 return Builder.getInt1Ty();
3830 }
3831}
3832
3833// Return the llvm predicate vector type corresponding to the specified element
3834// TypeFlags.
3835llvm::ScalableVectorType *
3837 switch (TypeFlags.getEltType()) {
3838 default: llvm_unreachable("Unhandled SVETypeFlag!");
3839
3840 case SVETypeFlags::EltTyInt8:
3841 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3842 case SVETypeFlags::EltTyInt16:
3843 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3844 case SVETypeFlags::EltTyInt32:
3845 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3846 case SVETypeFlags::EltTyInt64:
3847 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3848
3849 case SVETypeFlags::EltTyBFloat16:
3850 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3851 case SVETypeFlags::EltTyFloat16:
3852 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3853 case SVETypeFlags::EltTyFloat32:
3854 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3855 case SVETypeFlags::EltTyFloat64:
3856 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3857
3858 case SVETypeFlags::EltTyBool8:
3859 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3860 case SVETypeFlags::EltTyBool16:
3861 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3862 case SVETypeFlags::EltTyBool32:
3863 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3864 case SVETypeFlags::EltTyBool64:
3865 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3866 }
3867}
3868
3869// Return the llvm vector type corresponding to the specified element TypeFlags.
3870llvm::ScalableVectorType *
3872 switch (TypeFlags.getEltType()) {
3873 default:
3874 llvm_unreachable("Invalid SVETypeFlag!");
3875
3876 case SVETypeFlags::EltTyInt8:
3877 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3878 case SVETypeFlags::EltTyInt16:
3879 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3880 case SVETypeFlags::EltTyInt32:
3881 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3882 case SVETypeFlags::EltTyInt64:
3883 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3884
3885 case SVETypeFlags::EltTyMFloat8:
3886 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3887 case SVETypeFlags::EltTyFloat16:
3888 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3889 case SVETypeFlags::EltTyBFloat16:
3890 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3891 case SVETypeFlags::EltTyFloat32:
3892 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3893 case SVETypeFlags::EltTyFloat64:
3894 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3895
3896 case SVETypeFlags::EltTyBool8:
3897 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3898 case SVETypeFlags::EltTyBool16:
3899 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3900 case SVETypeFlags::EltTyBool32:
3901 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3902 case SVETypeFlags::EltTyBool64:
3903 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3904 }
3905}
3906
3907llvm::Value *
3909 Function *Ptrue =
3910 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3911 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
3912}
3913
3914constexpr unsigned SVEBitsPerBlock = 128;
3915
3916static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3917 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3918 return llvm::ScalableVectorType::get(EltTy, NumElts);
3919}
3920
3921// Reinterpret the input predicate so that it can be used to correctly isolate
3922// the elements of the specified datatype.
3924 llvm::ScalableVectorType *VTy) {
3925
3926 if (isa<TargetExtType>(Pred->getType()) &&
3927 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3928 return Pred;
3929
3930 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3931 if (Pred->getType() == RTy)
3932 return Pred;
3933
3934 unsigned IntID;
3935 llvm::Type *IntrinsicTy;
3936 switch (VTy->getMinNumElements()) {
3937 default:
3938 llvm_unreachable("unsupported element count!");
3939 case 1:
3940 case 2:
3941 case 4:
3942 case 8:
3943 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3944 IntrinsicTy = RTy;
3945 break;
3946 case 16:
3947 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3948 IntrinsicTy = Pred->getType();
3949 break;
3950 }
3951
3952 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3953 Value *C = Builder.CreateCall(F, Pred);
3954 assert(C->getType() == RTy && "Unexpected return type!");
3955 return C;
3956}
3957
3959 llvm::StructType *Ty) {
3960 if (PredTuple->getType() == Ty)
3961 return PredTuple;
3962
3963 Value *Ret = llvm::PoisonValue::get(Ty);
3964 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3965 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3966 Pred = EmitSVEPredicateCast(
3967 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3968 Ret = Builder.CreateInsertValue(Ret, Pred, I);
3969 }
3970
3971 return Ret;
3972}
3973
3976 unsigned IntID) {
3977 auto *ResultTy = getSVEType(TypeFlags);
3978 auto *OverloadedTy =
3979 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
3980
3981 Function *F = nullptr;
3982 if (Ops[1]->getType()->isVectorTy())
3983 // This is the "vector base, scalar offset" case. In order to uniquely
3984 // map this built-in to an LLVM IR intrinsic, we need both the return type
3985 // and the type of the vector base.
3986 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
3987 else
3988 // This is the "scalar base, vector offset case". The type of the offset
3989 // is encoded in the name of the intrinsic. We only need to specify the
3990 // return type in order to uniquely map this built-in to an LLVM IR
3991 // intrinsic.
3992 F = CGM.getIntrinsic(IntID, OverloadedTy);
3993
3994 // At the ACLE level there's only one predicate type, svbool_t, which is
3995 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3996 // actual type being loaded. For example, when loading doubles (i64) the
3997 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3998 // the predicate and the data being loaded must match. Cast to the type
3999 // expected by the intrinsic. The intrinsic itself should be defined in
4000 // a way than enforces relations between parameter types.
4001 Ops[0] = EmitSVEPredicateCast(
4002 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
4003
4004 // Pass 0 when the offset is missing. This can only be applied when using
4005 // the "vector base" addressing mode for which ACLE allows no offset. The
4006 // corresponding LLVM IR always requires an offset.
4007 if (Ops.size() == 2) {
4008 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4009 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4010 }
4011
4012 // For "vector base, scalar index" scale the index so that it becomes a
4013 // scalar offset.
4014 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
4015 unsigned BytesPerElt =
4016 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4017 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4018 }
4019
4020 Value *Call = Builder.CreateCall(F, Ops);
4021
4022 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
4023 // other cases it's folded into a nop.
4024 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
4025 : Builder.CreateSExt(Call, ResultTy);
4026}
4027
4030 unsigned IntID) {
4031 auto *SrcDataTy = getSVEType(TypeFlags);
4032 auto *OverloadedTy =
4033 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
4034
4035 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
4036 // it's the first argument. Move it accordingly.
4037 Ops.insert(Ops.begin(), Ops.pop_back_val());
4038
4039 Function *F = nullptr;
4040 if (Ops[2]->getType()->isVectorTy())
4041 // This is the "vector base, scalar offset" case. In order to uniquely
4042 // map this built-in to an LLVM IR intrinsic, we need both the return type
4043 // and the type of the vector base.
4044 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
4045 else
4046 // This is the "scalar base, vector offset case". The type of the offset
4047 // is encoded in the name of the intrinsic. We only need to specify the
4048 // return type in order to uniquely map this built-in to an LLVM IR
4049 // intrinsic.
4050 F = CGM.getIntrinsic(IntID, OverloadedTy);
4051
4052 // Pass 0 when the offset is missing. This can only be applied when using
4053 // the "vector base" addressing mode for which ACLE allows no offset. The
4054 // corresponding LLVM IR always requires an offset.
4055 if (Ops.size() == 3) {
4056 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4057 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4058 }
4059
4060 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4061 // folded into a nop.
4062 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
4063
4064 // At the ACLE level there's only one predicate type, svbool_t, which is
4065 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4066 // actual type being stored. For example, when storing doubles (i64) the
4067 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4068 // the predicate and the data being stored must match. Cast to the type
4069 // expected by the intrinsic. The intrinsic itself should be defined in
4070 // a way that enforces relations between parameter types.
4071 Ops[1] = EmitSVEPredicateCast(
4072 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
4073
4074 // For "vector base, scalar index" scale the index so that it becomes a
4075 // scalar offset.
4076 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4077 unsigned BytesPerElt =
4078 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4079 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
4080 }
4081
4082 return Builder.CreateCall(F, Ops);
4083}
4084
4087 unsigned IntID) {
4088 // The gather prefetches are overloaded on the vector input - this can either
4089 // be the vector of base addresses or vector of offsets.
4090 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
4091 if (!OverloadedTy)
4092 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
4093
4094 // Cast the predicate from svbool_t to the right number of elements.
4095 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
4096
4097 // vector + imm addressing modes
4098 if (Ops[1]->getType()->isVectorTy()) {
4099 if (Ops.size() == 3) {
4100 // Pass 0 for 'vector+imm' when the index is omitted.
4101 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4102
4103 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4104 std::swap(Ops[2], Ops[3]);
4105 } else {
4106 // Index needs to be passed as scaled offset.
4107 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4108 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4109 if (BytesPerElt > 1)
4110 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4111 }
4112 }
4113
4114 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
4115 return Builder.CreateCall(F, Ops);
4116}
4117
4120 unsigned IntID) {
4121 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4122 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4123 Value *BasePtr = Ops[1];
4124
4125 // Does the load have an offset?
4126 if (Ops.size() > 2)
4127 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4128
4129 Function *F = CGM.getIntrinsic(IntID, {VTy});
4130 return Builder.CreateCall(F, {Predicate, BasePtr});
4131}
4132
4135 unsigned IntID) {
4136 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4137
4138 unsigned N;
4139 switch (IntID) {
4140 case Intrinsic::aarch64_sve_st2:
4141 case Intrinsic::aarch64_sve_st1_pn_x2:
4142 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4143 case Intrinsic::aarch64_sve_st2q:
4144 N = 2;
4145 break;
4146 case Intrinsic::aarch64_sve_st3:
4147 case Intrinsic::aarch64_sve_st3q:
4148 N = 3;
4149 break;
4150 case Intrinsic::aarch64_sve_st4:
4151 case Intrinsic::aarch64_sve_st1_pn_x4:
4152 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4153 case Intrinsic::aarch64_sve_st4q:
4154 N = 4;
4155 break;
4156 default:
4157 llvm_unreachable("unknown intrinsic!");
4158 }
4159
4160 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4161 Value *BasePtr = Ops[1];
4162
4163 // Does the store have an offset?
4164 if (Ops.size() > (2 + N))
4165 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4166
4167 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4168 // need to break up the tuple vector.
4170 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4171 Operands.push_back(Ops[I]);
4172 Operands.append({Predicate, BasePtr});
4173 Function *F = CGM.getIntrinsic(IntID, { VTy });
4174
4175 return Builder.CreateCall(F, Operands);
4176}
4177
4178// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4179// svpmullt_pair intrinsics, with the exception that their results are bitcast
4180// to a wider type.
4183 unsigned BuiltinID) {
4184 // Splat scalar operand to vector (intrinsics with _n infix)
4185 if (TypeFlags.hasSplatOperand()) {
4186 unsigned OpNo = TypeFlags.getSplatOperand();
4187 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4188 }
4189
4190 // The pair-wise function has a narrower overloaded type.
4191 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
4192 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
4193
4194 // Now bitcast to the wider result type.
4195 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4196 return EmitSVEReinterpret(Call, Ty);
4197}
4198
4200 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4201 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4202 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
4203 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
4204}
4205
4208 unsigned BuiltinID) {
4209 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4210 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
4211 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4212
4213 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
4214 Value *BasePtr = Ops[1];
4215
4216 // Implement the index operand if not omitted.
4217 if (Ops.size() > 3)
4218 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4219
4220 Value *PrfOp = Ops.back();
4221
4222 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
4223 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
4224}
4225
4227 llvm::Type *ReturnTy,
4229 unsigned IntrinsicID,
4230 bool IsZExtReturn) {
4231 QualType LangPTy = E->getArg(1)->getType();
4232 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4233 LangPTy->castAs<PointerType>()->getPointeeType());
4234
4235 // Mfloat8 types is stored as a vector, so extra work
4236 // to extract sclar element type is necessary.
4237 if (MemEltTy->isVectorTy()) {
4238 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4239 "Only <1 x i8> expected");
4240 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4241 }
4242
4243 // The vector type that is returned may be different from the
4244 // eventual type loaded from memory.
4245 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
4246 llvm::ScalableVectorType *MemoryTy = nullptr;
4247 llvm::ScalableVectorType *PredTy = nullptr;
4248 bool IsQuadLoad = false;
4249 switch (IntrinsicID) {
4250 case Intrinsic::aarch64_sve_ld1uwq:
4251 case Intrinsic::aarch64_sve_ld1udq:
4252 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4253 PredTy = llvm::ScalableVectorType::get(
4254 llvm::Type::getInt1Ty(getLLVMContext()), 1);
4255 IsQuadLoad = true;
4256 break;
4257 default:
4258 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4259 PredTy = MemoryTy;
4260 break;
4261 }
4262
4263 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4264 Value *BasePtr = Ops[1];
4265
4266 // Does the load have an offset?
4267 if (Ops.size() > 2)
4268 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4269
4270 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
4271 auto *Load =
4272 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
4273 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4274 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
4275
4276 if (IsQuadLoad)
4277 return Load;
4278
4279 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
4280 : Builder.CreateSExt(Load, VectorTy);
4281}
4282
4285 unsigned IntrinsicID) {
4286 QualType LangPTy = E->getArg(1)->getType();
4287 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4288 LangPTy->castAs<PointerType>()->getPointeeType());
4289
4290 // Mfloat8 types is stored as a vector, so extra work
4291 // to extract sclar element type is necessary.
4292 if (MemEltTy->isVectorTy()) {
4293 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4294 "Only <1 x i8> expected");
4295 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4296 }
4297
4298 // The vector type that is stored may be different from the
4299 // eventual type stored to memory.
4300 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
4301 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4302
4303 auto PredTy = MemoryTy;
4304 auto AddrMemoryTy = MemoryTy;
4305 bool IsQuadStore = false;
4306
4307 switch (IntrinsicID) {
4308 case Intrinsic::aarch64_sve_st1wq:
4309 case Intrinsic::aarch64_sve_st1dq:
4310 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4311 PredTy =
4312 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
4313 IsQuadStore = true;
4314 break;
4315 default:
4316 break;
4317 }
4318 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4319 Value *BasePtr = Ops[1];
4320
4321 // Does the store have an offset?
4322 if (Ops.size() == 4)
4323 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
4324
4325 // Last value is always the data
4326 Value *Val =
4327 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
4328
4329 Function *F =
4330 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
4331 auto *Store =
4332 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
4333 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4334 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
4335 return Store;
4336}
4337
4340 unsigned IntID) {
4341 Ops[2] = EmitSVEPredicateCast(
4343
4344 SmallVector<Value *> NewOps;
4345 NewOps.push_back(Ops[2]);
4346
4347 llvm::Value *BasePtr = Ops[3];
4348 llvm::Value *RealSlice = Ops[1];
4349 // If the intrinsic contains the vnum parameter, multiply it with the vector
4350 // size in bytes.
4351 if (Ops.size() == 5) {
4352 Function *StreamingVectorLength =
4353 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
4354 llvm::Value *StreamingVectorLengthCall =
4355 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
4356 llvm::ConstantInt::get(Int64Ty, 8), "svl",
4357 /* HasNUW */ true, /* HasNSW */ true);
4358 llvm::Value *Mulvl =
4359 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
4360 // The type of the ptr parameter is void *, so use Int8Ty here.
4361 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
4362 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
4363 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
4364 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
4365 }
4366 NewOps.push_back(BasePtr);
4367 NewOps.push_back(Ops[0]);
4368 NewOps.push_back(RealSlice);
4369 Function *F = CGM.getIntrinsic(IntID);
4370 return Builder.CreateCall(F, NewOps);
4371}
4372
4375 unsigned IntID) {
4376 auto *VecTy = getSVEType(TypeFlags);
4377 Function *F = CGM.getIntrinsic(IntID, VecTy);
4378 if (TypeFlags.isReadZA())
4379 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
4380 else if (TypeFlags.isWriteZA())
4381 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
4382 return Builder.CreateCall(F, Ops);
4383}
4384
4387 unsigned IntID) {
4388 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4389 if (Ops.size() == 0)
4390 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
4391 Function *F = CGM.getIntrinsic(IntID, {});
4392 return Builder.CreateCall(F, Ops);
4393}
4394
4397 unsigned IntID) {
4398 if (Ops.size() == 2)
4399 Ops.push_back(Builder.getInt32(0));
4400 else
4401 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
4402 Function *F = CGM.getIntrinsic(IntID, {});
4403 return Builder.CreateCall(F, Ops);
4404}
4405
4406// Limit the usage of scalable llvm IR generated by the ACLE by using the
4407// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4408Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4409 return Builder.CreateVectorSplat(
4410 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
4411}
4412
4414 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4415#ifndef NDEBUG
4416 auto *VecTy = cast<llvm::VectorType>(Ty);
4417 ElementCount EC = VecTy->getElementCount();
4418 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4419 "Only <1 x i8> expected");
4420#endif
4421 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
4422 }
4423 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
4424}
4425
4427 // FIXME: For big endian this needs an additional REV, or needs a separate
4428 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4429 // instruction is defined as 'bitwise' equivalent from memory point of
4430 // view (when storing/reloading), whereas the svreinterpret builtin
4431 // implements bitwise equivalent cast from register point of view.
4432 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4433
4434 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
4435 Value *Tuple = llvm::PoisonValue::get(Ty);
4436
4437 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4438 Value *In = Builder.CreateExtractValue(Val, I);
4439 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
4440 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
4441 }
4442
4443 return Tuple;
4444 }
4445
4446 return Builder.CreateBitCast(Val, Ty);
4447}
4448
4449static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4451 auto *SplatZero = Constant::getNullValue(Ty);
4452 Ops.insert(Ops.begin(), SplatZero);
4453}
4454
4455static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4457 auto *SplatUndef = UndefValue::get(Ty);
4458 Ops.insert(Ops.begin(), SplatUndef);
4459}
4460
4461SmallVector<llvm::Type *, 2>
4463 llvm::Type *ResultType,
4464 ArrayRef<Value *> Ops) {
4465 if (TypeFlags.isOverloadNone())
4466 return {};
4467
4468 llvm::Type *DefaultType = getSVEType(TypeFlags);
4469
4470 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4471 return {DefaultType, Ops[1]->getType()};
4472
4473 if (TypeFlags.isOverloadWhileRW())
4474 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4475
4476 if (TypeFlags.isOverloadCvt())
4477 return {Ops[0]->getType(), Ops.back()->getType()};
4478
4479 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4480 ResultType->isVectorTy())
4481 return {ResultType, Ops[1]->getType()};
4482
4483 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4484 return {DefaultType};
4485}
4486
4488 ArrayRef<Value *> Ops) {
4489 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4490 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4491 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
4492
4493 if (TypeFlags.isTupleSet())
4494 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
4495 return Builder.CreateExtractValue(Ops[0], Idx);
4496}
4497
4499 llvm::Type *Ty,
4500 ArrayRef<Value *> Ops) {
4501 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4502
4503 Value *Tuple = llvm::PoisonValue::get(Ty);
4504 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4505 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
4506
4507 return Tuple;
4508}
4509
4511 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4512 SVETypeFlags TypeFlags) {
4513 // Find out if any arguments are required to be integer constant expressions.
4514 unsigned ICEArguments = 0;
4516 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4517 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4518
4519 // Tuple set/get only requires one insert/extract vector, which is
4520 // created by EmitSVETupleSetOrGet.
4521 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4522
4523 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4524 bool IsICE = ICEArguments & (1 << i);
4525 Value *Arg = EmitScalarExpr(E->getArg(i));
4526
4527 if (IsICE) {
4528 // If this is required to be a constant, constant fold it so that we know
4529 // that the generated intrinsic gets a ConstantInt.
4530 std::optional<llvm::APSInt> Result =
4532 assert(Result && "Expected argument to be a constant");
4533
4534 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4535 // truncate because the immediate has been range checked and no valid
4536 // immediate requires more than a handful of bits.
4537 *Result = Result->extOrTrunc(32);
4538 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4539 continue;
4540 }
4541
4542 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4543 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4544 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4545
4546 continue;
4547 }
4548
4549 Ops.push_back(Arg);
4550 }
4551}
4552
4554 const CallExpr *E) {
4555 llvm::Type *Ty = ConvertType(E->getType());
4556 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4557 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4558 Value *Val = EmitScalarExpr(E->getArg(0));
4559 return EmitSVEReinterpret(Val, Ty);
4560 }
4561
4564
4566 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4567 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4568
4569 if (TypeFlags.isLoad())
4570 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4571 TypeFlags.isZExtReturn());
4572 else if (TypeFlags.isStore())
4573 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4574 else if (TypeFlags.isGatherLoad())
4575 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4576 else if (TypeFlags.isScatterStore())
4577 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4578 else if (TypeFlags.isPrefetch())
4579 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4580 else if (TypeFlags.isGatherPrefetch())
4581 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4582 else if (TypeFlags.isStructLoad())
4583 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4584 else if (TypeFlags.isStructStore())
4585 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4586 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4587 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4588 else if (TypeFlags.isTupleCreate())
4589 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4590 else if (TypeFlags.isUndef())
4591 return UndefValue::get(Ty);
4592 else if (Builtin->LLVMIntrinsic != 0) {
4593 // Emit set FPMR for intrinsics that require it
4594 if (TypeFlags.setsFPMR())
4595 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4596 Ops.pop_back_val());
4597 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4599
4600 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4602
4603 // Some ACLE builtins leave out the argument to specify the predicate
4604 // pattern, which is expected to be expanded to an SV_ALL pattern.
4605 if (TypeFlags.isAppendSVALL())
4606 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4607 if (TypeFlags.isInsertOp1SVALL())
4608 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4609
4610 // Predicates must match the main datatype.
4611 for (Value *&Op : Ops)
4612 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4613 if (PredTy->getElementType()->isIntegerTy(1))
4614 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4615
4616 // Splat scalar operand to vector (intrinsics with _n infix)
4617 if (TypeFlags.hasSplatOperand()) {
4618 unsigned OpNo = TypeFlags.getSplatOperand();
4619 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4620 }
4621
4622 if (TypeFlags.isReverseCompare())
4623 std::swap(Ops[1], Ops[2]);
4624 else if (TypeFlags.isReverseUSDOT())
4625 std::swap(Ops[1], Ops[2]);
4626 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4627 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4628 std::swap(Ops[1], Ops[2]);
4629 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4630 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4631 std::swap(Ops[1], Ops[3]);
4632
4633 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4634 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4635 llvm::Type *OpndTy = Ops[1]->getType();
4636 auto *SplatZero = Constant::getNullValue(OpndTy);
4637 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4638 }
4639
4640 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4641 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4642 Value *Call = Builder.CreateCall(F, Ops);
4643
4644 if (Call->getType() == Ty)
4645 return Call;
4646
4647 // Predicate results must be converted to svbool_t.
4648 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4649 return EmitSVEPredicateCast(Call, PredTy);
4650 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4651 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4652
4653 llvm_unreachable("unsupported element count!");
4654 }
4655
4656 switch (BuiltinID) {
4657 default:
4658 return nullptr;
4659
4660 case SVE::BI__builtin_sve_svreinterpret_b: {
4661 auto SVCountTy =
4662 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4663 Function *CastFromSVCountF =
4664 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4665 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4666 }
4667 case SVE::BI__builtin_sve_svreinterpret_c: {
4668 auto SVCountTy =
4669 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4670 Function *CastToSVCountF =
4671 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4672 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4673 }
4674
4675 case SVE::BI__builtin_sve_svpsel_lane_b8:
4676 case SVE::BI__builtin_sve_svpsel_lane_b16:
4677 case SVE::BI__builtin_sve_svpsel_lane_b32:
4678 case SVE::BI__builtin_sve_svpsel_lane_b64:
4679 case SVE::BI__builtin_sve_svpsel_lane_c8:
4680 case SVE::BI__builtin_sve_svpsel_lane_c16:
4681 case SVE::BI__builtin_sve_svpsel_lane_c32:
4682 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4683 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4684 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4685 "aarch64.svcount")) &&
4686 "Unexpected TargetExtType");
4687 auto SVCountTy =
4688 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4689 Function *CastFromSVCountF =
4690 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4691 Function *CastToSVCountF =
4692 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4693
4694 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4695 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4696 llvm::Value *Ops0 =
4697 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4698 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4699 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4700 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4701 }
4702 case SVE::BI__builtin_sve_svmov_b_z: {
4703 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4704 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4705 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4706 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4707 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4708 }
4709
4710 case SVE::BI__builtin_sve_svnot_b_z: {
4711 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4712 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4713 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4714 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4715 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4716 }
4717
4718 case SVE::BI__builtin_sve_svmovlb_u16:
4719 case SVE::BI__builtin_sve_svmovlb_u32:
4720 case SVE::BI__builtin_sve_svmovlb_u64:
4721 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4722
4723 case SVE::BI__builtin_sve_svmovlb_s16:
4724 case SVE::BI__builtin_sve_svmovlb_s32:
4725 case SVE::BI__builtin_sve_svmovlb_s64:
4726 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4727
4728 case SVE::BI__builtin_sve_svmovlt_u16:
4729 case SVE::BI__builtin_sve_svmovlt_u32:
4730 case SVE::BI__builtin_sve_svmovlt_u64:
4731 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4732
4733 case SVE::BI__builtin_sve_svmovlt_s16:
4734 case SVE::BI__builtin_sve_svmovlt_s32:
4735 case SVE::BI__builtin_sve_svmovlt_s64:
4736 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4737
4738 case SVE::BI__builtin_sve_svpmullt_u16:
4739 case SVE::BI__builtin_sve_svpmullt_u64:
4740 case SVE::BI__builtin_sve_svpmullt_n_u16:
4741 case SVE::BI__builtin_sve_svpmullt_n_u64:
4742 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4743
4744 case SVE::BI__builtin_sve_svpmullb_u16:
4745 case SVE::BI__builtin_sve_svpmullb_u64:
4746 case SVE::BI__builtin_sve_svpmullb_n_u16:
4747 case SVE::BI__builtin_sve_svpmullb_n_u64:
4748 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4749
4750 case SVE::BI__builtin_sve_svdup_n_b8:
4751 case SVE::BI__builtin_sve_svdup_n_b16:
4752 case SVE::BI__builtin_sve_svdup_n_b32:
4753 case SVE::BI__builtin_sve_svdup_n_b64: {
4754 Value *CmpNE =
4755 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4756 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4757 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4759 }
4760
4761 case SVE::BI__builtin_sve_svdupq_n_b8:
4762 case SVE::BI__builtin_sve_svdupq_n_b16:
4763 case SVE::BI__builtin_sve_svdupq_n_b32:
4764 case SVE::BI__builtin_sve_svdupq_n_b64:
4765 case SVE::BI__builtin_sve_svdupq_n_u8:
4766 case SVE::BI__builtin_sve_svdupq_n_s8:
4767 case SVE::BI__builtin_sve_svdupq_n_u64:
4768 case SVE::BI__builtin_sve_svdupq_n_f64:
4769 case SVE::BI__builtin_sve_svdupq_n_s64:
4770 case SVE::BI__builtin_sve_svdupq_n_u16:
4771 case SVE::BI__builtin_sve_svdupq_n_f16:
4772 case SVE::BI__builtin_sve_svdupq_n_bf16:
4773 case SVE::BI__builtin_sve_svdupq_n_s16:
4774 case SVE::BI__builtin_sve_svdupq_n_u32:
4775 case SVE::BI__builtin_sve_svdupq_n_f32:
4776 case SVE::BI__builtin_sve_svdupq_n_s32: {
4777 // These builtins are implemented by storing each element to an array and using
4778 // ld1rq to materialize a vector.
4779 unsigned NumOpnds = Ops.size();
4780
4781 bool IsBoolTy =
4782 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4783
4784 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4785 // so that the compare can use the width that is natural for the expected
4786 // number of predicate lanes.
4787 llvm::Type *EltTy = Ops[0]->getType();
4788 if (IsBoolTy)
4789 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4790
4792 for (unsigned I = 0; I < NumOpnds; ++I)
4793 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4794 Value *Vec = BuildVector(VecOps);
4795
4796 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4797 Value *InsertSubVec = Builder.CreateInsertVector(
4798 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4799
4800 Function *F =
4801 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4802 Value *DupQLane =
4803 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4804
4805 if (!IsBoolTy)
4806 return DupQLane;
4807
4808 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4809 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4810
4811 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4812 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4813 : Intrinsic::aarch64_sve_cmpne_wide,
4814 OverloadedTy);
4815 Value *Call = Builder.CreateCall(
4816 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4818 }
4819
4820 case SVE::BI__builtin_sve_svpfalse_b:
4821 return ConstantInt::getFalse(Ty);
4822
4823 case SVE::BI__builtin_sve_svpfalse_c: {
4824 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4825 Function *CastToSVCountF =
4826 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4827 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4828 }
4829
4830 case SVE::BI__builtin_sve_svlen_bf16:
4831 case SVE::BI__builtin_sve_svlen_f16:
4832 case SVE::BI__builtin_sve_svlen_f32:
4833 case SVE::BI__builtin_sve_svlen_f64:
4834 case SVE::BI__builtin_sve_svlen_s8:
4835 case SVE::BI__builtin_sve_svlen_s16:
4836 case SVE::BI__builtin_sve_svlen_s32:
4837 case SVE::BI__builtin_sve_svlen_s64:
4838 case SVE::BI__builtin_sve_svlen_u8:
4839 case SVE::BI__builtin_sve_svlen_u16:
4840 case SVE::BI__builtin_sve_svlen_u32:
4841 case SVE::BI__builtin_sve_svlen_u64: {
4842 SVETypeFlags TF(Builtin->TypeModifier);
4843 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4844 }
4845
4846 case SVE::BI__builtin_sve_svtbl2_u8:
4847 case SVE::BI__builtin_sve_svtbl2_s8:
4848 case SVE::BI__builtin_sve_svtbl2_u16:
4849 case SVE::BI__builtin_sve_svtbl2_s16:
4850 case SVE::BI__builtin_sve_svtbl2_u32:
4851 case SVE::BI__builtin_sve_svtbl2_s32:
4852 case SVE::BI__builtin_sve_svtbl2_u64:
4853 case SVE::BI__builtin_sve_svtbl2_s64:
4854 case SVE::BI__builtin_sve_svtbl2_f16:
4855 case SVE::BI__builtin_sve_svtbl2_bf16:
4856 case SVE::BI__builtin_sve_svtbl2_f32:
4857 case SVE::BI__builtin_sve_svtbl2_f64: {
4858 SVETypeFlags TF(Builtin->TypeModifier);
4859 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4860 return Builder.CreateCall(F, Ops);
4861 }
4862
4863 case SVE::BI__builtin_sve_svset_neonq_s8:
4864 case SVE::BI__builtin_sve_svset_neonq_s16:
4865 case SVE::BI__builtin_sve_svset_neonq_s32:
4866 case SVE::BI__builtin_sve_svset_neonq_s64:
4867 case SVE::BI__builtin_sve_svset_neonq_u8:
4868 case SVE::BI__builtin_sve_svset_neonq_u16:
4869 case SVE::BI__builtin_sve_svset_neonq_u32:
4870 case SVE::BI__builtin_sve_svset_neonq_u64:
4871 case SVE::BI__builtin_sve_svset_neonq_f16:
4872 case SVE::BI__builtin_sve_svset_neonq_f32:
4873 case SVE::BI__builtin_sve_svset_neonq_f64:
4874 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4875 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4876 }
4877
4878 case SVE::BI__builtin_sve_svget_neonq_s8:
4879 case SVE::BI__builtin_sve_svget_neonq_s16:
4880 case SVE::BI__builtin_sve_svget_neonq_s32:
4881 case SVE::BI__builtin_sve_svget_neonq_s64:
4882 case SVE::BI__builtin_sve_svget_neonq_u8:
4883 case SVE::BI__builtin_sve_svget_neonq_u16:
4884 case SVE::BI__builtin_sve_svget_neonq_u32:
4885 case SVE::BI__builtin_sve_svget_neonq_u64:
4886 case SVE::BI__builtin_sve_svget_neonq_f16:
4887 case SVE::BI__builtin_sve_svget_neonq_f32:
4888 case SVE::BI__builtin_sve_svget_neonq_f64:
4889 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4890 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4891 }
4892
4893 case SVE::BI__builtin_sve_svdup_neonq_s8:
4894 case SVE::BI__builtin_sve_svdup_neonq_s16:
4895 case SVE::BI__builtin_sve_svdup_neonq_s32:
4896 case SVE::BI__builtin_sve_svdup_neonq_s64:
4897 case SVE::BI__builtin_sve_svdup_neonq_u8:
4898 case SVE::BI__builtin_sve_svdup_neonq_u16:
4899 case SVE::BI__builtin_sve_svdup_neonq_u32:
4900 case SVE::BI__builtin_sve_svdup_neonq_u64:
4901 case SVE::BI__builtin_sve_svdup_neonq_f16:
4902 case SVE::BI__builtin_sve_svdup_neonq_f32:
4903 case SVE::BI__builtin_sve_svdup_neonq_f64:
4904 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4905 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4906 uint64_t(0));
4907 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4908 {Insert, Builder.getInt64(0)});
4909 }
4910 }
4911
4912 /// Should not happen
4913 return nullptr;
4914}
4915
4916static void swapCommutativeSMEOperands(unsigned BuiltinID,
4918 unsigned MultiVec;
4919 switch (BuiltinID) {
4920 default:
4921 return;
4922 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4923 MultiVec = 1;
4924 break;
4925 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4926 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4927 MultiVec = 2;
4928 break;
4929 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4930 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4931 MultiVec = 4;
4932 break;
4933 }
4934
4935 if (MultiVec > 0)
4936 for (unsigned I = 0; I < MultiVec; ++I)
4937 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4938}
4939
4941 const CallExpr *E) {
4944
4946 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4947 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4948
4949 if (TypeFlags.isLoad() || TypeFlags.isStore())
4950 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4951 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4952 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4953 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4954 BuiltinID == SME::BI__builtin_sme_svzero_za)
4955 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4956 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4957 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4958 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4959 BuiltinID == SME::BI__builtin_sme_svstr_za)
4960 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4961
4962 // Emit set FPMR for intrinsics that require it
4963 if (TypeFlags.setsFPMR())
4964 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4965 Ops.pop_back_val());
4966 // Handle builtins which require their multi-vector operands to be swapped
4967 swapCommutativeSMEOperands(BuiltinID, Ops);
4968
4969 auto isCntsBuiltin = [&]() {
4970 switch (BuiltinID) {
4971 default:
4972 return 0;
4973 case SME::BI__builtin_sme_svcntsb:
4974 return 8;
4975 case SME::BI__builtin_sme_svcntsh:
4976 return 4;
4977 case SME::BI__builtin_sme_svcntsw:
4978 return 2;
4979 }
4980 };
4981
4982 if (auto Mul = isCntsBuiltin()) {
4983 llvm::Value *Cntd =
4984 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
4985 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
4986 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4987 }
4988
4989 // Should not happen!
4990 if (Builtin->LLVMIntrinsic == 0)
4991 return nullptr;
4992
4993 // Predicates must match the main datatype.
4994 for (Value *&Op : Ops)
4995 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4996 if (PredTy->getElementType()->isIntegerTy(1))
4997 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4998
4999 Function *F =
5000 TypeFlags.isOverloadNone()
5001 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
5002 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
5003
5004 return Builder.CreateCall(F, Ops);
5005}
5006
5007/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
5008/// return it as an i8 pointer.
5010 LLVMContext &Context = CGF.CGM.getLLVMContext();
5011 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
5012 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5013 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5014 llvm::Function *F =
5015 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
5016 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
5017 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
5018}
5019
5021 const CallExpr *E,
5022 llvm::Triple::ArchType Arch) {
5023 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
5024 BuiltinID <= clang::AArch64::LastSVEBuiltin)
5025 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
5026
5027 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
5028 BuiltinID <= clang::AArch64::LastSMEBuiltin)
5029 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
5030
5031 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
5032 return EmitAArch64CpuSupports(E);
5033
5034 unsigned HintID = static_cast<unsigned>(-1);
5035 switch (BuiltinID) {
5036 default: break;
5037 case clang::AArch64::BI__builtin_arm_nop:
5038 HintID = 0;
5039 break;
5040 case clang::AArch64::BI__builtin_arm_yield:
5041 case clang::AArch64::BI__yield:
5042 HintID = 1;
5043 break;
5044 case clang::AArch64::BI__builtin_arm_wfe:
5045 case clang::AArch64::BI__wfe:
5046 HintID = 2;
5047 break;
5048 case clang::AArch64::BI__builtin_arm_wfi:
5049 case clang::AArch64::BI__wfi:
5050 HintID = 3;
5051 break;
5052 case clang::AArch64::BI__builtin_arm_sev:
5053 case clang::AArch64::BI__sev:
5054 HintID = 4;
5055 break;
5056 case clang::AArch64::BI__builtin_arm_sevl:
5057 case clang::AArch64::BI__sevl:
5058 HintID = 5;
5059 break;
5060 }
5061
5062 if (HintID != static_cast<unsigned>(-1)) {
5063 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5064 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5065 }
5066
5067 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5068 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5069 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5070 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
5071 }
5072
5073 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5074 // Create call to __arm_sme_state and store the results to the two pointers.
5075 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
5076 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
5077 false),
5078 "__arm_sme_state"));
5079 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
5080 "aarch64_pstate_sm_compatible");
5081 CI->setAttributes(Attrs);
5082 CI->setCallingConv(
5083 llvm::CallingConv::
5084 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5085 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
5087 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
5089 }
5090
5091 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5092 assert((getContext().getTypeSize(E->getType()) == 32) &&
5093 "rbit of unusual size!");
5094 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5095 return Builder.CreateCall(
5096 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5097 }
5098 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5099 assert((getContext().getTypeSize(E->getType()) == 64) &&
5100 "rbit of unusual size!");
5101 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5102 return Builder.CreateCall(
5103 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5104 }
5105
5106 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5107 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5108 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5109 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5110 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5111 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5112 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
5113 return Res;
5114 }
5115
5116 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5117 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5118 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5119 "cls");
5120 }
5121 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5122 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5123 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5124 "cls");
5125 }
5126
5127 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5128 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5129 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5130 llvm::Type *Ty = Arg->getType();
5131 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5132 Arg, "frint32z");
5133 }
5134
5135 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5136 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5137 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5138 llvm::Type *Ty = Arg->getType();
5139 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5140 Arg, "frint64z");
5141 }
5142
5143 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5144 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5145 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5146 llvm::Type *Ty = Arg->getType();
5147 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5148 Arg, "frint32x");
5149 }
5150
5151 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5152 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5153 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5154 llvm::Type *Ty = Arg->getType();
5155 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5156 Arg, "frint64x");
5157 }
5158
5159 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5160 assert((getContext().getTypeSize(E->getType()) == 32) &&
5161 "__jcvt of unusual size!");
5162 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5163 return Builder.CreateCall(
5164 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5165 }
5166
5167 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5168 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5169 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5170 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5171 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
5172 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
5173
5174 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5175 // Load from the address via an LLVM intrinsic, receiving a
5176 // tuple of 8 i64 words, and store each one to ValPtr.
5177 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5178 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
5179 llvm::Value *ToRet;
5180 for (size_t i = 0; i < 8; i++) {
5181 llvm::Value *ValOffsetPtr =
5182 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5183 Address Addr =
5184 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5185 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
5186 }
5187 return ToRet;
5188 } else {
5189 // Load 8 i64 words from ValPtr, and store them to the address
5190 // via an LLVM intrinsic.
5192 Args.push_back(MemAddr);
5193 for (size_t i = 0; i < 8; i++) {
5194 llvm::Value *ValOffsetPtr =
5195 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5196 Address Addr =
5197 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5198 Args.push_back(Builder.CreateLoad(Addr));
5199 }
5200
5201 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5202 ? Intrinsic::aarch64_st64b
5203 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5204 ? Intrinsic::aarch64_st64bv
5205 : Intrinsic::aarch64_st64bv0);
5206 Function *F = CGM.getIntrinsic(Intr);
5207 return Builder.CreateCall(F, Args);
5208 }
5209 }
5210
5211 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5212 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5213
5214 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5215 ? Intrinsic::aarch64_rndr
5216 : Intrinsic::aarch64_rndrrs);
5217 Function *F = CGM.getIntrinsic(Intr);
5218 llvm::Value *Val = Builder.CreateCall(F);
5219 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
5220 Value *Status = Builder.CreateExtractValue(Val, 1);
5221
5222 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
5223 Builder.CreateStore(RandomValue, MemAddress);
5224 Status = Builder.CreateZExt(Status, Int32Ty);
5225 return Status;
5226 }
5227
5228 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5229 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5230 const FunctionDecl *FD = E->getDirectCallee();
5231 Value *Ops[2];
5232 for (unsigned i = 0; i < 2; i++)
5233 Ops[i] = EmitScalarExpr(E->getArg(i));
5234 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5235 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5236 StringRef Name = FD->getName();
5237 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5238 }
5239
5240 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5241 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5242 getContext().getTypeSize(E->getType()) == 128) {
5243 Function *F =
5244 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5245 ? Intrinsic::aarch64_ldaxp
5246 : Intrinsic::aarch64_ldxp);
5247
5248 Value *LdPtr = EmitScalarExpr(E->getArg(0));
5249 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
5250
5251 Value *Val0 = Builder.CreateExtractValue(Val, 1);
5252 Value *Val1 = Builder.CreateExtractValue(Val, 0);
5253 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5254 Val0 = Builder.CreateZExt(Val0, Int128Ty);
5255 Val1 = Builder.CreateZExt(Val1, Int128Ty);
5256
5257 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5258 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5259 Val = Builder.CreateOr(Val, Val1);
5260 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5261 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5262 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5263 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5264
5265 QualType Ty = E->getType();
5266 llvm::Type *RealResTy = ConvertType(Ty);
5267 llvm::Type *IntTy =
5268 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5269
5270 Function *F =
5271 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5272 ? Intrinsic::aarch64_ldaxr
5273 : Intrinsic::aarch64_ldxr,
5274 DefaultPtrTy);
5275 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5276 Val->addParamAttr(
5277 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
5278
5279 if (RealResTy->isPointerTy())
5280 return Builder.CreateIntToPtr(Val, RealResTy);
5281
5282 llvm::Type *IntResTy = llvm::IntegerType::get(
5283 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5284 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
5285 RealResTy);
5286 }
5287
5288 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5289 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5290 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5291 Function *F =
5292 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5293 ? Intrinsic::aarch64_stlxp
5294 : Intrinsic::aarch64_stxp);
5295 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5296
5297 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5298 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5299
5300 Tmp = Tmp.withElementType(STy);
5301 llvm::Value *Val = Builder.CreateLoad(Tmp);
5302
5303 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5304 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5305 Value *StPtr = EmitScalarExpr(E->getArg(1));
5306 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5307 }
5308
5309 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5310 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5311 Value *StoreVal = EmitScalarExpr(E->getArg(0));
5312 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5313
5314 QualType Ty = E->getArg(0)->getType();
5315 llvm::Type *StoreTy =
5316 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5317
5318 if (StoreVal->getType()->isPointerTy())
5319 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5320 else {
5321 llvm::Type *IntTy = llvm::IntegerType::get(
5323 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5324 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5325 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5326 }
5327
5328 Function *F =
5329 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5330 ? Intrinsic::aarch64_stlxr
5331 : Intrinsic::aarch64_stxr,
5332 StoreAddr->getType());
5333 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5334 CI->addParamAttr(
5335 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
5336 return CI;
5337 }
5338
5339 if (BuiltinID == clang::AArch64::BI__getReg) {
5341 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5342 llvm_unreachable("Sema will ensure that the parameter is constant");
5343
5344 llvm::APSInt Value = Result.Val.getInt();
5345 LLVMContext &Context = CGM.getLLVMContext();
5346 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
5347
5348 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5349 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5350 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5351
5352 llvm::Function *F =
5353 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
5354 return Builder.CreateCall(F, Metadata);
5355 }
5356
5357 if (BuiltinID == clang::AArch64::BI__break) {
5359 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5360 llvm_unreachable("Sema will ensure that the parameter is constant");
5361
5362 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5363 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5364 }
5365
5366 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5367 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5368 return Builder.CreateCall(F);
5369 }
5370
5371 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5372 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5373 llvm::SyncScope::SingleThread);
5374
5375 // CRC32
5376 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5377 switch (BuiltinID) {
5378 case clang::AArch64::BI__builtin_arm_crc32b:
5379 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5380 case clang::AArch64::BI__builtin_arm_crc32cb:
5381 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5382 case clang::AArch64::BI__builtin_arm_crc32h:
5383 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5384 case clang::AArch64::BI__builtin_arm_crc32ch:
5385 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5386 case clang::AArch64::BI__builtin_arm_crc32w:
5387 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5388 case clang::AArch64::BI__builtin_arm_crc32cw:
5389 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5390 case clang::AArch64::BI__builtin_arm_crc32d:
5391 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5392 case clang::AArch64::BI__builtin_arm_crc32cd:
5393 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5394 }
5395
5396 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5397 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5398 Value *Arg1 = EmitScalarExpr(E->getArg(1));
5399 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5400
5401 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5402 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5403
5404 return Builder.CreateCall(F, {Arg0, Arg1});
5405 }
5406
5407 // Memory Operations (MOPS)
5408 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5409 Value *Dst = EmitScalarExpr(E->getArg(0));
5410 Value *Val = EmitScalarExpr(E->getArg(1));
5411 Value *Size = EmitScalarExpr(E->getArg(2));
5412 Val = Builder.CreateTrunc(Val, Int8Ty);
5413 Size = Builder.CreateIntCast(Size, Int64Ty, false);
5414 return Builder.CreateCall(
5415 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5416 }
5417
5418 // Memory Tagging Extensions (MTE) Intrinsics
5419 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5420 switch (BuiltinID) {
5421 case clang::AArch64::BI__builtin_arm_irg:
5422 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5423 case clang::AArch64::BI__builtin_arm_addg:
5424 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5425 case clang::AArch64::BI__builtin_arm_gmi:
5426 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5427 case clang::AArch64::BI__builtin_arm_ldg:
5428 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5429 case clang::AArch64::BI__builtin_arm_stg:
5430 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5431 case clang::AArch64::BI__builtin_arm_subp:
5432 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5433 }
5434
5435 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5436 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5438 Value *Mask = EmitScalarExpr(E->getArg(1));
5439
5440 Mask = Builder.CreateZExt(Mask, Int64Ty);
5441 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5442 {Pointer, Mask});
5443 }
5444 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5446 Value *TagOffset = EmitScalarExpr(E->getArg(1));
5447
5448 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
5449 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5450 {Pointer, TagOffset});
5451 }
5452 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5454 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
5455
5456 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
5457 return Builder.CreateCall(
5458 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5459 }
5460 // Although it is possible to supply a different return
5461 // address (first arg) to this intrinsic, for now we set
5462 // return address same as input address.
5463 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5464 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5465 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5466 {TagAddress, TagAddress});
5467 }
5468 // Although it is possible to supply a different tag (to set)
5469 // to this intrinsic (as first arg), for now we supply
5470 // the tag that is in input address arg (common use case).
5471 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5472 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5473 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5474 {TagAddress, TagAddress});
5475 }
5476 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5477 Value *PointerA = EmitScalarExpr(E->getArg(0));
5478 Value *PointerB = EmitScalarExpr(E->getArg(1));
5479 return Builder.CreateCall(
5480 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5481 }
5482 }
5483
5484 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5485 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5486 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5487 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5488 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5489 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5490 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5491 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5492
5493 SpecialRegisterAccessKind AccessKind = Write;
5494 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5495 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5496 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5497 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5498 AccessKind = VolatileRead;
5499
5500 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5501 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5502
5503 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5504 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5505
5506 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5507 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5508
5509 llvm::Type *ValueType;
5510 llvm::Type *RegisterType = Int64Ty;
5511 if (Is32Bit) {
5512 ValueType = Int32Ty;
5513 } else if (Is128Bit) {
5514 llvm::Type *Int128Ty =
5515 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5516 ValueType = Int128Ty;
5517 RegisterType = Int128Ty;
5518 } else if (IsPointerBuiltin) {
5519 ValueType = VoidPtrTy;
5520 } else {
5521 ValueType = Int64Ty;
5522 };
5523
5524 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5525 AccessKind);
5526 }
5527
5528 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5529 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5530 BuiltinID == clang::AArch64::BI__sys) {
5531 LLVMContext &Context = CGM.getLLVMContext();
5532
5533 unsigned SysReg =
5534 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5535
5536 std::string SysRegStr;
5537 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5538 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5539 ? ((1 << 1) | ((SysReg >> 14) & 1))
5540 : 1;
5541 llvm::raw_string_ostream(SysRegStr)
5542 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5543 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5544 << (SysReg & 7);
5545
5546 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5547 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5548 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5549
5550 llvm::Type *RegisterType = Int64Ty;
5551 llvm::Type *Types[] = { RegisterType };
5552
5553 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5554 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5555
5556 return Builder.CreateCall(F, Metadata);
5557 }
5558
5559 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5560 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5561 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5562 if (BuiltinID == clang::AArch64::BI__sys) {
5563 // Return 0 for convenience, even though MSVC returns some other undefined
5564 // value.
5565 Result = ConstantInt::get(Builder.getInt32Ty(), 0);
5566 }
5567 return Result;
5568 }
5569
5570 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5571 llvm::Function *F =
5572 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5573 return Builder.CreateCall(F);
5574 }
5575
5576 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5577 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5578 return Builder.CreateCall(F);
5579 }
5580
5581 if (BuiltinID == clang::AArch64::BI__mulh ||
5582 BuiltinID == clang::AArch64::BI__umulh) {
5583 llvm::Type *ResType = ConvertType(E->getType());
5584 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5585
5586 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5587 Value *LHS =
5588 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5589 Value *RHS =
5590 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5591
5592 Value *MulResult, *HigherBits;
5593 if (IsSigned) {
5594 MulResult = Builder.CreateNSWMul(LHS, RHS);
5595 HigherBits = Builder.CreateAShr(MulResult, 64);
5596 } else {
5597 MulResult = Builder.CreateNUWMul(LHS, RHS);
5598 HigherBits = Builder.CreateLShr(MulResult, 64);
5599 }
5600 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5601
5602 return HigherBits;
5603 }
5604
5605 if (BuiltinID == AArch64::BI__writex18byte ||
5606 BuiltinID == AArch64::BI__writex18word ||
5607 BuiltinID == AArch64::BI__writex18dword ||
5608 BuiltinID == AArch64::BI__writex18qword) {
5609 // Process the args first
5610 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5611 Value *DataArg = EmitScalarExpr(E->getArg(1));
5612
5613 // Read x18 as i8*
5614 llvm::Value *X18 = readX18AsPtr(*this);
5615
5616 // Store val at x18 + offset
5617 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5618 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5619 StoreInst *Store =
5620 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5621 return Store;
5622 }
5623
5624 if (BuiltinID == AArch64::BI__readx18byte ||
5625 BuiltinID == AArch64::BI__readx18word ||
5626 BuiltinID == AArch64::BI__readx18dword ||
5627 BuiltinID == AArch64::BI__readx18qword) {
5628 // Process the args first
5629 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5630
5631 // Read x18 as i8*
5632 llvm::Value *X18 = readX18AsPtr(*this);
5633
5634 // Load x18 + offset
5635 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5636 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5637 llvm::Type *IntTy = ConvertType(E->getType());
5638 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5639 return Load;
5640 }
5641
5642 if (BuiltinID == AArch64::BI__addx18byte ||
5643 BuiltinID == AArch64::BI__addx18word ||
5644 BuiltinID == AArch64::BI__addx18dword ||
5645 BuiltinID == AArch64::BI__addx18qword ||
5646 BuiltinID == AArch64::BI__incx18byte ||
5647 BuiltinID == AArch64::BI__incx18word ||
5648 BuiltinID == AArch64::BI__incx18dword ||
5649 BuiltinID == AArch64::BI__incx18qword) {
5650 llvm::Type *IntTy;
5651 bool isIncrement;
5652 switch (BuiltinID) {
5653 case AArch64::BI__incx18byte:
5654 IntTy = Int8Ty;
5655 isIncrement = true;
5656 break;
5657 case AArch64::BI__incx18word:
5658 IntTy = Int16Ty;
5659 isIncrement = true;
5660 break;
5661 case AArch64::BI__incx18dword:
5662 IntTy = Int32Ty;
5663 isIncrement = true;
5664 break;
5665 case AArch64::BI__incx18qword:
5666 IntTy = Int64Ty;
5667 isIncrement = true;
5668 break;
5669 default:
5670 IntTy = ConvertType(E->getArg(1)->getType());
5671 isIncrement = false;
5672 break;
5673 }
5674 // Process the args first
5675 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5676 Value *ValToAdd =
5677 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5678
5679 // Read x18 as i8*
5680 llvm::Value *X18 = readX18AsPtr(*this);
5681
5682 // Load x18 + offset
5683 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5684 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5685 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5686
5687 // Add values
5688 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5689
5690 // Store val at x18 + offset
5691 StoreInst *Store =
5692 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5693 return Store;
5694 }
5695
5696 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5697 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5698 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5699 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5700 Value *Arg = EmitScalarExpr(E->getArg(0));
5701 llvm::Type *RetTy = ConvertType(E->getType());
5702 return Builder.CreateBitCast(Arg, RetTy);
5703 }
5704
5705 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5706 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5707 BuiltinID == AArch64::BI_CountLeadingZeros ||
5708 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5709 Value *Arg = EmitScalarExpr(E->getArg(0));
5710 llvm::Type *ArgType = Arg->getType();
5711
5712 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5713 BuiltinID == AArch64::BI_CountLeadingOnes64)
5714 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5715
5716 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5717 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5718
5719 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5720 BuiltinID == AArch64::BI_CountLeadingZeros64)
5721 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5722 return Result;
5723 }
5724
5725 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5726 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5727 Value *Arg = EmitScalarExpr(E->getArg(0));
5728
5729 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5730 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5731 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5732
5733 Value *Result = Builder.CreateCall(F, Arg, "cls");
5734 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5735 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5736 return Result;
5737 }
5738
5739 if (BuiltinID == AArch64::BI_CountOneBits ||
5740 BuiltinID == AArch64::BI_CountOneBits64) {
5741 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5742 llvm::Type *ArgType = ArgValue->getType();
5743 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5744
5745 Value *Result = Builder.CreateCall(F, ArgValue);
5746 if (BuiltinID == AArch64::BI_CountOneBits64)
5747 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5748 return Result;
5749 }
5750
5751 if (BuiltinID == AArch64::BI__prefetch) {
5753 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5754 Value *Locality = ConstantInt::get(Int32Ty, 3);
5755 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5756 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5757 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5758 }
5759
5760 if (BuiltinID == AArch64::BI__hlt) {
5761 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5762 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5763
5764 // Return 0 for convenience, even though MSVC returns some other undefined
5765 // value.
5766 return ConstantInt::get(Builder.getInt32Ty(), 0);
5767 }
5768
5769 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5770 return Builder.CreateFPTrunc(
5771 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5772 Builder.getFloatTy()),
5773 Builder.getBFloatTy());
5774
5775 // Handle MSVC intrinsics before argument evaluation to prevent double
5776 // evaluation.
5777 if (std::optional<MSVCIntrin> MsvcIntId =
5779 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5780
5781 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5782 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5783 return P.first == BuiltinID;
5784 });
5785 if (It != end(NEONEquivalentIntrinsicMap))
5786 BuiltinID = It->second;
5787
5788 // Find out if any arguments are required to be integer constant
5789 // expressions.
5790 unsigned ICEArguments = 0;
5792 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5793 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5794
5796 Address PtrOp0 = Address::invalid();
5797 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5798 if (i == 0) {
5799 switch (BuiltinID) {
5800 case NEON::BI__builtin_neon_vld1_v:
5801 case NEON::BI__builtin_neon_vld1q_v:
5802 case NEON::BI__builtin_neon_vld1_dup_v:
5803 case NEON::BI__builtin_neon_vld1q_dup_v:
5804 case NEON::BI__builtin_neon_vld1_lane_v:
5805 case NEON::BI__builtin_neon_vld1q_lane_v:
5806 case NEON::BI__builtin_neon_vst1_v:
5807 case NEON::BI__builtin_neon_vst1q_v:
5808 case NEON::BI__builtin_neon_vst1_lane_v:
5809 case NEON::BI__builtin_neon_vst1q_lane_v:
5810 case NEON::BI__builtin_neon_vldap1_lane_s64:
5811 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5812 case NEON::BI__builtin_neon_vstl1_lane_s64:
5813 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5814 // Get the alignment for the argument in addition to the value;
5815 // we'll use it later.
5816 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5817 Ops.push_back(PtrOp0.emitRawPointer(*this));
5818 continue;
5819 }
5820 }
5821 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5822 }
5823
5824 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5825 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5826 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5827
5828 if (Builtin) {
5829 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5831 assert(Result && "SISD intrinsic should have been handled");
5832 return Result;
5833 }
5834
5835 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5837 if (std::optional<llvm::APSInt> Result =
5839 // Determine the type of this overloaded NEON intrinsic.
5840 Type = NeonTypeFlags(Result->getZExtValue());
5841
5842 bool usgn = Type.isUnsigned();
5843 bool quad = Type.isQuad();
5844
5845 // Handle non-overloaded intrinsics first.
5846 switch (BuiltinID) {
5847 default: break;
5848 case NEON::BI__builtin_neon_vabsh_f16:
5849 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5850 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5851 case NEON::BI__builtin_neon_vaddq_p128: {
5852 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5853 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5854 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5855 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5856 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5857 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5858 return Builder.CreateBitCast(Ops[0], Int128Ty);
5859 }
5860 case NEON::BI__builtin_neon_vldrq_p128: {
5861 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5862 Value *Ptr = EmitScalarExpr(E->getArg(0));
5863 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5865 }
5866 case NEON::BI__builtin_neon_vstrq_p128: {
5867 Value *Ptr = Ops[0];
5868 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5869 }
5870 case NEON::BI__builtin_neon_vcvts_f32_u32:
5871 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5872 usgn = true;
5873 [[fallthrough]];
5874 case NEON::BI__builtin_neon_vcvts_f32_s32:
5875 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5876 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5877 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5878 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5879 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5880 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5881 if (usgn)
5882 return Builder.CreateUIToFP(Ops[0], FTy);
5883 return Builder.CreateSIToFP(Ops[0], FTy);
5884 }
5885 case NEON::BI__builtin_neon_vcvth_f16_u16:
5886 case NEON::BI__builtin_neon_vcvth_f16_u32:
5887 case NEON::BI__builtin_neon_vcvth_f16_u64:
5888 usgn = true;
5889 [[fallthrough]];
5890 case NEON::BI__builtin_neon_vcvth_f16_s16:
5891 case NEON::BI__builtin_neon_vcvth_f16_s32:
5892 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5893 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5894 llvm::Type *FTy = HalfTy;
5895 llvm::Type *InTy;
5896 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5897 InTy = Int64Ty;
5898 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5899 InTy = Int32Ty;
5900 else
5901 InTy = Int16Ty;
5902 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5903 if (usgn)
5904 return Builder.CreateUIToFP(Ops[0], FTy);
5905 return Builder.CreateSIToFP(Ops[0], FTy);
5906 }
5907 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5908 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5909 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5910 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5911 case NEON::BI__builtin_neon_vcvth_u16_f16:
5912 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5913 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5914 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5915 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5916 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5917 unsigned Int;
5918 llvm::Type *InTy = Int16Ty;
5919 llvm::Type* FTy = HalfTy;
5920 llvm::Type *Tys[2] = {InTy, FTy};
5921 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5922 switch (BuiltinID) {
5923 default: llvm_unreachable("missing builtin ID in switch!");
5924 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5925 Int = Intrinsic::aarch64_neon_fcvtau; break;
5926 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5927 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5928 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5929 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5930 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5931 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5932 case NEON::BI__builtin_neon_vcvth_u16_f16:
5933 Int = Intrinsic::aarch64_neon_fcvtzu; break;
5934 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5935 Int = Intrinsic::aarch64_neon_fcvtas; break;
5936 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5937 Int = Intrinsic::aarch64_neon_fcvtms; break;
5938 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5939 Int = Intrinsic::aarch64_neon_fcvtns; break;
5940 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5941 Int = Intrinsic::aarch64_neon_fcvtps; break;
5942 case NEON::BI__builtin_neon_vcvth_s16_f16:
5943 Int = Intrinsic::aarch64_neon_fcvtzs; break;
5944 }
5945 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5946 }
5947 case NEON::BI__builtin_neon_vcaleh_f16:
5948 case NEON::BI__builtin_neon_vcalth_f16:
5949 case NEON::BI__builtin_neon_vcageh_f16:
5950 case NEON::BI__builtin_neon_vcagth_f16: {
5951 unsigned Int;
5952 llvm::Type* InTy = Int32Ty;
5953 llvm::Type* FTy = HalfTy;
5954 llvm::Type *Tys[2] = {InTy, FTy};
5955 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5956 switch (BuiltinID) {
5957 default: llvm_unreachable("missing builtin ID in switch!");
5958 case NEON::BI__builtin_neon_vcageh_f16:
5959 Int = Intrinsic::aarch64_neon_facge; break;
5960 case NEON::BI__builtin_neon_vcagth_f16:
5961 Int = Intrinsic::aarch64_neon_facgt; break;
5962 case NEON::BI__builtin_neon_vcaleh_f16:
5963 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5964 case NEON::BI__builtin_neon_vcalth_f16:
5965 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5966 }
5967 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
5968 return Builder.CreateTrunc(Ops[0], Int16Ty);
5969 }
5970 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5971 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5972 unsigned Int;
5973 llvm::Type* InTy = Int32Ty;
5974 llvm::Type* FTy = HalfTy;
5975 llvm::Type *Tys[2] = {InTy, FTy};
5976 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5977 switch (BuiltinID) {
5978 default: llvm_unreachable("missing builtin ID in switch!");
5979 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5980 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5981 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5982 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5983 }
5984 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5985 return Builder.CreateTrunc(Ops[0], Int16Ty);
5986 }
5987 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5988 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5989 unsigned Int;
5990 llvm::Type* FTy = HalfTy;
5991 llvm::Type* InTy = Int32Ty;
5992 llvm::Type *Tys[2] = {FTy, InTy};
5993 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5994 switch (BuiltinID) {
5995 default: llvm_unreachable("missing builtin ID in switch!");
5996 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5997 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5998 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
5999 break;
6000 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
6001 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
6002 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
6003 break;
6004 }
6005 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
6006 }
6007 case NEON::BI__builtin_neon_vpaddd_s64: {
6008 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
6009 Value *Vec = EmitScalarExpr(E->getArg(0));
6010 // The vector is v2f64, so make sure it's bitcast to that.
6011 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
6012 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6013 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6014 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6015 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6016 // Pairwise addition of a v2f64 into a scalar f64.
6017 return Builder.CreateAdd(Op0, Op1, "vpaddd");
6018 }
6019 case NEON::BI__builtin_neon_vpaddd_f64: {
6020 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
6021 Value *Vec = EmitScalarExpr(E->getArg(0));
6022 // The vector is v2f64, so make sure it's bitcast to that.
6023 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
6024 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6025 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6026 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6027 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6028 // Pairwise addition of a v2f64 into a scalar f64.
6029 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6030 }
6031 case NEON::BI__builtin_neon_vpadds_f32: {
6032 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
6033 Value *Vec = EmitScalarExpr(E->getArg(0));
6034 // The vector is v2f32, so make sure it's bitcast to that.
6035 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
6036 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6037 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6038 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6039 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6040 // Pairwise addition of a v2f32 into a scalar f32.
6041 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6042 }
6043 case NEON::BI__builtin_neon_vceqzd_s64:
6044 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6047 ICmpInst::ICMP_EQ, "vceqz");
6048 case NEON::BI__builtin_neon_vceqzd_f64:
6049 case NEON::BI__builtin_neon_vceqzs_f32:
6050 case NEON::BI__builtin_neon_vceqzh_f16:
6051 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6054 ICmpInst::FCMP_OEQ, "vceqz");
6055 case NEON::BI__builtin_neon_vcgezd_s64:
6056 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6059 ICmpInst::ICMP_SGE, "vcgez");
6060 case NEON::BI__builtin_neon_vcgezd_f64:
6061 case NEON::BI__builtin_neon_vcgezs_f32:
6062 case NEON::BI__builtin_neon_vcgezh_f16:
6063 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6066 ICmpInst::FCMP_OGE, "vcgez");
6067 case NEON::BI__builtin_neon_vclezd_s64:
6068 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6071 ICmpInst::ICMP_SLE, "vclez");
6072 case NEON::BI__builtin_neon_vclezd_f64:
6073 case NEON::BI__builtin_neon_vclezs_f32:
6074 case NEON::BI__builtin_neon_vclezh_f16:
6075 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6078 ICmpInst::FCMP_OLE, "vclez");
6079 case NEON::BI__builtin_neon_vcgtzd_s64:
6080 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6083 ICmpInst::ICMP_SGT, "vcgtz");
6084 case NEON::BI__builtin_neon_vcgtzd_f64:
6085 case NEON::BI__builtin_neon_vcgtzs_f32:
6086 case NEON::BI__builtin_neon_vcgtzh_f16:
6087 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6090 ICmpInst::FCMP_OGT, "vcgtz");
6091 case NEON::BI__builtin_neon_vcltzd_s64:
6092 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6095 ICmpInst::ICMP_SLT, "vcltz");
6096
6097 case NEON::BI__builtin_neon_vcltzd_f64:
6098 case NEON::BI__builtin_neon_vcltzs_f32:
6099 case NEON::BI__builtin_neon_vcltzh_f16:
6100 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6103 ICmpInst::FCMP_OLT, "vcltz");
6104
6105 case NEON::BI__builtin_neon_vceqzd_u64: {
6106 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6107 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6108 Ops[0] =
6109 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
6110 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
6111 }
6112 case NEON::BI__builtin_neon_vceqd_f64:
6113 case NEON::BI__builtin_neon_vcled_f64:
6114 case NEON::BI__builtin_neon_vcltd_f64:
6115 case NEON::BI__builtin_neon_vcged_f64:
6116 case NEON::BI__builtin_neon_vcgtd_f64: {
6117 llvm::CmpInst::Predicate P;
6118 switch (BuiltinID) {
6119 default: llvm_unreachable("missing builtin ID in switch!");
6120 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6121 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6122 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6123 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6124 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6125 }
6126 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6127 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6128 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6129 if (P == llvm::FCmpInst::FCMP_OEQ)
6130 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6131 else
6132 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6133 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
6134 }
6135 case NEON::BI__builtin_neon_vceqs_f32:
6136 case NEON::BI__builtin_neon_vcles_f32:
6137 case NEON::BI__builtin_neon_vclts_f32:
6138 case NEON::BI__builtin_neon_vcges_f32:
6139 case NEON::BI__builtin_neon_vcgts_f32: {
6140 llvm::CmpInst::Predicate P;
6141 switch (BuiltinID) {
6142 default: llvm_unreachable("missing builtin ID in switch!");
6143 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6144 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6145 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6146 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6147 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6148 }
6149 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6150 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
6151 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
6152 if (P == llvm::FCmpInst::FCMP_OEQ)
6153 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6154 else
6155 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6156 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
6157 }
6158 case NEON::BI__builtin_neon_vceqh_f16:
6159 case NEON::BI__builtin_neon_vcleh_f16:
6160 case NEON::BI__builtin_neon_vclth_f16:
6161 case NEON::BI__builtin_neon_vcgeh_f16:
6162 case NEON::BI__builtin_neon_vcgth_f16: {
6163 llvm::CmpInst::Predicate P;
6164 switch (BuiltinID) {
6165 default: llvm_unreachable("missing builtin ID in switch!");
6166 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6167 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6168 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6169 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6170 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6171 }
6172 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6173 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6174 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
6175 if (P == llvm::FCmpInst::FCMP_OEQ)
6176 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6177 else
6178 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6179 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
6180 }
6181 case NEON::BI__builtin_neon_vceqd_s64:
6182 case NEON::BI__builtin_neon_vceqd_u64:
6183 case NEON::BI__builtin_neon_vcgtd_s64:
6184 case NEON::BI__builtin_neon_vcgtd_u64:
6185 case NEON::BI__builtin_neon_vcltd_s64:
6186 case NEON::BI__builtin_neon_vcltd_u64:
6187 case NEON::BI__builtin_neon_vcged_u64:
6188 case NEON::BI__builtin_neon_vcged_s64:
6189 case NEON::BI__builtin_neon_vcled_u64:
6190 case NEON::BI__builtin_neon_vcled_s64: {
6191 llvm::CmpInst::Predicate P;
6192 switch (BuiltinID) {
6193 default: llvm_unreachable("missing builtin ID in switch!");
6194 case NEON::BI__builtin_neon_vceqd_s64:
6195 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6196 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6197 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6198 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6199 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6200 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6201 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6202 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6203 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6204 }
6205 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6206 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6207 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6208 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
6209 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
6210 }
6211 case NEON::BI__builtin_neon_vtstd_s64:
6212 case NEON::BI__builtin_neon_vtstd_u64: {
6213 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6214 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6215 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6216 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6217 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6218 llvm::Constant::getNullValue(Int64Ty));
6219 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6220 }
6221 case NEON::BI__builtin_neon_vset_lane_i8:
6222 case NEON::BI__builtin_neon_vset_lane_i16:
6223 case NEON::BI__builtin_neon_vset_lane_i32:
6224 case NEON::BI__builtin_neon_vset_lane_i64:
6225 case NEON::BI__builtin_neon_vset_lane_bf16:
6226 case NEON::BI__builtin_neon_vset_lane_f32:
6227 case NEON::BI__builtin_neon_vsetq_lane_i8:
6228 case NEON::BI__builtin_neon_vsetq_lane_i16:
6229 case NEON::BI__builtin_neon_vsetq_lane_i32:
6230 case NEON::BI__builtin_neon_vsetq_lane_i64:
6231 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6232 case NEON::BI__builtin_neon_vsetq_lane_f32:
6233 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6234 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6235 case NEON::BI__builtin_neon_vset_lane_f64:
6236 // The vector type needs a cast for the v1f64 variant.
6237 Ops[1] =
6238 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
6239 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6240 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6241 case NEON::BI__builtin_neon_vset_lane_mf8:
6242 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6243 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6244 // The input vector type needs a cast to scalar type.
6245 Ops[0] =
6246 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
6247 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6248 case NEON::BI__builtin_neon_vsetq_lane_f64:
6249 // The vector type needs a cast for the v2f64 variant.
6250 Ops[1] =
6251 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
6252 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6253 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6254
6255 case NEON::BI__builtin_neon_vget_lane_i8:
6256 case NEON::BI__builtin_neon_vdupb_lane_i8:
6257 Ops[0] =
6258 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
6259 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6260 "vget_lane");
6261 case NEON::BI__builtin_neon_vgetq_lane_i8:
6262 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6263 Ops[0] =
6264 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
6265 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6266 "vgetq_lane");
6267 case NEON::BI__builtin_neon_vget_lane_mf8:
6268 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6269 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6270 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6271 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6272 "vget_lane");
6273 case NEON::BI__builtin_neon_vget_lane_i16:
6274 case NEON::BI__builtin_neon_vduph_lane_i16:
6275 Ops[0] =
6276 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
6277 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6278 "vget_lane");
6279 case NEON::BI__builtin_neon_vgetq_lane_i16:
6280 case NEON::BI__builtin_neon_vduph_laneq_i16:
6281 Ops[0] =
6282 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
6283 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6284 "vgetq_lane");
6285 case NEON::BI__builtin_neon_vget_lane_i32:
6286 case NEON::BI__builtin_neon_vdups_lane_i32:
6287 Ops[0] =
6288 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
6289 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6290 "vget_lane");
6291 case NEON::BI__builtin_neon_vdups_lane_f32:
6292 Ops[0] =
6293 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6294 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6295 "vdups_lane");
6296 case NEON::BI__builtin_neon_vgetq_lane_i32:
6297 case NEON::BI__builtin_neon_vdups_laneq_i32:
6298 Ops[0] =
6299 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
6300 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6301 "vgetq_lane");
6302 case NEON::BI__builtin_neon_vget_lane_i64:
6303 case NEON::BI__builtin_neon_vdupd_lane_i64:
6304 Ops[0] =
6305 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
6306 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6307 "vget_lane");
6308 case NEON::BI__builtin_neon_vdupd_lane_f64:
6309 Ops[0] =
6310 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6311 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6312 "vdupd_lane");
6313 case NEON::BI__builtin_neon_vgetq_lane_i64:
6314 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6315 Ops[0] =
6316 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
6317 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6318 "vgetq_lane");
6319 case NEON::BI__builtin_neon_vget_lane_f32:
6320 Ops[0] =
6321 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6322 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6323 "vget_lane");
6324 case NEON::BI__builtin_neon_vget_lane_f64:
6325 Ops[0] =
6326 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6327 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6328 "vget_lane");
6329 case NEON::BI__builtin_neon_vgetq_lane_f32:
6330 case NEON::BI__builtin_neon_vdups_laneq_f32:
6331 Ops[0] =
6332 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
6333 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6334 "vgetq_lane");
6335 case NEON::BI__builtin_neon_vgetq_lane_f64:
6336 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6337 Ops[0] =
6338 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
6339 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6340 "vgetq_lane");
6341 case NEON::BI__builtin_neon_vaddh_f16:
6342 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6343 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
6344 case NEON::BI__builtin_neon_vsubh_f16:
6345 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6346 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
6347 case NEON::BI__builtin_neon_vmulh_f16:
6348 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6349 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
6350 case NEON::BI__builtin_neon_vdivh_f16:
6351 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6352 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
6353 case NEON::BI__builtin_neon_vfmah_f16:
6354 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6356 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6357 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
6358 case NEON::BI__builtin_neon_vfmsh_f16: {
6359 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
6360
6361 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6363 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6364 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
6365 }
6366 case NEON::BI__builtin_neon_vaddd_s64:
6367 case NEON::BI__builtin_neon_vaddd_u64:
6368 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
6369 case NEON::BI__builtin_neon_vsubd_s64:
6370 case NEON::BI__builtin_neon_vsubd_u64:
6371 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
6372 case NEON::BI__builtin_neon_vqdmlalh_s16:
6373 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6374 SmallVector<Value *, 2> ProductOps;
6375 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6376 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6377 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6378 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6379 ProductOps, "vqdmlXl");
6380 Constant *CI = ConstantInt::get(SizeTy, 0);
6381 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6382
6383 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6384 ? Intrinsic::aarch64_neon_sqadd
6385 : Intrinsic::aarch64_neon_sqsub;
6386 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6387 }
6388 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6389 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6390 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6391 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6392 Ops, "vqshlu_n");
6393 }
6394 case NEON::BI__builtin_neon_vqshld_n_u64:
6395 case NEON::BI__builtin_neon_vqshld_n_s64: {
6396 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6397 ? Intrinsic::aarch64_neon_uqshl
6398 : Intrinsic::aarch64_neon_sqshl;
6399 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6400 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6401 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6402 }
6403 case NEON::BI__builtin_neon_vrshrd_n_u64:
6404 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6405 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6406 ? Intrinsic::aarch64_neon_urshl
6407 : Intrinsic::aarch64_neon_srshl;
6408 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6409 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6410 Ops[1] = ConstantInt::get(Int64Ty, -SV);
6411 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6412 }
6413 case NEON::BI__builtin_neon_vrsrad_n_u64:
6414 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6415 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6416 ? Intrinsic::aarch64_neon_urshl
6417 : Intrinsic::aarch64_neon_srshl;
6418 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6419 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6420 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6421 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6422 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6423 }
6424 case NEON::BI__builtin_neon_vshld_n_s64:
6425 case NEON::BI__builtin_neon_vshld_n_u64: {
6426 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6427 return Builder.CreateShl(
6428 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6429 }
6430 case NEON::BI__builtin_neon_vshrd_n_s64: {
6431 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6432 return Builder.CreateAShr(
6433 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6434 Amt->getZExtValue())),
6435 "shrd_n");
6436 }
6437 case NEON::BI__builtin_neon_vshrd_n_u64: {
6438 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6439 uint64_t ShiftAmt = Amt->getZExtValue();
6440 // Right-shifting an unsigned value by its size yields 0.
6441 if (ShiftAmt == 64)
6442 return ConstantInt::get(Int64Ty, 0);
6443 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6444 "shrd_n");
6445 }
6446 case NEON::BI__builtin_neon_vsrad_n_s64: {
6447 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6448 Ops[1] = Builder.CreateAShr(
6449 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6450 Amt->getZExtValue())),
6451 "shrd_n");
6452 return Builder.CreateAdd(Ops[0], Ops[1]);
6453 }
6454 case NEON::BI__builtin_neon_vsrad_n_u64: {
6455 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6456 uint64_t ShiftAmt = Amt->getZExtValue();
6457 // Right-shifting an unsigned value by its size yields 0.
6458 // As Op + 0 = Op, return Ops[0] directly.
6459 if (ShiftAmt == 64)
6460 return Ops[0];
6461 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6462 "shrd_n");
6463 return Builder.CreateAdd(Ops[0], Ops[1]);
6464 }
6465 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6466 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6467 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6468 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6469 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6470 "lane");
6471 SmallVector<Value *, 2> ProductOps;
6472 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6473 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6474 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6475 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6476 ProductOps, "vqdmlXl");
6477 Constant *CI = ConstantInt::get(SizeTy, 0);
6478 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6479 Ops.pop_back();
6480
6481 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6482 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6483 ? Intrinsic::aarch64_neon_sqadd
6484 : Intrinsic::aarch64_neon_sqsub;
6485 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6486 }
6487 case NEON::BI__builtin_neon_vqdmlals_s32:
6488 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6489 SmallVector<Value *, 2> ProductOps;
6490 ProductOps.push_back(Ops[1]);
6491 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6492 Ops[1] =
6493 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6494 ProductOps, "vqdmlXl");
6495
6496 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6497 ? Intrinsic::aarch64_neon_sqadd
6498 : Intrinsic::aarch64_neon_sqsub;
6499 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6500 }
6501 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6502 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6503 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6504 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6505 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6506 "lane");
6507 SmallVector<Value *, 2> ProductOps;
6508 ProductOps.push_back(Ops[1]);
6509 ProductOps.push_back(Ops[2]);
6510 Ops[1] =
6511 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6512 ProductOps, "vqdmlXl");
6513 Ops.pop_back();
6514
6515 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6516 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6517 ? Intrinsic::aarch64_neon_sqadd
6518 : Intrinsic::aarch64_neon_sqsub;
6519 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6520 }
6521 case NEON::BI__builtin_neon_vget_lane_bf16:
6522 case NEON::BI__builtin_neon_vduph_lane_bf16:
6523 case NEON::BI__builtin_neon_vduph_lane_f16: {
6524 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6525 "vget_lane");
6526 }
6527 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6528 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6529 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6530 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6531 "vgetq_lane");
6532 }
6533 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6534 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6535 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6536 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6537 }
6538 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6539 SmallVector<int, 16> ConcatMask(8);
6540 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6541 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6542 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6543 llvm::Value *Trunc =
6544 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6545 return Builder.CreateShuffleVector(
6546 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6547 }
6548 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6549 SmallVector<int, 16> ConcatMask(8);
6550 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6551 SmallVector<int, 16> LoMask(4);
6552 std::iota(LoMask.begin(), LoMask.end(), 0);
6553 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6554 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6555 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6556 llvm::Value *Inactive = Builder.CreateShuffleVector(
6557 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6558 llvm::Value *Trunc =
6559 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6560 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6561 }
6562
6563 case clang::AArch64::BI_InterlockedAdd:
6564 case clang::AArch64::BI_InterlockedAdd_acq:
6565 case clang::AArch64::BI_InterlockedAdd_rel:
6566 case clang::AArch64::BI_InterlockedAdd_nf:
6567 case clang::AArch64::BI_InterlockedAdd64:
6568 case clang::AArch64::BI_InterlockedAdd64_acq:
6569 case clang::AArch64::BI_InterlockedAdd64_rel:
6570 case clang::AArch64::BI_InterlockedAdd64_nf: {
6571 Address DestAddr = CheckAtomicAlignment(*this, E);
6572 Value *Val = EmitScalarExpr(E->getArg(1));
6573 llvm::AtomicOrdering Ordering;
6574 switch (BuiltinID) {
6575 case clang::AArch64::BI_InterlockedAdd:
6576 case clang::AArch64::BI_InterlockedAdd64:
6577 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6578 break;
6579 case clang::AArch64::BI_InterlockedAdd_acq:
6580 case clang::AArch64::BI_InterlockedAdd64_acq:
6581 Ordering = llvm::AtomicOrdering::Acquire;
6582 break;
6583 case clang::AArch64::BI_InterlockedAdd_rel:
6584 case clang::AArch64::BI_InterlockedAdd64_rel:
6585 Ordering = llvm::AtomicOrdering::Release;
6586 break;
6587 case clang::AArch64::BI_InterlockedAdd_nf:
6588 case clang::AArch64::BI_InterlockedAdd64_nf:
6589 Ordering = llvm::AtomicOrdering::Monotonic;
6590 break;
6591 default:
6592 llvm_unreachable("missing builtin ID in switch!");
6593 }
6594 AtomicRMWInst *RMWI =
6595 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6596 return Builder.CreateAdd(RMWI, Val);
6597 }
6598 }
6599
6600 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6601 llvm::Type *Ty = VTy;
6602 if (!Ty)
6603 return nullptr;
6604
6605 // Not all intrinsics handled by the common case work for AArch64 yet, so only
6606 // defer to common code if it's been added to our special map.
6609
6610 if (Builtin)
6612 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6613 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6614 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
6615
6616 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
6617 return V;
6618
6619 unsigned Int;
6620 bool ExtractLow = false;
6621 bool ExtendLaneArg = false;
6622 switch (BuiltinID) {
6623 default: return nullptr;
6624 case NEON::BI__builtin_neon_vbsl_v:
6625 case NEON::BI__builtin_neon_vbslq_v: {
6626 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6627 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6628 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6629 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6630
6631 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6632 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6633 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6634 return Builder.CreateBitCast(Ops[0], Ty);
6635 }
6636 case NEON::BI__builtin_neon_vfma_lane_v:
6637 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6638 // The ARM builtins (and instructions) have the addend as the first
6639 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6640 Value *Addend = Ops[0];
6641 Value *Multiplicand = Ops[1];
6642 Value *LaneSource = Ops[2];
6643 Ops[0] = Multiplicand;
6644 Ops[1] = LaneSource;
6645 Ops[2] = Addend;
6646
6647 // Now adjust things to handle the lane access.
6648 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6649 ? llvm::FixedVectorType::get(VTy->getElementType(),
6650 VTy->getNumElements() / 2)
6651 : VTy;
6652 llvm::Constant *cst = cast<Constant>(Ops[3]);
6653 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6654 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6655 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6656
6657 Ops.pop_back();
6658 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6659 : Intrinsic::fma;
6660 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6661 }
6662 case NEON::BI__builtin_neon_vfma_laneq_v: {
6663 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6664 // v1f64 fma should be mapped to Neon scalar f64 fma
6665 if (VTy && VTy->getElementType() == DoubleTy) {
6666 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6667 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6668 llvm::FixedVectorType *VTy =
6670 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6671 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6672 Value *Result;
6674 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6675 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6676 return Builder.CreateBitCast(Result, Ty);
6677 }
6678 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6679 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6680
6681 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6682 VTy->getNumElements() * 2);
6683 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6684 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6685 cast<ConstantInt>(Ops[3]));
6686 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6687
6689 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6690 {Ops[2], Ops[1], Ops[0]});
6691 }
6692 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6693 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6694 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6695
6696 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6697 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6699 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6700 {Ops[2], Ops[1], Ops[0]});
6701 }
6702 case NEON::BI__builtin_neon_vfmah_lane_f16:
6703 case NEON::BI__builtin_neon_vfmas_lane_f32:
6704 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6705 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6706 case NEON::BI__builtin_neon_vfmad_lane_f64:
6707 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6708 Ops.push_back(EmitScalarExpr(E->getArg(3)));
6709 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6710 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6712 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6713 {Ops[1], Ops[2], Ops[0]});
6714 }
6715 case NEON::BI__builtin_neon_vmull_v:
6716 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6717 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6718 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6719 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6720 case NEON::BI__builtin_neon_vmax_v:
6721 case NEON::BI__builtin_neon_vmaxq_v:
6722 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6723 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6724 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6725 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6726 case NEON::BI__builtin_neon_vmaxh_f16: {
6727 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6728 Int = Intrinsic::aarch64_neon_fmax;
6729 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6730 }
6731 case NEON::BI__builtin_neon_vmin_v:
6732 case NEON::BI__builtin_neon_vminq_v:
6733 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6734 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6735 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6736 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6737 case NEON::BI__builtin_neon_vminh_f16: {
6738 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6739 Int = Intrinsic::aarch64_neon_fmin;
6740 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6741 }
6742 case NEON::BI__builtin_neon_vabd_v:
6743 case NEON::BI__builtin_neon_vabdq_v:
6744 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6745 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6746 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6747 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6748 case NEON::BI__builtin_neon_vpadal_v:
6749 case NEON::BI__builtin_neon_vpadalq_v: {
6750 unsigned ArgElts = VTy->getNumElements();
6751 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6752 unsigned BitWidth = EltTy->getBitWidth();
6753 auto *ArgTy = llvm::FixedVectorType::get(
6754 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6755 llvm::Type* Tys[2] = { VTy, ArgTy };
6756 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6758 TmpOps.push_back(Ops[1]);
6759 Function *F = CGM.getIntrinsic(Int, Tys);
6760 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6761 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6762 return Builder.CreateAdd(tmp, addend);
6763 }
6764 case NEON::BI__builtin_neon_vpmin_v:
6765 case NEON::BI__builtin_neon_vpminq_v:
6766 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6767 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6768 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6769 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6770 case NEON::BI__builtin_neon_vpmax_v:
6771 case NEON::BI__builtin_neon_vpmaxq_v:
6772 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6773 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6774 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6775 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6776 case NEON::BI__builtin_neon_vminnm_v:
6777 case NEON::BI__builtin_neon_vminnmq_v:
6778 Int = Intrinsic::aarch64_neon_fminnm;
6779 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6780 case NEON::BI__builtin_neon_vminnmh_f16:
6781 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6782 Int = Intrinsic::aarch64_neon_fminnm;
6783 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6784 case NEON::BI__builtin_neon_vmaxnm_v:
6785 case NEON::BI__builtin_neon_vmaxnmq_v:
6786 Int = Intrinsic::aarch64_neon_fmaxnm;
6787 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6788 case NEON::BI__builtin_neon_vmaxnmh_f16:
6789 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6790 Int = Intrinsic::aarch64_neon_fmaxnm;
6791 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6792 case NEON::BI__builtin_neon_vrecpss_f32: {
6793 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6794 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6795 Ops, "vrecps");
6796 }
6797 case NEON::BI__builtin_neon_vrecpsd_f64:
6798 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6799 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6800 Ops, "vrecps");
6801 case NEON::BI__builtin_neon_vrecpsh_f16:
6802 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6803 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6804 Ops, "vrecps");
6805 case NEON::BI__builtin_neon_vqshrun_n_v:
6806 Int = Intrinsic::aarch64_neon_sqshrun;
6807 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6808 case NEON::BI__builtin_neon_vqrshrun_n_v:
6809 Int = Intrinsic::aarch64_neon_sqrshrun;
6810 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6811 case NEON::BI__builtin_neon_vqshrn_n_v:
6812 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6813 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6814 case NEON::BI__builtin_neon_vrshrn_n_v:
6815 Int = Intrinsic::aarch64_neon_rshrn;
6816 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6817 case NEON::BI__builtin_neon_vqrshrn_n_v:
6818 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6819 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6820 case NEON::BI__builtin_neon_vrndah_f16: {
6821 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6822 Int = Builder.getIsFPConstrained()
6823 ? Intrinsic::experimental_constrained_round
6824 : Intrinsic::round;
6825 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6826 }
6827 case NEON::BI__builtin_neon_vrnda_v:
6828 case NEON::BI__builtin_neon_vrndaq_v: {
6829 Int = Builder.getIsFPConstrained()
6830 ? Intrinsic::experimental_constrained_round
6831 : Intrinsic::round;
6832 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6833 }
6834 case NEON::BI__builtin_neon_vrndih_f16: {
6835 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6836 Int = Builder.getIsFPConstrained()
6837 ? Intrinsic::experimental_constrained_nearbyint
6838 : Intrinsic::nearbyint;
6839 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6840 }
6841 case NEON::BI__builtin_neon_vrndmh_f16: {
6842 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6843 Int = Builder.getIsFPConstrained()
6844 ? Intrinsic::experimental_constrained_floor
6845 : Intrinsic::floor;
6846 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6847 }
6848 case NEON::BI__builtin_neon_vrndm_v:
6849 case NEON::BI__builtin_neon_vrndmq_v: {
6850 Int = Builder.getIsFPConstrained()
6851 ? Intrinsic::experimental_constrained_floor
6852 : Intrinsic::floor;
6853 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6854 }
6855 case NEON::BI__builtin_neon_vrndnh_f16: {
6856 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6857 Int = Builder.getIsFPConstrained()
6858 ? Intrinsic::experimental_constrained_roundeven
6859 : Intrinsic::roundeven;
6860 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6861 }
6862 case NEON::BI__builtin_neon_vrndn_v:
6863 case NEON::BI__builtin_neon_vrndnq_v: {
6864 Int = Builder.getIsFPConstrained()
6865 ? Intrinsic::experimental_constrained_roundeven
6866 : Intrinsic::roundeven;
6867 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6868 }
6869 case NEON::BI__builtin_neon_vrndns_f32: {
6870 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6871 Int = Builder.getIsFPConstrained()
6872 ? Intrinsic::experimental_constrained_roundeven
6873 : Intrinsic::roundeven;
6874 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6875 }
6876 case NEON::BI__builtin_neon_vrndph_f16: {
6877 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6878 Int = Builder.getIsFPConstrained()
6879 ? Intrinsic::experimental_constrained_ceil
6880 : Intrinsic::ceil;
6881 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6882 }
6883 case NEON::BI__builtin_neon_vrndp_v:
6884 case NEON::BI__builtin_neon_vrndpq_v: {
6885 Int = Builder.getIsFPConstrained()
6886 ? Intrinsic::experimental_constrained_ceil
6887 : Intrinsic::ceil;
6888 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6889 }
6890 case NEON::BI__builtin_neon_vrndxh_f16: {
6891 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6892 Int = Builder.getIsFPConstrained()
6893 ? Intrinsic::experimental_constrained_rint
6894 : Intrinsic::rint;
6895 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6896 }
6897 case NEON::BI__builtin_neon_vrndx_v:
6898 case NEON::BI__builtin_neon_vrndxq_v: {
6899 Int = Builder.getIsFPConstrained()
6900 ? Intrinsic::experimental_constrained_rint
6901 : Intrinsic::rint;
6902 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6903 }
6904 case NEON::BI__builtin_neon_vrndh_f16: {
6905 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6906 Int = Builder.getIsFPConstrained()
6907 ? Intrinsic::experimental_constrained_trunc
6908 : Intrinsic::trunc;
6909 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6910 }
6911 case NEON::BI__builtin_neon_vrnd32x_f32:
6912 case NEON::BI__builtin_neon_vrnd32xq_f32:
6913 case NEON::BI__builtin_neon_vrnd32x_f64:
6914 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6915 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6916 Int = Intrinsic::aarch64_neon_frint32x;
6917 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6918 }
6919 case NEON::BI__builtin_neon_vrnd32z_f32:
6920 case NEON::BI__builtin_neon_vrnd32zq_f32:
6921 case NEON::BI__builtin_neon_vrnd32z_f64:
6922 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6923 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6924 Int = Intrinsic::aarch64_neon_frint32z;
6925 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6926 }
6927 case NEON::BI__builtin_neon_vrnd64x_f32:
6928 case NEON::BI__builtin_neon_vrnd64xq_f32:
6929 case NEON::BI__builtin_neon_vrnd64x_f64:
6930 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6931 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6932 Int = Intrinsic::aarch64_neon_frint64x;
6933 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6934 }
6935 case NEON::BI__builtin_neon_vrnd64z_f32:
6936 case NEON::BI__builtin_neon_vrnd64zq_f32:
6937 case NEON::BI__builtin_neon_vrnd64z_f64:
6938 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6939 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6940 Int = Intrinsic::aarch64_neon_frint64z;
6941 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6942 }
6943 case NEON::BI__builtin_neon_vrnd_v:
6944 case NEON::BI__builtin_neon_vrndq_v: {
6945 Int = Builder.getIsFPConstrained()
6946 ? Intrinsic::experimental_constrained_trunc
6947 : Intrinsic::trunc;
6948 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6949 }
6950 case NEON::BI__builtin_neon_vcvt_f64_v:
6951 case NEON::BI__builtin_neon_vcvtq_f64_v:
6952 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6953 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6954 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6955 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6956 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6957 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6958 "unexpected vcvt_f64_f32 builtin");
6959 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6960 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6961
6962 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6963 }
6964 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6965 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6966 "unexpected vcvt_f32_f64 builtin");
6967 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6968 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6969
6970 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6971 }
6972 case NEON::BI__builtin_neon_vcvt_s32_v:
6973 case NEON::BI__builtin_neon_vcvt_u32_v:
6974 case NEON::BI__builtin_neon_vcvt_s64_v:
6975 case NEON::BI__builtin_neon_vcvt_u64_v:
6976 case NEON::BI__builtin_neon_vcvt_s16_f16:
6977 case NEON::BI__builtin_neon_vcvt_u16_f16:
6978 case NEON::BI__builtin_neon_vcvtq_s32_v:
6979 case NEON::BI__builtin_neon_vcvtq_u32_v:
6980 case NEON::BI__builtin_neon_vcvtq_s64_v:
6981 case NEON::BI__builtin_neon_vcvtq_u64_v:
6982 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6983 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6984 Int =
6985 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6986 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
6987 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
6988 }
6989 case NEON::BI__builtin_neon_vcvta_s16_f16:
6990 case NEON::BI__builtin_neon_vcvta_u16_f16:
6991 case NEON::BI__builtin_neon_vcvta_s32_v:
6992 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6993 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6994 case NEON::BI__builtin_neon_vcvta_u32_v:
6995 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6996 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6997 case NEON::BI__builtin_neon_vcvta_s64_v:
6998 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6999 case NEON::BI__builtin_neon_vcvta_u64_v:
7000 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
7001 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
7002 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7003 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
7004 }
7005 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7006 case NEON::BI__builtin_neon_vcvtm_s32_v:
7007 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7008 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7009 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7010 case NEON::BI__builtin_neon_vcvtm_u32_v:
7011 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7012 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7013 case NEON::BI__builtin_neon_vcvtm_s64_v:
7014 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7015 case NEON::BI__builtin_neon_vcvtm_u64_v:
7016 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7017 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
7018 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7019 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
7020 }
7021 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7022 case NEON::BI__builtin_neon_vcvtn_s32_v:
7023 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7024 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7025 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7026 case NEON::BI__builtin_neon_vcvtn_u32_v:
7027 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7028 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7029 case NEON::BI__builtin_neon_vcvtn_s64_v:
7030 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7031 case NEON::BI__builtin_neon_vcvtn_u64_v:
7032 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
7033 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
7034 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7035 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
7036 }
7037 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7038 case NEON::BI__builtin_neon_vcvtp_s32_v:
7039 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7040 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7041 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7042 case NEON::BI__builtin_neon_vcvtp_u32_v:
7043 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7044 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7045 case NEON::BI__builtin_neon_vcvtp_s64_v:
7046 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7047 case NEON::BI__builtin_neon_vcvtp_u64_v:
7048 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7049 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7050 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7051 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
7052 }
7053 case NEON::BI__builtin_neon_vmulx_v:
7054 case NEON::BI__builtin_neon_vmulxq_v: {
7055 Int = Intrinsic::aarch64_neon_fmulx;
7056 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
7057 }
7058 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7059 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7060 // vmulx_lane should be mapped to Neon scalar mulx after
7061 // extracting the scalar element
7062 Ops.push_back(EmitScalarExpr(E->getArg(2)));
7063 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7064 Ops.pop_back();
7065 Int = Intrinsic::aarch64_neon_fmulx;
7066 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
7067 }
7068 case NEON::BI__builtin_neon_vmul_lane_v:
7069 case NEON::BI__builtin_neon_vmul_laneq_v: {
7070 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7071 bool Quad = false;
7072 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7073 Quad = true;
7074 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7075 llvm::FixedVectorType *VTy =
7077 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7078 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7079 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
7080 return Builder.CreateBitCast(Result, Ty);
7081 }
7082 case NEON::BI__builtin_neon_vnegd_s64:
7083 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
7084 case NEON::BI__builtin_neon_vnegh_f16:
7085 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
7086 case NEON::BI__builtin_neon_vpmaxnm_v:
7087 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7088 Int = Intrinsic::aarch64_neon_fmaxnmp;
7089 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
7090 }
7091 case NEON::BI__builtin_neon_vpminnm_v:
7092 case NEON::BI__builtin_neon_vpminnmq_v: {
7093 Int = Intrinsic::aarch64_neon_fminnmp;
7094 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
7095 }
7096 case NEON::BI__builtin_neon_vsqrth_f16: {
7097 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7098 Int = Builder.getIsFPConstrained()
7099 ? Intrinsic::experimental_constrained_sqrt
7100 : Intrinsic::sqrt;
7101 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
7102 }
7103 case NEON::BI__builtin_neon_vsqrt_v:
7104 case NEON::BI__builtin_neon_vsqrtq_v: {
7105 Int = Builder.getIsFPConstrained()
7106 ? Intrinsic::experimental_constrained_sqrt
7107 : Intrinsic::sqrt;
7108 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7109 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
7110 }
7111 case NEON::BI__builtin_neon_vrbit_v:
7112 case NEON::BI__builtin_neon_vrbitq_v: {
7113 Int = Intrinsic::bitreverse;
7114 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
7115 }
7116 case NEON::BI__builtin_neon_vmaxv_f16: {
7117 Int = Intrinsic::aarch64_neon_fmaxv;
7118 Ty = HalfTy;
7119 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7120 llvm::Type *Tys[2] = { Ty, VTy };
7121 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7122 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7123 return Builder.CreateTrunc(Ops[0], HalfTy);
7124 }
7125 case NEON::BI__builtin_neon_vmaxvq_f16: {
7126 Int = Intrinsic::aarch64_neon_fmaxv;
7127 Ty = HalfTy;
7128 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7129 llvm::Type *Tys[2] = { Ty, VTy };
7130 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7131 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7132 return Builder.CreateTrunc(Ops[0], HalfTy);
7133 }
7134 case NEON::BI__builtin_neon_vminv_f16: {
7135 Int = Intrinsic::aarch64_neon_fminv;
7136 Ty = HalfTy;
7137 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7138 llvm::Type *Tys[2] = { Ty, VTy };
7139 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7140 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7141 return Builder.CreateTrunc(Ops[0], HalfTy);
7142 }
7143 case NEON::BI__builtin_neon_vminvq_f16: {
7144 Int = Intrinsic::aarch64_neon_fminv;
7145 Ty = HalfTy;
7146 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7147 llvm::Type *Tys[2] = { Ty, VTy };
7148 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7149 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7150 return Builder.CreateTrunc(Ops[0], HalfTy);
7151 }
7152 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7153 Int = Intrinsic::aarch64_neon_fmaxnmv;
7154 Ty = HalfTy;
7155 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7156 llvm::Type *Tys[2] = { Ty, VTy };
7157 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7158 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7159 return Builder.CreateTrunc(Ops[0], HalfTy);
7160 }
7161 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7162 Int = Intrinsic::aarch64_neon_fmaxnmv;
7163 Ty = HalfTy;
7164 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7165 llvm::Type *Tys[2] = { Ty, VTy };
7166 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7167 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7168 return Builder.CreateTrunc(Ops[0], HalfTy);
7169 }
7170 case NEON::BI__builtin_neon_vminnmv_f16: {
7171 Int = Intrinsic::aarch64_neon_fminnmv;
7172 Ty = HalfTy;
7173 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7174 llvm::Type *Tys[2] = { Ty, VTy };
7175 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7176 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7177 return Builder.CreateTrunc(Ops[0], HalfTy);
7178 }
7179 case NEON::BI__builtin_neon_vminnmvq_f16: {
7180 Int = Intrinsic::aarch64_neon_fminnmv;
7181 Ty = HalfTy;
7182 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7183 llvm::Type *Tys[2] = { Ty, VTy };
7184 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7185 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7186 return Builder.CreateTrunc(Ops[0], HalfTy);
7187 }
7188 case NEON::BI__builtin_neon_vmul_n_f64: {
7189 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7190 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
7191 return Builder.CreateFMul(Ops[0], RHS);
7192 }
7193 case NEON::BI__builtin_neon_vaddlv_u8: {
7194 Int = Intrinsic::aarch64_neon_uaddlv;
7195 Ty = Int32Ty;
7196 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7197 llvm::Type *Tys[2] = { Ty, VTy };
7198 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7199 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7200 return Builder.CreateTrunc(Ops[0], Int16Ty);
7201 }
7202 case NEON::BI__builtin_neon_vaddlv_u16: {
7203 Int = Intrinsic::aarch64_neon_uaddlv;
7204 Ty = Int32Ty;
7205 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7206 llvm::Type *Tys[2] = { Ty, VTy };
7207 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7208 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7209 }
7210 case NEON::BI__builtin_neon_vaddlvq_u8: {
7211 Int = Intrinsic::aarch64_neon_uaddlv;
7212 Ty = Int32Ty;
7213 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7214 llvm::Type *Tys[2] = { Ty, VTy };
7215 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7216 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7217 return Builder.CreateTrunc(Ops[0], Int16Ty);
7218 }
7219 case NEON::BI__builtin_neon_vaddlvq_u16: {
7220 Int = Intrinsic::aarch64_neon_uaddlv;
7221 Ty = Int32Ty;
7222 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7223 llvm::Type *Tys[2] = { Ty, VTy };
7224 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7225 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7226 }
7227 case NEON::BI__builtin_neon_vaddlv_s8: {
7228 Int = Intrinsic::aarch64_neon_saddlv;
7229 Ty = Int32Ty;
7230 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7231 llvm::Type *Tys[2] = { Ty, VTy };
7232 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7233 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7234 return Builder.CreateTrunc(Ops[0], Int16Ty);
7235 }
7236 case NEON::BI__builtin_neon_vaddlv_s16: {
7237 Int = Intrinsic::aarch64_neon_saddlv;
7238 Ty = Int32Ty;
7239 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7240 llvm::Type *Tys[2] = { Ty, VTy };
7241 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7242 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7243 }
7244 case NEON::BI__builtin_neon_vaddlvq_s8: {
7245 Int = Intrinsic::aarch64_neon_saddlv;
7246 Ty = Int32Ty;
7247 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7248 llvm::Type *Tys[2] = { Ty, VTy };
7249 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7250 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7251 return Builder.CreateTrunc(Ops[0], Int16Ty);
7252 }
7253 case NEON::BI__builtin_neon_vaddlvq_s16: {
7254 Int = Intrinsic::aarch64_neon_saddlv;
7255 Ty = Int32Ty;
7256 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7257 llvm::Type *Tys[2] = { Ty, VTy };
7258 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7259 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7260 }
7261 case NEON::BI__builtin_neon_vsri_n_v:
7262 case NEON::BI__builtin_neon_vsriq_n_v: {
7263 Int = Intrinsic::aarch64_neon_vsri;
7264 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7265 return EmitNeonCall(Intrin, Ops, "vsri_n");
7266 }
7267 case NEON::BI__builtin_neon_vsli_n_v:
7268 case NEON::BI__builtin_neon_vsliq_n_v: {
7269 Int = Intrinsic::aarch64_neon_vsli;
7270 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7271 return EmitNeonCall(Intrin, Ops, "vsli_n");
7272 }
7273 case NEON::BI__builtin_neon_vsra_n_v:
7274 case NEON::BI__builtin_neon_vsraq_n_v:
7275 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7276 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
7277 return Builder.CreateAdd(Ops[0], Ops[1]);
7278 case NEON::BI__builtin_neon_vrsra_n_v:
7279 case NEON::BI__builtin_neon_vrsraq_n_v: {
7280 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7282 TmpOps.push_back(Ops[1]);
7283 TmpOps.push_back(Ops[2]);
7284 Function* F = CGM.getIntrinsic(Int, Ty);
7285 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
7286 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7287 return Builder.CreateAdd(Ops[0], tmp);
7288 }
7289 case NEON::BI__builtin_neon_vld1_v:
7290 case NEON::BI__builtin_neon_vld1q_v: {
7291 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
7292 }
7293 case NEON::BI__builtin_neon_vst1_v:
7294 case NEON::BI__builtin_neon_vst1q_v:
7295 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7296 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7297 case NEON::BI__builtin_neon_vld1_lane_v:
7298 case NEON::BI__builtin_neon_vld1q_lane_v: {
7299 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7300 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7301 PtrOp0.getAlignment());
7302 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7303 }
7304 case NEON::BI__builtin_neon_vldap1_lane_s64:
7305 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7306 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7307 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7308 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
7309 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7310 Ops[0] = LI;
7311 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
7312 }
7313 case NEON::BI__builtin_neon_vld1_dup_v:
7314 case NEON::BI__builtin_neon_vld1q_dup_v: {
7315 Value *V = PoisonValue::get(Ty);
7316 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7317 PtrOp0.getAlignment());
7318 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7319 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7320 return EmitNeonSplat(Ops[0], CI);
7321 }
7322 case NEON::BI__builtin_neon_vst1_lane_v:
7323 case NEON::BI__builtin_neon_vst1q_lane_v:
7324 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7325 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7326 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7327 case NEON::BI__builtin_neon_vstl1_lane_s64:
7328 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7329 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7330 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7331 llvm::StoreInst *SI =
7332 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7333 SI->setAtomic(llvm::AtomicOrdering::Release);
7334 return SI;
7335 }
7336 case NEON::BI__builtin_neon_vld2_v:
7337 case NEON::BI__builtin_neon_vld2q_v: {
7338 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7339 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7340 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7341 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7342 }
7343 case NEON::BI__builtin_neon_vld3_v:
7344 case NEON::BI__builtin_neon_vld3q_v: {
7345 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7346 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7347 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7348 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7349 }
7350 case NEON::BI__builtin_neon_vld4_v:
7351 case NEON::BI__builtin_neon_vld4q_v: {
7352 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7353 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7354 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7355 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7356 }
7357 case NEON::BI__builtin_neon_vld2_dup_v:
7358 case NEON::BI__builtin_neon_vld2q_dup_v: {
7359 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7360 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7361 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7362 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7363 }
7364 case NEON::BI__builtin_neon_vld3_dup_v:
7365 case NEON::BI__builtin_neon_vld3q_dup_v: {
7366 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7367 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7368 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7369 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7370 }
7371 case NEON::BI__builtin_neon_vld4_dup_v:
7372 case NEON::BI__builtin_neon_vld4q_dup_v: {
7373 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7374 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7375 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7376 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7377 }
7378 case NEON::BI__builtin_neon_vld2_lane_v:
7379 case NEON::BI__builtin_neon_vld2q_lane_v: {
7380 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7381 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7382 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7383 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7384 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7385 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7386 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
7387 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7388 }
7389 case NEON::BI__builtin_neon_vld3_lane_v:
7390 case NEON::BI__builtin_neon_vld3q_lane_v: {
7391 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7392 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7393 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7394 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7395 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7396 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7397 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7398 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
7399 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7400 }
7401 case NEON::BI__builtin_neon_vld4_lane_v:
7402 case NEON::BI__builtin_neon_vld4q_lane_v: {
7403 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7404 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7405 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7406 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7407 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7408 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7409 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7410 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7411 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
7412 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7413 }
7414 case NEON::BI__builtin_neon_vst2_v:
7415 case NEON::BI__builtin_neon_vst2q_v: {
7416 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7417 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7418 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7419 Ops, "");
7420 }
7421 case NEON::BI__builtin_neon_vst2_lane_v:
7422 case NEON::BI__builtin_neon_vst2q_lane_v: {
7423 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7424 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7425 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7426 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7427 Ops, "");
7428 }
7429 case NEON::BI__builtin_neon_vst3_v:
7430 case NEON::BI__builtin_neon_vst3q_v: {
7431 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7432 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7433 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7434 Ops, "");
7435 }
7436 case NEON::BI__builtin_neon_vst3_lane_v:
7437 case NEON::BI__builtin_neon_vst3q_lane_v: {
7438 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7439 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7440 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7441 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7442 Ops, "");
7443 }
7444 case NEON::BI__builtin_neon_vst4_v:
7445 case NEON::BI__builtin_neon_vst4q_v: {
7446 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7447 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7448 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7449 Ops, "");
7450 }
7451 case NEON::BI__builtin_neon_vst4_lane_v:
7452 case NEON::BI__builtin_neon_vst4q_lane_v: {
7453 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7454 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7455 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7456 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7457 Ops, "");
7458 }
7459 case NEON::BI__builtin_neon_vtrn_v:
7460 case NEON::BI__builtin_neon_vtrnq_v: {
7461 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7462 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7463 Value *SV = nullptr;
7464
7465 for (unsigned vi = 0; vi != 2; ++vi) {
7466 SmallVector<int, 16> Indices;
7467 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7468 Indices.push_back(i+vi);
7469 Indices.push_back(i+e+vi);
7470 }
7471 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7472 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7473 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7474 }
7475 return SV;
7476 }
7477 case NEON::BI__builtin_neon_vuzp_v:
7478 case NEON::BI__builtin_neon_vuzpq_v: {
7479 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7480 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7481 Value *SV = nullptr;
7482
7483 for (unsigned vi = 0; vi != 2; ++vi) {
7484 SmallVector<int, 16> Indices;
7485 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7486 Indices.push_back(2*i+vi);
7487
7488 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7489 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7490 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7491 }
7492 return SV;
7493 }
7494 case NEON::BI__builtin_neon_vzip_v:
7495 case NEON::BI__builtin_neon_vzipq_v: {
7496 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7497 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7498 Value *SV = nullptr;
7499
7500 for (unsigned vi = 0; vi != 2; ++vi) {
7501 SmallVector<int, 16> Indices;
7502 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7503 Indices.push_back((i + vi*e) >> 1);
7504 Indices.push_back(((i + vi*e) >> 1)+e);
7505 }
7506 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7507 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7508 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7509 }
7510 return SV;
7511 }
7512 case NEON::BI__builtin_neon_vqtbl1q_v: {
7513 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7514 Ops, "vtbl1");
7515 }
7516 case NEON::BI__builtin_neon_vqtbl2q_v: {
7517 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7518 Ops, "vtbl2");
7519 }
7520 case NEON::BI__builtin_neon_vqtbl3q_v: {
7521 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7522 Ops, "vtbl3");
7523 }
7524 case NEON::BI__builtin_neon_vqtbl4q_v: {
7525 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7526 Ops, "vtbl4");
7527 }
7528 case NEON::BI__builtin_neon_vqtbx1q_v: {
7529 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7530 Ops, "vtbx1");
7531 }
7532 case NEON::BI__builtin_neon_vqtbx2q_v: {
7533 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7534 Ops, "vtbx2");
7535 }
7536 case NEON::BI__builtin_neon_vqtbx3q_v: {
7537 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7538 Ops, "vtbx3");
7539 }
7540 case NEON::BI__builtin_neon_vqtbx4q_v: {
7541 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7542 Ops, "vtbx4");
7543 }
7544 case NEON::BI__builtin_neon_vsqadd_v:
7545 case NEON::BI__builtin_neon_vsqaddq_v: {
7546 Int = Intrinsic::aarch64_neon_usqadd;
7547 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7548 }
7549 case NEON::BI__builtin_neon_vuqadd_v:
7550 case NEON::BI__builtin_neon_vuqaddq_v: {
7551 Int = Intrinsic::aarch64_neon_suqadd;
7552 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7553 }
7554
7555 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7556 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7557 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7558 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7559 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7560 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7561 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7562 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7563 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7564 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7565 llvm::Type *Tys[2];
7566 Tys[0] = Ty;
7567 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7568 /*isQuad*/ false));
7569 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7570 }
7571 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7572 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7573 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7574 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7575 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7576 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7577 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7578 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7579 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7580 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7581 llvm::Type *Tys[2];
7582 Tys[0] = Ty;
7583 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7584 /*isQuad*/ true));
7585 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7586 }
7587 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7588 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7589 case NEON::BI__builtin_neon_vluti2_lane_f16:
7590 case NEON::BI__builtin_neon_vluti2_lane_p16:
7591 case NEON::BI__builtin_neon_vluti2_lane_p8:
7592 case NEON::BI__builtin_neon_vluti2_lane_s16:
7593 case NEON::BI__builtin_neon_vluti2_lane_s8:
7594 case NEON::BI__builtin_neon_vluti2_lane_u16:
7595 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7596 Int = Intrinsic::aarch64_neon_vluti2_lane;
7597 llvm::Type *Tys[2];
7598 Tys[0] = Ty;
7599 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7600 /*isQuad*/ false));
7601 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7602 }
7603 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7604 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7605 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7606 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7607 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7608 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7609 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7610 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7611 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7612 Int = Intrinsic::aarch64_neon_vluti2_lane;
7613 llvm::Type *Tys[2];
7614 Tys[0] = Ty;
7615 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7616 /*isQuad*/ true));
7617 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7618 }
7619 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7620 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7621 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7622 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7623 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7624 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7625 }
7626 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7627 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7628 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7629 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7630 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7631 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7632 }
7633 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7634 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7635 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7636 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7637 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7638 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7639 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7640 }
7641 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7642 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7643 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7644 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7645 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7646 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7647 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7648 }
7649 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7650 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7651 {llvm::FixedVectorType::get(HalfTy, 8),
7652 llvm::FixedVectorType::get(Int8Ty, 16)},
7653 Ops, E, "fmmla");
7654 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7655 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7656 {llvm::FixedVectorType::get(FloatTy, 4),
7657 llvm::FixedVectorType::get(Int8Ty, 16)},
7658 Ops, E, "fmmla");
7659 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7660 ExtractLow = true;
7661 [[fallthrough]];
7662 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7663 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7664 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7665 llvm::FixedVectorType::get(BFloatTy, 8),
7666 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7667 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7668 ExtractLow = true;
7669 [[fallthrough]];
7670 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7671 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7672 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7673 llvm::FixedVectorType::get(BFloatTy, 8),
7674 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7675 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7676 ExtractLow = true;
7677 [[fallthrough]];
7678 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7679 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7680 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7681 llvm::FixedVectorType::get(HalfTy, 8),
7682 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7683 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7684 ExtractLow = true;
7685 [[fallthrough]];
7686 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7687 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7688 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7689 llvm::FixedVectorType::get(HalfTy, 8),
7690 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7691 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7692 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7693 llvm::FixedVectorType::get(Int8Ty, 8),
7694 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7695 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7696 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7697 llvm::FixedVectorType::get(Int8Ty, 8),
7698 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7699 E, "vfcvtn");
7700 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7701 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7702 llvm::FixedVectorType::get(Int8Ty, 16),
7703 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7704 E, "vfcvtn");
7705 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7706 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7707 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7708 uint64_t(0));
7709 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7710 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7711 }
7712
7713 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7714 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7715 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7716 Ops, E, "fdot2");
7717 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7718 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7719 ExtendLaneArg = true;
7720 [[fallthrough]];
7721 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7722 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7723 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7724 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7725 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7726 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7727 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7728 FloatTy, Ops, E, "fdot4");
7729 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7730 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7731 ExtendLaneArg = true;
7732 [[fallthrough]];
7733 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7734 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7735 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7736 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7737
7738 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7739 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7740 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7741 "vmlal");
7742 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7743 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7744 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7745 "vmlal");
7746 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7747 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7748 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7749 "vmlall");
7750 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7751 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7752 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7753 "vmlall");
7754 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7755 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7756 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7757 "vmlall");
7758 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7759 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7760 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7761 "vmlall");
7762 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7763 ExtendLaneArg = true;
7764 [[fallthrough]];
7765 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7766 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7767 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7768 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7769 ExtendLaneArg = true;
7770 [[fallthrough]];
7771 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7772 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7773 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7774 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7775 ExtendLaneArg = true;
7776 [[fallthrough]];
7777 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7778 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7779 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7780 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7781 ExtendLaneArg = true;
7782 [[fallthrough]];
7783 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7784 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7785 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7786 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7787 ExtendLaneArg = true;
7788 [[fallthrough]];
7789 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7790 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7791 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7792 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7793 ExtendLaneArg = true;
7794 [[fallthrough]];
7795 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7796 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7797 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7798 case NEON::BI__builtin_neon_vamin_f16:
7799 case NEON::BI__builtin_neon_vaminq_f16:
7800 case NEON::BI__builtin_neon_vamin_f32:
7801 case NEON::BI__builtin_neon_vaminq_f32:
7802 case NEON::BI__builtin_neon_vaminq_f64: {
7803 Int = Intrinsic::aarch64_neon_famin;
7804 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7805 }
7806 case NEON::BI__builtin_neon_vamax_f16:
7807 case NEON::BI__builtin_neon_vamaxq_f16:
7808 case NEON::BI__builtin_neon_vamax_f32:
7809 case NEON::BI__builtin_neon_vamaxq_f32:
7810 case NEON::BI__builtin_neon_vamaxq_f64: {
7811 Int = Intrinsic::aarch64_neon_famax;
7812 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7813 }
7814 case NEON::BI__builtin_neon_vscale_f16:
7815 case NEON::BI__builtin_neon_vscaleq_f16:
7816 case NEON::BI__builtin_neon_vscale_f32:
7817 case NEON::BI__builtin_neon_vscaleq_f32:
7818 case NEON::BI__builtin_neon_vscaleq_f64: {
7819 Int = Intrinsic::aarch64_neon_fp8_fscale;
7820 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7821 }
7822 }
7823}
7824
7826 const CallExpr *E) {
7827 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7828 BuiltinID == BPF::BI__builtin_btf_type_id ||
7829 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7830 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7831 "unexpected BPF builtin");
7832
7833 // A sequence number, injected into IR builtin functions, to
7834 // prevent CSE given the only difference of the function
7835 // may just be the debuginfo metadata.
7836 static uint32_t BuiltinSeqNum;
7837
7838 switch (BuiltinID) {
7839 default:
7840 llvm_unreachable("Unexpected BPF builtin");
7841 case BPF::BI__builtin_preserve_field_info: {
7842 const Expr *Arg = E->getArg(0);
7843 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7844
7845 if (!getDebugInfo()) {
7846 CGM.Error(E->getExprLoc(),
7847 "using __builtin_preserve_field_info() without -g");
7848 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7849 : EmitLValue(Arg).emitRawPointer(*this);
7850 }
7851
7852 // Enable underlying preserve_*_access_index() generation.
7853 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7854 IsInPreservedAIRegion = true;
7855 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7856 : EmitLValue(Arg).emitRawPointer(*this);
7857 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7858
7859 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7860 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7861
7862 // Built the IR for the preserve_field_info intrinsic.
7863 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7864 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7865 {FieldAddr->getType()});
7866 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7867 }
7868 case BPF::BI__builtin_btf_type_id:
7869 case BPF::BI__builtin_preserve_type_info: {
7870 if (!getDebugInfo()) {
7871 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7872 return nullptr;
7873 }
7874
7875 const Expr *Arg0 = E->getArg(0);
7876 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7877 Arg0->getType(), Arg0->getExprLoc());
7878
7879 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7880 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7881 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7882
7883 llvm::Function *FnDecl;
7884 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7885 FnDecl = Intrinsic::getOrInsertDeclaration(
7886 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7887 else
7888 FnDecl = Intrinsic::getOrInsertDeclaration(
7889 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7890 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
7891 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7892 return Fn;
7893 }
7894 case BPF::BI__builtin_preserve_enum_value: {
7895 if (!getDebugInfo()) {
7896 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7897 return nullptr;
7898 }
7899
7900 const Expr *Arg0 = E->getArg(0);
7901 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7902 Arg0->getType(), Arg0->getExprLoc());
7903
7904 // Find enumerator
7905 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
7906 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
7907 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
7908 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
7909
7910 auto InitVal = Enumerator->getInitVal();
7911 std::string InitValStr;
7912 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7913 InitValStr = std::to_string(InitVal.getSExtValue());
7914 else
7915 InitValStr = std::to_string(InitVal.getZExtValue());
7916 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7917 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
7918
7919 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7920 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7921 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7922
7923 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7924 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
7925 CallInst *Fn =
7926 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
7927 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7928 return Fn;
7929 }
7930 }
7931}
7932
7935 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7936 "Not a power-of-two sized vector!");
7937 bool AllConstants = true;
7938 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7939 AllConstants &= isa<Constant>(Ops[i]);
7940
7941 // If this is a constant vector, create a ConstantVector.
7942 if (AllConstants) {
7944 for (llvm::Value *Op : Ops)
7945 CstOps.push_back(cast<Constant>(Op));
7946 return llvm::ConstantVector::get(CstOps);
7947 }
7948
7949 // Otherwise, insertelement the values to build the vector.
7950 Value *Result = llvm::PoisonValue::get(
7951 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
7952
7953 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7954 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
7955
7956 return Result;
7957}
7958
7959Value *CodeGenFunction::EmitAArch64CpuInit() {
7960 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
7961 llvm::FunctionCallee Func =
7962 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
7963 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
7964 cast<llvm::GlobalValue>(Func.getCallee())
7965 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
7966 return Builder.CreateCall(Func);
7967}
7968
7969Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
7970 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
7971 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
7973 ArgStr.split(Features, "+");
7974 for (auto &Feature : Features) {
7975 Feature = Feature.trim();
7976 if (!llvm::AArch64::parseFMVExtension(Feature))
7977 return Builder.getFalse();
7978 if (Feature != "default")
7979 Features.push_back(Feature);
7980 }
7981 return EmitAArch64CpuSupports(Features);
7982}
7983
7984llvm::Value *
7985CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
7986 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
7987 Value *Result = Builder.getTrue();
7988 if (FeaturesMask != 0) {
7989 // Get features from structure in runtime library
7990 // struct {
7991 // unsigned long long features;
7992 // } __aarch64_cpu_features;
7993 llvm::Type *STy = llvm::StructType::get(Int64Ty);
7994 llvm::Constant *AArch64CPUFeatures =
7995 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
7996 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
7997 llvm::Value *CpuFeatures = Builder.CreateGEP(
7998 STy, AArch64CPUFeatures,
7999 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
8000 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
8002 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
8003 Value *Bitset = Builder.CreateAnd(Features, Mask);
8004 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
8005 Result = Builder.CreateAnd(Result, Cmp);
8006 }
8007 return Result;
8008}
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:2597
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:3437
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition ARM.cpp:588
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:3407
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:3400
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4449
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:1649
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3627
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4916
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:1661
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:1631
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2665
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition ARM.cpp:584
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:1662
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition ARM.cpp:1666
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4455
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:3396
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:1663
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:3474
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:3914
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:1473
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:359
#define NEONMAP0(NameBase)
Definition ARM.cpp:581
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:5009
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:3501
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:31
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:190
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:594
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:3463
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition ARM.cpp:3916
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:3489
SpecialRegisterAccessKind
Definition ARM.cpp:2588
@ VolatileRead
Definition ARM.cpp:2590
@ NormalRead
Definition ARM.cpp:2589
@ Write
Definition ARM.cpp:2591
@ UnsignedAlts
Definition ARM.cpp:551
@ Vectorize1ArgType
Definition ARM.cpp:556
@ FpCmpzModifiers
Definition ARM.cpp:560
@ Use64BitVectors
Definition ARM.cpp:553
@ VectorizeArgTypes
Definition ARM.cpp:548
@ VectorRetGetArgs01
Definition ARM.cpp:558
@ InventFloatType
Definition ARM.cpp:550
@ AddRetType
Definition ARM.cpp:543
@ Add2ArgTypes
Definition ARM.cpp:545
@ VectorizeRetType
Definition ARM.cpp:547
@ VectorRet
Definition ARM.cpp:557
@ Add1ArgType
Definition ARM.cpp:544
@ Use128BitVectors
Definition ARM.cpp:554
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:3429
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:1658
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition ARM.cpp:913
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1724
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:342
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:2515
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:1660
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition ARM.cpp:1188
TokenType getType() const
Returns the token's type, e.g.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:58
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2877
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3081
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3060
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3068
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1599
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:504
Address getAddress() const
Definition CGValue.h:644
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:3923
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:474
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:7934
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
Definition AMDGPU.cpp:346
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4118
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:4226
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:448
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4085
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4373
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3782
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4028
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4283
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4940
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:4510
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3974
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1685
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3798
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1786
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4553
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:495
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2710
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3871
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7825
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4395
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4498
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4181
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3515
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:511
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4462
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:489
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:5020
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:458
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3770
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3616
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:2488
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:295
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:4199
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:3958
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4206
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4385
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1552
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4133
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:188
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4338
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3908
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:4426
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1668
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3836
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4487
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:419
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:379
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3090
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3081
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:451
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:273
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2000
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3328
QualType getPointeeType() const
Definition TypeBase.h:3338
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:709
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1833
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9158
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
QualType getType() const
Definition Decl.h:723
QualType getType() const
Definition Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:154
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:154
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
const FunctionProtoType * T
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:826
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601