clang 22.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(F, Args);
355 else
356 return CGF.Builder.CreateCall(F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasFastHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
369 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
372 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
376 else
377 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
379 if (HasFastHalfType)
380 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
381 else
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 else
445 return Builder.CreateCall(F, Ops, name);
446}
447
451 const CallExpr *E, const char *name) {
452 llvm::Value *FPM =
453 EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
454 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
455 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
456}
457
459 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
460 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
461
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
465 Ops[1]->getType()};
466 if (ExtendLaneArg) {
467 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
468 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
469 uint64_t(0));
470 }
471 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
472}
473
475 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
476 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
477
478 if (ExtendLaneArg) {
479 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
480 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
481 uint64_t(0));
482 }
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
486 Ops, E, name);
487}
488
490 bool neg) {
491 int SV = cast<ConstantInt>(V)->getSExtValue();
492 return ConstantInt::get(Ty, neg ? -SV : SV);
493}
494
495Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
496 llvm::Type *Ty1, bool Extract,
498 const CallExpr *E,
499 const char *name) {
500 llvm::Type *Tys[] = {Ty0, Ty1};
501 if (Extract) {
502 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
503 // the vector.
504 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
505 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
506 }
507 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
508}
509
510// Right-shift a vector by a constant.
512 llvm::Type *Ty, bool usgn,
513 const char *name) {
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
515
516 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
518
519 Vec = Builder.CreateBitCast(Vec, Ty);
520
521 // lshr/ashr are undefined when the shift amount is equal to the vector
522 // element size.
523 if (ShiftAmt == EltSize) {
524 if (usgn) {
525 // Right-shifting an unsigned value by its size yields 0.
526 return llvm::ConstantAggregateZero::get(VTy);
527 } else {
528 // Right-shifting a signed value by its size is equivalent
529 // to a shift of size-1.
530 --ShiftAmt;
531 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
532 }
533 }
534
535 Shift = EmitNeonShiftVector(Shift, Ty, false);
536 if (usgn)
537 return Builder.CreateLShr(Vec, Shift, name);
538 else
539 return Builder.CreateAShr(Vec, Shift, name);
540}
541
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
555
563
564namespace {
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
567 unsigned BuiltinID;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
571
572 bool operator<(unsigned RHSBuiltinID) const {
573 return BuiltinID < RHSBuiltinID;
574 }
575 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
576 return BuiltinID < TE.BuiltinID;
577 }
578};
579} // end anonymous namespace
580
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
583
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
587
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
591 TypeModifier }
592
593static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
594 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
595 NEONMAP0(splat_lane_v),
596 NEONMAP0(splat_laneq_v),
597 NEONMAP0(splatq_lane_v),
598 NEONMAP0(splatq_laneq_v),
599 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
600 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
601 NEONMAP1(vabs_v, arm_neon_vabs, 0),
602 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
603 NEONMAP0(vadd_v),
604 NEONMAP0(vaddhn_v),
605 NEONMAP0(vaddq_v),
606 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
607 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
608 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
609 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
610 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
611 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
613 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
614 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
615 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
616 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
617 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
618 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
619 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
620 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
621 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
622 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
623 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
624 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
625 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
626 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcage_v, arm_neon_vacge, 0),
628 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
629 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
630 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcale_v, arm_neon_vacge, 0),
632 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
633 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
634 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
635 NEONMAP0(vceqz_v),
636 NEONMAP0(vceqzq_v),
637 NEONMAP0(vcgez_v),
638 NEONMAP0(vcgezq_v),
639 NEONMAP0(vcgtz_v),
640 NEONMAP0(vcgtzq_v),
641 NEONMAP0(vclez_v),
642 NEONMAP0(vclezq_v),
643 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
644 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
645 NEONMAP0(vcltz_v),
646 NEONMAP0(vcltzq_v),
647 NEONMAP1(vclz_v, ctlz, Add1ArgType),
648 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
649 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
650 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
651 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
652 NEONMAP0(vcvt_f16_s16),
653 NEONMAP0(vcvt_f16_u16),
654 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
655 NEONMAP0(vcvt_f32_v),
656 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
657 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
658 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
659 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
660 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
663 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
665 NEONMAP0(vcvt_s16_f16),
666 NEONMAP0(vcvt_s32_v),
667 NEONMAP0(vcvt_s64_v),
668 NEONMAP0(vcvt_u16_f16),
669 NEONMAP0(vcvt_u32_v),
670 NEONMAP0(vcvt_u64_v),
671 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
672 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
675 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
678 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
681 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
684 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
685 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
688 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
691 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
694 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
697 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
700 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
703 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
706 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
709 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
712 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
715 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
718 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
720 NEONMAP0(vcvtq_f16_s16),
721 NEONMAP0(vcvtq_f16_u16),
722 NEONMAP0(vcvtq_f32_v),
723 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
724 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
725 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
726 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
727 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
730 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
732 NEONMAP0(vcvtq_s16_f16),
733 NEONMAP0(vcvtq_s32_v),
734 NEONMAP0(vcvtq_s64_v),
735 NEONMAP0(vcvtq_u16_f16),
736 NEONMAP0(vcvtq_u32_v),
737 NEONMAP0(vcvtq_u64_v),
738 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
739 NEONMAP1(vdot_u32, arm_neon_udot, 0),
740 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
741 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
742 NEONMAP0(vext_v),
743 NEONMAP0(vextq_v),
744 NEONMAP0(vfma_v),
745 NEONMAP0(vfmaq_v),
746 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
747 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
748 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
750 NEONMAP0(vld1_dup_v),
751 NEONMAP1(vld1_v, arm_neon_vld1, 0),
752 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
753 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
754 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
755 NEONMAP0(vld1q_dup_v),
756 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
757 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
758 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
759 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
760 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
761 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
762 NEONMAP1(vld2_v, arm_neon_vld2, 0),
763 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
764 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
765 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
766 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
767 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
768 NEONMAP1(vld3_v, arm_neon_vld3, 0),
769 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
770 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
771 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
772 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
773 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
774 NEONMAP1(vld4_v, arm_neon_vld4, 0),
775 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
776 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
777 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
778 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
779 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
780 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
781 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
782 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
783 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
784 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
785 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
786 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
787 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
788 NEONMAP0(vmovl_v),
789 NEONMAP0(vmovn_v),
790 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
791 NEONMAP0(vmull_v),
792 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
793 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
794 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
795 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
796 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
797 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
798 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
799 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
800 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
801 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
802 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
803 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
804 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
805 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
806 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
807 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
808 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
809 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
810 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
811 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
812 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
813 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
814 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
815 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
816 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
817 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
818 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
819 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
820 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
821 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
822 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
823 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
824 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
825 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
826 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
827 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
828 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
829 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
830 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
831 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
832 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
833 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
834 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
835 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
836 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
837 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
838 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
839 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
840 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
841 NEONMAP1(vrnd_v, trunc, Add1ArgType),
842 NEONMAP1(vrnda_v, round, Add1ArgType),
843 NEONMAP1(vrndaq_v, round, Add1ArgType),
844 NEONMAP0(vrndi_v),
845 NEONMAP0(vrndiq_v),
846 NEONMAP1(vrndm_v, floor, Add1ArgType),
847 NEONMAP1(vrndmq_v, floor, Add1ArgType),
848 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
849 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
850 NEONMAP1(vrndp_v, ceil, Add1ArgType),
851 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
852 NEONMAP1(vrndq_v, trunc, Add1ArgType),
853 NEONMAP1(vrndx_v, rint, Add1ArgType),
854 NEONMAP1(vrndxq_v, rint, Add1ArgType),
855 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
856 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
857 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
858 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
859 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
860 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
861 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
862 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
863 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
864 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
865 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
866 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
867 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
868 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
869 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
870 NEONMAP0(vshl_n_v),
871 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
872 NEONMAP0(vshll_n_v),
873 NEONMAP0(vshlq_n_v),
874 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
875 NEONMAP0(vshr_n_v),
876 NEONMAP0(vshrn_n_v),
877 NEONMAP0(vshrq_n_v),
878 NEONMAP1(vst1_v, arm_neon_vst1, 0),
879 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
880 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
881 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
882 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
883 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
884 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
885 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
886 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
887 NEONMAP1(vst2_v, arm_neon_vst2, 0),
888 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
889 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
890 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
891 NEONMAP1(vst3_v, arm_neon_vst3, 0),
892 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
893 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
894 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
895 NEONMAP1(vst4_v, arm_neon_vst4, 0),
896 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
897 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
898 NEONMAP0(vsubhn_v),
899 NEONMAP0(vtrn_v),
900 NEONMAP0(vtrnq_v),
901 NEONMAP0(vtst_v),
902 NEONMAP0(vtstq_v),
903 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
904 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
906 NEONMAP0(vuzp_v),
907 NEONMAP0(vuzpq_v),
908 NEONMAP0(vzip_v),
909 NEONMAP0(vzipq_v)
910};
911
912static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
913 NEONMAP0(splat_lane_v),
914 NEONMAP0(splat_laneq_v),
915 NEONMAP0(splatq_lane_v),
916 NEONMAP0(splatq_laneq_v),
917 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
918 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
919 NEONMAP0(vadd_v),
920 NEONMAP0(vaddhn_v),
921 NEONMAP0(vaddq_p128),
922 NEONMAP0(vaddq_v),
923 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
924 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
925 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
926 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
927 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
928 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
929 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
930 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
931 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
932 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
933 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
936 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
938 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
939 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
940 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
941 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
942 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
943 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
944 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
945 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
946 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
948 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
951 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
953 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
955 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
957 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
958 NEONMAP0(vceqz_v),
959 NEONMAP0(vceqzq_v),
960 NEONMAP0(vcgez_v),
961 NEONMAP0(vcgezq_v),
962 NEONMAP0(vcgtz_v),
963 NEONMAP0(vcgtzq_v),
964 NEONMAP0(vclez_v),
965 NEONMAP0(vclezq_v),
966 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
967 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
968 NEONMAP0(vcltz_v),
969 NEONMAP0(vcltzq_v),
970 NEONMAP1(vclz_v, ctlz, Add1ArgType),
971 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
972 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
973 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
974 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
975 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
976 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
977 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
978 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
979 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
980 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
981 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
982 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
983 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
984 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
985 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
986 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
987 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
988 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
989 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
990 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
991 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
992 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
993 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
994 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
995 NEONMAP0(vcvt_f16_s16),
996 NEONMAP0(vcvt_f16_u16),
997 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
998 NEONMAP0(vcvt_f32_v),
999 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1000 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1001 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1002 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1004 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1007 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1009 NEONMAP0(vcvtq_f16_s16),
1010 NEONMAP0(vcvtq_f16_u16),
1011 NEONMAP0(vcvtq_f32_v),
1012 NEONMAP0(vcvtq_high_bf16_f32),
1013 NEONMAP0(vcvtq_low_bf16_f32),
1014 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1015 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1016 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1017 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1019 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1022 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1024 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1025 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1026 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1027 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1028 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1029 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1030 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1031 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1032 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1033 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP0(vext_v),
1038 NEONMAP0(vextq_v),
1039 NEONMAP0(vfma_v),
1040 NEONMAP0(vfmaq_v),
1041 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1042 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1043 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1044 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1045 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1046 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1047 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1048 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1049 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1050 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1051 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1052 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1053 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1054 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1055 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1056 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1057 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1058 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1059 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1060 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1061 NEONMAP0(vmovl_v),
1062 NEONMAP0(vmovn_v),
1063 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1064 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1065 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1066 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1067 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1068 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1069 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1070 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1071 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1072 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1073 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1074 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1075 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1076 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1077 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1078 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1079 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1080 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1081 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1082 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1083 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1084 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1085 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1086 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1087 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1088 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1089 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1090 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1091 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1092 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1093 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1094 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1095 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1096 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1097 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1098 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1099 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1100 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1101 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1102 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1103 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1104 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1105 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1106 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1107 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1108 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1109 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1110 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1111 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1112 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1113 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1114 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1115 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1116 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1117 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1118 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1119 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1120 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1121 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1122 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1123 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1124 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1125 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1126 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1127 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1128 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1129 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1130 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1131 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1132 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1133 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1134 NEONMAP0(vrndi_v),
1135 NEONMAP0(vrndiq_v),
1136 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1137 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1138 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1139 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1140 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1141 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1142 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1143 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1144 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1145 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1146 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1147 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1148 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1149 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1150 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1151 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1152 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1153 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1154 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1155 NEONMAP0(vshl_n_v),
1156 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1157 NEONMAP0(vshll_n_v),
1158 NEONMAP0(vshlq_n_v),
1159 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1160 NEONMAP0(vshr_n_v),
1161 NEONMAP0(vshrn_n_v),
1162 NEONMAP0(vshrq_n_v),
1163 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1164 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1165 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1166 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1167 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1168 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1169 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1170 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1171 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1172 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1173 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1174 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1175 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1176 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1177 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1178 NEONMAP0(vsubhn_v),
1179 NEONMAP0(vtst_v),
1180 NEONMAP0(vtstq_v),
1181 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1182 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1184 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1185};
1186
1187static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1188 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1189 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1190 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1191 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1192 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1193 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1194 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1195 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1196 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1197 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1198 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1199 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1200 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1201 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
1202 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1203 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1204 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1205 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1206 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1207 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1208 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1209 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1210 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1211 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1212 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1213 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1214 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1215 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1216 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1217 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1218 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1219 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1220 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1221 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1222 NEONMAP0(vcvth_bf16_f32),
1223 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1224 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1225 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1226 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1227 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1228 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1229 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1230 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1231 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1232 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1233 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1234 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1237 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1238 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1242 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1243 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1244 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1245 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1246 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1247 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1248 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1249 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1250 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1251 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1252 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1253 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1254 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1255 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1256 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1257 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1258 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1259 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1260 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1261 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1262 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1263 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1264 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1265 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1266 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1267 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1268 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1269 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1270 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1271 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1272 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1273 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1274 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1275 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1276 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1277 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1278 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1279 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1280 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1281 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1282 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1283 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1284 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1285 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1286 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1287 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1288 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1289 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1290 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1291 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1292 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1293 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1294 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1295 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1296 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1297 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1298 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1299 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1300 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1301 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1302 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1303 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1304 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1305 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1306 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1307 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1308 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1309 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1310 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1311 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1312 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1313 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1314 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1315 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1316 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1317 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1318 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1319 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1320 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1321 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1322 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1323 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1324 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1325 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1326 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1327 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1328 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1329 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1330 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1331 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1332 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1333 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1334 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1335 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1336 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1337 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1338 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1339 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1340 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1341 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1342 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1343 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1344 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1345 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1346 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1347 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1348 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1349 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1350 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1351 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1352 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1353 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1354 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1355 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1356 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1357 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1358 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1359 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1360 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1361 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1362 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1363 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1364 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1365 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1366 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1367 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1368 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1369 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1370 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1371 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1372 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1373 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1374 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1375 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1376 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1377 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1378 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1379 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1380 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1381 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1382 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1383 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1384 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1385 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1386 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1387 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1389 // FP16 scalar intrinisics go here.
1390 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1391 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1392 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1393 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1394 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1395 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1396 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1397 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1398 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1399 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1400 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1401 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1402 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1403 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1404 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1405 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1406 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1407 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1408 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1409 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1410 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1411 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1412 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1413 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1414 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1415 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1416 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1417 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1418 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1419 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1420 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1421 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1422 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1423 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1424};
1425
1426// Some intrinsics are equivalent for codegen.
1427static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1428 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1429 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1430 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1431 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1432 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1433 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1434 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1435 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1436 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1437 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1438 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1439 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1440 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1441 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1442 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1443 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1444 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1445 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1446 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1447 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1448 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1449 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1450 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1451 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1452 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1453 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1454 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1455 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1456 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1457 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1458 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1459 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1460 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1461 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1462 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1463 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1464 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1465 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1466 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1467 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1468 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1469 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1470 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1471 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1472 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1473 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1474 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1475 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1476 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1477 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1478 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1479 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1480 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1481 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1482 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1483 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1484 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1485 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1486 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1487 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1488 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1489 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1490 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1491 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1492 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1493 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1494 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1495 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1496 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1497 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1498 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1499 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1500 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1501 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1502 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1503 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1504 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1505 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1506 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1507 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1508 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1509 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1510 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1511 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1512 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1513 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1514 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1515 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1516 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1517 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1518 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1519 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1520 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1521 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1522 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1523 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1524 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1525 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1526 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1527 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1528 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1529 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1530 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1531 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1532 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1533 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1534 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1535 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1536 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1537 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1538 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1539 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1540 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1541 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1542 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1543 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1544 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1545 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1546 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1547 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1548 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1549 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1550 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1551 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1552 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1553 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1554 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1555 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1556 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1557 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1558 // arbitrary one to be handled as tha canonical variation.
1559 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1560 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1561 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1562 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1563 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1564 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1565 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1566 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1567 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1568 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1569 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1570 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1571};
1572
1573#undef NEONMAP0
1574#undef NEONMAP1
1575#undef NEONMAP2
1576
1577#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1578 { \
1579 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1580 TypeModifier \
1581 }
1582
1583#define SVEMAP2(NameBase, TypeModifier) \
1584 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1585static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1586#define GET_SVE_LLVM_INTRINSIC_MAP
1587#include "clang/Basic/arm_sve_builtin_cg.inc"
1588#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1589#undef GET_SVE_LLVM_INTRINSIC_MAP
1590};
1591
1592#undef SVEMAP1
1593#undef SVEMAP2
1594
1595#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1596 { \
1597 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1598 TypeModifier \
1599 }
1600
1601#define SMEMAP2(NameBase, TypeModifier) \
1602 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1603static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1604#define GET_SME_LLVM_INTRINSIC_MAP
1605#include "clang/Basic/arm_sme_builtin_cg.inc"
1606#undef GET_SME_LLVM_INTRINSIC_MAP
1607};
1608
1609#undef SMEMAP1
1610#undef SMEMAP2
1611
1613
1618
1619static const ARMVectorIntrinsicInfo *
1621 unsigned BuiltinID, bool &MapProvenSorted) {
1622
1623#ifndef NDEBUG
1624 if (!MapProvenSorted) {
1625 assert(llvm::is_sorted(IntrinsicMap));
1626 MapProvenSorted = true;
1627 }
1628#endif
1629
1630 const ARMVectorIntrinsicInfo *Builtin =
1631 llvm::lower_bound(IntrinsicMap, BuiltinID);
1632
1633 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1634 return Builtin;
1635
1636 return nullptr;
1637}
1638
1640 unsigned Modifier,
1641 llvm::Type *ArgType,
1642 const CallExpr *E) {
1643 int VectorSize = 0;
1644 if (Modifier & Use64BitVectors)
1645 VectorSize = 64;
1646 else if (Modifier & Use128BitVectors)
1647 VectorSize = 128;
1648
1649 // Return type.
1651 if (Modifier & AddRetType) {
1652 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1653 if (Modifier & VectorizeRetType)
1654 Ty = llvm::FixedVectorType::get(
1655 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1656
1657 Tys.push_back(Ty);
1658 }
1659
1660 // Arguments.
1661 if (Modifier & VectorizeArgTypes) {
1662 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1663 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1664 }
1665
1666 if (Modifier & (Add1ArgType | Add2ArgTypes))
1667 Tys.push_back(ArgType);
1668
1669 if (Modifier & Add2ArgTypes)
1670 Tys.push_back(ArgType);
1671
1672 if (Modifier & InventFloatType)
1673 Tys.push_back(FloatTy);
1674
1675 return CGM.getIntrinsic(IntrinsicID, Tys);
1676}
1677
1679 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1680 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1681 unsigned BuiltinID = SISDInfo.BuiltinID;
1682 unsigned int Int = SISDInfo.LLVMIntrinsic;
1683 unsigned Modifier = SISDInfo.TypeModifier;
1684 const char *s = SISDInfo.NameHint;
1685
1686 switch (BuiltinID) {
1687 case NEON::BI__builtin_neon_vcled_s64:
1688 case NEON::BI__builtin_neon_vcled_u64:
1689 case NEON::BI__builtin_neon_vcles_f32:
1690 case NEON::BI__builtin_neon_vcled_f64:
1691 case NEON::BI__builtin_neon_vcltd_s64:
1692 case NEON::BI__builtin_neon_vcltd_u64:
1693 case NEON::BI__builtin_neon_vclts_f32:
1694 case NEON::BI__builtin_neon_vcltd_f64:
1695 case NEON::BI__builtin_neon_vcales_f32:
1696 case NEON::BI__builtin_neon_vcaled_f64:
1697 case NEON::BI__builtin_neon_vcalts_f32:
1698 case NEON::BI__builtin_neon_vcaltd_f64:
1699 // Only one direction of comparisons actually exist, cmle is actually a cmge
1700 // with swapped operands. The table gives us the right intrinsic but we
1701 // still need to do the swap.
1702 std::swap(Ops[0], Ops[1]);
1703 break;
1704 }
1705
1706 assert(Int && "Generic code assumes a valid intrinsic");
1707
1708 // Determine the type(s) of this overloaded AArch64 intrinsic.
1709 const Expr *Arg = E->getArg(0);
1710 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
1711 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
1712
1713 int j = 0;
1714 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1715 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1716 ai != ae; ++ai, ++j) {
1717 llvm::Type *ArgTy = ai->getType();
1718 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1719 ArgTy->getPrimitiveSizeInBits())
1720 continue;
1721
1722 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1723 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1724 // it before inserting.
1725 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1726 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1727 Ops[j] =
1728 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1729 }
1730
1731 Value *Result = CGF.EmitNeonCall(F, Ops, s);
1732 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1733 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1734 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1735 return CGF.Builder.CreateExtractElement(Result, C0);
1736
1737 return CGF.Builder.CreateBitCast(Result, ResultType, s);
1738}
1739
1741 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1742 const char *NameHint, unsigned Modifier, const CallExpr *E,
1743 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1744 llvm::Triple::ArchType Arch) {
1745 // Get the last argument, which specifies the vector type.
1746 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1747 std::optional<llvm::APSInt> NeonTypeConst =
1749 if (!NeonTypeConst)
1750 return nullptr;
1751
1752 // Determine the type of this overloaded NEON intrinsic.
1753 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1754 const bool Usgn = Type.isUnsigned();
1755 const bool Quad = Type.isQuad();
1756 const bool Floating = Type.isFloatingPoint();
1757 const bool HasFastHalfType = getTarget().hasFastHalfType();
1758 const bool AllowBFloatArgsAndRet =
1759 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1760
1761 llvm::FixedVectorType *VTy =
1762 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1763 llvm::Type *Ty = VTy;
1764 if (!Ty)
1765 return nullptr;
1766
1767 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1768 return Builder.getInt32(addr.getAlignment().getQuantity());
1769 };
1770
1771 unsigned Int = LLVMIntrinsic;
1772 if ((Modifier & UnsignedAlts) && !Usgn)
1773 Int = AltLLVMIntrinsic;
1774
1775 switch (BuiltinID) {
1776 default: break;
1777 case NEON::BI__builtin_neon_splat_lane_v:
1778 case NEON::BI__builtin_neon_splat_laneq_v:
1779 case NEON::BI__builtin_neon_splatq_lane_v:
1780 case NEON::BI__builtin_neon_splatq_laneq_v: {
1781 auto NumElements = VTy->getElementCount();
1782 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1783 NumElements = NumElements * 2;
1784 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1785 NumElements = NumElements.divideCoefficientBy(2);
1786
1787 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1788 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1789 }
1790 case NEON::BI__builtin_neon_vpadd_v:
1791 case NEON::BI__builtin_neon_vpaddq_v:
1792 // We don't allow fp/int overloading of intrinsics.
1793 if (VTy->getElementType()->isFloatingPointTy() &&
1794 Int == Intrinsic::aarch64_neon_addp)
1795 Int = Intrinsic::aarch64_neon_faddp;
1796 break;
1797 case NEON::BI__builtin_neon_vabs_v:
1798 case NEON::BI__builtin_neon_vabsq_v:
1799 if (VTy->getElementType()->isFloatingPointTy())
1800 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1801 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1802 case NEON::BI__builtin_neon_vadd_v:
1803 case NEON::BI__builtin_neon_vaddq_v: {
1804 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1805 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1806 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1807 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1808 return Builder.CreateBitCast(Ops[0], Ty);
1809 }
1810 case NEON::BI__builtin_neon_vaddhn_v: {
1811 llvm::FixedVectorType *SrcTy =
1812 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1813
1814 // %sum = add <4 x i32> %lhs, %rhs
1815 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1816 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1817 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1818
1819 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1820 Constant *ShiftAmt =
1821 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1822 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1823
1824 // %res = trunc <4 x i32> %high to <4 x i16>
1825 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1826 }
1827 case NEON::BI__builtin_neon_vcale_v:
1828 case NEON::BI__builtin_neon_vcaleq_v:
1829 case NEON::BI__builtin_neon_vcalt_v:
1830 case NEON::BI__builtin_neon_vcaltq_v:
1831 std::swap(Ops[0], Ops[1]);
1832 [[fallthrough]];
1833 case NEON::BI__builtin_neon_vcage_v:
1834 case NEON::BI__builtin_neon_vcageq_v:
1835 case NEON::BI__builtin_neon_vcagt_v:
1836 case NEON::BI__builtin_neon_vcagtq_v: {
1837 llvm::Type *Ty;
1838 switch (VTy->getScalarSizeInBits()) {
1839 default: llvm_unreachable("unexpected type");
1840 case 32:
1841 Ty = FloatTy;
1842 break;
1843 case 64:
1844 Ty = DoubleTy;
1845 break;
1846 case 16:
1847 Ty = HalfTy;
1848 break;
1849 }
1850 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1851 llvm::Type *Tys[] = { VTy, VecFlt };
1852 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1853 return EmitNeonCall(F, Ops, NameHint);
1854 }
1855 case NEON::BI__builtin_neon_vceqz_v:
1856 case NEON::BI__builtin_neon_vceqzq_v:
1858 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1859 case NEON::BI__builtin_neon_vcgez_v:
1860 case NEON::BI__builtin_neon_vcgezq_v:
1862 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1863 "vcgez");
1864 case NEON::BI__builtin_neon_vclez_v:
1865 case NEON::BI__builtin_neon_vclezq_v:
1867 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1868 "vclez");
1869 case NEON::BI__builtin_neon_vcgtz_v:
1870 case NEON::BI__builtin_neon_vcgtzq_v:
1872 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1873 "vcgtz");
1874 case NEON::BI__builtin_neon_vcltz_v:
1875 case NEON::BI__builtin_neon_vcltzq_v:
1877 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1878 "vcltz");
1879 case NEON::BI__builtin_neon_vclz_v:
1880 case NEON::BI__builtin_neon_vclzq_v:
1881 // We generate target-independent intrinsic, which needs a second argument
1882 // for whether or not clz of zero is undefined; on ARM it isn't.
1883 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1884 break;
1885 case NEON::BI__builtin_neon_vcvt_f32_v:
1886 case NEON::BI__builtin_neon_vcvtq_f32_v:
1887 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1888 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1889 HasFastHalfType);
1890 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1891 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1892 case NEON::BI__builtin_neon_vcvt_f16_s16:
1893 case NEON::BI__builtin_neon_vcvt_f16_u16:
1894 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1895 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1896 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1897 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1898 HasFastHalfType);
1899 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1900 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1901 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1902 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1903 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1904 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1905 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1906 Function *F = CGM.getIntrinsic(Int, Tys);
1907 return EmitNeonCall(F, Ops, "vcvt_n");
1908 }
1909 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1910 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1911 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1912 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1913 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1914 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1915 Function *F = CGM.getIntrinsic(Int, Tys);
1916 return EmitNeonCall(F, Ops, "vcvt_n");
1917 }
1918 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1919 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1920 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1921 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1922 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1923 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1924 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1925 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1926 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1927 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1928 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1929 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1930 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1931 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1932 return EmitNeonCall(F, Ops, "vcvt_n");
1933 }
1934 case NEON::BI__builtin_neon_vcvt_s32_v:
1935 case NEON::BI__builtin_neon_vcvt_u32_v:
1936 case NEON::BI__builtin_neon_vcvt_s64_v:
1937 case NEON::BI__builtin_neon_vcvt_u64_v:
1938 case NEON::BI__builtin_neon_vcvt_s16_f16:
1939 case NEON::BI__builtin_neon_vcvt_u16_f16:
1940 case NEON::BI__builtin_neon_vcvtq_s32_v:
1941 case NEON::BI__builtin_neon_vcvtq_u32_v:
1942 case NEON::BI__builtin_neon_vcvtq_s64_v:
1943 case NEON::BI__builtin_neon_vcvtq_u64_v:
1944 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1945 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1946 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1947 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1948 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1949 }
1950 case NEON::BI__builtin_neon_vcvta_s16_f16:
1951 case NEON::BI__builtin_neon_vcvta_s32_v:
1952 case NEON::BI__builtin_neon_vcvta_s64_v:
1953 case NEON::BI__builtin_neon_vcvta_u16_f16:
1954 case NEON::BI__builtin_neon_vcvta_u32_v:
1955 case NEON::BI__builtin_neon_vcvta_u64_v:
1956 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1957 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1958 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1959 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1960 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1961 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1962 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1963 case NEON::BI__builtin_neon_vcvtn_s32_v:
1964 case NEON::BI__builtin_neon_vcvtn_s64_v:
1965 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1966 case NEON::BI__builtin_neon_vcvtn_u32_v:
1967 case NEON::BI__builtin_neon_vcvtn_u64_v:
1968 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1969 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1970 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1971 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1972 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1973 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1974 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1975 case NEON::BI__builtin_neon_vcvtp_s32_v:
1976 case NEON::BI__builtin_neon_vcvtp_s64_v:
1977 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1978 case NEON::BI__builtin_neon_vcvtp_u32_v:
1979 case NEON::BI__builtin_neon_vcvtp_u64_v:
1980 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1981 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1982 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1983 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1984 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1985 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1986 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1987 case NEON::BI__builtin_neon_vcvtm_s32_v:
1988 case NEON::BI__builtin_neon_vcvtm_s64_v:
1989 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1990 case NEON::BI__builtin_neon_vcvtm_u32_v:
1991 case NEON::BI__builtin_neon_vcvtm_u64_v:
1992 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1993 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1994 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1995 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1996 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1997 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1998 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1999 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2000 }
2001 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2002 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2003 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2004
2005 }
2006 case NEON::BI__builtin_neon_vext_v:
2007 case NEON::BI__builtin_neon_vextq_v: {
2008 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2009 SmallVector<int, 16> Indices;
2010 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2011 Indices.push_back(i+CV);
2012
2013 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2014 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2015 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
2016 }
2017 case NEON::BI__builtin_neon_vfma_v:
2018 case NEON::BI__builtin_neon_vfmaq_v: {
2019 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2020 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2021 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2022
2023 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2025 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2026 {Ops[1], Ops[2], Ops[0]});
2027 }
2028 case NEON::BI__builtin_neon_vld1_v:
2029 case NEON::BI__builtin_neon_vld1q_v: {
2030 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2031 Ops.push_back(getAlignmentValue32(PtrOp0));
2032 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
2033 }
2034 case NEON::BI__builtin_neon_vld1_x2_v:
2035 case NEON::BI__builtin_neon_vld1q_x2_v:
2036 case NEON::BI__builtin_neon_vld1_x3_v:
2037 case NEON::BI__builtin_neon_vld1q_x3_v:
2038 case NEON::BI__builtin_neon_vld1_x4_v:
2039 case NEON::BI__builtin_neon_vld1q_x4_v: {
2040 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2041 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2042 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
2043 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2044 }
2045 case NEON::BI__builtin_neon_vld2_v:
2046 case NEON::BI__builtin_neon_vld2q_v:
2047 case NEON::BI__builtin_neon_vld3_v:
2048 case NEON::BI__builtin_neon_vld3q_v:
2049 case NEON::BI__builtin_neon_vld4_v:
2050 case NEON::BI__builtin_neon_vld4q_v:
2051 case NEON::BI__builtin_neon_vld2_dup_v:
2052 case NEON::BI__builtin_neon_vld2q_dup_v:
2053 case NEON::BI__builtin_neon_vld3_dup_v:
2054 case NEON::BI__builtin_neon_vld3q_dup_v:
2055 case NEON::BI__builtin_neon_vld4_dup_v:
2056 case NEON::BI__builtin_neon_vld4q_dup_v: {
2057 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2058 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2059 Value *Align = getAlignmentValue32(PtrOp1);
2060 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2061 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2062 }
2063 case NEON::BI__builtin_neon_vld1_dup_v:
2064 case NEON::BI__builtin_neon_vld1q_dup_v: {
2065 Value *V = PoisonValue::get(Ty);
2066 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2067 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
2068 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
2069 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2070 return EmitNeonSplat(Ops[0], CI);
2071 }
2072 case NEON::BI__builtin_neon_vld2_lane_v:
2073 case NEON::BI__builtin_neon_vld2q_lane_v:
2074 case NEON::BI__builtin_neon_vld3_lane_v:
2075 case NEON::BI__builtin_neon_vld3q_lane_v:
2076 case NEON::BI__builtin_neon_vld4_lane_v:
2077 case NEON::BI__builtin_neon_vld4q_lane_v: {
2078 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2079 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2080 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2081 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
2082 Ops.push_back(getAlignmentValue32(PtrOp1));
2083 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
2084 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2085 }
2086 case NEON::BI__builtin_neon_vmovl_v: {
2087 llvm::FixedVectorType *DTy =
2088 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2089 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2090 if (Usgn)
2091 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2092 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2093 }
2094 case NEON::BI__builtin_neon_vmovn_v: {
2095 llvm::FixedVectorType *QTy =
2096 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2097 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2098 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2099 }
2100 case NEON::BI__builtin_neon_vmull_v:
2101 // FIXME: the integer vmull operations could be emitted in terms of pure
2102 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2103 // hoisting the exts outside loops. Until global ISel comes along that can
2104 // see through such movement this leads to bad CodeGen. So we need an
2105 // intrinsic for now.
2106 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2107 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2108 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2109 case NEON::BI__builtin_neon_vpadal_v:
2110 case NEON::BI__builtin_neon_vpadalq_v: {
2111 // The source operand type has twice as many elements of half the size.
2112 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2113 llvm::Type *EltTy =
2114 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2115 auto *NarrowTy =
2116 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2117 llvm::Type *Tys[2] = { Ty, NarrowTy };
2118 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2119 }
2120 case NEON::BI__builtin_neon_vpaddl_v:
2121 case NEON::BI__builtin_neon_vpaddlq_v: {
2122 // The source operand type has twice as many elements of half the size.
2123 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2124 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2125 auto *NarrowTy =
2126 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2127 llvm::Type *Tys[2] = { Ty, NarrowTy };
2128 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2129 }
2130 case NEON::BI__builtin_neon_vqdmlal_v:
2131 case NEON::BI__builtin_neon_vqdmlsl_v: {
2132 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2133 Ops[1] =
2134 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
2135 Ops.resize(2);
2136 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2137 }
2138 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2139 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2140 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2141 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2142 auto *RTy = cast<llvm::FixedVectorType>(Ty);
2143 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2144 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2145 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2146 RTy->getNumElements() * 2);
2147 llvm::Type *Tys[2] = {
2148 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2149 /*isQuad*/ false))};
2150 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2151 }
2152 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2153 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2154 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2155 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2156 llvm::Type *Tys[2] = {
2157 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2158 /*isQuad*/ true))};
2159 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2160 }
2161 case NEON::BI__builtin_neon_vqshl_n_v:
2162 case NEON::BI__builtin_neon_vqshlq_n_v:
2163 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2164 1, false);
2165 case NEON::BI__builtin_neon_vqshlu_n_v:
2166 case NEON::BI__builtin_neon_vqshluq_n_v:
2167 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
2168 1, false);
2169 case NEON::BI__builtin_neon_vrecpe_v:
2170 case NEON::BI__builtin_neon_vrecpeq_v:
2171 case NEON::BI__builtin_neon_vrsqrte_v:
2172 case NEON::BI__builtin_neon_vrsqrteq_v:
2173 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2174 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2175 case NEON::BI__builtin_neon_vrndi_v:
2176 case NEON::BI__builtin_neon_vrndiq_v:
2177 Int = Builder.getIsFPConstrained()
2178 ? Intrinsic::experimental_constrained_nearbyint
2179 : Intrinsic::nearbyint;
2180 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2181 case NEON::BI__builtin_neon_vrshr_n_v:
2182 case NEON::BI__builtin_neon_vrshrq_n_v:
2183 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
2184 1, true);
2185 case NEON::BI__builtin_neon_vsha512hq_u64:
2186 case NEON::BI__builtin_neon_vsha512h2q_u64:
2187 case NEON::BI__builtin_neon_vsha512su0q_u64:
2188 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2189 Function *F = CGM.getIntrinsic(Int);
2190 return EmitNeonCall(F, Ops, "");
2191 }
2192 case NEON::BI__builtin_neon_vshl_n_v:
2193 case NEON::BI__builtin_neon_vshlq_n_v:
2194 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2195 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2196 "vshl_n");
2197 case NEON::BI__builtin_neon_vshll_n_v: {
2198 llvm::FixedVectorType *SrcTy =
2199 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2200 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2201 if (Usgn)
2202 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
2203 else
2204 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
2205 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
2206 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
2207 }
2208 case NEON::BI__builtin_neon_vshrn_n_v: {
2209 llvm::FixedVectorType *SrcTy =
2210 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2211 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2212 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
2213 if (Usgn)
2214 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
2215 else
2216 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
2217 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
2218 }
2219 case NEON::BI__builtin_neon_vshr_n_v:
2220 case NEON::BI__builtin_neon_vshrq_n_v:
2221 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
2222 case NEON::BI__builtin_neon_vst1_v:
2223 case NEON::BI__builtin_neon_vst1q_v:
2224 case NEON::BI__builtin_neon_vst2_v:
2225 case NEON::BI__builtin_neon_vst2q_v:
2226 case NEON::BI__builtin_neon_vst3_v:
2227 case NEON::BI__builtin_neon_vst3q_v:
2228 case NEON::BI__builtin_neon_vst4_v:
2229 case NEON::BI__builtin_neon_vst4q_v:
2230 case NEON::BI__builtin_neon_vst2_lane_v:
2231 case NEON::BI__builtin_neon_vst2q_lane_v:
2232 case NEON::BI__builtin_neon_vst3_lane_v:
2233 case NEON::BI__builtin_neon_vst3q_lane_v:
2234 case NEON::BI__builtin_neon_vst4_lane_v:
2235 case NEON::BI__builtin_neon_vst4q_lane_v: {
2236 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2237 Ops.push_back(getAlignmentValue32(PtrOp0));
2238 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
2239 }
2240 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2241 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2242 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2243 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2244 case NEON::BI__builtin_neon_vsm4eq_u32: {
2245 Function *F = CGM.getIntrinsic(Int);
2246 return EmitNeonCall(F, Ops, "");
2247 }
2248 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2249 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2250 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2251 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2252 Function *F = CGM.getIntrinsic(Int);
2253 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
2254 return EmitNeonCall(F, Ops, "");
2255 }
2256 case NEON::BI__builtin_neon_vst1_x2_v:
2257 case NEON::BI__builtin_neon_vst1q_x2_v:
2258 case NEON::BI__builtin_neon_vst1_x3_v:
2259 case NEON::BI__builtin_neon_vst1q_x3_v:
2260 case NEON::BI__builtin_neon_vst1_x4_v:
2261 case NEON::BI__builtin_neon_vst1q_x4_v: {
2262 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2263 // in AArch64 it comes last. We may want to stick to one or another.
2264 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2265 Arch == llvm::Triple::aarch64_32) {
2266 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2267 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2268 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2269 }
2270 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
2271 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2272 }
2273 case NEON::BI__builtin_neon_vsubhn_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2276
2277 // %sum = add <4 x i32> %lhs, %rhs
2278 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2279 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2280 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
2281
2282 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2283 Constant *ShiftAmt =
2284 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2285 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
2286
2287 // %res = trunc <4 x i32> %high to <4 x i16>
2288 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
2289 }
2290 case NEON::BI__builtin_neon_vtrn_v:
2291 case NEON::BI__builtin_neon_vtrnq_v: {
2292 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2293 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2294 Value *SV = nullptr;
2295
2296 for (unsigned vi = 0; vi != 2; ++vi) {
2297 SmallVector<int, 16> Indices;
2298 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2299 Indices.push_back(i+vi);
2300 Indices.push_back(i+e+vi);
2301 }
2302 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2303 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
2305 }
2306 return SV;
2307 }
2308 case NEON::BI__builtin_neon_vtst_v:
2309 case NEON::BI__builtin_neon_vtstq_v: {
2310 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2311 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2312 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2313 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2314 ConstantAggregateZero::get(Ty));
2315 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2316 }
2317 case NEON::BI__builtin_neon_vuzp_v:
2318 case NEON::BI__builtin_neon_vuzpq_v: {
2319 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2320 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2321 Value *SV = nullptr;
2322
2323 for (unsigned vi = 0; vi != 2; ++vi) {
2324 SmallVector<int, 16> Indices;
2325 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2326 Indices.push_back(2*i+vi);
2327
2328 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2329 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
2331 }
2332 return SV;
2333 }
2334 case NEON::BI__builtin_neon_vxarq_u64: {
2335 Function *F = CGM.getIntrinsic(Int);
2336 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
2337 return EmitNeonCall(F, Ops, "");
2338 }
2339 case NEON::BI__builtin_neon_vzip_v:
2340 case NEON::BI__builtin_neon_vzipq_v: {
2341 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2342 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2343 Value *SV = nullptr;
2344
2345 for (unsigned vi = 0; vi != 2; ++vi) {
2346 SmallVector<int, 16> Indices;
2347 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2348 Indices.push_back((i + vi*e) >> 1);
2349 Indices.push_back(((i + vi*e) >> 1)+e);
2350 }
2351 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2352 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
2354 }
2355 return SV;
2356 }
2357 case NEON::BI__builtin_neon_vdot_s32:
2358 case NEON::BI__builtin_neon_vdot_u32:
2359 case NEON::BI__builtin_neon_vdotq_s32:
2360 case NEON::BI__builtin_neon_vdotq_u32: {
2361 auto *InputTy =
2362 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2363 llvm::Type *Tys[2] = { Ty, InputTy };
2364 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
2365 }
2366 case NEON::BI__builtin_neon_vfmlal_low_f16:
2367 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2368 auto *InputTy =
2369 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2370 llvm::Type *Tys[2] = { Ty, InputTy };
2371 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
2372 }
2373 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2374 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2375 auto *InputTy =
2376 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2377 llvm::Type *Tys[2] = { Ty, InputTy };
2378 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
2379 }
2380 case NEON::BI__builtin_neon_vfmlal_high_f16:
2381 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2382 auto *InputTy =
2383 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2384 llvm::Type *Tys[2] = { Ty, InputTy };
2385 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
2386 }
2387 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2388 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2389 auto *InputTy =
2390 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2391 llvm::Type *Tys[2] = { Ty, InputTy };
2392 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
2393 }
2394 case NEON::BI__builtin_neon_vmmlaq_s32:
2395 case NEON::BI__builtin_neon_vmmlaq_u32: {
2396 auto *InputTy =
2397 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2398 llvm::Type *Tys[2] = { Ty, InputTy };
2399 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
2400 }
2401 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2402 auto *InputTy =
2403 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2404 llvm::Type *Tys[2] = { Ty, InputTy };
2405 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
2406 }
2407 case NEON::BI__builtin_neon_vusdot_s32:
2408 case NEON::BI__builtin_neon_vusdotq_s32: {
2409 auto *InputTy =
2410 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2411 llvm::Type *Tys[2] = { Ty, InputTy };
2412 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
2413 }
2414 case NEON::BI__builtin_neon_vbfdot_f32:
2415 case NEON::BI__builtin_neon_vbfdotq_f32: {
2416 llvm::Type *InputTy =
2417 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2418 llvm::Type *Tys[2] = { Ty, InputTy };
2419 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
2420 }
2421 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2422 llvm::Type *Tys[1] = { Ty };
2423 Function *F = CGM.getIntrinsic(Int, Tys);
2424 return EmitNeonCall(F, Ops, "vcvtfp2bf");
2425 }
2426
2427 }
2428
2429 assert(Int && "Expected valid intrinsic number");
2430
2431 // Determine the type(s) of this overloaded AArch64 intrinsic.
2432 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
2433
2434 Value *Result = EmitNeonCall(F, Ops, NameHint);
2435 llvm::Type *ResultType = ConvertType(E->getType());
2436 // AArch64 intrinsic one-element vector type cast to
2437 // scalar type expected by the builtin
2438 return Builder.CreateBitCast(Result, ResultType, NameHint);
2439}
2440
2441Value *
2443 const CmpInst::Predicate Pred,
2444 const Twine &Name) {
2445
2446 if (isa<FixedVectorType>(Ty)) {
2447 // Vector types are cast to i8 vectors. Recover original type.
2448 Op = Builder.CreateBitCast(Op, Ty);
2449 }
2450
2451 if (CmpInst::isFPPredicate(Pred)) {
2452 if (Pred == CmpInst::FCMP_OEQ)
2453 Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
2454 else
2455 Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
2456 } else {
2457 Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
2458 }
2459
2460 llvm::Type *ResTy = Ty;
2461 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
2462 ResTy = FixedVectorType::get(
2463 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
2464 VTy->getNumElements());
2465
2466 return Builder.CreateSExt(Op, ResTy, Name);
2467}
2468
2470 Value *ExtOp, Value *IndexOp,
2471 llvm::Type *ResTy, unsigned IntID,
2472 const char *Name) {
2474 if (ExtOp)
2475 TblOps.push_back(ExtOp);
2476
2477 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2478 SmallVector<int, 16> Indices;
2479 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
2480 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2481 Indices.push_back(2*i);
2482 Indices.push_back(2*i+1);
2483 }
2484
2485 int PairPos = 0, End = Ops.size() - 1;
2486 while (PairPos < End) {
2487 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2488 Ops[PairPos+1], Indices,
2489 Name));
2490 PairPos += 2;
2491 }
2492
2493 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2494 // of the 128-bit lookup table with zero.
2495 if (PairPos == End) {
2496 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
2497 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2498 ZeroTbl, Indices, Name));
2499 }
2500
2501 Function *TblF;
2502 TblOps.push_back(IndexOp);
2503 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
2504
2505 return CGF.EmitNeonCall(TblF, TblOps, Name);
2506}
2507
2508Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2509 unsigned Value;
2510 switch (BuiltinID) {
2511 default:
2512 return nullptr;
2513 case clang::ARM::BI__builtin_arm_nop:
2514 Value = 0;
2515 break;
2516 case clang::ARM::BI__builtin_arm_yield:
2517 case clang::ARM::BI__yield:
2518 Value = 1;
2519 break;
2520 case clang::ARM::BI__builtin_arm_wfe:
2521 case clang::ARM::BI__wfe:
2522 Value = 2;
2523 break;
2524 case clang::ARM::BI__builtin_arm_wfi:
2525 case clang::ARM::BI__wfi:
2526 Value = 3;
2527 break;
2528 case clang::ARM::BI__builtin_arm_sev:
2529 case clang::ARM::BI__sev:
2530 Value = 4;
2531 break;
2532 case clang::ARM::BI__builtin_arm_sevl:
2533 case clang::ARM::BI__sevl:
2534 Value = 5;
2535 break;
2536 }
2537
2538 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2539 llvm::ConstantInt::get(Int32Ty, Value));
2540}
2541
2546};
2547
2548// Generates the IR for the read/write special register builtin,
2549// ValueType is the type of the value that is to be written or read,
2550// RegisterType is the type of the register being written to or read from.
2552 const CallExpr *E,
2553 llvm::Type *RegisterType,
2554 llvm::Type *ValueType,
2555 SpecialRegisterAccessKind AccessKind,
2556 StringRef SysReg = "") {
2557 // write and register intrinsics only support 32, 64 and 128 bit operations.
2558 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2559 RegisterType->isIntegerTy(128)) &&
2560 "Unsupported size for register.");
2561
2562 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2563 CodeGen::CodeGenModule &CGM = CGF.CGM;
2564 LLVMContext &Context = CGM.getLLVMContext();
2565
2566 if (SysReg.empty()) {
2567 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2568 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2569 }
2570
2571 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2572 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2573 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2574
2575 llvm::Type *Types[] = { RegisterType };
2576
2577 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2578 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2579 && "Can't fit 64-bit value in 32-bit register");
2580
2581 if (AccessKind != Write) {
2582 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2583 llvm::Function *F = CGM.getIntrinsic(
2584 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2585 : Intrinsic::read_register,
2586 Types);
2587 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2588
2589 if (MixedTypes)
2590 // Read into 64 bit register and then truncate result to 32 bit.
2591 return Builder.CreateTrunc(Call, ValueType);
2592
2593 if (ValueType->isPointerTy())
2594 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2595 return Builder.CreateIntToPtr(Call, ValueType);
2596
2597 return Call;
2598 }
2599
2600 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2601 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2602 if (MixedTypes) {
2603 // Extend 32 bit write value to 64 bit to pass to write.
2604 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2605 return Builder.CreateCall(F, { Metadata, ArgValue });
2606 }
2607
2608 if (ValueType->isPointerTy()) {
2609 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2610 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2611 return Builder.CreateCall(F, { Metadata, ArgValue });
2612 }
2613
2614 return Builder.CreateCall(F, { Metadata, ArgValue });
2615}
2616
2617/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2618/// argument that specifies the vector type.
2619static bool HasExtraNeonArgument(unsigned BuiltinID) {
2620 switch (BuiltinID) {
2621 default: break;
2622 case NEON::BI__builtin_neon_vget_lane_i8:
2623 case NEON::BI__builtin_neon_vget_lane_i16:
2624 case NEON::BI__builtin_neon_vget_lane_bf16:
2625 case NEON::BI__builtin_neon_vget_lane_i32:
2626 case NEON::BI__builtin_neon_vget_lane_i64:
2627 case NEON::BI__builtin_neon_vget_lane_mf8:
2628 case NEON::BI__builtin_neon_vget_lane_f32:
2629 case NEON::BI__builtin_neon_vgetq_lane_i8:
2630 case NEON::BI__builtin_neon_vgetq_lane_i16:
2631 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2632 case NEON::BI__builtin_neon_vgetq_lane_i32:
2633 case NEON::BI__builtin_neon_vgetq_lane_i64:
2634 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2635 case NEON::BI__builtin_neon_vgetq_lane_f32:
2636 case NEON::BI__builtin_neon_vduph_lane_bf16:
2637 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_mf8:
2640 case NEON::BI__builtin_neon_vset_lane_i16:
2641 case NEON::BI__builtin_neon_vset_lane_bf16:
2642 case NEON::BI__builtin_neon_vset_lane_i32:
2643 case NEON::BI__builtin_neon_vset_lane_i64:
2644 case NEON::BI__builtin_neon_vset_lane_f32:
2645 case NEON::BI__builtin_neon_vsetq_lane_i8:
2646 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2647 case NEON::BI__builtin_neon_vsetq_lane_i16:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_i32:
2650 case NEON::BI__builtin_neon_vsetq_lane_i64:
2651 case NEON::BI__builtin_neon_vsetq_lane_f32:
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2653 case NEON::BI__builtin_neon_vsha1cq_u32:
2654 case NEON::BI__builtin_neon_vsha1pq_u32:
2655 case NEON::BI__builtin_neon_vsha1mq_u32:
2656 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2657 case clang::ARM::BI_MoveToCoprocessor:
2658 case clang::ARM::BI_MoveToCoprocessor2:
2659 return false;
2660 }
2661 return true;
2662}
2663
2665 const CallExpr *E,
2666 ReturnValueSlot ReturnValue,
2667 llvm::Triple::ArchType Arch) {
2668 if (auto Hint = GetValueForARMHint(BuiltinID))
2669 return Hint;
2670
2671 if (BuiltinID == clang::ARM::BI__emit) {
2672 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2673 llvm::FunctionType *FTy =
2674 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2675
2677 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2678 llvm_unreachable("Sema will ensure that the parameter is constant");
2679
2680 llvm::APSInt Value = Result.Val.getInt();
2681 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2682
2683 llvm::InlineAsm *Emit =
2684 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2685 /*hasSideEffects=*/true)
2686 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2687 /*hasSideEffects=*/true);
2688
2689 return Builder.CreateCall(Emit);
2690 }
2691
2692 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2693 Value *Option = EmitScalarExpr(E->getArg(0));
2694 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2695 }
2696
2697 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2698 Value *Address = EmitScalarExpr(E->getArg(0));
2699 Value *RW = EmitScalarExpr(E->getArg(1));
2700 Value *IsData = EmitScalarExpr(E->getArg(2));
2701
2702 // Locality is not supported on ARM target
2703 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2704
2705 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2706 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2707 }
2708
2709 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2710 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2711 return Builder.CreateCall(
2712 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2713 }
2714
2715 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2716 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2717 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2718 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2719 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2720 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2721 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2722 return Res;
2723 }
2724
2725
2726 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2727 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2728 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2729 }
2730 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2731 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2732 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2733 "cls");
2734 }
2735
2736 if (BuiltinID == clang::ARM::BI__clear_cache) {
2737 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2738 const FunctionDecl *FD = E->getDirectCallee();
2739 Value *Ops[2];
2740 for (unsigned i = 0; i < 2; i++)
2741 Ops[i] = EmitScalarExpr(E->getArg(i));
2742 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2743 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2744 StringRef Name = FD->getName();
2745 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2746 }
2747
2748 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2749 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2750 Function *F;
2751
2752 switch (BuiltinID) {
2753 default: llvm_unreachable("unexpected builtin");
2754 case clang::ARM::BI__builtin_arm_mcrr:
2755 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2756 break;
2757 case clang::ARM::BI__builtin_arm_mcrr2:
2758 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2759 break;
2760 }
2761
2762 // MCRR{2} instruction has 5 operands but
2763 // the intrinsic has 4 because Rt and Rt2
2764 // are represented as a single unsigned 64
2765 // bit integer in the intrinsic definition
2766 // but internally it's represented as 2 32
2767 // bit integers.
2768
2769 Value *Coproc = EmitScalarExpr(E->getArg(0));
2770 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2771 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2772 Value *CRm = EmitScalarExpr(E->getArg(3));
2773
2774 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2775 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2776 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2777 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2778
2779 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2780 }
2781
2782 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2783 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2784 Function *F;
2785
2786 switch (BuiltinID) {
2787 default: llvm_unreachable("unexpected builtin");
2788 case clang::ARM::BI__builtin_arm_mrrc:
2789 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2790 break;
2791 case clang::ARM::BI__builtin_arm_mrrc2:
2792 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2793 break;
2794 }
2795
2796 Value *Coproc = EmitScalarExpr(E->getArg(0));
2797 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2798 Value *CRm = EmitScalarExpr(E->getArg(2));
2799 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2800
2801 // Returns an unsigned 64 bit integer, represented
2802 // as two 32 bit integers.
2803
2804 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2805 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2806 Rt = Builder.CreateZExt(Rt, Int64Ty);
2807 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2808
2809 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2810 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2811 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2812
2813 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2814 }
2815
2816 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2817 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2818 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2819 getContext().getTypeSize(E->getType()) == 64) ||
2820 BuiltinID == clang::ARM::BI__ldrexd) {
2821 Function *F;
2822
2823 switch (BuiltinID) {
2824 default: llvm_unreachable("unexpected builtin");
2825 case clang::ARM::BI__builtin_arm_ldaex:
2826 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2827 break;
2828 case clang::ARM::BI__builtin_arm_ldrexd:
2829 case clang::ARM::BI__builtin_arm_ldrex:
2830 case clang::ARM::BI__ldrexd:
2831 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2832 break;
2833 }
2834
2835 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2836 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2837
2838 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2839 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2840 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2841 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2842
2843 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2844 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2845 Val = Builder.CreateOr(Val, Val1);
2846 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2847 }
2848
2849 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2850 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2851 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2852
2853 QualType Ty = E->getType();
2854 llvm::Type *RealResTy = ConvertType(Ty);
2855 llvm::Type *IntTy =
2856 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2857
2859 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2860 : Intrinsic::arm_ldrex,
2861 UnqualPtrTy);
2862 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2863 Val->addParamAttr(
2864 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2865
2866 if (RealResTy->isPointerTy())
2867 return Builder.CreateIntToPtr(Val, RealResTy);
2868 else {
2869 llvm::Type *IntResTy = llvm::IntegerType::get(
2870 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2871 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2872 RealResTy);
2873 }
2874 }
2875
2876 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2877 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2878 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2879 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2881 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2882 : Intrinsic::arm_strexd);
2883 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2884
2885 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
2886 Value *Val = EmitScalarExpr(E->getArg(0));
2887 Builder.CreateStore(Val, Tmp);
2888
2889 Address LdPtr = Tmp.withElementType(STy);
2890 Val = Builder.CreateLoad(LdPtr);
2891
2892 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2893 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2894 Value *StPtr = EmitScalarExpr(E->getArg(1));
2895 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2896 }
2897
2898 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2899 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2900 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2901 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2902
2903 QualType Ty = E->getArg(0)->getType();
2904 llvm::Type *StoreTy =
2905 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2906
2907 if (StoreVal->getType()->isPointerTy())
2908 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2909 else {
2910 llvm::Type *IntTy = llvm::IntegerType::get(
2912 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2913 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2914 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2915 }
2916
2918 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2919 : Intrinsic::arm_strex,
2920 StoreAddr->getType());
2921
2922 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2923 CI->addParamAttr(
2924 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2925 return CI;
2926 }
2927
2928 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2929 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2930 return Builder.CreateCall(F);
2931 }
2932
2933 // CRC32
2934 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2935 switch (BuiltinID) {
2936 case clang::ARM::BI__builtin_arm_crc32b:
2937 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2938 case clang::ARM::BI__builtin_arm_crc32cb:
2939 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2940 case clang::ARM::BI__builtin_arm_crc32h:
2941 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2942 case clang::ARM::BI__builtin_arm_crc32ch:
2943 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2944 case clang::ARM::BI__builtin_arm_crc32w:
2945 case clang::ARM::BI__builtin_arm_crc32d:
2946 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2947 case clang::ARM::BI__builtin_arm_crc32cw:
2948 case clang::ARM::BI__builtin_arm_crc32cd:
2949 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2950 }
2951
2952 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2953 Value *Arg0 = EmitScalarExpr(E->getArg(0));
2954 Value *Arg1 = EmitScalarExpr(E->getArg(1));
2955
2956 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2957 // intrinsics, hence we need different codegen for these cases.
2958 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2959 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2960 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2961 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
2962 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
2963 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
2964
2965 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2966 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
2967 return Builder.CreateCall(F, {Res, Arg1b});
2968 } else {
2969 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
2970
2971 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2972 return Builder.CreateCall(F, {Arg0, Arg1});
2973 }
2974 }
2975
2976 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2977 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2978 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2979 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2980 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2981 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2982
2983 SpecialRegisterAccessKind AccessKind = Write;
2984 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2985 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2986 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2987 AccessKind = VolatileRead;
2988
2989 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2990 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2991
2992 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2993 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2994
2995 llvm::Type *ValueType;
2996 llvm::Type *RegisterType;
2997 if (IsPointerBuiltin) {
2998 ValueType = VoidPtrTy;
3000 } else if (Is64Bit) {
3001 ValueType = RegisterType = Int64Ty;
3002 } else {
3003 ValueType = RegisterType = Int32Ty;
3004 }
3005
3006 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
3007 AccessKind);
3008 }
3009
3010 if (BuiltinID == ARM::BI__builtin_sponentry) {
3011 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
3012 return Builder.CreateCall(F);
3013 }
3014
3015 // Handle MSVC intrinsics before argument evaluation to prevent double
3016 // evaluation.
3017 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3018 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
3019
3020 // Deal with MVE builtins
3021 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3022 return Result;
3023 // Handle CDE builtins
3024 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3025 return Result;
3026
3027 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3028 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
3029 return P.first == BuiltinID;
3030 });
3031 if (It != end(NEONEquivalentIntrinsicMap))
3032 BuiltinID = It->second;
3033
3034 // Find out if any arguments are required to be integer constant
3035 // expressions.
3036 unsigned ICEArguments = 0;
3038 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3039 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3040
3041 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3042 return Builder.getInt32(addr.getAlignment().getQuantity());
3043 };
3044
3045 Address PtrOp0 = Address::invalid();
3046 Address PtrOp1 = Address::invalid();
3048 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3049 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3050 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3051 if (i == 0) {
3052 switch (BuiltinID) {
3053 case NEON::BI__builtin_neon_vld1_v:
3054 case NEON::BI__builtin_neon_vld1q_v:
3055 case NEON::BI__builtin_neon_vld1q_lane_v:
3056 case NEON::BI__builtin_neon_vld1_lane_v:
3057 case NEON::BI__builtin_neon_vld1_dup_v:
3058 case NEON::BI__builtin_neon_vld1q_dup_v:
3059 case NEON::BI__builtin_neon_vst1_v:
3060 case NEON::BI__builtin_neon_vst1q_v:
3061 case NEON::BI__builtin_neon_vst1q_lane_v:
3062 case NEON::BI__builtin_neon_vst1_lane_v:
3063 case NEON::BI__builtin_neon_vst2_v:
3064 case NEON::BI__builtin_neon_vst2q_v:
3065 case NEON::BI__builtin_neon_vst2_lane_v:
3066 case NEON::BI__builtin_neon_vst2q_lane_v:
3067 case NEON::BI__builtin_neon_vst3_v:
3068 case NEON::BI__builtin_neon_vst3q_v:
3069 case NEON::BI__builtin_neon_vst3_lane_v:
3070 case NEON::BI__builtin_neon_vst3q_lane_v:
3071 case NEON::BI__builtin_neon_vst4_v:
3072 case NEON::BI__builtin_neon_vst4q_v:
3073 case NEON::BI__builtin_neon_vst4_lane_v:
3074 case NEON::BI__builtin_neon_vst4q_lane_v:
3075 // Get the alignment for the argument in addition to the value;
3076 // we'll use it later.
3077 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3078 Ops.push_back(PtrOp0.emitRawPointer(*this));
3079 continue;
3080 }
3081 }
3082 if (i == 1) {
3083 switch (BuiltinID) {
3084 case NEON::BI__builtin_neon_vld2_v:
3085 case NEON::BI__builtin_neon_vld2q_v:
3086 case NEON::BI__builtin_neon_vld3_v:
3087 case NEON::BI__builtin_neon_vld3q_v:
3088 case NEON::BI__builtin_neon_vld4_v:
3089 case NEON::BI__builtin_neon_vld4q_v:
3090 case NEON::BI__builtin_neon_vld2_lane_v:
3091 case NEON::BI__builtin_neon_vld2q_lane_v:
3092 case NEON::BI__builtin_neon_vld3_lane_v:
3093 case NEON::BI__builtin_neon_vld3q_lane_v:
3094 case NEON::BI__builtin_neon_vld4_lane_v:
3095 case NEON::BI__builtin_neon_vld4q_lane_v:
3096 case NEON::BI__builtin_neon_vld2_dup_v:
3097 case NEON::BI__builtin_neon_vld2q_dup_v:
3098 case NEON::BI__builtin_neon_vld3_dup_v:
3099 case NEON::BI__builtin_neon_vld3q_dup_v:
3100 case NEON::BI__builtin_neon_vld4_dup_v:
3101 case NEON::BI__builtin_neon_vld4q_dup_v:
3102 // Get the alignment for the argument in addition to the value;
3103 // we'll use it later.
3104 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3105 Ops.push_back(PtrOp1.emitRawPointer(*this));
3106 continue;
3107 }
3108 }
3109
3110 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
3111 }
3112
3113 switch (BuiltinID) {
3114 default: break;
3115
3116 case NEON::BI__builtin_neon_vget_lane_i8:
3117 case NEON::BI__builtin_neon_vget_lane_i16:
3118 case NEON::BI__builtin_neon_vget_lane_i32:
3119 case NEON::BI__builtin_neon_vget_lane_i64:
3120 case NEON::BI__builtin_neon_vget_lane_bf16:
3121 case NEON::BI__builtin_neon_vget_lane_f32:
3122 case NEON::BI__builtin_neon_vgetq_lane_i8:
3123 case NEON::BI__builtin_neon_vgetq_lane_i16:
3124 case NEON::BI__builtin_neon_vgetq_lane_i32:
3125 case NEON::BI__builtin_neon_vgetq_lane_i64:
3126 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3127 case NEON::BI__builtin_neon_vgetq_lane_f32:
3128 case NEON::BI__builtin_neon_vduph_lane_bf16:
3129 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3130 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3131
3132 case NEON::BI__builtin_neon_vrndns_f32: {
3133 Value *Arg = EmitScalarExpr(E->getArg(0));
3134 llvm::Type *Tys[] = {Arg->getType()};
3135 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
3136 return Builder.CreateCall(F, {Arg}, "vrndn"); }
3137
3138 case NEON::BI__builtin_neon_vset_lane_i8:
3139 case NEON::BI__builtin_neon_vset_lane_i16:
3140 case NEON::BI__builtin_neon_vset_lane_i32:
3141 case NEON::BI__builtin_neon_vset_lane_i64:
3142 case NEON::BI__builtin_neon_vset_lane_bf16:
3143 case NEON::BI__builtin_neon_vset_lane_f32:
3144 case NEON::BI__builtin_neon_vsetq_lane_i8:
3145 case NEON::BI__builtin_neon_vsetq_lane_i16:
3146 case NEON::BI__builtin_neon_vsetq_lane_i32:
3147 case NEON::BI__builtin_neon_vsetq_lane_i64:
3148 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3149 case NEON::BI__builtin_neon_vsetq_lane_f32:
3150 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3151
3152 case NEON::BI__builtin_neon_vsha1h_u32:
3153 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3154 "vsha1h");
3155 case NEON::BI__builtin_neon_vsha1cq_u32:
3156 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3157 "vsha1h");
3158 case NEON::BI__builtin_neon_vsha1pq_u32:
3159 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3160 "vsha1h");
3161 case NEON::BI__builtin_neon_vsha1mq_u32:
3162 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3163 "vsha1h");
3164
3165 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3166 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3167 "vcvtbfp2bf");
3168 }
3169
3170 // The ARM _MoveToCoprocessor builtins put the input register value as
3171 // the first argument, but the LLVM intrinsic expects it as the third one.
3172 case clang::ARM::BI_MoveToCoprocessor:
3173 case clang::ARM::BI_MoveToCoprocessor2: {
3174 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
3175 ? Intrinsic::arm_mcr
3176 : Intrinsic::arm_mcr2);
3177 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3178 Ops[3], Ops[4], Ops[5]});
3179 }
3180 }
3181
3182 // Get the last argument, which specifies the vector type.
3183 assert(HasExtraArg);
3184 const Expr *Arg = E->getArg(E->getNumArgs()-1);
3185 std::optional<llvm::APSInt> Result =
3187 if (!Result)
3188 return nullptr;
3189
3190 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3191 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3192 // Determine the overloaded type of this builtin.
3193 llvm::Type *Ty;
3194 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3195 Ty = FloatTy;
3196 else
3197 Ty = DoubleTy;
3198
3199 // Determine whether this is an unsigned conversion or not.
3200 bool usgn = Result->getZExtValue() == 1;
3201 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3202
3203 // Call the appropriate intrinsic.
3204 Function *F = CGM.getIntrinsic(Int, Ty);
3205 return Builder.CreateCall(F, Ops, "vcvtr");
3206 }
3207
3208 // Determine the type of this overloaded NEON intrinsic.
3209 NeonTypeFlags Type = Result->getZExtValue();
3210 bool usgn = Type.isUnsigned();
3211 bool rightShift = false;
3212
3213 llvm::FixedVectorType *VTy =
3214 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
3215 getTarget().hasBFloat16Type());
3216 llvm::Type *Ty = VTy;
3217 if (!Ty)
3218 return nullptr;
3219
3220 // Many NEON builtins have identical semantics and uses in ARM and
3221 // AArch64. Emit these in a single function.
3222 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3223 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3224 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3225 if (Builtin)
3227 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3228 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3229
3230 unsigned Int;
3231 switch (BuiltinID) {
3232 default: return nullptr;
3233 case NEON::BI__builtin_neon_vld1q_lane_v:
3234 // Handle 64-bit integer elements as a special case. Use shuffles of
3235 // one-element vectors to avoid poor code for i64 in the backend.
3236 if (VTy->getElementType()->isIntegerTy(64)) {
3237 // Extract the other lane.
3238 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3239 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3240 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3241 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3242 // Load the value as a one-element vector.
3243 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3244 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3245 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3246 Value *Align = getAlignmentValue32(PtrOp0);
3247 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3248 // Combine them.
3249 int Indices[] = {1 - Lane, Lane};
3250 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
3251 }
3252 [[fallthrough]];
3253 case NEON::BI__builtin_neon_vld1_lane_v: {
3254 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3255 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
3256 Value *Ld = Builder.CreateLoad(PtrOp0);
3257 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3258 }
3259 case NEON::BI__builtin_neon_vqrshrn_n_v:
3260 Int =
3261 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3262 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3263 1, true);
3264 case NEON::BI__builtin_neon_vqrshrun_n_v:
3265 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3266 Ops, "vqrshrun_n", 1, true);
3267 case NEON::BI__builtin_neon_vqshrn_n_v:
3268 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3269 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3270 1, true);
3271 case NEON::BI__builtin_neon_vqshrun_n_v:
3272 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3273 Ops, "vqshrun_n", 1, true);
3274 case NEON::BI__builtin_neon_vrecpe_v:
3275 case NEON::BI__builtin_neon_vrecpeq_v:
3276 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3277 Ops, "vrecpe");
3278 case NEON::BI__builtin_neon_vrshrn_n_v:
3279 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3280 Ops, "vrshrn_n", 1, true);
3281 case NEON::BI__builtin_neon_vrsra_n_v:
3282 case NEON::BI__builtin_neon_vrsraq_n_v:
3283 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3284 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3285 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3286 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3287 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3288 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3289 case NEON::BI__builtin_neon_vsri_n_v:
3290 case NEON::BI__builtin_neon_vsriq_n_v:
3291 rightShift = true;
3292 [[fallthrough]];
3293 case NEON::BI__builtin_neon_vsli_n_v:
3294 case NEON::BI__builtin_neon_vsliq_n_v:
3295 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3296 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3297 Ops, "vsli_n");
3298 case NEON::BI__builtin_neon_vsra_n_v:
3299 case NEON::BI__builtin_neon_vsraq_n_v:
3300 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3301 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3302 return Builder.CreateAdd(Ops[0], Ops[1]);
3303 case NEON::BI__builtin_neon_vst1q_lane_v:
3304 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3305 // a one-element vector and avoid poor code for i64 in the backend.
3306 if (VTy->getElementType()->isIntegerTy(64)) {
3307 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3308 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3309 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3310 Ops[2] = getAlignmentValue32(PtrOp0);
3311 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3312 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3313 Tys), Ops);
3314 }
3315 [[fallthrough]];
3316 case NEON::BI__builtin_neon_vst1_lane_v: {
3317 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3318 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3319 return Builder.CreateStore(Ops[1],
3320 PtrOp0.withElementType(Ops[1]->getType()));
3321 }
3322 case NEON::BI__builtin_neon_vtbl1_v:
3323 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3324 Ops, "vtbl1");
3325 case NEON::BI__builtin_neon_vtbl2_v:
3326 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3327 Ops, "vtbl2");
3328 case NEON::BI__builtin_neon_vtbl3_v:
3329 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3330 Ops, "vtbl3");
3331 case NEON::BI__builtin_neon_vtbl4_v:
3332 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3333 Ops, "vtbl4");
3334 case NEON::BI__builtin_neon_vtbx1_v:
3335 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3336 Ops, "vtbx1");
3337 case NEON::BI__builtin_neon_vtbx2_v:
3338 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3339 Ops, "vtbx2");
3340 case NEON::BI__builtin_neon_vtbx3_v:
3341 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3342 Ops, "vtbx3");
3343 case NEON::BI__builtin_neon_vtbx4_v:
3344 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3345 Ops, "vtbx4");
3346 }
3347}
3348
3349template<typename Integer>
3351 return E->getIntegerConstantExpr(Context)->getExtValue();
3352}
3353
3354static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3355 llvm::Type *T, bool Unsigned) {
3356 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3357 // which finds it convenient to specify signed/unsigned as a boolean flag.
3358 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
3359}
3360
3361static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3362 uint32_t Shift, bool Unsigned) {
3363 // MVE helper function for integer shift right. This must handle signed vs
3364 // unsigned, and also deal specially with the case where the shift count is
3365 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3366 // undefined behavior, but in MVE it's legal, so we must convert it to code
3367 // that is not undefined in IR.
3368 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
3369 ->getElementType()
3370 ->getPrimitiveSizeInBits();
3371 if (Shift == LaneBits) {
3372 // An unsigned shift of the full lane size always generates zero, so we can
3373 // simply emit a zero vector. A signed shift of the full lane size does the
3374 // same thing as shifting by one bit fewer.
3375 if (Unsigned)
3376 return llvm::Constant::getNullValue(V->getType());
3377 else
3378 --Shift;
3379 }
3380 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
3381}
3382
3383static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3384 // MVE-specific helper function for a vector splat, which infers the element
3385 // count of the output vector by knowing that MVE vectors are all 128 bits
3386 // wide.
3387 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3388 return Builder.CreateVectorSplat(Elements, V);
3389}
3390
3391static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3392 CodeGenFunction *CGF,
3393 llvm::Value *V,
3394 llvm::Type *DestType) {
3395 // Convert one MVE vector type into another by reinterpreting its in-register
3396 // format.
3397 //
3398 // Little-endian, this is identical to a bitcast (which reinterprets the
3399 // memory format). But big-endian, they're not necessarily the same, because
3400 // the register and memory formats map to each other differently depending on
3401 // the lane size.
3402 //
3403 // We generate a bitcast whenever we can (if we're little-endian, or if the
3404 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3405 // that performs the different kind of reinterpretation.
3406 if (CGF->getTarget().isBigEndian() &&
3407 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3408 return Builder.CreateCall(
3409 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
3410 {DestType, V->getType()}),
3411 V);
3412 } else {
3413 return Builder.CreateBitCast(V, DestType);
3414 }
3415}
3416
3417static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3418 // Make a shufflevector that extracts every other element of a vector (evens
3419 // or odds, as desired).
3420 SmallVector<int, 16> Indices;
3421 unsigned InputElements =
3422 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
3423 for (unsigned i = 0; i < InputElements; i += 2)
3424 Indices.push_back(i + Odd);
3425 return Builder.CreateShuffleVector(V, Indices);
3426}
3427
3428static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3429 llvm::Value *V1) {
3430 // Make a shufflevector that interleaves two vectors element by element.
3431 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3432 SmallVector<int, 16> Indices;
3433 unsigned InputElements =
3434 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
3435 for (unsigned i = 0; i < InputElements; i++) {
3436 Indices.push_back(i);
3437 Indices.push_back(i + InputElements);
3438 }
3439 return Builder.CreateShuffleVector(V0, V1, Indices);
3440}
3441
3442template<unsigned HighBit, unsigned OtherBits>
3443static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3444 // MVE-specific helper function to make a vector splat of a constant such as
3445 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3446 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
3447 unsigned LaneBits = T->getPrimitiveSizeInBits();
3448 uint32_t Value = HighBit << (LaneBits - 1);
3449 if (OtherBits)
3450 Value |= (1UL << (LaneBits - 1)) - 1;
3451 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
3452 return ARMMVEVectorSplat(Builder, Lane);
3453}
3454
3455static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3456 llvm::Value *V,
3457 unsigned ReverseWidth) {
3458 // MVE-specific helper function which reverses the elements of a
3459 // vector within every (ReverseWidth)-bit collection of lanes.
3460 SmallVector<int, 16> Indices;
3461 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3462 unsigned Elements = 128 / LaneSize;
3463 unsigned Mask = ReverseWidth / LaneSize - 1;
3464 for (unsigned i = 0; i < Elements; i++)
3465 Indices.push_back(i ^ Mask);
3466 return Builder.CreateShuffleVector(V, Indices);
3467}
3468
3470 const CallExpr *E,
3471 ReturnValueSlot ReturnValue,
3472 llvm::Triple::ArchType Arch) {
3473 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3474 Intrinsic::ID IRIntr;
3475 unsigned NumVectors;
3476
3477 // Code autogenerated by Tablegen will handle all the simple builtins.
3478 switch (BuiltinID) {
3479 #include "clang/Basic/arm_mve_builtin_cg.inc"
3480
3481 // If we didn't match an MVE builtin id at all, go back to the
3482 // main EmitARMBuiltinExpr.
3483 default:
3484 return nullptr;
3485 }
3486
3487 // Anything that breaks from that switch is an MVE builtin that
3488 // needs handwritten code to generate.
3489
3490 switch (CustomCodeGenType) {
3491
3492 case CustomCodeGen::VLD24: {
3495
3496 auto MvecCType = E->getType();
3497 auto MvecLType = ConvertType(MvecCType);
3498 assert(MvecLType->isStructTy() &&
3499 "Return type for vld[24]q should be a struct");
3500 assert(MvecLType->getStructNumElements() == 1 &&
3501 "Return-type struct for vld[24]q should have one element");
3502 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3503 assert(MvecLTypeInner->isArrayTy() &&
3504 "Return-type struct for vld[24]q should contain an array");
3505 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3506 "Array member of return-type struct vld[24]q has wrong length");
3507 auto VecLType = MvecLTypeInner->getArrayElementType();
3508
3509 Tys.push_back(VecLType);
3510
3511 auto Addr = E->getArg(0);
3512 Ops.push_back(EmitScalarExpr(Addr));
3513 Tys.push_back(ConvertType(Addr->getType()));
3514
3515 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3516 Value *LoadResult = Builder.CreateCall(F, Ops);
3517 Value *MvecOut = PoisonValue::get(MvecLType);
3518 for (unsigned i = 0; i < NumVectors; ++i) {
3519 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3520 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3521 }
3522
3523 if (ReturnValue.isNull())
3524 return MvecOut;
3525 else
3526 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3527 }
3528
3529 case CustomCodeGen::VST24: {
3532
3533 auto Addr = E->getArg(0);
3534 Ops.push_back(EmitScalarExpr(Addr));
3535 Tys.push_back(ConvertType(Addr->getType()));
3536
3537 auto MvecCType = E->getArg(1)->getType();
3538 auto MvecLType = ConvertType(MvecCType);
3539 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3540 assert(MvecLType->getStructNumElements() == 1 &&
3541 "Data-type struct for vst2q should have one element");
3542 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3543 assert(MvecLTypeInner->isArrayTy() &&
3544 "Data-type struct for vst2q should contain an array");
3545 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3546 "Array member of return-type struct vld[24]q has wrong length");
3547 auto VecLType = MvecLTypeInner->getArrayElementType();
3548
3549 Tys.push_back(VecLType);
3550
3551 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3552 EmitAggExpr(E->getArg(1), MvecSlot);
3553 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3554 for (unsigned i = 0; i < NumVectors; i++)
3555 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3556
3557 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3558 Value *ToReturn = nullptr;
3559 for (unsigned i = 0; i < NumVectors; i++) {
3560 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3561 ToReturn = Builder.CreateCall(F, Ops);
3562 Ops.pop_back();
3563 }
3564 return ToReturn;
3565 }
3566 }
3567 llvm_unreachable("unknown custom codegen type.");
3568}
3569
3571 const CallExpr *E,
3572 ReturnValueSlot ReturnValue,
3573 llvm::Triple::ArchType Arch) {
3574 switch (BuiltinID) {
3575 default:
3576 return nullptr;
3577#include "clang/Basic/arm_cde_builtin_cg.inc"
3578 }
3579}
3580
3581static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3582 const CallExpr *E,
3584 llvm::Triple::ArchType Arch) {
3585 unsigned int Int = 0;
3586 const char *s = nullptr;
3587
3588 switch (BuiltinID) {
3589 default:
3590 return nullptr;
3591 case NEON::BI__builtin_neon_vtbl1_v:
3592 case NEON::BI__builtin_neon_vqtbl1_v:
3593 case NEON::BI__builtin_neon_vqtbl1q_v:
3594 case NEON::BI__builtin_neon_vtbl2_v:
3595 case NEON::BI__builtin_neon_vqtbl2_v:
3596 case NEON::BI__builtin_neon_vqtbl2q_v:
3597 case NEON::BI__builtin_neon_vtbl3_v:
3598 case NEON::BI__builtin_neon_vqtbl3_v:
3599 case NEON::BI__builtin_neon_vqtbl3q_v:
3600 case NEON::BI__builtin_neon_vtbl4_v:
3601 case NEON::BI__builtin_neon_vqtbl4_v:
3602 case NEON::BI__builtin_neon_vqtbl4q_v:
3603 break;
3604 case NEON::BI__builtin_neon_vtbx1_v:
3605 case NEON::BI__builtin_neon_vqtbx1_v:
3606 case NEON::BI__builtin_neon_vqtbx1q_v:
3607 case NEON::BI__builtin_neon_vtbx2_v:
3608 case NEON::BI__builtin_neon_vqtbx2_v:
3609 case NEON::BI__builtin_neon_vqtbx2q_v:
3610 case NEON::BI__builtin_neon_vtbx3_v:
3611 case NEON::BI__builtin_neon_vqtbx3_v:
3612 case NEON::BI__builtin_neon_vqtbx3q_v:
3613 case NEON::BI__builtin_neon_vtbx4_v:
3614 case NEON::BI__builtin_neon_vqtbx4_v:
3615 case NEON::BI__builtin_neon_vqtbx4q_v:
3616 break;
3617 }
3618
3619 assert(E->getNumArgs() >= 3);
3620
3621 // Get the last argument, which specifies the vector type.
3622 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3623 std::optional<llvm::APSInt> Result =
3625 if (!Result)
3626 return nullptr;
3627
3628 // Determine the type of this overloaded NEON intrinsic.
3629 NeonTypeFlags Type = Result->getZExtValue();
3630 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3631 if (!Ty)
3632 return nullptr;
3633
3634 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3635
3636 // AArch64 scalar builtins are not overloaded, they do not have an extra
3637 // argument that specifies the vector type, need to handle each case.
3638 switch (BuiltinID) {
3639 case NEON::BI__builtin_neon_vtbl1_v: {
3640 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3641 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3642 }
3643 case NEON::BI__builtin_neon_vtbl2_v: {
3644 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3645 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3646 }
3647 case NEON::BI__builtin_neon_vtbl3_v: {
3648 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3649 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3650 }
3651 case NEON::BI__builtin_neon_vtbl4_v: {
3652 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3653 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3654 }
3655 case NEON::BI__builtin_neon_vtbx1_v: {
3656 Value *TblRes =
3657 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3658 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3659
3660 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3661 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3662 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3663
3664 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3665 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3666 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3667 }
3668 case NEON::BI__builtin_neon_vtbx2_v: {
3669 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3670 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3671 }
3672 case NEON::BI__builtin_neon_vtbx3_v: {
3673 Value *TblRes =
3674 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3675 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3676
3677 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3678 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3679 TwentyFourV);
3680 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3681
3682 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3683 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3684 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3685 }
3686 case NEON::BI__builtin_neon_vtbx4_v: {
3687 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3688 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3689 }
3690 case NEON::BI__builtin_neon_vqtbl1_v:
3691 case NEON::BI__builtin_neon_vqtbl1q_v:
3692 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3693 case NEON::BI__builtin_neon_vqtbl2_v:
3694 case NEON::BI__builtin_neon_vqtbl2q_v: {
3695 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3696 case NEON::BI__builtin_neon_vqtbl3_v:
3697 case NEON::BI__builtin_neon_vqtbl3q_v:
3698 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3699 case NEON::BI__builtin_neon_vqtbl4_v:
3700 case NEON::BI__builtin_neon_vqtbl4q_v:
3701 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3702 case NEON::BI__builtin_neon_vqtbx1_v:
3703 case NEON::BI__builtin_neon_vqtbx1q_v:
3704 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3705 case NEON::BI__builtin_neon_vqtbx2_v:
3706 case NEON::BI__builtin_neon_vqtbx2q_v:
3707 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3708 case NEON::BI__builtin_neon_vqtbx3_v:
3709 case NEON::BI__builtin_neon_vqtbx3q_v:
3710 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3711 case NEON::BI__builtin_neon_vqtbx4_v:
3712 case NEON::BI__builtin_neon_vqtbx4q_v:
3713 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3714 }
3715 }
3716
3717 if (!Int)
3718 return nullptr;
3719
3720 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3721 return CGF.EmitNeonCall(F, Ops, s);
3722}
3723
3725 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3726 Op = Builder.CreateBitCast(Op, Int16Ty);
3727 Value *V = PoisonValue::get(VTy);
3728 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3729 Op = Builder.CreateInsertElement(V, Op, CI);
3730 return Op;
3731}
3732
3733/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3734/// access builtin. Only required if it can't be inferred from the base pointer
3735/// operand.
3737 switch (TypeFlags.getMemEltType()) {
3738 case SVETypeFlags::MemEltTyDefault:
3739 return getEltType(TypeFlags);
3740 case SVETypeFlags::MemEltTyInt8:
3741 return Builder.getInt8Ty();
3742 case SVETypeFlags::MemEltTyInt16:
3743 return Builder.getInt16Ty();
3744 case SVETypeFlags::MemEltTyInt32:
3745 return Builder.getInt32Ty();
3746 case SVETypeFlags::MemEltTyInt64:
3747 return Builder.getInt64Ty();
3748 }
3749 llvm_unreachable("Unknown MemEltType");
3750}
3751
3752llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3753 switch (TypeFlags.getEltType()) {
3754 default:
3755 llvm_unreachable("Invalid SVETypeFlag!");
3756
3757 case SVETypeFlags::EltTyMFloat8:
3758 case SVETypeFlags::EltTyInt8:
3759 return Builder.getInt8Ty();
3760 case SVETypeFlags::EltTyInt16:
3761 return Builder.getInt16Ty();
3762 case SVETypeFlags::EltTyInt32:
3763 return Builder.getInt32Ty();
3764 case SVETypeFlags::EltTyInt64:
3765 return Builder.getInt64Ty();
3766 case SVETypeFlags::EltTyInt128:
3767 return Builder.getInt128Ty();
3768
3769 case SVETypeFlags::EltTyFloat16:
3770 return Builder.getHalfTy();
3771 case SVETypeFlags::EltTyFloat32:
3772 return Builder.getFloatTy();
3773 case SVETypeFlags::EltTyFloat64:
3774 return Builder.getDoubleTy();
3775
3776 case SVETypeFlags::EltTyBFloat16:
3777 return Builder.getBFloatTy();
3778
3779 case SVETypeFlags::EltTyBool8:
3780 case SVETypeFlags::EltTyBool16:
3781 case SVETypeFlags::EltTyBool32:
3782 case SVETypeFlags::EltTyBool64:
3783 return Builder.getInt1Ty();
3784 }
3785}
3786
3787// Return the llvm predicate vector type corresponding to the specified element
3788// TypeFlags.
3789llvm::ScalableVectorType *
3791 switch (TypeFlags.getEltType()) {
3792 default: llvm_unreachable("Unhandled SVETypeFlag!");
3793
3794 case SVETypeFlags::EltTyInt8:
3795 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3796 case SVETypeFlags::EltTyInt16:
3797 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3798 case SVETypeFlags::EltTyInt32:
3799 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3800 case SVETypeFlags::EltTyInt64:
3801 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3802
3803 case SVETypeFlags::EltTyBFloat16:
3804 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3805 case SVETypeFlags::EltTyFloat16:
3806 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3807 case SVETypeFlags::EltTyFloat32:
3808 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3809 case SVETypeFlags::EltTyFloat64:
3810 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3811
3812 case SVETypeFlags::EltTyBool8:
3813 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3814 case SVETypeFlags::EltTyBool16:
3815 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3816 case SVETypeFlags::EltTyBool32:
3817 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3818 case SVETypeFlags::EltTyBool64:
3819 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3820 }
3821}
3822
3823// Return the llvm vector type corresponding to the specified element TypeFlags.
3824llvm::ScalableVectorType *
3826 switch (TypeFlags.getEltType()) {
3827 default:
3828 llvm_unreachable("Invalid SVETypeFlag!");
3829
3830 case SVETypeFlags::EltTyInt8:
3831 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3832 case SVETypeFlags::EltTyInt16:
3833 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3834 case SVETypeFlags::EltTyInt32:
3835 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3836 case SVETypeFlags::EltTyInt64:
3837 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3838
3839 case SVETypeFlags::EltTyMFloat8:
3840 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3841 case SVETypeFlags::EltTyFloat16:
3842 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3843 case SVETypeFlags::EltTyBFloat16:
3844 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3845 case SVETypeFlags::EltTyFloat32:
3846 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3847 case SVETypeFlags::EltTyFloat64:
3848 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3849
3850 case SVETypeFlags::EltTyBool8:
3851 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3852 case SVETypeFlags::EltTyBool16:
3853 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3854 case SVETypeFlags::EltTyBool32:
3855 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3856 case SVETypeFlags::EltTyBool64:
3857 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3858 }
3859}
3860
3861llvm::Value *
3863 Function *Ptrue =
3864 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3865 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
3866}
3867
3868constexpr unsigned SVEBitsPerBlock = 128;
3869
3870static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3871 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3872 return llvm::ScalableVectorType::get(EltTy, NumElts);
3873}
3874
3875// Reinterpret the input predicate so that it can be used to correctly isolate
3876// the elements of the specified datatype.
3878 llvm::ScalableVectorType *VTy) {
3879
3880 if (isa<TargetExtType>(Pred->getType()) &&
3881 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3882 return Pred;
3883
3884 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3885 if (Pred->getType() == RTy)
3886 return Pred;
3887
3888 unsigned IntID;
3889 llvm::Type *IntrinsicTy;
3890 switch (VTy->getMinNumElements()) {
3891 default:
3892 llvm_unreachable("unsupported element count!");
3893 case 1:
3894 case 2:
3895 case 4:
3896 case 8:
3897 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3898 IntrinsicTy = RTy;
3899 break;
3900 case 16:
3901 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3902 IntrinsicTy = Pred->getType();
3903 break;
3904 }
3905
3906 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3907 Value *C = Builder.CreateCall(F, Pred);
3908 assert(C->getType() == RTy && "Unexpected return type!");
3909 return C;
3910}
3911
3913 llvm::StructType *Ty) {
3914 if (PredTuple->getType() == Ty)
3915 return PredTuple;
3916
3917 Value *Ret = llvm::PoisonValue::get(Ty);
3918 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3919 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3920 Pred = EmitSVEPredicateCast(
3921 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3922 Ret = Builder.CreateInsertValue(Ret, Pred, I);
3923 }
3924
3925 return Ret;
3926}
3927
3930 unsigned IntID) {
3931 auto *ResultTy = getSVEType(TypeFlags);
3932 auto *OverloadedTy =
3933 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
3934
3935 Function *F = nullptr;
3936 if (Ops[1]->getType()->isVectorTy())
3937 // This is the "vector base, scalar offset" case. In order to uniquely
3938 // map this built-in to an LLVM IR intrinsic, we need both the return type
3939 // and the type of the vector base.
3940 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
3941 else
3942 // This is the "scalar base, vector offset case". The type of the offset
3943 // is encoded in the name of the intrinsic. We only need to specify the
3944 // return type in order to uniquely map this built-in to an LLVM IR
3945 // intrinsic.
3946 F = CGM.getIntrinsic(IntID, OverloadedTy);
3947
3948 // At the ACLE level there's only one predicate type, svbool_t, which is
3949 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3950 // actual type being loaded. For example, when loading doubles (i64) the
3951 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3952 // the predicate and the data being loaded must match. Cast to the type
3953 // expected by the intrinsic. The intrinsic itself should be defined in
3954 // a way than enforces relations between parameter types.
3955 Ops[0] = EmitSVEPredicateCast(
3956 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3957
3958 // Pass 0 when the offset is missing. This can only be applied when using
3959 // the "vector base" addressing mode for which ACLE allows no offset. The
3960 // corresponding LLVM IR always requires an offset.
3961 if (Ops.size() == 2) {
3962 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3963 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3964 }
3965
3966 // For "vector base, scalar index" scale the index so that it becomes a
3967 // scalar offset.
3968 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3969 unsigned BytesPerElt =
3970 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3971 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3972 }
3973
3974 Value *Call = Builder.CreateCall(F, Ops);
3975
3976 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3977 // other cases it's folded into a nop.
3978 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
3979 : Builder.CreateSExt(Call, ResultTy);
3980}
3981
3984 unsigned IntID) {
3985 auto *SrcDataTy = getSVEType(TypeFlags);
3986 auto *OverloadedTy =
3987 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
3988
3989 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3990 // it's the first argument. Move it accordingly.
3991 Ops.insert(Ops.begin(), Ops.pop_back_val());
3992
3993 Function *F = nullptr;
3994 if (Ops[2]->getType()->isVectorTy())
3995 // This is the "vector base, scalar offset" case. In order to uniquely
3996 // map this built-in to an LLVM IR intrinsic, we need both the return type
3997 // and the type of the vector base.
3998 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
3999 else
4000 // This is the "scalar base, vector offset case". The type of the offset
4001 // is encoded in the name of the intrinsic. We only need to specify the
4002 // return type in order to uniquely map this built-in to an LLVM IR
4003 // intrinsic.
4004 F = CGM.getIntrinsic(IntID, OverloadedTy);
4005
4006 // Pass 0 when the offset is missing. This can only be applied when using
4007 // the "vector base" addressing mode for which ACLE allows no offset. The
4008 // corresponding LLVM IR always requires an offset.
4009 if (Ops.size() == 3) {
4010 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4011 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4012 }
4013
4014 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4015 // folded into a nop.
4016 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
4017
4018 // At the ACLE level there's only one predicate type, svbool_t, which is
4019 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4020 // actual type being stored. For example, when storing doubles (i64) the
4021 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4022 // the predicate and the data being stored must match. Cast to the type
4023 // expected by the intrinsic. The intrinsic itself should be defined in
4024 // a way that enforces relations between parameter types.
4025 Ops[1] = EmitSVEPredicateCast(
4026 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
4027
4028 // For "vector base, scalar index" scale the index so that it becomes a
4029 // scalar offset.
4030 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4031 unsigned BytesPerElt =
4032 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4033 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
4034 }
4035
4036 return Builder.CreateCall(F, Ops);
4037}
4038
4041 unsigned IntID) {
4042 // The gather prefetches are overloaded on the vector input - this can either
4043 // be the vector of base addresses or vector of offsets.
4044 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
4045 if (!OverloadedTy)
4046 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
4047
4048 // Cast the predicate from svbool_t to the right number of elements.
4049 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
4050
4051 // vector + imm addressing modes
4052 if (Ops[1]->getType()->isVectorTy()) {
4053 if (Ops.size() == 3) {
4054 // Pass 0 for 'vector+imm' when the index is omitted.
4055 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4056
4057 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4058 std::swap(Ops[2], Ops[3]);
4059 } else {
4060 // Index needs to be passed as scaled offset.
4061 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4062 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4063 if (BytesPerElt > 1)
4064 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4065 }
4066 }
4067
4068 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
4069 return Builder.CreateCall(F, Ops);
4070}
4071
4074 unsigned IntID) {
4075 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4076 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4077 Value *BasePtr = Ops[1];
4078
4079 // Does the load have an offset?
4080 if (Ops.size() > 2)
4081 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4082
4083 Function *F = CGM.getIntrinsic(IntID, {VTy});
4084 return Builder.CreateCall(F, {Predicate, BasePtr});
4085}
4086
4089 unsigned IntID) {
4090 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4091
4092 unsigned N;
4093 switch (IntID) {
4094 case Intrinsic::aarch64_sve_st2:
4095 case Intrinsic::aarch64_sve_st1_pn_x2:
4096 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4097 case Intrinsic::aarch64_sve_st2q:
4098 N = 2;
4099 break;
4100 case Intrinsic::aarch64_sve_st3:
4101 case Intrinsic::aarch64_sve_st3q:
4102 N = 3;
4103 break;
4104 case Intrinsic::aarch64_sve_st4:
4105 case Intrinsic::aarch64_sve_st1_pn_x4:
4106 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4107 case Intrinsic::aarch64_sve_st4q:
4108 N = 4;
4109 break;
4110 default:
4111 llvm_unreachable("unknown intrinsic!");
4112 }
4113
4114 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4115 Value *BasePtr = Ops[1];
4116
4117 // Does the store have an offset?
4118 if (Ops.size() > (2 + N))
4119 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4120
4121 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4122 // need to break up the tuple vector.
4124 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4125 Operands.push_back(Ops[I]);
4126 Operands.append({Predicate, BasePtr});
4127 Function *F = CGM.getIntrinsic(IntID, { VTy });
4128
4129 return Builder.CreateCall(F, Operands);
4130}
4131
4132// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4133// svpmullt_pair intrinsics, with the exception that their results are bitcast
4134// to a wider type.
4137 unsigned BuiltinID) {
4138 // Splat scalar operand to vector (intrinsics with _n infix)
4139 if (TypeFlags.hasSplatOperand()) {
4140 unsigned OpNo = TypeFlags.getSplatOperand();
4141 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4142 }
4143
4144 // The pair-wise function has a narrower overloaded type.
4145 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
4146 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
4147
4148 // Now bitcast to the wider result type.
4149 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4150 return EmitSVEReinterpret(Call, Ty);
4151}
4152
4154 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4155 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4156 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
4157 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
4158}
4159
4162 unsigned BuiltinID) {
4163 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4164 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
4165 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4166
4167 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
4168 Value *BasePtr = Ops[1];
4169
4170 // Implement the index operand if not omitted.
4171 if (Ops.size() > 3)
4172 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4173
4174 Value *PrfOp = Ops.back();
4175
4176 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
4177 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
4178}
4179
4181 llvm::Type *ReturnTy,
4183 unsigned IntrinsicID,
4184 bool IsZExtReturn) {
4185 QualType LangPTy = E->getArg(1)->getType();
4186 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4187 LangPTy->castAs<PointerType>()->getPointeeType());
4188
4189 // Mfloat8 types is stored as a vector, so extra work
4190 // to extract sclar element type is necessary.
4191 if (MemEltTy->isVectorTy()) {
4192 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4193 "Only <1 x i8> expected");
4194 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4195 }
4196
4197 // The vector type that is returned may be different from the
4198 // eventual type loaded from memory.
4199 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
4200 llvm::ScalableVectorType *MemoryTy = nullptr;
4201 llvm::ScalableVectorType *PredTy = nullptr;
4202 bool IsQuadLoad = false;
4203 switch (IntrinsicID) {
4204 case Intrinsic::aarch64_sve_ld1uwq:
4205 case Intrinsic::aarch64_sve_ld1udq:
4206 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4207 PredTy = llvm::ScalableVectorType::get(
4208 llvm::Type::getInt1Ty(getLLVMContext()), 1);
4209 IsQuadLoad = true;
4210 break;
4211 default:
4212 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4213 PredTy = MemoryTy;
4214 break;
4215 }
4216
4217 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4218 Value *BasePtr = Ops[1];
4219
4220 // Does the load have an offset?
4221 if (Ops.size() > 2)
4222 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4223
4224 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
4225 auto *Load =
4226 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
4227 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4228 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
4229
4230 if (IsQuadLoad)
4231 return Load;
4232
4233 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
4234 : Builder.CreateSExt(Load, VectorTy);
4235}
4236
4239 unsigned IntrinsicID) {
4240 QualType LangPTy = E->getArg(1)->getType();
4241 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4242 LangPTy->castAs<PointerType>()->getPointeeType());
4243
4244 // Mfloat8 types is stored as a vector, so extra work
4245 // to extract sclar element type is necessary.
4246 if (MemEltTy->isVectorTy()) {
4247 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4248 "Only <1 x i8> expected");
4249 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4250 }
4251
4252 // The vector type that is stored may be different from the
4253 // eventual type stored to memory.
4254 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
4255 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4256
4257 auto PredTy = MemoryTy;
4258 auto AddrMemoryTy = MemoryTy;
4259 bool IsQuadStore = false;
4260
4261 switch (IntrinsicID) {
4262 case Intrinsic::aarch64_sve_st1wq:
4263 case Intrinsic::aarch64_sve_st1dq:
4264 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4265 PredTy =
4266 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
4267 IsQuadStore = true;
4268 break;
4269 default:
4270 break;
4271 }
4272 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4273 Value *BasePtr = Ops[1];
4274
4275 // Does the store have an offset?
4276 if (Ops.size() == 4)
4277 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
4278
4279 // Last value is always the data
4280 Value *Val =
4281 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
4282
4283 Function *F =
4284 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
4285 auto *Store =
4286 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
4287 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4288 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
4289 return Store;
4290}
4291
4294 unsigned IntID) {
4295 Ops[2] = EmitSVEPredicateCast(
4297
4298 SmallVector<Value *> NewOps;
4299 NewOps.push_back(Ops[2]);
4300
4301 llvm::Value *BasePtr = Ops[3];
4302 llvm::Value *RealSlice = Ops[1];
4303 // If the intrinsic contains the vnum parameter, multiply it with the vector
4304 // size in bytes.
4305 if (Ops.size() == 5) {
4306 Function *StreamingVectorLength =
4307 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
4308 llvm::Value *StreamingVectorLengthCall =
4309 Builder.CreateCall(StreamingVectorLength);
4310 llvm::Value *Mulvl =
4311 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
4312 // The type of the ptr parameter is void *, so use Int8Ty here.
4313 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
4314 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
4315 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
4316 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
4317 }
4318 NewOps.push_back(BasePtr);
4319 NewOps.push_back(Ops[0]);
4320 NewOps.push_back(RealSlice);
4321 Function *F = CGM.getIntrinsic(IntID);
4322 return Builder.CreateCall(F, NewOps);
4323}
4324
4327 unsigned IntID) {
4328 auto *VecTy = getSVEType(TypeFlags);
4329 Function *F = CGM.getIntrinsic(IntID, VecTy);
4330 if (TypeFlags.isReadZA())
4331 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
4332 else if (TypeFlags.isWriteZA())
4333 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
4334 return Builder.CreateCall(F, Ops);
4335}
4336
4339 unsigned IntID) {
4340 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4341 if (Ops.size() == 0)
4342 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
4343 Function *F = CGM.getIntrinsic(IntID, {});
4344 return Builder.CreateCall(F, Ops);
4345}
4346
4349 unsigned IntID) {
4350 if (Ops.size() == 2)
4351 Ops.push_back(Builder.getInt32(0));
4352 else
4353 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
4354 Function *F = CGM.getIntrinsic(IntID, {});
4355 return Builder.CreateCall(F, Ops);
4356}
4357
4358// Limit the usage of scalable llvm IR generated by the ACLE by using the
4359// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4360Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4361 return Builder.CreateVectorSplat(
4362 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
4363}
4364
4366 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4367#ifndef NDEBUG
4368 auto *VecTy = cast<llvm::VectorType>(Ty);
4369 ElementCount EC = VecTy->getElementCount();
4370 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4371 "Only <1 x i8> expected");
4372#endif
4373 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
4374 }
4375 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
4376}
4377
4379 // FIXME: For big endian this needs an additional REV, or needs a separate
4380 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4381 // instruction is defined as 'bitwise' equivalent from memory point of
4382 // view (when storing/reloading), whereas the svreinterpret builtin
4383 // implements bitwise equivalent cast from register point of view.
4384 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4385
4386 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
4387 Value *Tuple = llvm::PoisonValue::get(Ty);
4388
4389 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4390 Value *In = Builder.CreateExtractValue(Val, I);
4391 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
4392 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
4393 }
4394
4395 return Tuple;
4396 }
4397
4398 return Builder.CreateBitCast(Val, Ty);
4399}
4400
4401static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4403 auto *SplatZero = Constant::getNullValue(Ty);
4404 Ops.insert(Ops.begin(), SplatZero);
4405}
4406
4407static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4409 auto *SplatUndef = UndefValue::get(Ty);
4410 Ops.insert(Ops.begin(), SplatUndef);
4411}
4412
4415 llvm::Type *ResultType,
4416 ArrayRef<Value *> Ops) {
4417 if (TypeFlags.isOverloadNone())
4418 return {};
4419
4420 llvm::Type *DefaultType = getSVEType(TypeFlags);
4421
4422 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4423 return {DefaultType, Ops[1]->getType()};
4424
4425 if (TypeFlags.isOverloadWhileRW())
4426 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4427
4428 if (TypeFlags.isOverloadCvt())
4429 return {Ops[0]->getType(), Ops.back()->getType()};
4430
4431 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4432 ResultType->isVectorTy())
4433 return {ResultType, Ops[1]->getType()};
4434
4435 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4436 return {DefaultType};
4437}
4438
4440 ArrayRef<Value *> Ops) {
4441 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4442 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4443 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
4444
4445 if (TypeFlags.isTupleSet())
4446 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
4447 return Builder.CreateExtractValue(Ops[0], Idx);
4448}
4449
4451 llvm::Type *Ty,
4452 ArrayRef<Value *> Ops) {
4453 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4454
4455 Value *Tuple = llvm::PoisonValue::get(Ty);
4456 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4457 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
4458
4459 return Tuple;
4460}
4461
4463 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4464 SVETypeFlags TypeFlags) {
4465 // Find out if any arguments are required to be integer constant expressions.
4466 unsigned ICEArguments = 0;
4468 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4469 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4470
4471 // Tuple set/get only requires one insert/extract vector, which is
4472 // created by EmitSVETupleSetOrGet.
4473 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4474
4475 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4476 bool IsICE = ICEArguments & (1 << i);
4477 Value *Arg = EmitScalarExpr(E->getArg(i));
4478
4479 if (IsICE) {
4480 // If this is required to be a constant, constant fold it so that we know
4481 // that the generated intrinsic gets a ConstantInt.
4482 std::optional<llvm::APSInt> Result =
4483 E->getArg(i)->getIntegerConstantExpr(getContext());
4484 assert(Result && "Expected argument to be a constant");
4485
4486 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4487 // truncate because the immediate has been range checked and no valid
4488 // immediate requires more than a handful of bits.
4489 *Result = Result->extOrTrunc(32);
4490 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4491 continue;
4492 }
4493
4494 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4495 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4496 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4497
4498 continue;
4499 }
4500
4501 Ops.push_back(Arg);
4502 }
4503}
4504
4506 const CallExpr *E) {
4507 llvm::Type *Ty = ConvertType(E->getType());
4508 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4509 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4510 Value *Val = EmitScalarExpr(E->getArg(0));
4511 return EmitSVEReinterpret(Val, Ty);
4512 }
4513
4516
4518 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4519 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4520
4521 if (TypeFlags.isLoad())
4522 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4523 TypeFlags.isZExtReturn());
4524 else if (TypeFlags.isStore())
4525 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4526 else if (TypeFlags.isGatherLoad())
4527 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4528 else if (TypeFlags.isScatterStore())
4529 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4530 else if (TypeFlags.isPrefetch())
4531 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4532 else if (TypeFlags.isGatherPrefetch())
4533 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4534 else if (TypeFlags.isStructLoad())
4535 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4536 else if (TypeFlags.isStructStore())
4537 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4538 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4539 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4540 else if (TypeFlags.isTupleCreate())
4541 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4542 else if (TypeFlags.isUndef())
4543 return UndefValue::get(Ty);
4544 else if (Builtin->LLVMIntrinsic != 0) {
4545 // Emit set FPMR for intrinsics that require it
4546 if (TypeFlags.setsFPMR())
4547 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4548 Ops.pop_back_val());
4549 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4551
4552 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4554
4555 // Some ACLE builtins leave out the argument to specify the predicate
4556 // pattern, which is expected to be expanded to an SV_ALL pattern.
4557 if (TypeFlags.isAppendSVALL())
4558 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4559 if (TypeFlags.isInsertOp1SVALL())
4560 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4561
4562 // Predicates must match the main datatype.
4563 for (Value *&Op : Ops)
4564 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4565 if (PredTy->getElementType()->isIntegerTy(1))
4566 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4567
4568 // Splat scalar operand to vector (intrinsics with _n infix)
4569 if (TypeFlags.hasSplatOperand()) {
4570 unsigned OpNo = TypeFlags.getSplatOperand();
4571 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4572 }
4573
4574 if (TypeFlags.isReverseCompare())
4575 std::swap(Ops[1], Ops[2]);
4576 else if (TypeFlags.isReverseUSDOT())
4577 std::swap(Ops[1], Ops[2]);
4578 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4579 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4580 std::swap(Ops[1], Ops[2]);
4581 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4582 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4583 std::swap(Ops[1], Ops[3]);
4584
4585 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4586 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4587 llvm::Type *OpndTy = Ops[1]->getType();
4588 auto *SplatZero = Constant::getNullValue(OpndTy);
4589 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4590 }
4591
4592 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4593 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4594 Value *Call = Builder.CreateCall(F, Ops);
4595
4596 if (Call->getType() == Ty)
4597 return Call;
4598
4599 // Predicate results must be converted to svbool_t.
4600 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4601 return EmitSVEPredicateCast(Call, PredTy);
4602 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4603 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4604
4605 llvm_unreachable("unsupported element count!");
4606 }
4607
4608 switch (BuiltinID) {
4609 default:
4610 return nullptr;
4611
4612 case SVE::BI__builtin_sve_svreinterpret_b: {
4613 auto SVCountTy =
4614 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4615 Function *CastFromSVCountF =
4616 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4617 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4618 }
4619 case SVE::BI__builtin_sve_svreinterpret_c: {
4620 auto SVCountTy =
4621 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4622 Function *CastToSVCountF =
4623 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4624 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4625 }
4626
4627 case SVE::BI__builtin_sve_svpsel_lane_b8:
4628 case SVE::BI__builtin_sve_svpsel_lane_b16:
4629 case SVE::BI__builtin_sve_svpsel_lane_b32:
4630 case SVE::BI__builtin_sve_svpsel_lane_b64:
4631 case SVE::BI__builtin_sve_svpsel_lane_c8:
4632 case SVE::BI__builtin_sve_svpsel_lane_c16:
4633 case SVE::BI__builtin_sve_svpsel_lane_c32:
4634 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4635 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4636 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4637 "aarch64.svcount")) &&
4638 "Unexpected TargetExtType");
4639 auto SVCountTy =
4640 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4641 Function *CastFromSVCountF =
4642 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4643 Function *CastToSVCountF =
4644 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4645
4646 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4647 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4648 llvm::Value *Ops0 =
4649 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4650 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4651 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4652 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4653 }
4654 case SVE::BI__builtin_sve_svmov_b_z: {
4655 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4656 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4657 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4658 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4659 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4660 }
4661
4662 case SVE::BI__builtin_sve_svnot_b_z: {
4663 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4664 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4665 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4666 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4667 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4668 }
4669
4670 case SVE::BI__builtin_sve_svmovlb_u16:
4671 case SVE::BI__builtin_sve_svmovlb_u32:
4672 case SVE::BI__builtin_sve_svmovlb_u64:
4673 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4674
4675 case SVE::BI__builtin_sve_svmovlb_s16:
4676 case SVE::BI__builtin_sve_svmovlb_s32:
4677 case SVE::BI__builtin_sve_svmovlb_s64:
4678 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4679
4680 case SVE::BI__builtin_sve_svmovlt_u16:
4681 case SVE::BI__builtin_sve_svmovlt_u32:
4682 case SVE::BI__builtin_sve_svmovlt_u64:
4683 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4684
4685 case SVE::BI__builtin_sve_svmovlt_s16:
4686 case SVE::BI__builtin_sve_svmovlt_s32:
4687 case SVE::BI__builtin_sve_svmovlt_s64:
4688 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4689
4690 case SVE::BI__builtin_sve_svpmullt_u16:
4691 case SVE::BI__builtin_sve_svpmullt_u64:
4692 case SVE::BI__builtin_sve_svpmullt_n_u16:
4693 case SVE::BI__builtin_sve_svpmullt_n_u64:
4694 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4695
4696 case SVE::BI__builtin_sve_svpmullb_u16:
4697 case SVE::BI__builtin_sve_svpmullb_u64:
4698 case SVE::BI__builtin_sve_svpmullb_n_u16:
4699 case SVE::BI__builtin_sve_svpmullb_n_u64:
4700 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4701
4702 case SVE::BI__builtin_sve_svdup_n_b8:
4703 case SVE::BI__builtin_sve_svdup_n_b16:
4704 case SVE::BI__builtin_sve_svdup_n_b32:
4705 case SVE::BI__builtin_sve_svdup_n_b64: {
4706 Value *CmpNE =
4707 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4708 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4709 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4710 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
4711 }
4712
4713 case SVE::BI__builtin_sve_svdupq_n_b8:
4714 case SVE::BI__builtin_sve_svdupq_n_b16:
4715 case SVE::BI__builtin_sve_svdupq_n_b32:
4716 case SVE::BI__builtin_sve_svdupq_n_b64:
4717 case SVE::BI__builtin_sve_svdupq_n_u8:
4718 case SVE::BI__builtin_sve_svdupq_n_s8:
4719 case SVE::BI__builtin_sve_svdupq_n_u64:
4720 case SVE::BI__builtin_sve_svdupq_n_f64:
4721 case SVE::BI__builtin_sve_svdupq_n_s64:
4722 case SVE::BI__builtin_sve_svdupq_n_u16:
4723 case SVE::BI__builtin_sve_svdupq_n_f16:
4724 case SVE::BI__builtin_sve_svdupq_n_bf16:
4725 case SVE::BI__builtin_sve_svdupq_n_s16:
4726 case SVE::BI__builtin_sve_svdupq_n_u32:
4727 case SVE::BI__builtin_sve_svdupq_n_f32:
4728 case SVE::BI__builtin_sve_svdupq_n_s32: {
4729 // These builtins are implemented by storing each element to an array and using
4730 // ld1rq to materialize a vector.
4731 unsigned NumOpnds = Ops.size();
4732
4733 bool IsBoolTy =
4734 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4735
4736 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4737 // so that the compare can use the width that is natural for the expected
4738 // number of predicate lanes.
4739 llvm::Type *EltTy = Ops[0]->getType();
4740 if (IsBoolTy)
4741 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4742
4744 for (unsigned I = 0; I < NumOpnds; ++I)
4745 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4746 Value *Vec = BuildVector(VecOps);
4747
4748 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4749 Value *InsertSubVec = Builder.CreateInsertVector(
4750 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4751
4752 Function *F =
4753 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4754 Value *DupQLane =
4755 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4756
4757 if (!IsBoolTy)
4758 return DupQLane;
4759
4760 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4761 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4762
4763 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4764 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4765 : Intrinsic::aarch64_sve_cmpne_wide,
4766 OverloadedTy);
4767 Value *Call = Builder.CreateCall(
4768 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4769 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
4770 }
4771
4772 case SVE::BI__builtin_sve_svpfalse_b:
4773 return ConstantInt::getFalse(Ty);
4774
4775 case SVE::BI__builtin_sve_svpfalse_c: {
4776 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4777 Function *CastToSVCountF =
4778 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4779 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4780 }
4781
4782 case SVE::BI__builtin_sve_svlen_bf16:
4783 case SVE::BI__builtin_sve_svlen_f16:
4784 case SVE::BI__builtin_sve_svlen_f32:
4785 case SVE::BI__builtin_sve_svlen_f64:
4786 case SVE::BI__builtin_sve_svlen_s8:
4787 case SVE::BI__builtin_sve_svlen_s16:
4788 case SVE::BI__builtin_sve_svlen_s32:
4789 case SVE::BI__builtin_sve_svlen_s64:
4790 case SVE::BI__builtin_sve_svlen_u8:
4791 case SVE::BI__builtin_sve_svlen_u16:
4792 case SVE::BI__builtin_sve_svlen_u32:
4793 case SVE::BI__builtin_sve_svlen_u64: {
4794 SVETypeFlags TF(Builtin->TypeModifier);
4795 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4796 }
4797
4798 case SVE::BI__builtin_sve_svtbl2_u8:
4799 case SVE::BI__builtin_sve_svtbl2_s8:
4800 case SVE::BI__builtin_sve_svtbl2_u16:
4801 case SVE::BI__builtin_sve_svtbl2_s16:
4802 case SVE::BI__builtin_sve_svtbl2_u32:
4803 case SVE::BI__builtin_sve_svtbl2_s32:
4804 case SVE::BI__builtin_sve_svtbl2_u64:
4805 case SVE::BI__builtin_sve_svtbl2_s64:
4806 case SVE::BI__builtin_sve_svtbl2_f16:
4807 case SVE::BI__builtin_sve_svtbl2_bf16:
4808 case SVE::BI__builtin_sve_svtbl2_f32:
4809 case SVE::BI__builtin_sve_svtbl2_f64: {
4810 SVETypeFlags TF(Builtin->TypeModifier);
4811 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4812 return Builder.CreateCall(F, Ops);
4813 }
4814
4815 case SVE::BI__builtin_sve_svset_neonq_s8:
4816 case SVE::BI__builtin_sve_svset_neonq_s16:
4817 case SVE::BI__builtin_sve_svset_neonq_s32:
4818 case SVE::BI__builtin_sve_svset_neonq_s64:
4819 case SVE::BI__builtin_sve_svset_neonq_u8:
4820 case SVE::BI__builtin_sve_svset_neonq_u16:
4821 case SVE::BI__builtin_sve_svset_neonq_u32:
4822 case SVE::BI__builtin_sve_svset_neonq_u64:
4823 case SVE::BI__builtin_sve_svset_neonq_f16:
4824 case SVE::BI__builtin_sve_svset_neonq_f32:
4825 case SVE::BI__builtin_sve_svset_neonq_f64:
4826 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4827 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4828 }
4829
4830 case SVE::BI__builtin_sve_svget_neonq_s8:
4831 case SVE::BI__builtin_sve_svget_neonq_s16:
4832 case SVE::BI__builtin_sve_svget_neonq_s32:
4833 case SVE::BI__builtin_sve_svget_neonq_s64:
4834 case SVE::BI__builtin_sve_svget_neonq_u8:
4835 case SVE::BI__builtin_sve_svget_neonq_u16:
4836 case SVE::BI__builtin_sve_svget_neonq_u32:
4837 case SVE::BI__builtin_sve_svget_neonq_u64:
4838 case SVE::BI__builtin_sve_svget_neonq_f16:
4839 case SVE::BI__builtin_sve_svget_neonq_f32:
4840 case SVE::BI__builtin_sve_svget_neonq_f64:
4841 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4842 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4843 }
4844
4845 case SVE::BI__builtin_sve_svdup_neonq_s8:
4846 case SVE::BI__builtin_sve_svdup_neonq_s16:
4847 case SVE::BI__builtin_sve_svdup_neonq_s32:
4848 case SVE::BI__builtin_sve_svdup_neonq_s64:
4849 case SVE::BI__builtin_sve_svdup_neonq_u8:
4850 case SVE::BI__builtin_sve_svdup_neonq_u16:
4851 case SVE::BI__builtin_sve_svdup_neonq_u32:
4852 case SVE::BI__builtin_sve_svdup_neonq_u64:
4853 case SVE::BI__builtin_sve_svdup_neonq_f16:
4854 case SVE::BI__builtin_sve_svdup_neonq_f32:
4855 case SVE::BI__builtin_sve_svdup_neonq_f64:
4856 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4857 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4858 uint64_t(0));
4859 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4860 {Insert, Builder.getInt64(0)});
4861 }
4862 }
4863
4864 /// Should not happen
4865 return nullptr;
4866}
4867
4868static void swapCommutativeSMEOperands(unsigned BuiltinID,
4870 unsigned MultiVec;
4871 switch (BuiltinID) {
4872 default:
4873 return;
4874 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4875 MultiVec = 1;
4876 break;
4877 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4878 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4879 MultiVec = 2;
4880 break;
4881 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4882 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4883 MultiVec = 4;
4884 break;
4885 }
4886
4887 if (MultiVec > 0)
4888 for (unsigned I = 0; I < MultiVec; ++I)
4889 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4890}
4891
4893 const CallExpr *E) {
4896
4898 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4899 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4900
4901 if (TypeFlags.isLoad() || TypeFlags.isStore())
4902 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4903 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4904 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4905 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4906 BuiltinID == SME::BI__builtin_sme_svzero_za)
4907 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4908 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4909 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4910 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4911 BuiltinID == SME::BI__builtin_sme_svstr_za)
4912 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4913
4914 // Emit set FPMR for intrinsics that require it
4915 if (TypeFlags.setsFPMR())
4916 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4917 Ops.pop_back_val());
4918 // Handle builtins which require their multi-vector operands to be swapped
4919 swapCommutativeSMEOperands(BuiltinID, Ops);
4920
4921 // Should not happen!
4922 if (Builtin->LLVMIntrinsic == 0)
4923 return nullptr;
4924
4925 // Predicates must match the main datatype.
4926 for (Value *&Op : Ops)
4927 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4928 if (PredTy->getElementType()->isIntegerTy(1))
4929 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4930
4931 Function *F =
4932 TypeFlags.isOverloadNone()
4933 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
4934 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
4935
4936 return Builder.CreateCall(F, Ops);
4937}
4938
4939/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4940/// return it as an i8 pointer.
4942 LLVMContext &Context = CGF.CGM.getLLVMContext();
4943 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
4944 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4945 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4946 llvm::Function *F =
4947 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
4948 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
4949 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
4950}
4951
4953 const CallExpr *E,
4954 llvm::Triple::ArchType Arch) {
4955 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4956 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4957 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4958
4959 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4960 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4961 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4962
4963 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4964 return EmitAArch64CpuSupports(E);
4965
4966 unsigned HintID = static_cast<unsigned>(-1);
4967 switch (BuiltinID) {
4968 default: break;
4969 case clang::AArch64::BI__builtin_arm_nop:
4970 HintID = 0;
4971 break;
4972 case clang::AArch64::BI__builtin_arm_yield:
4973 case clang::AArch64::BI__yield:
4974 HintID = 1;
4975 break;
4976 case clang::AArch64::BI__builtin_arm_wfe:
4977 case clang::AArch64::BI__wfe:
4978 HintID = 2;
4979 break;
4980 case clang::AArch64::BI__builtin_arm_wfi:
4981 case clang::AArch64::BI__wfi:
4982 HintID = 3;
4983 break;
4984 case clang::AArch64::BI__builtin_arm_sev:
4985 case clang::AArch64::BI__sev:
4986 HintID = 4;
4987 break;
4988 case clang::AArch64::BI__builtin_arm_sevl:
4989 case clang::AArch64::BI__sevl:
4990 HintID = 5;
4991 break;
4992 }
4993
4994 if (HintID != static_cast<unsigned>(-1)) {
4995 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4996 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4997 }
4998
4999 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5000 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5001 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5002 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
5003 }
5004
5005 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5006 // Create call to __arm_sme_state and store the results to the two pointers.
5008 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
5009 false),
5010 "__arm_sme_state"));
5011 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
5012 "aarch64_pstate_sm_compatible");
5013 CI->setAttributes(Attrs);
5014 CI->setCallingConv(
5015 llvm::CallingConv::
5016 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5017 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
5018 EmitPointerWithAlignment(E->getArg(0)));
5019 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
5020 EmitPointerWithAlignment(E->getArg(1)));
5021 }
5022
5023 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5024 assert((getContext().getTypeSize(E->getType()) == 32) &&
5025 "rbit of unusual size!");
5026 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5027 return Builder.CreateCall(
5028 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5029 }
5030 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5031 assert((getContext().getTypeSize(E->getType()) == 64) &&
5032 "rbit of unusual size!");
5033 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5034 return Builder.CreateCall(
5035 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5036 }
5037
5038 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5039 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5040 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5041 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5042 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5043 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5044 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
5045 return Res;
5046 }
5047
5048 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5049 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5050 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5051 "cls");
5052 }
5053 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5054 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5055 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5056 "cls");
5057 }
5058
5059 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5060 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5061 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5062 llvm::Type *Ty = Arg->getType();
5063 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5064 Arg, "frint32z");
5065 }
5066
5067 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5068 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5069 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5070 llvm::Type *Ty = Arg->getType();
5071 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5072 Arg, "frint64z");
5073 }
5074
5075 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5076 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5077 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5078 llvm::Type *Ty = Arg->getType();
5079 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5080 Arg, "frint32x");
5081 }
5082
5083 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5084 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5085 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5086 llvm::Type *Ty = Arg->getType();
5087 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5088 Arg, "frint64x");
5089 }
5090
5091 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5092 assert((getContext().getTypeSize(E->getType()) == 32) &&
5093 "__jcvt of unusual size!");
5094 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5095 return Builder.CreateCall(
5096 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5097 }
5098
5099 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5100 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5101 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5102 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5103 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
5104 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
5105
5106 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5107 // Load from the address via an LLVM intrinsic, receiving a
5108 // tuple of 8 i64 words, and store each one to ValPtr.
5109 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5110 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
5111 llvm::Value *ToRet;
5112 for (size_t i = 0; i < 8; i++) {
5113 llvm::Value *ValOffsetPtr =
5114 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5115 Address Addr =
5116 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5117 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
5118 }
5119 return ToRet;
5120 } else {
5121 // Load 8 i64 words from ValPtr, and store them to the address
5122 // via an LLVM intrinsic.
5124 Args.push_back(MemAddr);
5125 for (size_t i = 0; i < 8; i++) {
5126 llvm::Value *ValOffsetPtr =
5127 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5128 Address Addr =
5129 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5130 Args.push_back(Builder.CreateLoad(Addr));
5131 }
5132
5133 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5134 ? Intrinsic::aarch64_st64b
5135 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5136 ? Intrinsic::aarch64_st64bv
5137 : Intrinsic::aarch64_st64bv0);
5138 Function *F = CGM.getIntrinsic(Intr);
5139 return Builder.CreateCall(F, Args);
5140 }
5141 }
5142
5143 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5144 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5145
5146 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5147 ? Intrinsic::aarch64_rndr
5148 : Intrinsic::aarch64_rndrrs);
5149 Function *F = CGM.getIntrinsic(Intr);
5150 llvm::Value *Val = Builder.CreateCall(F);
5151 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
5152 Value *Status = Builder.CreateExtractValue(Val, 1);
5153
5154 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
5155 Builder.CreateStore(RandomValue, MemAddress);
5156 Status = Builder.CreateZExt(Status, Int32Ty);
5157 return Status;
5158 }
5159
5160 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5161 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5162 const FunctionDecl *FD = E->getDirectCallee();
5163 Value *Ops[2];
5164 for (unsigned i = 0; i < 2; i++)
5165 Ops[i] = EmitScalarExpr(E->getArg(i));
5166 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5167 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5168 StringRef Name = FD->getName();
5169 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5170 }
5171
5172 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5173 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5174 getContext().getTypeSize(E->getType()) == 128) {
5175 Function *F =
5176 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5177 ? Intrinsic::aarch64_ldaxp
5178 : Intrinsic::aarch64_ldxp);
5179
5180 Value *LdPtr = EmitScalarExpr(E->getArg(0));
5181 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
5182
5183 Value *Val0 = Builder.CreateExtractValue(Val, 1);
5184 Value *Val1 = Builder.CreateExtractValue(Val, 0);
5185 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5186 Val0 = Builder.CreateZExt(Val0, Int128Ty);
5187 Val1 = Builder.CreateZExt(Val1, Int128Ty);
5188
5189 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5190 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5191 Val = Builder.CreateOr(Val, Val1);
5192 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5193 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5194 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5195 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5196
5197 QualType Ty = E->getType();
5198 llvm::Type *RealResTy = ConvertType(Ty);
5199 llvm::Type *IntTy =
5200 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5201
5202 Function *F =
5203 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5204 ? Intrinsic::aarch64_ldaxr
5205 : Intrinsic::aarch64_ldxr,
5206 UnqualPtrTy);
5207 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5208 Val->addParamAttr(
5209 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
5210
5211 if (RealResTy->isPointerTy())
5212 return Builder.CreateIntToPtr(Val, RealResTy);
5213
5214 llvm::Type *IntResTy = llvm::IntegerType::get(
5215 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5216 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
5217 RealResTy);
5218 }
5219
5220 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5221 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5222 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5223 Function *F =
5224 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5225 ? Intrinsic::aarch64_stlxp
5226 : Intrinsic::aarch64_stxp);
5227 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5228
5229 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5230 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5231
5232 Tmp = Tmp.withElementType(STy);
5233 llvm::Value *Val = Builder.CreateLoad(Tmp);
5234
5235 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5236 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5237 Value *StPtr = EmitScalarExpr(E->getArg(1));
5238 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5239 }
5240
5241 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5242 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5243 Value *StoreVal = EmitScalarExpr(E->getArg(0));
5244 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5245
5246 QualType Ty = E->getArg(0)->getType();
5247 llvm::Type *StoreTy =
5248 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5249
5250 if (StoreVal->getType()->isPointerTy())
5251 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5252 else {
5253 llvm::Type *IntTy = llvm::IntegerType::get(
5255 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5256 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5257 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5258 }
5259
5260 Function *F =
5261 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5262 ? Intrinsic::aarch64_stlxr
5263 : Intrinsic::aarch64_stxr,
5264 StoreAddr->getType());
5265 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5266 CI->addParamAttr(
5267 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
5268 return CI;
5269 }
5270
5271 if (BuiltinID == clang::AArch64::BI__getReg) {
5273 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5274 llvm_unreachable("Sema will ensure that the parameter is constant");
5275
5276 llvm::APSInt Value = Result.Val.getInt();
5277 LLVMContext &Context = CGM.getLLVMContext();
5278 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
5279
5280 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5281 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5282 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5283
5284 llvm::Function *F =
5285 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
5286 return Builder.CreateCall(F, Metadata);
5287 }
5288
5289 if (BuiltinID == clang::AArch64::BI__break) {
5291 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5292 llvm_unreachable("Sema will ensure that the parameter is constant");
5293
5294 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5295 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5296 }
5297
5298 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5299 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5300 return Builder.CreateCall(F);
5301 }
5302
5303 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5304 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5305 llvm::SyncScope::SingleThread);
5306
5307 // CRC32
5308 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5309 switch (BuiltinID) {
5310 case clang::AArch64::BI__builtin_arm_crc32b:
5311 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5312 case clang::AArch64::BI__builtin_arm_crc32cb:
5313 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5314 case clang::AArch64::BI__builtin_arm_crc32h:
5315 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5316 case clang::AArch64::BI__builtin_arm_crc32ch:
5317 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5318 case clang::AArch64::BI__builtin_arm_crc32w:
5319 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5320 case clang::AArch64::BI__builtin_arm_crc32cw:
5321 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5322 case clang::AArch64::BI__builtin_arm_crc32d:
5323 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5324 case clang::AArch64::BI__builtin_arm_crc32cd:
5325 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5326 }
5327
5328 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5329 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5330 Value *Arg1 = EmitScalarExpr(E->getArg(1));
5331 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5332
5333 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5334 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5335
5336 return Builder.CreateCall(F, {Arg0, Arg1});
5337 }
5338
5339 // Memory Operations (MOPS)
5340 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5341 Value *Dst = EmitScalarExpr(E->getArg(0));
5342 Value *Val = EmitScalarExpr(E->getArg(1));
5343 Value *Size = EmitScalarExpr(E->getArg(2));
5344 Val = Builder.CreateTrunc(Val, Int8Ty);
5345 Size = Builder.CreateIntCast(Size, Int64Ty, false);
5346 return Builder.CreateCall(
5347 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5348 }
5349
5350 // Memory Tagging Extensions (MTE) Intrinsics
5351 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5352 switch (BuiltinID) {
5353 case clang::AArch64::BI__builtin_arm_irg:
5354 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5355 case clang::AArch64::BI__builtin_arm_addg:
5356 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5357 case clang::AArch64::BI__builtin_arm_gmi:
5358 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5359 case clang::AArch64::BI__builtin_arm_ldg:
5360 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5361 case clang::AArch64::BI__builtin_arm_stg:
5362 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5363 case clang::AArch64::BI__builtin_arm_subp:
5364 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5365 }
5366
5367 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5368 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5369 Value *Pointer = EmitScalarExpr(E->getArg(0));
5370 Value *Mask = EmitScalarExpr(E->getArg(1));
5371
5372 Mask = Builder.CreateZExt(Mask, Int64Ty);
5373 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5374 {Pointer, Mask});
5375 }
5376 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5377 Value *Pointer = EmitScalarExpr(E->getArg(0));
5378 Value *TagOffset = EmitScalarExpr(E->getArg(1));
5379
5380 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
5381 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5382 {Pointer, TagOffset});
5383 }
5384 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5385 Value *Pointer = EmitScalarExpr(E->getArg(0));
5386 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
5387
5388 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
5389 return Builder.CreateCall(
5390 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5391 }
5392 // Although it is possible to supply a different return
5393 // address (first arg) to this intrinsic, for now we set
5394 // return address same as input address.
5395 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5396 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5397 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5398 {TagAddress, TagAddress});
5399 }
5400 // Although it is possible to supply a different tag (to set)
5401 // to this intrinsic (as first arg), for now we supply
5402 // the tag that is in input address arg (common use case).
5403 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5404 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5405 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5406 {TagAddress, TagAddress});
5407 }
5408 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5409 Value *PointerA = EmitScalarExpr(E->getArg(0));
5410 Value *PointerB = EmitScalarExpr(E->getArg(1));
5411 return Builder.CreateCall(
5412 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5413 }
5414 }
5415
5416 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5417 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5418 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5419 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5420 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5421 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5422 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5423 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5424
5425 SpecialRegisterAccessKind AccessKind = Write;
5426 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5427 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5428 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5429 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5430 AccessKind = VolatileRead;
5431
5432 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5433 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5434
5435 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5436 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5437
5438 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5439 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5440
5441 llvm::Type *ValueType;
5442 llvm::Type *RegisterType = Int64Ty;
5443 if (Is32Bit) {
5444 ValueType = Int32Ty;
5445 } else if (Is128Bit) {
5446 llvm::Type *Int128Ty =
5447 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5448 ValueType = Int128Ty;
5449 RegisterType = Int128Ty;
5450 } else if (IsPointerBuiltin) {
5451 ValueType = VoidPtrTy;
5452 } else {
5453 ValueType = Int64Ty;
5454 };
5455
5456 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5457 AccessKind);
5458 }
5459
5460 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5461 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5462 BuiltinID == clang::AArch64::BI__sys) {
5463 LLVMContext &Context = CGM.getLLVMContext();
5464
5465 unsigned SysReg =
5466 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5467
5468 std::string SysRegStr;
5469 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5470 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5471 ? ((1 << 1) | ((SysReg >> 14) & 1))
5472 : 1;
5473 llvm::raw_string_ostream(SysRegStr)
5474 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5475 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5476 << (SysReg & 7);
5477
5478 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5479 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5480 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5481
5482 llvm::Type *RegisterType = Int64Ty;
5483 llvm::Type *Types[] = { RegisterType };
5484
5485 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5486 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5487
5488 return Builder.CreateCall(F, Metadata);
5489 }
5490
5491 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5492 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5493 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5494 if (BuiltinID == clang::AArch64::BI__sys) {
5495 // Return 0 for convenience, even though MSVC returns some other undefined
5496 // value.
5497 Result = ConstantInt::get(Builder.getInt32Ty(), 0);
5498 }
5499 return Result;
5500 }
5501
5502 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5503 llvm::Function *F =
5504 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5505 return Builder.CreateCall(F);
5506 }
5507
5508 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5509 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5510 return Builder.CreateCall(F);
5511 }
5512
5513 if (BuiltinID == clang::AArch64::BI__mulh ||
5514 BuiltinID == clang::AArch64::BI__umulh) {
5515 llvm::Type *ResType = ConvertType(E->getType());
5516 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5517
5518 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5519 Value *LHS =
5520 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5521 Value *RHS =
5522 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5523
5524 Value *MulResult, *HigherBits;
5525 if (IsSigned) {
5526 MulResult = Builder.CreateNSWMul(LHS, RHS);
5527 HigherBits = Builder.CreateAShr(MulResult, 64);
5528 } else {
5529 MulResult = Builder.CreateNUWMul(LHS, RHS);
5530 HigherBits = Builder.CreateLShr(MulResult, 64);
5531 }
5532 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5533
5534 return HigherBits;
5535 }
5536
5537 if (BuiltinID == AArch64::BI__writex18byte ||
5538 BuiltinID == AArch64::BI__writex18word ||
5539 BuiltinID == AArch64::BI__writex18dword ||
5540 BuiltinID == AArch64::BI__writex18qword) {
5541 // Process the args first
5542 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5543 Value *DataArg = EmitScalarExpr(E->getArg(1));
5544
5545 // Read x18 as i8*
5546 llvm::Value *X18 = readX18AsPtr(*this);
5547
5548 // Store val at x18 + offset
5549 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5550 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5551 StoreInst *Store =
5552 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5553 return Store;
5554 }
5555
5556 if (BuiltinID == AArch64::BI__readx18byte ||
5557 BuiltinID == AArch64::BI__readx18word ||
5558 BuiltinID == AArch64::BI__readx18dword ||
5559 BuiltinID == AArch64::BI__readx18qword) {
5560 // Process the args first
5561 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5562
5563 // Read x18 as i8*
5564 llvm::Value *X18 = readX18AsPtr(*this);
5565
5566 // Load x18 + offset
5567 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5568 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5569 llvm::Type *IntTy = ConvertType(E->getType());
5570 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5571 return Load;
5572 }
5573
5574 if (BuiltinID == AArch64::BI__addx18byte ||
5575 BuiltinID == AArch64::BI__addx18word ||
5576 BuiltinID == AArch64::BI__addx18dword ||
5577 BuiltinID == AArch64::BI__addx18qword ||
5578 BuiltinID == AArch64::BI__incx18byte ||
5579 BuiltinID == AArch64::BI__incx18word ||
5580 BuiltinID == AArch64::BI__incx18dword ||
5581 BuiltinID == AArch64::BI__incx18qword) {
5582 llvm::Type *IntTy;
5583 bool isIncrement;
5584 switch (BuiltinID) {
5585 case AArch64::BI__incx18byte:
5586 IntTy = Int8Ty;
5587 isIncrement = true;
5588 break;
5589 case AArch64::BI__incx18word:
5590 IntTy = Int16Ty;
5591 isIncrement = true;
5592 break;
5593 case AArch64::BI__incx18dword:
5594 IntTy = Int32Ty;
5595 isIncrement = true;
5596 break;
5597 case AArch64::BI__incx18qword:
5598 IntTy = Int64Ty;
5599 isIncrement = true;
5600 break;
5601 default:
5602 IntTy = ConvertType(E->getArg(1)->getType());
5603 isIncrement = false;
5604 break;
5605 }
5606 // Process the args first
5607 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5608 Value *ValToAdd =
5609 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5610
5611 // Read x18 as i8*
5612 llvm::Value *X18 = readX18AsPtr(*this);
5613
5614 // Load x18 + offset
5615 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5616 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5617 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5618
5619 // Add values
5620 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5621
5622 // Store val at x18 + offset
5623 StoreInst *Store =
5624 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5625 return Store;
5626 }
5627
5628 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5629 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5630 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5631 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5632 Value *Arg = EmitScalarExpr(E->getArg(0));
5633 llvm::Type *RetTy = ConvertType(E->getType());
5634 return Builder.CreateBitCast(Arg, RetTy);
5635 }
5636
5637 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5638 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5639 BuiltinID == AArch64::BI_CountLeadingZeros ||
5640 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5641 Value *Arg = EmitScalarExpr(E->getArg(0));
5642 llvm::Type *ArgType = Arg->getType();
5643
5644 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5645 BuiltinID == AArch64::BI_CountLeadingOnes64)
5646 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5647
5648 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5649 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5650
5651 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5652 BuiltinID == AArch64::BI_CountLeadingZeros64)
5653 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5654 return Result;
5655 }
5656
5657 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5658 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5659 Value *Arg = EmitScalarExpr(E->getArg(0));
5660
5661 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5662 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5663 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5664
5665 Value *Result = Builder.CreateCall(F, Arg, "cls");
5666 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5667 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5668 return Result;
5669 }
5670
5671 if (BuiltinID == AArch64::BI_CountOneBits ||
5672 BuiltinID == AArch64::BI_CountOneBits64) {
5673 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5674 llvm::Type *ArgType = ArgValue->getType();
5675 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5676
5677 Value *Result = Builder.CreateCall(F, ArgValue);
5678 if (BuiltinID == AArch64::BI_CountOneBits64)
5679 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5680 return Result;
5681 }
5682
5683 if (BuiltinID == AArch64::BI__prefetch) {
5684 Value *Address = EmitScalarExpr(E->getArg(0));
5685 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5686 Value *Locality = ConstantInt::get(Int32Ty, 3);
5687 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5688 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5689 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5690 }
5691
5692 if (BuiltinID == AArch64::BI__hlt) {
5693 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5694 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5695
5696 // Return 0 for convenience, even though MSVC returns some other undefined
5697 // value.
5698 return ConstantInt::get(Builder.getInt32Ty(), 0);
5699 }
5700
5701 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5702 return Builder.CreateFPTrunc(
5703 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5704 Builder.getFloatTy()),
5705 Builder.getBFloatTy());
5706
5707 // Handle MSVC intrinsics before argument evaluation to prevent double
5708 // evaluation.
5709 if (std::optional<MSVCIntrin> MsvcIntId =
5711 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5712
5713 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5714 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5715 return P.first == BuiltinID;
5716 });
5717 if (It != end(NEONEquivalentIntrinsicMap))
5718 BuiltinID = It->second;
5719
5720 // Find out if any arguments are required to be integer constant
5721 // expressions.
5722 unsigned ICEArguments = 0;
5724 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5725 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5726
5728 Address PtrOp0 = Address::invalid();
5729 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5730 if (i == 0) {
5731 switch (BuiltinID) {
5732 case NEON::BI__builtin_neon_vld1_v:
5733 case NEON::BI__builtin_neon_vld1q_v:
5734 case NEON::BI__builtin_neon_vld1_dup_v:
5735 case NEON::BI__builtin_neon_vld1q_dup_v:
5736 case NEON::BI__builtin_neon_vld1_lane_v:
5737 case NEON::BI__builtin_neon_vld1q_lane_v:
5738 case NEON::BI__builtin_neon_vst1_v:
5739 case NEON::BI__builtin_neon_vst1q_v:
5740 case NEON::BI__builtin_neon_vst1_lane_v:
5741 case NEON::BI__builtin_neon_vst1q_lane_v:
5742 case NEON::BI__builtin_neon_vldap1_lane_s64:
5743 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5744 case NEON::BI__builtin_neon_vstl1_lane_s64:
5745 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5746 // Get the alignment for the argument in addition to the value;
5747 // we'll use it later.
5748 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5749 Ops.push_back(PtrOp0.emitRawPointer(*this));
5750 continue;
5751 }
5752 }
5753 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5754 }
5755
5756 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5757 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5758 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5759
5760 if (Builtin) {
5761 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5763 assert(Result && "SISD intrinsic should have been handled");
5764 return Result;
5765 }
5766
5767 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5769 if (std::optional<llvm::APSInt> Result =
5771 // Determine the type of this overloaded NEON intrinsic.
5772 Type = NeonTypeFlags(Result->getZExtValue());
5773
5774 bool usgn = Type.isUnsigned();
5775 bool quad = Type.isQuad();
5776
5777 // Handle non-overloaded intrinsics first.
5778 switch (BuiltinID) {
5779 default: break;
5780 case NEON::BI__builtin_neon_vabsh_f16:
5781 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5782 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5783 case NEON::BI__builtin_neon_vaddq_p128: {
5784 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5785 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5786 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5787 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5788 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5789 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5790 return Builder.CreateBitCast(Ops[0], Int128Ty);
5791 }
5792 case NEON::BI__builtin_neon_vldrq_p128: {
5793 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5794 Value *Ptr = EmitScalarExpr(E->getArg(0));
5795 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5797 }
5798 case NEON::BI__builtin_neon_vstrq_p128: {
5799 Value *Ptr = Ops[0];
5800 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5801 }
5802 case NEON::BI__builtin_neon_vcvts_f32_u32:
5803 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5804 usgn = true;
5805 [[fallthrough]];
5806 case NEON::BI__builtin_neon_vcvts_f32_s32:
5807 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5808 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5809 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5810 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5811 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5812 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5813 if (usgn)
5814 return Builder.CreateUIToFP(Ops[0], FTy);
5815 return Builder.CreateSIToFP(Ops[0], FTy);
5816 }
5817 case NEON::BI__builtin_neon_vcvth_f16_u16:
5818 case NEON::BI__builtin_neon_vcvth_f16_u32:
5819 case NEON::BI__builtin_neon_vcvth_f16_u64:
5820 usgn = true;
5821 [[fallthrough]];
5822 case NEON::BI__builtin_neon_vcvth_f16_s16:
5823 case NEON::BI__builtin_neon_vcvth_f16_s32:
5824 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5825 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5826 llvm::Type *FTy = HalfTy;
5827 llvm::Type *InTy;
5828 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5829 InTy = Int64Ty;
5830 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5831 InTy = Int32Ty;
5832 else
5833 InTy = Int16Ty;
5834 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5835 if (usgn)
5836 return Builder.CreateUIToFP(Ops[0], FTy);
5837 return Builder.CreateSIToFP(Ops[0], FTy);
5838 }
5839 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5840 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5841 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5842 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5843 case NEON::BI__builtin_neon_vcvth_u16_f16:
5844 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5845 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5846 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5847 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5848 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5849 unsigned Int;
5850 llvm::Type* InTy = Int32Ty;
5851 llvm::Type* FTy = HalfTy;
5852 llvm::Type *Tys[2] = {InTy, FTy};
5853 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5854 switch (BuiltinID) {
5855 default: llvm_unreachable("missing builtin ID in switch!");
5856 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5857 Int = Intrinsic::aarch64_neon_fcvtau; break;
5858 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5859 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5860 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5861 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5862 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5863 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5864 case NEON::BI__builtin_neon_vcvth_u16_f16:
5865 Int = Intrinsic::aarch64_neon_fcvtzu; break;
5866 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5867 Int = Intrinsic::aarch64_neon_fcvtas; break;
5868 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5869 Int = Intrinsic::aarch64_neon_fcvtms; break;
5870 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5871 Int = Intrinsic::aarch64_neon_fcvtns; break;
5872 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5873 Int = Intrinsic::aarch64_neon_fcvtps; break;
5874 case NEON::BI__builtin_neon_vcvth_s16_f16:
5875 Int = Intrinsic::aarch64_neon_fcvtzs; break;
5876 }
5877 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5878 return Builder.CreateTrunc(Ops[0], Int16Ty);
5879 }
5880 case NEON::BI__builtin_neon_vcaleh_f16:
5881 case NEON::BI__builtin_neon_vcalth_f16:
5882 case NEON::BI__builtin_neon_vcageh_f16:
5883 case NEON::BI__builtin_neon_vcagth_f16: {
5884 unsigned Int;
5885 llvm::Type* InTy = Int32Ty;
5886 llvm::Type* FTy = HalfTy;
5887 llvm::Type *Tys[2] = {InTy, FTy};
5888 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5889 switch (BuiltinID) {
5890 default: llvm_unreachable("missing builtin ID in switch!");
5891 case NEON::BI__builtin_neon_vcageh_f16:
5892 Int = Intrinsic::aarch64_neon_facge; break;
5893 case NEON::BI__builtin_neon_vcagth_f16:
5894 Int = Intrinsic::aarch64_neon_facgt; break;
5895 case NEON::BI__builtin_neon_vcaleh_f16:
5896 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5897 case NEON::BI__builtin_neon_vcalth_f16:
5898 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5899 }
5900 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
5901 return Builder.CreateTrunc(Ops[0], Int16Ty);
5902 }
5903 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5904 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5905 unsigned Int;
5906 llvm::Type* InTy = Int32Ty;
5907 llvm::Type* FTy = HalfTy;
5908 llvm::Type *Tys[2] = {InTy, FTy};
5909 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5910 switch (BuiltinID) {
5911 default: llvm_unreachable("missing builtin ID in switch!");
5912 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5913 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5914 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5915 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5916 }
5917 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5918 return Builder.CreateTrunc(Ops[0], Int16Ty);
5919 }
5920 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5921 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5922 unsigned Int;
5923 llvm::Type* FTy = HalfTy;
5924 llvm::Type* InTy = Int32Ty;
5925 llvm::Type *Tys[2] = {FTy, InTy};
5926 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5927 switch (BuiltinID) {
5928 default: llvm_unreachable("missing builtin ID in switch!");
5929 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5930 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5931 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
5932 break;
5933 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5934 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5935 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
5936 break;
5937 }
5938 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5939 }
5940 case NEON::BI__builtin_neon_vpaddd_s64: {
5941 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
5942 Value *Vec = EmitScalarExpr(E->getArg(0));
5943 // The vector is v2f64, so make sure it's bitcast to that.
5944 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5945 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5946 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5947 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5948 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5949 // Pairwise addition of a v2f64 into a scalar f64.
5950 return Builder.CreateAdd(Op0, Op1, "vpaddd");
5951 }
5952 case NEON::BI__builtin_neon_vpaddd_f64: {
5953 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
5954 Value *Vec = EmitScalarExpr(E->getArg(0));
5955 // The vector is v2f64, so make sure it's bitcast to that.
5956 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5957 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5958 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5959 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5960 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5961 // Pairwise addition of a v2f64 into a scalar f64.
5962 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5963 }
5964 case NEON::BI__builtin_neon_vpadds_f32: {
5965 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
5966 Value *Vec = EmitScalarExpr(E->getArg(0));
5967 // The vector is v2f32, so make sure it's bitcast to that.
5968 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5969 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5970 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5971 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5972 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5973 // Pairwise addition of a v2f32 into a scalar f32.
5974 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5975 }
5976 case NEON::BI__builtin_neon_vceqzd_s64:
5977 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5979 Ops[0], ConvertType(E->getCallReturnType(getContext())),
5980 ICmpInst::ICMP_EQ, "vceqz");
5981 case NEON::BI__builtin_neon_vceqzd_f64:
5982 case NEON::BI__builtin_neon_vceqzs_f32:
5983 case NEON::BI__builtin_neon_vceqzh_f16:
5984 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5986 Ops[0], ConvertType(E->getCallReturnType(getContext())),
5987 ICmpInst::FCMP_OEQ, "vceqz");
5988 case NEON::BI__builtin_neon_vcgezd_s64:
5989 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5991 Ops[0], ConvertType(E->getCallReturnType(getContext())),
5992 ICmpInst::ICMP_SGE, "vcgez");
5993 case NEON::BI__builtin_neon_vcgezd_f64:
5994 case NEON::BI__builtin_neon_vcgezs_f32:
5995 case NEON::BI__builtin_neon_vcgezh_f16:
5996 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5998 Ops[0], ConvertType(E->getCallReturnType(getContext())),
5999 ICmpInst::FCMP_OGE, "vcgez");
6000 case NEON::BI__builtin_neon_vclezd_s64:
6001 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6003 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6004 ICmpInst::ICMP_SLE, "vclez");
6005 case NEON::BI__builtin_neon_vclezd_f64:
6006 case NEON::BI__builtin_neon_vclezs_f32:
6007 case NEON::BI__builtin_neon_vclezh_f16:
6008 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6010 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6011 ICmpInst::FCMP_OLE, "vclez");
6012 case NEON::BI__builtin_neon_vcgtzd_s64:
6013 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6015 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6016 ICmpInst::ICMP_SGT, "vcgtz");
6017 case NEON::BI__builtin_neon_vcgtzd_f64:
6018 case NEON::BI__builtin_neon_vcgtzs_f32:
6019 case NEON::BI__builtin_neon_vcgtzh_f16:
6020 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6022 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6023 ICmpInst::FCMP_OGT, "vcgtz");
6024 case NEON::BI__builtin_neon_vcltzd_s64:
6025 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6027 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6028 ICmpInst::ICMP_SLT, "vcltz");
6029
6030 case NEON::BI__builtin_neon_vcltzd_f64:
6031 case NEON::BI__builtin_neon_vcltzs_f32:
6032 case NEON::BI__builtin_neon_vcltzh_f16:
6033 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6035 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6036 ICmpInst::FCMP_OLT, "vcltz");
6037
6038 case NEON::BI__builtin_neon_vceqzd_u64: {
6039 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6040 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6041 Ops[0] =
6042 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
6043 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
6044 }
6045 case NEON::BI__builtin_neon_vceqd_f64:
6046 case NEON::BI__builtin_neon_vcled_f64:
6047 case NEON::BI__builtin_neon_vcltd_f64:
6048 case NEON::BI__builtin_neon_vcged_f64:
6049 case NEON::BI__builtin_neon_vcgtd_f64: {
6050 llvm::CmpInst::Predicate P;
6051 switch (BuiltinID) {
6052 default: llvm_unreachable("missing builtin ID in switch!");
6053 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6054 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6055 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6056 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6057 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6058 }
6059 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6060 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6061 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6062 if (P == llvm::FCmpInst::FCMP_OEQ)
6063 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6064 else
6065 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6066 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
6067 }
6068 case NEON::BI__builtin_neon_vceqs_f32:
6069 case NEON::BI__builtin_neon_vcles_f32:
6070 case NEON::BI__builtin_neon_vclts_f32:
6071 case NEON::BI__builtin_neon_vcges_f32:
6072 case NEON::BI__builtin_neon_vcgts_f32: {
6073 llvm::CmpInst::Predicate P;
6074 switch (BuiltinID) {
6075 default: llvm_unreachable("missing builtin ID in switch!");
6076 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6077 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6078 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6079 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6080 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6081 }
6082 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6083 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
6084 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
6085 if (P == llvm::FCmpInst::FCMP_OEQ)
6086 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6087 else
6088 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6089 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
6090 }
6091 case NEON::BI__builtin_neon_vceqh_f16:
6092 case NEON::BI__builtin_neon_vcleh_f16:
6093 case NEON::BI__builtin_neon_vclth_f16:
6094 case NEON::BI__builtin_neon_vcgeh_f16:
6095 case NEON::BI__builtin_neon_vcgth_f16: {
6096 llvm::CmpInst::Predicate P;
6097 switch (BuiltinID) {
6098 default: llvm_unreachable("missing builtin ID in switch!");
6099 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6100 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6101 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6102 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6103 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6104 }
6105 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6106 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6107 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
6108 if (P == llvm::FCmpInst::FCMP_OEQ)
6109 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6110 else
6111 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6112 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
6113 }
6114 case NEON::BI__builtin_neon_vceqd_s64:
6115 case NEON::BI__builtin_neon_vceqd_u64:
6116 case NEON::BI__builtin_neon_vcgtd_s64:
6117 case NEON::BI__builtin_neon_vcgtd_u64:
6118 case NEON::BI__builtin_neon_vcltd_s64:
6119 case NEON::BI__builtin_neon_vcltd_u64:
6120 case NEON::BI__builtin_neon_vcged_u64:
6121 case NEON::BI__builtin_neon_vcged_s64:
6122 case NEON::BI__builtin_neon_vcled_u64:
6123 case NEON::BI__builtin_neon_vcled_s64: {
6124 llvm::CmpInst::Predicate P;
6125 switch (BuiltinID) {
6126 default: llvm_unreachable("missing builtin ID in switch!");
6127 case NEON::BI__builtin_neon_vceqd_s64:
6128 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6129 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6130 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6131 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6132 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6133 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6134 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6135 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6136 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6137 }
6138 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6139 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6140 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6141 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
6142 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
6143 }
6144 case NEON::BI__builtin_neon_vtstd_s64:
6145 case NEON::BI__builtin_neon_vtstd_u64: {
6146 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6147 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6148 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6149 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6150 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6151 llvm::Constant::getNullValue(Int64Ty));
6152 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6153 }
6154 case NEON::BI__builtin_neon_vset_lane_i8:
6155 case NEON::BI__builtin_neon_vset_lane_i16:
6156 case NEON::BI__builtin_neon_vset_lane_i32:
6157 case NEON::BI__builtin_neon_vset_lane_i64:
6158 case NEON::BI__builtin_neon_vset_lane_bf16:
6159 case NEON::BI__builtin_neon_vset_lane_f32:
6160 case NEON::BI__builtin_neon_vsetq_lane_i8:
6161 case NEON::BI__builtin_neon_vsetq_lane_i16:
6162 case NEON::BI__builtin_neon_vsetq_lane_i32:
6163 case NEON::BI__builtin_neon_vsetq_lane_i64:
6164 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6165 case NEON::BI__builtin_neon_vsetq_lane_f32:
6166 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6167 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6168 case NEON::BI__builtin_neon_vset_lane_f64:
6169 // The vector type needs a cast for the v1f64 variant.
6170 Ops[1] =
6171 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
6172 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6173 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6174 case NEON::BI__builtin_neon_vset_lane_mf8:
6175 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6176 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6177 // The input vector type needs a cast to scalar type.
6178 Ops[0] =
6179 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
6180 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6181 case NEON::BI__builtin_neon_vsetq_lane_f64:
6182 // The vector type needs a cast for the v2f64 variant.
6183 Ops[1] =
6184 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
6185 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6186 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6187
6188 case NEON::BI__builtin_neon_vget_lane_i8:
6189 case NEON::BI__builtin_neon_vdupb_lane_i8:
6190 Ops[0] =
6191 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
6192 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6193 "vget_lane");
6194 case NEON::BI__builtin_neon_vgetq_lane_i8:
6195 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6196 Ops[0] =
6197 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
6198 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6199 "vgetq_lane");
6200 case NEON::BI__builtin_neon_vget_lane_mf8:
6201 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6202 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6203 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6204 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6205 "vget_lane");
6206 case NEON::BI__builtin_neon_vget_lane_i16:
6207 case NEON::BI__builtin_neon_vduph_lane_i16:
6208 Ops[0] =
6209 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
6210 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6211 "vget_lane");
6212 case NEON::BI__builtin_neon_vgetq_lane_i16:
6213 case NEON::BI__builtin_neon_vduph_laneq_i16:
6214 Ops[0] =
6215 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
6216 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6217 "vgetq_lane");
6218 case NEON::BI__builtin_neon_vget_lane_i32:
6219 case NEON::BI__builtin_neon_vdups_lane_i32:
6220 Ops[0] =
6221 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
6222 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6223 "vget_lane");
6224 case NEON::BI__builtin_neon_vdups_lane_f32:
6225 Ops[0] =
6226 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6227 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6228 "vdups_lane");
6229 case NEON::BI__builtin_neon_vgetq_lane_i32:
6230 case NEON::BI__builtin_neon_vdups_laneq_i32:
6231 Ops[0] =
6232 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
6233 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6234 "vgetq_lane");
6235 case NEON::BI__builtin_neon_vget_lane_i64:
6236 case NEON::BI__builtin_neon_vdupd_lane_i64:
6237 Ops[0] =
6238 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
6239 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6240 "vget_lane");
6241 case NEON::BI__builtin_neon_vdupd_lane_f64:
6242 Ops[0] =
6243 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6244 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6245 "vdupd_lane");
6246 case NEON::BI__builtin_neon_vgetq_lane_i64:
6247 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6248 Ops[0] =
6249 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
6250 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6251 "vgetq_lane");
6252 case NEON::BI__builtin_neon_vget_lane_f32:
6253 Ops[0] =
6254 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6255 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6256 "vget_lane");
6257 case NEON::BI__builtin_neon_vget_lane_f64:
6258 Ops[0] =
6259 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6260 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6261 "vget_lane");
6262 case NEON::BI__builtin_neon_vgetq_lane_f32:
6263 case NEON::BI__builtin_neon_vdups_laneq_f32:
6264 Ops[0] =
6265 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
6266 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6267 "vgetq_lane");
6268 case NEON::BI__builtin_neon_vgetq_lane_f64:
6269 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6270 Ops[0] =
6271 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
6272 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6273 "vgetq_lane");
6274 case NEON::BI__builtin_neon_vaddh_f16:
6275 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6276 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
6277 case NEON::BI__builtin_neon_vsubh_f16:
6278 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6279 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
6280 case NEON::BI__builtin_neon_vmulh_f16:
6281 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6282 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
6283 case NEON::BI__builtin_neon_vdivh_f16:
6284 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6285 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
6286 case NEON::BI__builtin_neon_vfmah_f16:
6287 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6289 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6290 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
6291 case NEON::BI__builtin_neon_vfmsh_f16: {
6292 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
6293
6294 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6296 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6297 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
6298 }
6299 case NEON::BI__builtin_neon_vaddd_s64:
6300 case NEON::BI__builtin_neon_vaddd_u64:
6301 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
6302 case NEON::BI__builtin_neon_vsubd_s64:
6303 case NEON::BI__builtin_neon_vsubd_u64:
6304 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
6305 case NEON::BI__builtin_neon_vqdmlalh_s16:
6306 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6307 SmallVector<Value *, 2> ProductOps;
6308 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6309 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6310 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6311 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6312 ProductOps, "vqdmlXl");
6313 Constant *CI = ConstantInt::get(SizeTy, 0);
6314 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6315
6316 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6317 ? Intrinsic::aarch64_neon_sqadd
6318 : Intrinsic::aarch64_neon_sqsub;
6319 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6320 }
6321 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6322 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6323 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6324 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6325 Ops, "vqshlu_n");
6326 }
6327 case NEON::BI__builtin_neon_vqshld_n_u64:
6328 case NEON::BI__builtin_neon_vqshld_n_s64: {
6329 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6330 ? Intrinsic::aarch64_neon_uqshl
6331 : Intrinsic::aarch64_neon_sqshl;
6332 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6333 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6334 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6335 }
6336 case NEON::BI__builtin_neon_vrshrd_n_u64:
6337 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6338 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6339 ? Intrinsic::aarch64_neon_urshl
6340 : Intrinsic::aarch64_neon_srshl;
6341 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6342 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6343 Ops[1] = ConstantInt::get(Int64Ty, -SV);
6344 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6345 }
6346 case NEON::BI__builtin_neon_vrsrad_n_u64:
6347 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6348 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6349 ? Intrinsic::aarch64_neon_urshl
6350 : Intrinsic::aarch64_neon_srshl;
6351 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6352 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6353 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6354 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6355 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6356 }
6357 case NEON::BI__builtin_neon_vshld_n_s64:
6358 case NEON::BI__builtin_neon_vshld_n_u64: {
6359 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6360 return Builder.CreateShl(
6361 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6362 }
6363 case NEON::BI__builtin_neon_vshrd_n_s64: {
6364 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6365 return Builder.CreateAShr(
6366 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6367 Amt->getZExtValue())),
6368 "shrd_n");
6369 }
6370 case NEON::BI__builtin_neon_vshrd_n_u64: {
6371 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6372 uint64_t ShiftAmt = Amt->getZExtValue();
6373 // Right-shifting an unsigned value by its size yields 0.
6374 if (ShiftAmt == 64)
6375 return ConstantInt::get(Int64Ty, 0);
6376 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6377 "shrd_n");
6378 }
6379 case NEON::BI__builtin_neon_vsrad_n_s64: {
6380 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6381 Ops[1] = Builder.CreateAShr(
6382 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6383 Amt->getZExtValue())),
6384 "shrd_n");
6385 return Builder.CreateAdd(Ops[0], Ops[1]);
6386 }
6387 case NEON::BI__builtin_neon_vsrad_n_u64: {
6388 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6389 uint64_t ShiftAmt = Amt->getZExtValue();
6390 // Right-shifting an unsigned value by its size yields 0.
6391 // As Op + 0 = Op, return Ops[0] directly.
6392 if (ShiftAmt == 64)
6393 return Ops[0];
6394 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6395 "shrd_n");
6396 return Builder.CreateAdd(Ops[0], Ops[1]);
6397 }
6398 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6399 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6400 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6401 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6402 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6403 "lane");
6404 SmallVector<Value *, 2> ProductOps;
6405 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6406 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6407 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6408 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6409 ProductOps, "vqdmlXl");
6410 Constant *CI = ConstantInt::get(SizeTy, 0);
6411 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6412 Ops.pop_back();
6413
6414 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6415 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6416 ? Intrinsic::aarch64_neon_sqadd
6417 : Intrinsic::aarch64_neon_sqsub;
6418 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6419 }
6420 case NEON::BI__builtin_neon_vqdmlals_s32:
6421 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6422 SmallVector<Value *, 2> ProductOps;
6423 ProductOps.push_back(Ops[1]);
6424 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6425 Ops[1] =
6426 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6427 ProductOps, "vqdmlXl");
6428
6429 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6430 ? Intrinsic::aarch64_neon_sqadd
6431 : Intrinsic::aarch64_neon_sqsub;
6432 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6433 }
6434 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6435 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6436 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6437 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6438 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6439 "lane");
6440 SmallVector<Value *, 2> ProductOps;
6441 ProductOps.push_back(Ops[1]);
6442 ProductOps.push_back(Ops[2]);
6443 Ops[1] =
6444 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6445 ProductOps, "vqdmlXl");
6446 Ops.pop_back();
6447
6448 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6449 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6450 ? Intrinsic::aarch64_neon_sqadd
6451 : Intrinsic::aarch64_neon_sqsub;
6452 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6453 }
6454 case NEON::BI__builtin_neon_vget_lane_bf16:
6455 case NEON::BI__builtin_neon_vduph_lane_bf16:
6456 case NEON::BI__builtin_neon_vduph_lane_f16: {
6457 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6458 "vget_lane");
6459 }
6460 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6461 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6462 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6463 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6464 "vgetq_lane");
6465 }
6466 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6467 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6468 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6469 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6470 }
6471 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6472 SmallVector<int, 16> ConcatMask(8);
6473 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6474 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6475 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6476 llvm::Value *Trunc =
6477 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6478 return Builder.CreateShuffleVector(
6479 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6480 }
6481 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6482 SmallVector<int, 16> ConcatMask(8);
6483 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6484 SmallVector<int, 16> LoMask(4);
6485 std::iota(LoMask.begin(), LoMask.end(), 0);
6486 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6487 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6488 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6489 llvm::Value *Inactive = Builder.CreateShuffleVector(
6490 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6491 llvm::Value *Trunc =
6492 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6493 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6494 }
6495
6496 case clang::AArch64::BI_InterlockedAdd:
6497 case clang::AArch64::BI_InterlockedAdd_acq:
6498 case clang::AArch64::BI_InterlockedAdd_rel:
6499 case clang::AArch64::BI_InterlockedAdd_nf:
6500 case clang::AArch64::BI_InterlockedAdd64:
6501 case clang::AArch64::BI_InterlockedAdd64_acq:
6502 case clang::AArch64::BI_InterlockedAdd64_rel:
6503 case clang::AArch64::BI_InterlockedAdd64_nf: {
6504 Address DestAddr = CheckAtomicAlignment(*this, E);
6505 Value *Val = EmitScalarExpr(E->getArg(1));
6506 llvm::AtomicOrdering Ordering;
6507 switch (BuiltinID) {
6508 case clang::AArch64::BI_InterlockedAdd:
6509 case clang::AArch64::BI_InterlockedAdd64:
6510 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6511 break;
6512 case clang::AArch64::BI_InterlockedAdd_acq:
6513 case clang::AArch64::BI_InterlockedAdd64_acq:
6514 Ordering = llvm::AtomicOrdering::Acquire;
6515 break;
6516 case clang::AArch64::BI_InterlockedAdd_rel:
6517 case clang::AArch64::BI_InterlockedAdd64_rel:
6518 Ordering = llvm::AtomicOrdering::Release;
6519 break;
6520 case clang::AArch64::BI_InterlockedAdd_nf:
6521 case clang::AArch64::BI_InterlockedAdd64_nf:
6522 Ordering = llvm::AtomicOrdering::Monotonic;
6523 break;
6524 default:
6525 llvm_unreachable("missing builtin ID in switch!");
6526 }
6527 AtomicRMWInst *RMWI =
6528 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6529 return Builder.CreateAdd(RMWI, Val);
6530 }
6531 }
6532
6533 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6534 llvm::Type *Ty = VTy;
6535 if (!Ty)
6536 return nullptr;
6537
6538 // Not all intrinsics handled by the common case work for AArch64 yet, so only
6539 // defer to common code if it's been added to our special map.
6542
6543 if (Builtin)
6545 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6546 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6547 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
6548
6549 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
6550 return V;
6551
6552 unsigned Int;
6553 bool ExtractLow = false;
6554 bool ExtendLaneArg = false;
6555 switch (BuiltinID) {
6556 default: return nullptr;
6557 case NEON::BI__builtin_neon_vbsl_v:
6558 case NEON::BI__builtin_neon_vbslq_v: {
6559 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6560 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6561 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6562 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6563
6564 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6565 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6566 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6567 return Builder.CreateBitCast(Ops[0], Ty);
6568 }
6569 case NEON::BI__builtin_neon_vfma_lane_v:
6570 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6571 // The ARM builtins (and instructions) have the addend as the first
6572 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6573 Value *Addend = Ops[0];
6574 Value *Multiplicand = Ops[1];
6575 Value *LaneSource = Ops[2];
6576 Ops[0] = Multiplicand;
6577 Ops[1] = LaneSource;
6578 Ops[2] = Addend;
6579
6580 // Now adjust things to handle the lane access.
6581 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6582 ? llvm::FixedVectorType::get(VTy->getElementType(),
6583 VTy->getNumElements() / 2)
6584 : VTy;
6585 llvm::Constant *cst = cast<Constant>(Ops[3]);
6586 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6587 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6588 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6589
6590 Ops.pop_back();
6591 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6592 : Intrinsic::fma;
6593 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6594 }
6595 case NEON::BI__builtin_neon_vfma_laneq_v: {
6596 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6597 // v1f64 fma should be mapped to Neon scalar f64 fma
6598 if (VTy && VTy->getElementType() == DoubleTy) {
6599 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6600 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6601 llvm::FixedVectorType *VTy =
6603 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6604 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6605 Value *Result;
6607 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6608 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6609 return Builder.CreateBitCast(Result, Ty);
6610 }
6611 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6612 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6613
6614 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6615 VTy->getNumElements() * 2);
6616 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6617 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6618 cast<ConstantInt>(Ops[3]));
6619 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6620
6622 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6623 {Ops[2], Ops[1], Ops[0]});
6624 }
6625 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6626 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6627 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6628
6629 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6630 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6632 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6633 {Ops[2], Ops[1], Ops[0]});
6634 }
6635 case NEON::BI__builtin_neon_vfmah_lane_f16:
6636 case NEON::BI__builtin_neon_vfmas_lane_f32:
6637 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6638 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6639 case NEON::BI__builtin_neon_vfmad_lane_f64:
6640 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6641 Ops.push_back(EmitScalarExpr(E->getArg(3)));
6642 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6643 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6645 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6646 {Ops[1], Ops[2], Ops[0]});
6647 }
6648 case NEON::BI__builtin_neon_vmull_v:
6649 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6650 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6651 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6652 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6653 case NEON::BI__builtin_neon_vmax_v:
6654 case NEON::BI__builtin_neon_vmaxq_v:
6655 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6656 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6657 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6658 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6659 case NEON::BI__builtin_neon_vmaxh_f16: {
6660 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6661 Int = Intrinsic::aarch64_neon_fmax;
6662 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6663 }
6664 case NEON::BI__builtin_neon_vmin_v:
6665 case NEON::BI__builtin_neon_vminq_v:
6666 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6667 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6668 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6669 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6670 case NEON::BI__builtin_neon_vminh_f16: {
6671 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6672 Int = Intrinsic::aarch64_neon_fmin;
6673 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6674 }
6675 case NEON::BI__builtin_neon_vabd_v:
6676 case NEON::BI__builtin_neon_vabdq_v:
6677 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6678 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6679 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6680 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6681 case NEON::BI__builtin_neon_vpadal_v:
6682 case NEON::BI__builtin_neon_vpadalq_v: {
6683 unsigned ArgElts = VTy->getNumElements();
6684 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6685 unsigned BitWidth = EltTy->getBitWidth();
6686 auto *ArgTy = llvm::FixedVectorType::get(
6687 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6688 llvm::Type* Tys[2] = { VTy, ArgTy };
6689 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6691 TmpOps.push_back(Ops[1]);
6692 Function *F = CGM.getIntrinsic(Int, Tys);
6693 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6694 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6695 return Builder.CreateAdd(tmp, addend);
6696 }
6697 case NEON::BI__builtin_neon_vpmin_v:
6698 case NEON::BI__builtin_neon_vpminq_v:
6699 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6700 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6701 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6702 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6703 case NEON::BI__builtin_neon_vpmax_v:
6704 case NEON::BI__builtin_neon_vpmaxq_v:
6705 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6706 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6707 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6708 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6709 case NEON::BI__builtin_neon_vminnm_v:
6710 case NEON::BI__builtin_neon_vminnmq_v:
6711 Int = Intrinsic::aarch64_neon_fminnm;
6712 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6713 case NEON::BI__builtin_neon_vminnmh_f16:
6714 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6715 Int = Intrinsic::aarch64_neon_fminnm;
6716 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6717 case NEON::BI__builtin_neon_vmaxnm_v:
6718 case NEON::BI__builtin_neon_vmaxnmq_v:
6719 Int = Intrinsic::aarch64_neon_fmaxnm;
6720 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6721 case NEON::BI__builtin_neon_vmaxnmh_f16:
6722 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6723 Int = Intrinsic::aarch64_neon_fmaxnm;
6724 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6725 case NEON::BI__builtin_neon_vrecpss_f32: {
6726 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6727 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6728 Ops, "vrecps");
6729 }
6730 case NEON::BI__builtin_neon_vrecpsd_f64:
6731 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6732 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6733 Ops, "vrecps");
6734 case NEON::BI__builtin_neon_vrecpsh_f16:
6735 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6736 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6737 Ops, "vrecps");
6738 case NEON::BI__builtin_neon_vqshrun_n_v:
6739 Int = Intrinsic::aarch64_neon_sqshrun;
6740 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6741 case NEON::BI__builtin_neon_vqrshrun_n_v:
6742 Int = Intrinsic::aarch64_neon_sqrshrun;
6743 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6744 case NEON::BI__builtin_neon_vqshrn_n_v:
6745 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6746 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6747 case NEON::BI__builtin_neon_vrshrn_n_v:
6748 Int = Intrinsic::aarch64_neon_rshrn;
6749 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6750 case NEON::BI__builtin_neon_vqrshrn_n_v:
6751 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6752 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6753 case NEON::BI__builtin_neon_vrndah_f16: {
6754 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6755 Int = Builder.getIsFPConstrained()
6756 ? Intrinsic::experimental_constrained_round
6757 : Intrinsic::round;
6758 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6759 }
6760 case NEON::BI__builtin_neon_vrnda_v:
6761 case NEON::BI__builtin_neon_vrndaq_v: {
6762 Int = Builder.getIsFPConstrained()
6763 ? Intrinsic::experimental_constrained_round
6764 : Intrinsic::round;
6765 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6766 }
6767 case NEON::BI__builtin_neon_vrndih_f16: {
6768 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6769 Int = Builder.getIsFPConstrained()
6770 ? Intrinsic::experimental_constrained_nearbyint
6771 : Intrinsic::nearbyint;
6772 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6773 }
6774 case NEON::BI__builtin_neon_vrndmh_f16: {
6775 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6776 Int = Builder.getIsFPConstrained()
6777 ? Intrinsic::experimental_constrained_floor
6778 : Intrinsic::floor;
6779 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6780 }
6781 case NEON::BI__builtin_neon_vrndm_v:
6782 case NEON::BI__builtin_neon_vrndmq_v: {
6783 Int = Builder.getIsFPConstrained()
6784 ? Intrinsic::experimental_constrained_floor
6785 : Intrinsic::floor;
6786 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6787 }
6788 case NEON::BI__builtin_neon_vrndnh_f16: {
6789 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6790 Int = Builder.getIsFPConstrained()
6791 ? Intrinsic::experimental_constrained_roundeven
6792 : Intrinsic::roundeven;
6793 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6794 }
6795 case NEON::BI__builtin_neon_vrndn_v:
6796 case NEON::BI__builtin_neon_vrndnq_v: {
6797 Int = Builder.getIsFPConstrained()
6798 ? Intrinsic::experimental_constrained_roundeven
6799 : Intrinsic::roundeven;
6800 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6801 }
6802 case NEON::BI__builtin_neon_vrndns_f32: {
6803 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6804 Int = Builder.getIsFPConstrained()
6805 ? Intrinsic::experimental_constrained_roundeven
6806 : Intrinsic::roundeven;
6807 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6808 }
6809 case NEON::BI__builtin_neon_vrndph_f16: {
6810 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6811 Int = Builder.getIsFPConstrained()
6812 ? Intrinsic::experimental_constrained_ceil
6813 : Intrinsic::ceil;
6814 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6815 }
6816 case NEON::BI__builtin_neon_vrndp_v:
6817 case NEON::BI__builtin_neon_vrndpq_v: {
6818 Int = Builder.getIsFPConstrained()
6819 ? Intrinsic::experimental_constrained_ceil
6820 : Intrinsic::ceil;
6821 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6822 }
6823 case NEON::BI__builtin_neon_vrndxh_f16: {
6824 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6825 Int = Builder.getIsFPConstrained()
6826 ? Intrinsic::experimental_constrained_rint
6827 : Intrinsic::rint;
6828 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6829 }
6830 case NEON::BI__builtin_neon_vrndx_v:
6831 case NEON::BI__builtin_neon_vrndxq_v: {
6832 Int = Builder.getIsFPConstrained()
6833 ? Intrinsic::experimental_constrained_rint
6834 : Intrinsic::rint;
6835 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6836 }
6837 case NEON::BI__builtin_neon_vrndh_f16: {
6838 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6839 Int = Builder.getIsFPConstrained()
6840 ? Intrinsic::experimental_constrained_trunc
6841 : Intrinsic::trunc;
6842 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6843 }
6844 case NEON::BI__builtin_neon_vrnd32x_f32:
6845 case NEON::BI__builtin_neon_vrnd32xq_f32:
6846 case NEON::BI__builtin_neon_vrnd32x_f64:
6847 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6848 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6849 Int = Intrinsic::aarch64_neon_frint32x;
6850 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6851 }
6852 case NEON::BI__builtin_neon_vrnd32z_f32:
6853 case NEON::BI__builtin_neon_vrnd32zq_f32:
6854 case NEON::BI__builtin_neon_vrnd32z_f64:
6855 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6856 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6857 Int = Intrinsic::aarch64_neon_frint32z;
6858 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6859 }
6860 case NEON::BI__builtin_neon_vrnd64x_f32:
6861 case NEON::BI__builtin_neon_vrnd64xq_f32:
6862 case NEON::BI__builtin_neon_vrnd64x_f64:
6863 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6864 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6865 Int = Intrinsic::aarch64_neon_frint64x;
6866 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6867 }
6868 case NEON::BI__builtin_neon_vrnd64z_f32:
6869 case NEON::BI__builtin_neon_vrnd64zq_f32:
6870 case NEON::BI__builtin_neon_vrnd64z_f64:
6871 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6872 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6873 Int = Intrinsic::aarch64_neon_frint64z;
6874 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6875 }
6876 case NEON::BI__builtin_neon_vrnd_v:
6877 case NEON::BI__builtin_neon_vrndq_v: {
6878 Int = Builder.getIsFPConstrained()
6879 ? Intrinsic::experimental_constrained_trunc
6880 : Intrinsic::trunc;
6881 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6882 }
6883 case NEON::BI__builtin_neon_vcvt_f64_v:
6884 case NEON::BI__builtin_neon_vcvtq_f64_v:
6885 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6886 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6887 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6888 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6889 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6890 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6891 "unexpected vcvt_f64_f32 builtin");
6892 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6893 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6894
6895 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6896 }
6897 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6898 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6899 "unexpected vcvt_f32_f64 builtin");
6900 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6901 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6902
6903 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6904 }
6905 case NEON::BI__builtin_neon_vcvt_s32_v:
6906 case NEON::BI__builtin_neon_vcvt_u32_v:
6907 case NEON::BI__builtin_neon_vcvt_s64_v:
6908 case NEON::BI__builtin_neon_vcvt_u64_v:
6909 case NEON::BI__builtin_neon_vcvt_s16_f16:
6910 case NEON::BI__builtin_neon_vcvt_u16_f16:
6911 case NEON::BI__builtin_neon_vcvtq_s32_v:
6912 case NEON::BI__builtin_neon_vcvtq_u32_v:
6913 case NEON::BI__builtin_neon_vcvtq_s64_v:
6914 case NEON::BI__builtin_neon_vcvtq_u64_v:
6915 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6916 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6917 Int =
6918 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6919 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
6920 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
6921 }
6922 case NEON::BI__builtin_neon_vcvta_s16_f16:
6923 case NEON::BI__builtin_neon_vcvta_u16_f16:
6924 case NEON::BI__builtin_neon_vcvta_s32_v:
6925 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6926 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6927 case NEON::BI__builtin_neon_vcvta_u32_v:
6928 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6929 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6930 case NEON::BI__builtin_neon_vcvta_s64_v:
6931 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6932 case NEON::BI__builtin_neon_vcvta_u64_v:
6933 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6934 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6935 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6936 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6937 }
6938 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6939 case NEON::BI__builtin_neon_vcvtm_s32_v:
6940 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6941 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6942 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6943 case NEON::BI__builtin_neon_vcvtm_u32_v:
6944 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6945 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6946 case NEON::BI__builtin_neon_vcvtm_s64_v:
6947 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6948 case NEON::BI__builtin_neon_vcvtm_u64_v:
6949 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6950 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6951 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6952 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6953 }
6954 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6955 case NEON::BI__builtin_neon_vcvtn_s32_v:
6956 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6957 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6958 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6959 case NEON::BI__builtin_neon_vcvtn_u32_v:
6960 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6961 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6962 case NEON::BI__builtin_neon_vcvtn_s64_v:
6963 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6964 case NEON::BI__builtin_neon_vcvtn_u64_v:
6965 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6966 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6967 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6968 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6969 }
6970 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6971 case NEON::BI__builtin_neon_vcvtp_s32_v:
6972 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6973 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6974 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6975 case NEON::BI__builtin_neon_vcvtp_u32_v:
6976 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6977 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6978 case NEON::BI__builtin_neon_vcvtp_s64_v:
6979 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6980 case NEON::BI__builtin_neon_vcvtp_u64_v:
6981 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6982 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6983 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6984 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6985 }
6986 case NEON::BI__builtin_neon_vmulx_v:
6987 case NEON::BI__builtin_neon_vmulxq_v: {
6988 Int = Intrinsic::aarch64_neon_fmulx;
6989 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6990 }
6991 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6992 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6993 // vmulx_lane should be mapped to Neon scalar mulx after
6994 // extracting the scalar element
6995 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6996 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6997 Ops.pop_back();
6998 Int = Intrinsic::aarch64_neon_fmulx;
6999 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
7000 }
7001 case NEON::BI__builtin_neon_vmul_lane_v:
7002 case NEON::BI__builtin_neon_vmul_laneq_v: {
7003 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7004 bool Quad = false;
7005 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7006 Quad = true;
7007 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7008 llvm::FixedVectorType *VTy =
7010 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7011 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7012 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
7013 return Builder.CreateBitCast(Result, Ty);
7014 }
7015 case NEON::BI__builtin_neon_vnegd_s64:
7016 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
7017 case NEON::BI__builtin_neon_vnegh_f16:
7018 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
7019 case NEON::BI__builtin_neon_vpmaxnm_v:
7020 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7021 Int = Intrinsic::aarch64_neon_fmaxnmp;
7022 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
7023 }
7024 case NEON::BI__builtin_neon_vpminnm_v:
7025 case NEON::BI__builtin_neon_vpminnmq_v: {
7026 Int = Intrinsic::aarch64_neon_fminnmp;
7027 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
7028 }
7029 case NEON::BI__builtin_neon_vsqrth_f16: {
7030 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7031 Int = Builder.getIsFPConstrained()
7032 ? Intrinsic::experimental_constrained_sqrt
7033 : Intrinsic::sqrt;
7034 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
7035 }
7036 case NEON::BI__builtin_neon_vsqrt_v:
7037 case NEON::BI__builtin_neon_vsqrtq_v: {
7038 Int = Builder.getIsFPConstrained()
7039 ? Intrinsic::experimental_constrained_sqrt
7040 : Intrinsic::sqrt;
7041 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7042 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
7043 }
7044 case NEON::BI__builtin_neon_vrbit_v:
7045 case NEON::BI__builtin_neon_vrbitq_v: {
7046 Int = Intrinsic::bitreverse;
7047 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
7048 }
7049 case NEON::BI__builtin_neon_vaddv_u8:
7050 // FIXME: These are handled by the AArch64 scalar code.
7051 usgn = true;
7052 [[fallthrough]];
7053 case NEON::BI__builtin_neon_vaddv_s8: {
7054 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7055 Ty = Int32Ty;
7056 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7057 llvm::Type *Tys[2] = { Ty, VTy };
7058 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7059 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7060 return Builder.CreateTrunc(Ops[0], Int8Ty);
7061 }
7062 case NEON::BI__builtin_neon_vaddv_u16:
7063 usgn = true;
7064 [[fallthrough]];
7065 case NEON::BI__builtin_neon_vaddv_s16: {
7066 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7067 Ty = Int32Ty;
7068 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7069 llvm::Type *Tys[2] = { Ty, VTy };
7070 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7071 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7072 return Builder.CreateTrunc(Ops[0], Int16Ty);
7073 }
7074 case NEON::BI__builtin_neon_vaddvq_u8:
7075 usgn = true;
7076 [[fallthrough]];
7077 case NEON::BI__builtin_neon_vaddvq_s8: {
7078 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7079 Ty = Int32Ty;
7080 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7081 llvm::Type *Tys[2] = { Ty, VTy };
7082 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7083 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7084 return Builder.CreateTrunc(Ops[0], Int8Ty);
7085 }
7086 case NEON::BI__builtin_neon_vaddvq_u16:
7087 usgn = true;
7088 [[fallthrough]];
7089 case NEON::BI__builtin_neon_vaddvq_s16: {
7090 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7091 Ty = Int32Ty;
7092 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7093 llvm::Type *Tys[2] = { Ty, VTy };
7094 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7095 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7096 return Builder.CreateTrunc(Ops[0], Int16Ty);
7097 }
7098 case NEON::BI__builtin_neon_vmaxv_u8: {
7099 Int = Intrinsic::aarch64_neon_umaxv;
7100 Ty = Int32Ty;
7101 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7102 llvm::Type *Tys[2] = { Ty, VTy };
7103 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7104 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7105 return Builder.CreateTrunc(Ops[0], Int8Ty);
7106 }
7107 case NEON::BI__builtin_neon_vmaxv_u16: {
7108 Int = Intrinsic::aarch64_neon_umaxv;
7109 Ty = Int32Ty;
7110 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7111 llvm::Type *Tys[2] = { Ty, VTy };
7112 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7113 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7114 return Builder.CreateTrunc(Ops[0], Int16Ty);
7115 }
7116 case NEON::BI__builtin_neon_vmaxvq_u8: {
7117 Int = Intrinsic::aarch64_neon_umaxv;
7118 Ty = Int32Ty;
7119 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7120 llvm::Type *Tys[2] = { Ty, VTy };
7121 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7122 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7123 return Builder.CreateTrunc(Ops[0], Int8Ty);
7124 }
7125 case NEON::BI__builtin_neon_vmaxvq_u16: {
7126 Int = Intrinsic::aarch64_neon_umaxv;
7127 Ty = Int32Ty;
7128 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7129 llvm::Type *Tys[2] = { Ty, VTy };
7130 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7131 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7132 return Builder.CreateTrunc(Ops[0], Int16Ty);
7133 }
7134 case NEON::BI__builtin_neon_vmaxv_s8: {
7135 Int = Intrinsic::aarch64_neon_smaxv;
7136 Ty = Int32Ty;
7137 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7138 llvm::Type *Tys[2] = { Ty, VTy };
7139 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7140 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7141 return Builder.CreateTrunc(Ops[0], Int8Ty);
7142 }
7143 case NEON::BI__builtin_neon_vmaxv_s16: {
7144 Int = Intrinsic::aarch64_neon_smaxv;
7145 Ty = Int32Ty;
7146 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7147 llvm::Type *Tys[2] = { Ty, VTy };
7148 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7149 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7150 return Builder.CreateTrunc(Ops[0], Int16Ty);
7151 }
7152 case NEON::BI__builtin_neon_vmaxvq_s8: {
7153 Int = Intrinsic::aarch64_neon_smaxv;
7154 Ty = Int32Ty;
7155 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7156 llvm::Type *Tys[2] = { Ty, VTy };
7157 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7158 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7159 return Builder.CreateTrunc(Ops[0], Int8Ty);
7160 }
7161 case NEON::BI__builtin_neon_vmaxvq_s16: {
7162 Int = Intrinsic::aarch64_neon_smaxv;
7163 Ty = Int32Ty;
7164 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7165 llvm::Type *Tys[2] = { Ty, VTy };
7166 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7167 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7168 return Builder.CreateTrunc(Ops[0], Int16Ty);
7169 }
7170 case NEON::BI__builtin_neon_vmaxv_f16: {
7171 Int = Intrinsic::aarch64_neon_fmaxv;
7172 Ty = HalfTy;
7173 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7174 llvm::Type *Tys[2] = { Ty, VTy };
7175 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7176 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7177 return Builder.CreateTrunc(Ops[0], HalfTy);
7178 }
7179 case NEON::BI__builtin_neon_vmaxvq_f16: {
7180 Int = Intrinsic::aarch64_neon_fmaxv;
7181 Ty = HalfTy;
7182 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7183 llvm::Type *Tys[2] = { Ty, VTy };
7184 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7185 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7186 return Builder.CreateTrunc(Ops[0], HalfTy);
7187 }
7188 case NEON::BI__builtin_neon_vminv_u8: {
7189 Int = Intrinsic::aarch64_neon_uminv;
7190 Ty = Int32Ty;
7191 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7192 llvm::Type *Tys[2] = { Ty, VTy };
7193 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7194 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7195 return Builder.CreateTrunc(Ops[0], Int8Ty);
7196 }
7197 case NEON::BI__builtin_neon_vminv_u16: {
7198 Int = Intrinsic::aarch64_neon_uminv;
7199 Ty = Int32Ty;
7200 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7201 llvm::Type *Tys[2] = { Ty, VTy };
7202 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7203 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7204 return Builder.CreateTrunc(Ops[0], Int16Ty);
7205 }
7206 case NEON::BI__builtin_neon_vminvq_u8: {
7207 Int = Intrinsic::aarch64_neon_uminv;
7208 Ty = Int32Ty;
7209 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7210 llvm::Type *Tys[2] = { Ty, VTy };
7211 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7212 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7213 return Builder.CreateTrunc(Ops[0], Int8Ty);
7214 }
7215 case NEON::BI__builtin_neon_vminvq_u16: {
7216 Int = Intrinsic::aarch64_neon_uminv;
7217 Ty = Int32Ty;
7218 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7219 llvm::Type *Tys[2] = { Ty, VTy };
7220 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7221 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7222 return Builder.CreateTrunc(Ops[0], Int16Ty);
7223 }
7224 case NEON::BI__builtin_neon_vminv_s8: {
7225 Int = Intrinsic::aarch64_neon_sminv;
7226 Ty = Int32Ty;
7227 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7228 llvm::Type *Tys[2] = { Ty, VTy };
7229 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7230 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7231 return Builder.CreateTrunc(Ops[0], Int8Ty);
7232 }
7233 case NEON::BI__builtin_neon_vminv_s16: {
7234 Int = Intrinsic::aarch64_neon_sminv;
7235 Ty = Int32Ty;
7236 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7237 llvm::Type *Tys[2] = { Ty, VTy };
7238 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7239 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7240 return Builder.CreateTrunc(Ops[0], Int16Ty);
7241 }
7242 case NEON::BI__builtin_neon_vminvq_s8: {
7243 Int = Intrinsic::aarch64_neon_sminv;
7244 Ty = Int32Ty;
7245 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7246 llvm::Type *Tys[2] = { Ty, VTy };
7247 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7248 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7249 return Builder.CreateTrunc(Ops[0], Int8Ty);
7250 }
7251 case NEON::BI__builtin_neon_vminvq_s16: {
7252 Int = Intrinsic::aarch64_neon_sminv;
7253 Ty = Int32Ty;
7254 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7255 llvm::Type *Tys[2] = { Ty, VTy };
7256 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7257 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7258 return Builder.CreateTrunc(Ops[0], Int16Ty);
7259 }
7260 case NEON::BI__builtin_neon_vminv_f16: {
7261 Int = Intrinsic::aarch64_neon_fminv;
7262 Ty = HalfTy;
7263 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7264 llvm::Type *Tys[2] = { Ty, VTy };
7265 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7266 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7267 return Builder.CreateTrunc(Ops[0], HalfTy);
7268 }
7269 case NEON::BI__builtin_neon_vminvq_f16: {
7270 Int = Intrinsic::aarch64_neon_fminv;
7271 Ty = HalfTy;
7272 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7273 llvm::Type *Tys[2] = { Ty, VTy };
7274 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7275 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7276 return Builder.CreateTrunc(Ops[0], HalfTy);
7277 }
7278 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7279 Int = Intrinsic::aarch64_neon_fmaxnmv;
7280 Ty = HalfTy;
7281 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7282 llvm::Type *Tys[2] = { Ty, VTy };
7283 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7284 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7285 return Builder.CreateTrunc(Ops[0], HalfTy);
7286 }
7287 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7288 Int = Intrinsic::aarch64_neon_fmaxnmv;
7289 Ty = HalfTy;
7290 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7291 llvm::Type *Tys[2] = { Ty, VTy };
7292 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7293 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7294 return Builder.CreateTrunc(Ops[0], HalfTy);
7295 }
7296 case NEON::BI__builtin_neon_vminnmv_f16: {
7297 Int = Intrinsic::aarch64_neon_fminnmv;
7298 Ty = HalfTy;
7299 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7300 llvm::Type *Tys[2] = { Ty, VTy };
7301 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7302 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7303 return Builder.CreateTrunc(Ops[0], HalfTy);
7304 }
7305 case NEON::BI__builtin_neon_vminnmvq_f16: {
7306 Int = Intrinsic::aarch64_neon_fminnmv;
7307 Ty = HalfTy;
7308 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7309 llvm::Type *Tys[2] = { Ty, VTy };
7310 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7311 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7312 return Builder.CreateTrunc(Ops[0], HalfTy);
7313 }
7314 case NEON::BI__builtin_neon_vmul_n_f64: {
7315 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7316 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
7317 return Builder.CreateFMul(Ops[0], RHS);
7318 }
7319 case NEON::BI__builtin_neon_vaddlv_u8: {
7320 Int = Intrinsic::aarch64_neon_uaddlv;
7321 Ty = Int32Ty;
7322 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7323 llvm::Type *Tys[2] = { Ty, VTy };
7324 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7325 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7326 return Builder.CreateTrunc(Ops[0], Int16Ty);
7327 }
7328 case NEON::BI__builtin_neon_vaddlv_u16: {
7329 Int = Intrinsic::aarch64_neon_uaddlv;
7330 Ty = Int32Ty;
7331 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7332 llvm::Type *Tys[2] = { Ty, VTy };
7333 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7334 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7335 }
7336 case NEON::BI__builtin_neon_vaddlvq_u8: {
7337 Int = Intrinsic::aarch64_neon_uaddlv;
7338 Ty = Int32Ty;
7339 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7340 llvm::Type *Tys[2] = { Ty, VTy };
7341 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7342 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7343 return Builder.CreateTrunc(Ops[0], Int16Ty);
7344 }
7345 case NEON::BI__builtin_neon_vaddlvq_u16: {
7346 Int = Intrinsic::aarch64_neon_uaddlv;
7347 Ty = Int32Ty;
7348 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7349 llvm::Type *Tys[2] = { Ty, VTy };
7350 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7351 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7352 }
7353 case NEON::BI__builtin_neon_vaddlv_s8: {
7354 Int = Intrinsic::aarch64_neon_saddlv;
7355 Ty = Int32Ty;
7356 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7357 llvm::Type *Tys[2] = { Ty, VTy };
7358 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7359 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7360 return Builder.CreateTrunc(Ops[0], Int16Ty);
7361 }
7362 case NEON::BI__builtin_neon_vaddlv_s16: {
7363 Int = Intrinsic::aarch64_neon_saddlv;
7364 Ty = Int32Ty;
7365 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7366 llvm::Type *Tys[2] = { Ty, VTy };
7367 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7368 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7369 }
7370 case NEON::BI__builtin_neon_vaddlvq_s8: {
7371 Int = Intrinsic::aarch64_neon_saddlv;
7372 Ty = Int32Ty;
7373 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7374 llvm::Type *Tys[2] = { Ty, VTy };
7375 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7376 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7377 return Builder.CreateTrunc(Ops[0], Int16Ty);
7378 }
7379 case NEON::BI__builtin_neon_vaddlvq_s16: {
7380 Int = Intrinsic::aarch64_neon_saddlv;
7381 Ty = Int32Ty;
7382 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7383 llvm::Type *Tys[2] = { Ty, VTy };
7384 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7385 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7386 }
7387 case NEON::BI__builtin_neon_vsri_n_v:
7388 case NEON::BI__builtin_neon_vsriq_n_v: {
7389 Int = Intrinsic::aarch64_neon_vsri;
7390 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7391 return EmitNeonCall(Intrin, Ops, "vsri_n");
7392 }
7393 case NEON::BI__builtin_neon_vsli_n_v:
7394 case NEON::BI__builtin_neon_vsliq_n_v: {
7395 Int = Intrinsic::aarch64_neon_vsli;
7396 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7397 return EmitNeonCall(Intrin, Ops, "vsli_n");
7398 }
7399 case NEON::BI__builtin_neon_vsra_n_v:
7400 case NEON::BI__builtin_neon_vsraq_n_v:
7401 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7402 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
7403 return Builder.CreateAdd(Ops[0], Ops[1]);
7404 case NEON::BI__builtin_neon_vrsra_n_v:
7405 case NEON::BI__builtin_neon_vrsraq_n_v: {
7406 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7408 TmpOps.push_back(Ops[1]);
7409 TmpOps.push_back(Ops[2]);
7410 Function* F = CGM.getIntrinsic(Int, Ty);
7411 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
7412 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7413 return Builder.CreateAdd(Ops[0], tmp);
7414 }
7415 case NEON::BI__builtin_neon_vld1_v:
7416 case NEON::BI__builtin_neon_vld1q_v: {
7417 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
7418 }
7419 case NEON::BI__builtin_neon_vst1_v:
7420 case NEON::BI__builtin_neon_vst1q_v:
7421 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7422 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7423 case NEON::BI__builtin_neon_vld1_lane_v:
7424 case NEON::BI__builtin_neon_vld1q_lane_v: {
7425 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7426 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7427 PtrOp0.getAlignment());
7428 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7429 }
7430 case NEON::BI__builtin_neon_vldap1_lane_s64:
7431 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7432 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7433 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7434 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
7435 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7436 Ops[0] = LI;
7437 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
7438 }
7439 case NEON::BI__builtin_neon_vld1_dup_v:
7440 case NEON::BI__builtin_neon_vld1q_dup_v: {
7441 Value *V = PoisonValue::get(Ty);
7442 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7443 PtrOp0.getAlignment());
7444 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7445 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7446 return EmitNeonSplat(Ops[0], CI);
7447 }
7448 case NEON::BI__builtin_neon_vst1_lane_v:
7449 case NEON::BI__builtin_neon_vst1q_lane_v:
7450 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7451 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7452 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7453 case NEON::BI__builtin_neon_vstl1_lane_s64:
7454 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7455 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7456 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7457 llvm::StoreInst *SI =
7458 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7459 SI->setAtomic(llvm::AtomicOrdering::Release);
7460 return SI;
7461 }
7462 case NEON::BI__builtin_neon_vld2_v:
7463 case NEON::BI__builtin_neon_vld2q_v: {
7464 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7465 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7466 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7467 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7468 }
7469 case NEON::BI__builtin_neon_vld3_v:
7470 case NEON::BI__builtin_neon_vld3q_v: {
7471 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7472 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7473 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7474 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7475 }
7476 case NEON::BI__builtin_neon_vld4_v:
7477 case NEON::BI__builtin_neon_vld4q_v: {
7478 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7479 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7480 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7481 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7482 }
7483 case NEON::BI__builtin_neon_vld2_dup_v:
7484 case NEON::BI__builtin_neon_vld2q_dup_v: {
7485 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7486 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7487 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7488 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7489 }
7490 case NEON::BI__builtin_neon_vld3_dup_v:
7491 case NEON::BI__builtin_neon_vld3q_dup_v: {
7492 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7493 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7494 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7495 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7496 }
7497 case NEON::BI__builtin_neon_vld4_dup_v:
7498 case NEON::BI__builtin_neon_vld4q_dup_v: {
7499 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7500 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7501 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7502 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7503 }
7504 case NEON::BI__builtin_neon_vld2_lane_v:
7505 case NEON::BI__builtin_neon_vld2q_lane_v: {
7506 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7507 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7508 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7509 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7510 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7511 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7512 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
7513 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7514 }
7515 case NEON::BI__builtin_neon_vld3_lane_v:
7516 case NEON::BI__builtin_neon_vld3q_lane_v: {
7517 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7518 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7519 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7520 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7521 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7522 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7523 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7524 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
7525 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7526 }
7527 case NEON::BI__builtin_neon_vld4_lane_v:
7528 case NEON::BI__builtin_neon_vld4q_lane_v: {
7529 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7530 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7531 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7532 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7533 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7534 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7535 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7536 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7537 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
7538 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7539 }
7540 case NEON::BI__builtin_neon_vst2_v:
7541 case NEON::BI__builtin_neon_vst2q_v: {
7542 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7543 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7544 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7545 Ops, "");
7546 }
7547 case NEON::BI__builtin_neon_vst2_lane_v:
7548 case NEON::BI__builtin_neon_vst2q_lane_v: {
7549 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7550 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7551 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7552 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7553 Ops, "");
7554 }
7555 case NEON::BI__builtin_neon_vst3_v:
7556 case NEON::BI__builtin_neon_vst3q_v: {
7557 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7558 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7559 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7560 Ops, "");
7561 }
7562 case NEON::BI__builtin_neon_vst3_lane_v:
7563 case NEON::BI__builtin_neon_vst3q_lane_v: {
7564 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7565 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7566 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7567 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7568 Ops, "");
7569 }
7570 case NEON::BI__builtin_neon_vst4_v:
7571 case NEON::BI__builtin_neon_vst4q_v: {
7572 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7573 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7574 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7575 Ops, "");
7576 }
7577 case NEON::BI__builtin_neon_vst4_lane_v:
7578 case NEON::BI__builtin_neon_vst4q_lane_v: {
7579 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7580 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7581 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7582 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7583 Ops, "");
7584 }
7585 case NEON::BI__builtin_neon_vtrn_v:
7586 case NEON::BI__builtin_neon_vtrnq_v: {
7587 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7588 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7589 Value *SV = nullptr;
7590
7591 for (unsigned vi = 0; vi != 2; ++vi) {
7592 SmallVector<int, 16> Indices;
7593 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7594 Indices.push_back(i+vi);
7595 Indices.push_back(i+e+vi);
7596 }
7597 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7598 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7600 }
7601 return SV;
7602 }
7603 case NEON::BI__builtin_neon_vuzp_v:
7604 case NEON::BI__builtin_neon_vuzpq_v: {
7605 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7606 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7607 Value *SV = nullptr;
7608
7609 for (unsigned vi = 0; vi != 2; ++vi) {
7610 SmallVector<int, 16> Indices;
7611 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7612 Indices.push_back(2*i+vi);
7613
7614 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7615 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7617 }
7618 return SV;
7619 }
7620 case NEON::BI__builtin_neon_vzip_v:
7621 case NEON::BI__builtin_neon_vzipq_v: {
7622 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7623 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7624 Value *SV = nullptr;
7625
7626 for (unsigned vi = 0; vi != 2; ++vi) {
7627 SmallVector<int, 16> Indices;
7628 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7629 Indices.push_back((i + vi*e) >> 1);
7630 Indices.push_back(((i + vi*e) >> 1)+e);
7631 }
7632 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7633 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7635 }
7636 return SV;
7637 }
7638 case NEON::BI__builtin_neon_vqtbl1q_v: {
7639 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7640 Ops, "vtbl1");
7641 }
7642 case NEON::BI__builtin_neon_vqtbl2q_v: {
7643 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7644 Ops, "vtbl2");
7645 }
7646 case NEON::BI__builtin_neon_vqtbl3q_v: {
7647 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7648 Ops, "vtbl3");
7649 }
7650 case NEON::BI__builtin_neon_vqtbl4q_v: {
7651 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7652 Ops, "vtbl4");
7653 }
7654 case NEON::BI__builtin_neon_vqtbx1q_v: {
7655 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7656 Ops, "vtbx1");
7657 }
7658 case NEON::BI__builtin_neon_vqtbx2q_v: {
7659 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7660 Ops, "vtbx2");
7661 }
7662 case NEON::BI__builtin_neon_vqtbx3q_v: {
7663 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7664 Ops, "vtbx3");
7665 }
7666 case NEON::BI__builtin_neon_vqtbx4q_v: {
7667 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7668 Ops, "vtbx4");
7669 }
7670 case NEON::BI__builtin_neon_vsqadd_v:
7671 case NEON::BI__builtin_neon_vsqaddq_v: {
7672 Int = Intrinsic::aarch64_neon_usqadd;
7673 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7674 }
7675 case NEON::BI__builtin_neon_vuqadd_v:
7676 case NEON::BI__builtin_neon_vuqaddq_v: {
7677 Int = Intrinsic::aarch64_neon_suqadd;
7678 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7679 }
7680
7681 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7682 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7683 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7684 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7685 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7686 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7687 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7688 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7689 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7690 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7691 llvm::Type *Tys[2];
7692 Tys[0] = Ty;
7693 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7694 /*isQuad*/ false));
7695 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7696 }
7697 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7698 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7699 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7700 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7701 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7702 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7703 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7704 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7705 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7706 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7707 llvm::Type *Tys[2];
7708 Tys[0] = Ty;
7709 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7710 /*isQuad*/ true));
7711 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7712 }
7713 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7714 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7715 case NEON::BI__builtin_neon_vluti2_lane_f16:
7716 case NEON::BI__builtin_neon_vluti2_lane_p16:
7717 case NEON::BI__builtin_neon_vluti2_lane_p8:
7718 case NEON::BI__builtin_neon_vluti2_lane_s16:
7719 case NEON::BI__builtin_neon_vluti2_lane_s8:
7720 case NEON::BI__builtin_neon_vluti2_lane_u16:
7721 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7722 Int = Intrinsic::aarch64_neon_vluti2_lane;
7723 llvm::Type *Tys[2];
7724 Tys[0] = Ty;
7725 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7726 /*isQuad*/ false));
7727 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7728 }
7729 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7730 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7731 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7732 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7733 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7734 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7735 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7736 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7737 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7738 Int = Intrinsic::aarch64_neon_vluti2_lane;
7739 llvm::Type *Tys[2];
7740 Tys[0] = Ty;
7741 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7742 /*isQuad*/ true));
7743 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7744 }
7745 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7746 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7747 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7748 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7749 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7750 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7751 }
7752 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7753 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7754 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7755 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7756 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7757 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7758 }
7759 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7760 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7761 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7762 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7763 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7764 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7765 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7766 }
7767 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7768 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7769 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7770 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7771 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7772 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7773 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7774 }
7775 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7776 ExtractLow = true;
7777 LLVM_FALLTHROUGH;
7778 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7779 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7780 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7781 llvm::FixedVectorType::get(BFloatTy, 8),
7782 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7783 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7784 ExtractLow = true;
7785 LLVM_FALLTHROUGH;
7786 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7787 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7788 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7789 llvm::FixedVectorType::get(BFloatTy, 8),
7790 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7791 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7792 ExtractLow = true;
7793 LLVM_FALLTHROUGH;
7794 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7795 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7796 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7797 llvm::FixedVectorType::get(HalfTy, 8),
7798 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7799 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7800 ExtractLow = true;
7801 LLVM_FALLTHROUGH;
7802 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7803 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7804 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7805 llvm::FixedVectorType::get(HalfTy, 8),
7806 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7807 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7808 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7809 llvm::FixedVectorType::get(Int8Ty, 8),
7810 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7811 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7812 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7813 llvm::FixedVectorType::get(Int8Ty, 8),
7814 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7815 E, "vfcvtn");
7816 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7817 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7818 llvm::FixedVectorType::get(Int8Ty, 16),
7819 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7820 E, "vfcvtn");
7821 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7822 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7823 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7824 uint64_t(0));
7825 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7826 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7827 }
7828
7829 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7830 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7831 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7832 Ops, E, "fdot2");
7833 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7834 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7835 ExtendLaneArg = true;
7836 LLVM_FALLTHROUGH;
7837 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7838 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7839 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7840 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7841 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7842 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7843 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7844 FloatTy, Ops, E, "fdot4");
7845 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7846 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7847 ExtendLaneArg = true;
7848 LLVM_FALLTHROUGH;
7849 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7850 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7851 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7852 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7853
7854 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7855 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7856 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7857 "vmlal");
7858 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7859 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7860 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7861 "vmlal");
7862 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7863 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7864 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7865 "vmlall");
7866 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7867 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7868 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7869 "vmlall");
7870 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7871 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7872 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7873 "vmlall");
7874 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7875 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7876 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7877 "vmlall");
7878 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7879 ExtendLaneArg = true;
7880 LLVM_FALLTHROUGH;
7881 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7882 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7883 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7884 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7885 ExtendLaneArg = true;
7886 LLVM_FALLTHROUGH;
7887 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7888 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7889 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7890 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7891 ExtendLaneArg = true;
7892 LLVM_FALLTHROUGH;
7893 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7894 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7895 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7896 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7897 ExtendLaneArg = true;
7898 LLVM_FALLTHROUGH;
7899 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7900 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7901 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7902 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7903 ExtendLaneArg = true;
7904 LLVM_FALLTHROUGH;
7905 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7906 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7907 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7908 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7909 ExtendLaneArg = true;
7910 LLVM_FALLTHROUGH;
7911 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7912 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7913 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7914 case NEON::BI__builtin_neon_vamin_f16:
7915 case NEON::BI__builtin_neon_vaminq_f16:
7916 case NEON::BI__builtin_neon_vamin_f32:
7917 case NEON::BI__builtin_neon_vaminq_f32:
7918 case NEON::BI__builtin_neon_vaminq_f64: {
7919 Int = Intrinsic::aarch64_neon_famin;
7920 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7921 }
7922 case NEON::BI__builtin_neon_vamax_f16:
7923 case NEON::BI__builtin_neon_vamaxq_f16:
7924 case NEON::BI__builtin_neon_vamax_f32:
7925 case NEON::BI__builtin_neon_vamaxq_f32:
7926 case NEON::BI__builtin_neon_vamaxq_f64: {
7927 Int = Intrinsic::aarch64_neon_famax;
7928 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7929 }
7930 case NEON::BI__builtin_neon_vscale_f16:
7931 case NEON::BI__builtin_neon_vscaleq_f16:
7932 case NEON::BI__builtin_neon_vscale_f32:
7933 case NEON::BI__builtin_neon_vscaleq_f32:
7934 case NEON::BI__builtin_neon_vscaleq_f64: {
7935 Int = Intrinsic::aarch64_neon_fp8_fscale;
7936 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7937 }
7938 }
7939}
7940
7942 const CallExpr *E) {
7943 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7944 BuiltinID == BPF::BI__builtin_btf_type_id ||
7945 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7946 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7947 "unexpected BPF builtin");
7948
7949 // A sequence number, injected into IR builtin functions, to
7950 // prevent CSE given the only difference of the function
7951 // may just be the debuginfo metadata.
7952 static uint32_t BuiltinSeqNum;
7953
7954 switch (BuiltinID) {
7955 default:
7956 llvm_unreachable("Unexpected BPF builtin");
7957 case BPF::BI__builtin_preserve_field_info: {
7958 const Expr *Arg = E->getArg(0);
7959 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7960
7961 if (!getDebugInfo()) {
7962 CGM.Error(E->getExprLoc(),
7963 "using __builtin_preserve_field_info() without -g");
7964 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7965 : EmitLValue(Arg).emitRawPointer(*this);
7966 }
7967
7968 // Enable underlying preserve_*_access_index() generation.
7969 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7970 IsInPreservedAIRegion = true;
7971 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7972 : EmitLValue(Arg).emitRawPointer(*this);
7973 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7974
7975 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7976 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7977
7978 // Built the IR for the preserve_field_info intrinsic.
7979 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7980 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7981 {FieldAddr->getType()});
7982 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7983 }
7984 case BPF::BI__builtin_btf_type_id:
7985 case BPF::BI__builtin_preserve_type_info: {
7986 if (!getDebugInfo()) {
7987 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7988 return nullptr;
7989 }
7990
7991 const Expr *Arg0 = E->getArg(0);
7992 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7993 Arg0->getType(), Arg0->getExprLoc());
7994
7995 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7996 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7997 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7998
7999 llvm::Function *FnDecl;
8000 if (BuiltinID == BPF::BI__builtin_btf_type_id)
8001 FnDecl = Intrinsic::getOrInsertDeclaration(
8002 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
8003 else
8004 FnDecl = Intrinsic::getOrInsertDeclaration(
8005 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
8006 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
8007 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8008 return Fn;
8009 }
8010 case BPF::BI__builtin_preserve_enum_value: {
8011 if (!getDebugInfo()) {
8012 CGM.Error(E->getExprLoc(), "using builtin function without -g");
8013 return nullptr;
8014 }
8015
8016 const Expr *Arg0 = E->getArg(0);
8017 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
8018 Arg0->getType(), Arg0->getExprLoc());
8019
8020 // Find enumerator
8021 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
8022 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
8023 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
8024 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
8025
8026 auto InitVal = Enumerator->getInitVal();
8027 std::string InitValStr;
8028 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
8029 InitValStr = std::to_string(InitVal.getSExtValue());
8030 else
8031 InitValStr = std::to_string(InitVal.getZExtValue());
8032 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
8033 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
8034
8035 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
8036 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
8037 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
8038
8039 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
8040 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
8041 CallInst *Fn =
8042 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
8043 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8044 return Fn;
8045 }
8046 }
8047}
8048
8051 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
8052 "Not a power-of-two sized vector!");
8053 bool AllConstants = true;
8054 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
8055 AllConstants &= isa<Constant>(Ops[i]);
8056
8057 // If this is a constant vector, create a ConstantVector.
8058 if (AllConstants) {
8060 for (llvm::Value *Op : Ops)
8061 CstOps.push_back(cast<Constant>(Op));
8062 return llvm::ConstantVector::get(CstOps);
8063 }
8064
8065 // Otherwise, insertelement the values to build the vector.
8066 Value *Result = llvm::PoisonValue::get(
8067 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
8068
8069 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
8070 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
8071
8072 return Result;
8073}
8074
8075Value *CodeGenFunction::EmitAArch64CpuInit() {
8076 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8077 llvm::FunctionCallee Func =
8078 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
8079 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
8080 cast<llvm::GlobalValue>(Func.getCallee())
8081 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8082 return Builder.CreateCall(Func);
8083}
8084
8085Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
8086 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
8087 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
8089 ArgStr.split(Features, "+");
8090 for (auto &Feature : Features) {
8091 Feature = Feature.trim();
8092 if (!llvm::AArch64::parseFMVExtension(Feature))
8093 return Builder.getFalse();
8094 if (Feature != "default")
8095 Features.push_back(Feature);
8096 }
8097 return EmitAArch64CpuSupports(Features);
8098}
8099
8100llvm::Value *
8101CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8102 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
8103 Value *Result = Builder.getTrue();
8104 if (FeaturesMask != 0) {
8105 // Get features from structure in runtime library
8106 // struct {
8107 // unsigned long long features;
8108 // } __aarch64_cpu_features;
8109 llvm::Type *STy = llvm::StructType::get(Int64Ty);
8110 llvm::Constant *AArch64CPUFeatures =
8111 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
8112 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
8113 llvm::Value *CpuFeatures = Builder.CreateGEP(
8114 STy, AArch64CPUFeatures,
8115 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
8116 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
8118 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
8119 Value *Bitset = Builder.CreateAnd(Features, Mask);
8120 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
8121 Result = Builder.CreateAnd(Result, Cmp);
8122 }
8123 return Result;
8124}
#define V(N, I)
Definition: ASTContext.h:3597
StringRef P
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:274
Expr * E
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: ARM.cpp:2551
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: ARM.cpp:3391
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: ARM.cpp:588
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: ARM.cpp:3361
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: ARM.cpp:3354
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition: ARM.cpp:4401
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: ARM.cpp:1603
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:3581
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition: ARM.cpp:4868
static bool AArch64SISDIntrinsicsProvenSorted
Definition: ARM.cpp:1615
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: ARM.cpp:1585
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: ARM.cpp:2619
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: ARM.cpp:584
static bool AArch64SVEIntrinsicsProvenSorted
Definition: ARM.cpp:1616
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: ARM.cpp:1620
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition: ARM.cpp:4407
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: ARM.cpp:3350
static bool AArch64SMEIntrinsicsProvenSorted
Definition: ARM.cpp:1617
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: ARM.cpp:3428
constexpr unsigned SVEBitsPerBlock
Definition: ARM.cpp:3868
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: ARM.cpp:1427
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: ARM.cpp:359
#define NEONMAP0(NameBase)
Definition: ARM.cpp:581
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition: ARM.cpp:4941
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: ARM.cpp:3455
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: ARM.cpp:31
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: ARM.cpp:190
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: ARM.cpp:593
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: ARM.cpp:3417
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition: ARM.cpp:3870
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: ARM.cpp:3443
SpecialRegisterAccessKind
Definition: ARM.cpp:2542
@ VolatileRead
Definition: ARM.cpp:2544
@ NormalRead
Definition: ARM.cpp:2543
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: ARM.cpp:3383
@ UnsignedAlts
Definition: ARM.cpp:551
@ Vectorize1ArgType
Definition: ARM.cpp:556
@ FpCmpzModifiers
Definition: ARM.cpp:560
@ Use64BitVectors
Definition: ARM.cpp:553
@ VectorizeArgTypes
Definition: ARM.cpp:548
@ VectorRetGetArgs01
Definition: ARM.cpp:558
@ InventFloatType
Definition: ARM.cpp:550
@ AddRetType
Definition: ARM.cpp:543
@ Add2ArgTypes
Definition: ARM.cpp:545
@ VectorizeRetType
Definition: ARM.cpp:547
@ VectorRet
Definition: ARM.cpp:557
@ Add1ArgType
Definition: ARM.cpp:544
@ Use128BitVectors
Definition: ARM.cpp:554
static bool NEONSIMDIntrinsicsProvenSorted
Definition: ARM.cpp:1612
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: ARM.cpp:912
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: ARM.cpp:1678
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: ARM.cpp:342
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: ARM.cpp:2469
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: ARM.cpp:1614
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: ARM.cpp:1187
OffloadArch Arch
Definition: OffloadArch.cpp:10
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition: SemaHLSL.cpp:55
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
Definition: ASTContext.h:2536
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2879
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:253
CharUnits getAlignment() const
Definition: Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:204
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:140
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:147
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:296
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:184
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:155
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:112
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:132
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition: ARM.cpp:3877
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition: ARM.cpp:474
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition: ARM.cpp:8050
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
Definition: AMDGPU.cpp:258
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4072
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition: ARM.cpp:4180
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition: ARM.cpp:448
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4039
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4325
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition: ARM.cpp:3736
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:3982
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition: ARM.cpp:4237
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: ARM.cpp:4892
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition: ARM.cpp:4462
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:3928
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition: ARM.cpp:1639
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition: ARM.cpp:3752
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:1740
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: ARM.cpp:4505
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition: ARM.cpp:495
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:2664
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition: ARM.cpp:3825
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: ARM.cpp:7941
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4347
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition: ARM.cpp:4450
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition: ARM.cpp:4135
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:3469
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition: ARM.cpp:511
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition: ARM.cpp:4414
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition: ARM.cpp:489
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:4952
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1843
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition: ARM.cpp:458
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition: ARM.cpp:3724
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:3570
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition: ARM.cpp:2442
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:293
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition: ARM.cpp:4153
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition: ARM.cpp:3912
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition: ARM.cpp:4160
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4337
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:1515
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4087
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition: CGExpr.cpp:186
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4292
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
Definition: CGExprAgg.cpp:2205
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition: ARM.cpp:3862
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition: ARM.cpp:4378
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition: CGExpr.cpp:1631
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition: ARM.cpp:3790
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition: ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition: ARM.cpp:4439
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:379
const T & getABIInfo() const
Definition: TargetInfo.h:57
This represents one expression.
Definition: Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3078
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3069
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:451
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:273
QualType getType() const
Definition: Expr.h:144
Represents a function declaration or definition.
Definition: Decl.h:1999
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:300
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: TypeBase.h:3346
QualType getPointeeType() const
Definition: TypeBase.h:3356
A (possibly-)qualified type.
Definition: TypeBase.h:937
The collection of all-type qualifiers we support.
Definition: TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool setsFPMR() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1288
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition: TargetInfo.h:706
bool isBigEndian() const
Definition: TargetInfo.h:1705
The base class of the type hierarchy.
Definition: TypeBase.h:1833
const T * castAs() const
Member-template castAs<specific type>.
Definition: TypeBase.h:9226
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:752
QualType getType() const
Definition: Decl.h:722
QualType getType() const
Definition: Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
const FunctionProtoType * T
@ Enumerator
Enumerator value with fixed underlying type.
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:645
#define trunc(__x)
Definition: tgmath.h:1216
#define round(__x)
Definition: tgmath.h:1148
#define rint(__x)
Definition: tgmath.h:1131
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601