clang 23.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
19#include "llvm/IR/InlineAsm.h"
20#include "llvm/IR/IntrinsicsAArch64.h"
21#include "llvm/IR/IntrinsicsARM.h"
22#include "llvm/IR/IntrinsicsBPF.h"
23#include "llvm/TargetParser/AArch64TargetParser.h"
24
25#include <numeric>
26
27using namespace clang;
28using namespace CodeGen;
29using namespace llvm;
30using namespace clang::aarch64;
31
32static std::optional<CodeGenFunction::MSVCIntrin>
33translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
34 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
35 switch (BuiltinID) {
36 default:
37 return std::nullopt;
38 case clang::AArch64::BI_BitScanForward:
39 case clang::AArch64::BI_BitScanForward64:
40 return MSVCIntrin::_BitScanForward;
41 case clang::AArch64::BI_BitScanReverse:
42 case clang::AArch64::BI_BitScanReverse64:
43 return MSVCIntrin::_BitScanReverse;
44 case clang::AArch64::BI_InterlockedAnd64:
45 return MSVCIntrin::_InterlockedAnd;
46 case clang::AArch64::BI_InterlockedExchange64:
47 return MSVCIntrin::_InterlockedExchange;
48 case clang::AArch64::BI_InterlockedExchangeAdd64:
49 return MSVCIntrin::_InterlockedExchangeAdd;
50 case clang::AArch64::BI_InterlockedExchangeSub64:
51 return MSVCIntrin::_InterlockedExchangeSub;
52 case clang::AArch64::BI_InterlockedOr64:
53 return MSVCIntrin::_InterlockedOr;
54 case clang::AArch64::BI_InterlockedXor64:
55 return MSVCIntrin::_InterlockedXor;
56 case clang::AArch64::BI_InterlockedDecrement64:
57 return MSVCIntrin::_InterlockedDecrement;
58 case clang::AArch64::BI_InterlockedIncrement64:
59 return MSVCIntrin::_InterlockedIncrement;
60 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
62 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
63 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
64 return MSVCIntrin::_InterlockedExchangeAdd_acq;
65 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
67 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
68 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
69 return MSVCIntrin::_InterlockedExchangeAdd_rel;
70 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
72 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
73 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
74 return MSVCIntrin::_InterlockedExchangeAdd_nf;
75 case clang::AArch64::BI_InterlockedExchange8_acq:
76 case clang::AArch64::BI_InterlockedExchange16_acq:
77 case clang::AArch64::BI_InterlockedExchange_acq:
78 case clang::AArch64::BI_InterlockedExchange64_acq:
79 case clang::AArch64::BI_InterlockedExchangePointer_acq:
80 return MSVCIntrin::_InterlockedExchange_acq;
81 case clang::AArch64::BI_InterlockedExchange8_rel:
82 case clang::AArch64::BI_InterlockedExchange16_rel:
83 case clang::AArch64::BI_InterlockedExchange_rel:
84 case clang::AArch64::BI_InterlockedExchange64_rel:
85 case clang::AArch64::BI_InterlockedExchangePointer_rel:
86 return MSVCIntrin::_InterlockedExchange_rel;
87 case clang::AArch64::BI_InterlockedExchange8_nf:
88 case clang::AArch64::BI_InterlockedExchange16_nf:
89 case clang::AArch64::BI_InterlockedExchange_nf:
90 case clang::AArch64::BI_InterlockedExchange64_nf:
91 case clang::AArch64::BI_InterlockedExchangePointer_nf:
92 return MSVCIntrin::_InterlockedExchange_nf;
93 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
95 case clang::AArch64::BI_InterlockedCompareExchange_acq:
96 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
97 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
98 return MSVCIntrin::_InterlockedCompareExchange_acq;
99 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
101 case clang::AArch64::BI_InterlockedCompareExchange_rel:
102 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
103 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
104 return MSVCIntrin::_InterlockedCompareExchange_rel;
105 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
107 case clang::AArch64::BI_InterlockedCompareExchange_nf:
108 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
109 return MSVCIntrin::_InterlockedCompareExchange_nf;
110 case clang::AArch64::BI_InterlockedCompareExchange128:
111 return MSVCIntrin::_InterlockedCompareExchange128;
112 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
113 return MSVCIntrin::_InterlockedCompareExchange128_acq;
114 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
115 return MSVCIntrin::_InterlockedCompareExchange128_nf;
116 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
117 return MSVCIntrin::_InterlockedCompareExchange128_rel;
118 case clang::AArch64::BI_InterlockedOr8_acq:
119 case clang::AArch64::BI_InterlockedOr16_acq:
120 case clang::AArch64::BI_InterlockedOr_acq:
121 case clang::AArch64::BI_InterlockedOr64_acq:
122 return MSVCIntrin::_InterlockedOr_acq;
123 case clang::AArch64::BI_InterlockedOr8_rel:
124 case clang::AArch64::BI_InterlockedOr16_rel:
125 case clang::AArch64::BI_InterlockedOr_rel:
126 case clang::AArch64::BI_InterlockedOr64_rel:
127 return MSVCIntrin::_InterlockedOr_rel;
128 case clang::AArch64::BI_InterlockedOr8_nf:
129 case clang::AArch64::BI_InterlockedOr16_nf:
130 case clang::AArch64::BI_InterlockedOr_nf:
131 case clang::AArch64::BI_InterlockedOr64_nf:
132 return MSVCIntrin::_InterlockedOr_nf;
133 case clang::AArch64::BI_InterlockedXor8_acq:
134 case clang::AArch64::BI_InterlockedXor16_acq:
135 case clang::AArch64::BI_InterlockedXor_acq:
136 case clang::AArch64::BI_InterlockedXor64_acq:
137 return MSVCIntrin::_InterlockedXor_acq;
138 case clang::AArch64::BI_InterlockedXor8_rel:
139 case clang::AArch64::BI_InterlockedXor16_rel:
140 case clang::AArch64::BI_InterlockedXor_rel:
141 case clang::AArch64::BI_InterlockedXor64_rel:
142 return MSVCIntrin::_InterlockedXor_rel;
143 case clang::AArch64::BI_InterlockedXor8_nf:
144 case clang::AArch64::BI_InterlockedXor16_nf:
145 case clang::AArch64::BI_InterlockedXor_nf:
146 case clang::AArch64::BI_InterlockedXor64_nf:
147 return MSVCIntrin::_InterlockedXor_nf;
148 case clang::AArch64::BI_InterlockedAnd8_acq:
149 case clang::AArch64::BI_InterlockedAnd16_acq:
150 case clang::AArch64::BI_InterlockedAnd_acq:
151 case clang::AArch64::BI_InterlockedAnd64_acq:
152 return MSVCIntrin::_InterlockedAnd_acq;
153 case clang::AArch64::BI_InterlockedAnd8_rel:
154 case clang::AArch64::BI_InterlockedAnd16_rel:
155 case clang::AArch64::BI_InterlockedAnd_rel:
156 case clang::AArch64::BI_InterlockedAnd64_rel:
157 return MSVCIntrin::_InterlockedAnd_rel;
158 case clang::AArch64::BI_InterlockedAnd8_nf:
159 case clang::AArch64::BI_InterlockedAnd16_nf:
160 case clang::AArch64::BI_InterlockedAnd_nf:
161 case clang::AArch64::BI_InterlockedAnd64_nf:
162 return MSVCIntrin::_InterlockedAnd_nf;
163 case clang::AArch64::BI_InterlockedIncrement16_acq:
164 case clang::AArch64::BI_InterlockedIncrement_acq:
165 case clang::AArch64::BI_InterlockedIncrement64_acq:
166 return MSVCIntrin::_InterlockedIncrement_acq;
167 case clang::AArch64::BI_InterlockedIncrement16_rel:
168 case clang::AArch64::BI_InterlockedIncrement_rel:
169 case clang::AArch64::BI_InterlockedIncrement64_rel:
170 return MSVCIntrin::_InterlockedIncrement_rel;
171 case clang::AArch64::BI_InterlockedIncrement16_nf:
172 case clang::AArch64::BI_InterlockedIncrement_nf:
173 case clang::AArch64::BI_InterlockedIncrement64_nf:
174 return MSVCIntrin::_InterlockedIncrement_nf;
175 case clang::AArch64::BI_InterlockedDecrement16_acq:
176 case clang::AArch64::BI_InterlockedDecrement_acq:
177 case clang::AArch64::BI_InterlockedDecrement64_acq:
178 return MSVCIntrin::_InterlockedDecrement_acq;
179 case clang::AArch64::BI_InterlockedDecrement16_rel:
180 case clang::AArch64::BI_InterlockedDecrement_rel:
181 case clang::AArch64::BI_InterlockedDecrement64_rel:
182 return MSVCIntrin::_InterlockedDecrement_rel;
183 case clang::AArch64::BI_InterlockedDecrement16_nf:
184 case clang::AArch64::BI_InterlockedDecrement_nf:
185 case clang::AArch64::BI_InterlockedDecrement64_nf:
186 return MSVCIntrin::_InterlockedDecrement_nf;
187 }
188 llvm_unreachable("must return from switch");
189}
190
191static std::optional<CodeGenFunction::MSVCIntrin>
192translateArmToMsvcIntrin(unsigned BuiltinID) {
193 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
194 switch (BuiltinID) {
195 default:
196 return std::nullopt;
197 case clang::ARM::BI_BitScanForward:
198 case clang::ARM::BI_BitScanForward64:
199 return MSVCIntrin::_BitScanForward;
200 case clang::ARM::BI_BitScanReverse:
201 case clang::ARM::BI_BitScanReverse64:
202 return MSVCIntrin::_BitScanReverse;
203 case clang::ARM::BI_InterlockedAnd64:
204 return MSVCIntrin::_InterlockedAnd;
205 case clang::ARM::BI_InterlockedExchange64:
206 return MSVCIntrin::_InterlockedExchange;
207 case clang::ARM::BI_InterlockedExchangeAdd64:
208 return MSVCIntrin::_InterlockedExchangeAdd;
209 case clang::ARM::BI_InterlockedExchangeSub64:
210 return MSVCIntrin::_InterlockedExchangeSub;
211 case clang::ARM::BI_InterlockedOr64:
212 return MSVCIntrin::_InterlockedOr;
213 case clang::ARM::BI_InterlockedXor64:
214 return MSVCIntrin::_InterlockedXor;
215 case clang::ARM::BI_InterlockedDecrement64:
216 return MSVCIntrin::_InterlockedDecrement;
217 case clang::ARM::BI_InterlockedIncrement64:
218 return MSVCIntrin::_InterlockedIncrement;
219 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
221 case clang::ARM::BI_InterlockedExchangeAdd_acq:
222 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
223 return MSVCIntrin::_InterlockedExchangeAdd_acq;
224 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
226 case clang::ARM::BI_InterlockedExchangeAdd_rel:
227 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
228 return MSVCIntrin::_InterlockedExchangeAdd_rel;
229 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
231 case clang::ARM::BI_InterlockedExchangeAdd_nf:
232 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
233 return MSVCIntrin::_InterlockedExchangeAdd_nf;
234 case clang::ARM::BI_InterlockedExchange8_acq:
235 case clang::ARM::BI_InterlockedExchange16_acq:
236 case clang::ARM::BI_InterlockedExchange_acq:
237 case clang::ARM::BI_InterlockedExchange64_acq:
238 case clang::ARM::BI_InterlockedExchangePointer_acq:
239 return MSVCIntrin::_InterlockedExchange_acq;
240 case clang::ARM::BI_InterlockedExchange8_rel:
241 case clang::ARM::BI_InterlockedExchange16_rel:
242 case clang::ARM::BI_InterlockedExchange_rel:
243 case clang::ARM::BI_InterlockedExchange64_rel:
244 case clang::ARM::BI_InterlockedExchangePointer_rel:
245 return MSVCIntrin::_InterlockedExchange_rel;
246 case clang::ARM::BI_InterlockedExchange8_nf:
247 case clang::ARM::BI_InterlockedExchange16_nf:
248 case clang::ARM::BI_InterlockedExchange_nf:
249 case clang::ARM::BI_InterlockedExchange64_nf:
250 case clang::ARM::BI_InterlockedExchangePointer_nf:
251 return MSVCIntrin::_InterlockedExchange_nf;
252 case clang::ARM::BI_InterlockedCompareExchange8_acq:
253 case clang::ARM::BI_InterlockedCompareExchange16_acq:
254 case clang::ARM::BI_InterlockedCompareExchange_acq:
255 case clang::ARM::BI_InterlockedCompareExchange64_acq:
256 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
257 return MSVCIntrin::_InterlockedCompareExchange_acq;
258 case clang::ARM::BI_InterlockedCompareExchange8_rel:
259 case clang::ARM::BI_InterlockedCompareExchange16_rel:
260 case clang::ARM::BI_InterlockedCompareExchange_rel:
261 case clang::ARM::BI_InterlockedCompareExchange64_rel:
262 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
263 return MSVCIntrin::_InterlockedCompareExchange_rel;
264 case clang::ARM::BI_InterlockedCompareExchange8_nf:
265 case clang::ARM::BI_InterlockedCompareExchange16_nf:
266 case clang::ARM::BI_InterlockedCompareExchange_nf:
267 case clang::ARM::BI_InterlockedCompareExchange64_nf:
268 return MSVCIntrin::_InterlockedCompareExchange_nf;
269 case clang::ARM::BI_InterlockedOr8_acq:
270 case clang::ARM::BI_InterlockedOr16_acq:
271 case clang::ARM::BI_InterlockedOr_acq:
272 case clang::ARM::BI_InterlockedOr64_acq:
273 return MSVCIntrin::_InterlockedOr_acq;
274 case clang::ARM::BI_InterlockedOr8_rel:
275 case clang::ARM::BI_InterlockedOr16_rel:
276 case clang::ARM::BI_InterlockedOr_rel:
277 case clang::ARM::BI_InterlockedOr64_rel:
278 return MSVCIntrin::_InterlockedOr_rel;
279 case clang::ARM::BI_InterlockedOr8_nf:
280 case clang::ARM::BI_InterlockedOr16_nf:
281 case clang::ARM::BI_InterlockedOr_nf:
282 case clang::ARM::BI_InterlockedOr64_nf:
283 return MSVCIntrin::_InterlockedOr_nf;
284 case clang::ARM::BI_InterlockedXor8_acq:
285 case clang::ARM::BI_InterlockedXor16_acq:
286 case clang::ARM::BI_InterlockedXor_acq:
287 case clang::ARM::BI_InterlockedXor64_acq:
288 return MSVCIntrin::_InterlockedXor_acq;
289 case clang::ARM::BI_InterlockedXor8_rel:
290 case clang::ARM::BI_InterlockedXor16_rel:
291 case clang::ARM::BI_InterlockedXor_rel:
292 case clang::ARM::BI_InterlockedXor64_rel:
293 return MSVCIntrin::_InterlockedXor_rel;
294 case clang::ARM::BI_InterlockedXor8_nf:
295 case clang::ARM::BI_InterlockedXor16_nf:
296 case clang::ARM::BI_InterlockedXor_nf:
297 case clang::ARM::BI_InterlockedXor64_nf:
298 return MSVCIntrin::_InterlockedXor_nf;
299 case clang::ARM::BI_InterlockedAnd8_acq:
300 case clang::ARM::BI_InterlockedAnd16_acq:
301 case clang::ARM::BI_InterlockedAnd_acq:
302 case clang::ARM::BI_InterlockedAnd64_acq:
303 return MSVCIntrin::_InterlockedAnd_acq;
304 case clang::ARM::BI_InterlockedAnd8_rel:
305 case clang::ARM::BI_InterlockedAnd16_rel:
306 case clang::ARM::BI_InterlockedAnd_rel:
307 case clang::ARM::BI_InterlockedAnd64_rel:
308 return MSVCIntrin::_InterlockedAnd_rel;
309 case clang::ARM::BI_InterlockedAnd8_nf:
310 case clang::ARM::BI_InterlockedAnd16_nf:
311 case clang::ARM::BI_InterlockedAnd_nf:
312 case clang::ARM::BI_InterlockedAnd64_nf:
313 return MSVCIntrin::_InterlockedAnd_nf;
314 case clang::ARM::BI_InterlockedIncrement16_acq:
315 case clang::ARM::BI_InterlockedIncrement_acq:
316 case clang::ARM::BI_InterlockedIncrement64_acq:
317 return MSVCIntrin::_InterlockedIncrement_acq;
318 case clang::ARM::BI_InterlockedIncrement16_rel:
319 case clang::ARM::BI_InterlockedIncrement_rel:
320 case clang::ARM::BI_InterlockedIncrement64_rel:
321 return MSVCIntrin::_InterlockedIncrement_rel;
322 case clang::ARM::BI_InterlockedIncrement16_nf:
323 case clang::ARM::BI_InterlockedIncrement_nf:
324 case clang::ARM::BI_InterlockedIncrement64_nf:
325 return MSVCIntrin::_InterlockedIncrement_nf;
326 case clang::ARM::BI_InterlockedDecrement16_acq:
327 case clang::ARM::BI_InterlockedDecrement_acq:
328 case clang::ARM::BI_InterlockedDecrement64_acq:
329 return MSVCIntrin::_InterlockedDecrement_acq;
330 case clang::ARM::BI_InterlockedDecrement16_rel:
331 case clang::ARM::BI_InterlockedDecrement_rel:
332 case clang::ARM::BI_InterlockedDecrement64_rel:
333 return MSVCIntrin::_InterlockedDecrement_rel;
334 case clang::ARM::BI_InterlockedDecrement16_nf:
335 case clang::ARM::BI_InterlockedDecrement_nf:
336 case clang::ARM::BI_InterlockedDecrement64_nf:
337 return MSVCIntrin::_InterlockedDecrement_nf;
338 }
339 llvm_unreachable("must return from switch");
340}
341
342// Emit an intrinsic where all operands are of the same type as the result.
343// Depending on mode, this may be a constrained floating-point intrinsic.
345 unsigned IntrinsicID,
346 unsigned ConstrainedIntrinsicID,
347 llvm::Type *Ty,
348 ArrayRef<Value *> Args) {
349 Function *F;
350 if (CGF.Builder.getIsFPConstrained())
351 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
352 else
353 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
354
355 if (CGF.Builder.getIsFPConstrained())
356 return CGF.Builder.CreateConstrainedFPCall(F, Args);
357
358 return CGF.Builder.CreateCall(F, Args);
359}
360
361static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
362 NeonTypeFlags TypeFlags,
363 bool HasFastHalfType = true,
364 bool V1Ty = false,
365 bool AllowBFloatArgsAndRet = true) {
366 int IsQuad = TypeFlags.isQuad();
367 switch (TypeFlags.getEltType()) {
371 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
374 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
376 if (AllowBFloatArgsAndRet)
377 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
378 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
380 if (HasFastHalfType)
381 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 return Builder.CreateCall(F, Ops, name);
445}
446
450 const CallExpr *E, const char *name) {
451 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
452 Ops.pop_back_val());
453 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
454}
455
457 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
458 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
459
460 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
461 RetTy->getPrimitiveSizeInBits();
462 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
463 Ops[1]->getType()};
464 if (ExtendLaneArg) {
465 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
466 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
467 uint64_t(0));
468 }
469 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
470}
471
473 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
474 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
475
476 if (ExtendLaneArg) {
477 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
478 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
479 uint64_t(0));
480 }
481 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
482 RetTy->getPrimitiveSizeInBits();
483 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
484 Ops, E, name);
485}
486
488 bool neg) {
489 int SV = cast<ConstantInt>(V)->getSExtValue();
490 return ConstantInt::getSigned(Ty, neg ? -SV : SV);
491}
492
493Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
494 llvm::Type *Ty1, bool Extract,
496 const CallExpr *E,
497 const char *name) {
498 llvm::Type *Tys[] = {Ty0, Ty1};
499 if (Extract) {
500 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
501 // the vector.
502 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
503 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
504 }
505 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
506}
507
508// Right-shift a vector by a constant.
510 llvm::Type *Ty, bool usgn,
511 const char *name) {
512 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
513
514 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
515 int EltSize = VTy->getScalarSizeInBits();
516
517 Vec = Builder.CreateBitCast(Vec, Ty);
518
519 // lshr/ashr are undefined when the shift amount is equal to the vector
520 // element size.
521 if (ShiftAmt == EltSize) {
522 if (usgn) {
523 // Right-shifting an unsigned value by its size yields 0.
524 return llvm::ConstantAggregateZero::get(VTy);
525 } else {
526 // Right-shifting a signed value by its size is equivalent
527 // to a shift of size-1.
528 --ShiftAmt;
529 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
530 }
531 }
532
533 Shift = EmitNeonShiftVector(Shift, Ty, false);
534 if (usgn)
535 return Builder.CreateLShr(Vec, Shift, name);
536 return Builder.CreateAShr(Vec, Shift, name);
537}
538
539// clang-format off
541 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
542 NEONMAP0(splat_lane_v),
543 NEONMAP0(splat_laneq_v),
544 NEONMAP0(splatq_lane_v),
545 NEONMAP0(splatq_laneq_v),
546 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
547 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
548 NEONMAP1(vabs_v, arm_neon_vabs, 0),
549 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
550 NEONMAP0(vadd_v),
551 NEONMAP0(vaddhn_v),
552 NEONMAP0(vaddq_v),
553 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
554 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
555 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
556 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
557 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
558 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
559 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
560 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
561 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
562 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
563 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
564 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
565 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
566 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
567 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
568 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
569 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
570 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
571 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
572 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
573 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
574 NEONMAP1(vcage_v, arm_neon_vacge, 0),
575 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
576 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
577 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
578 NEONMAP1(vcale_v, arm_neon_vacge, 0),
579 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
580 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
581 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
582 NEONMAP0(vceqz_v),
583 NEONMAP0(vceqzq_v),
584 NEONMAP0(vcgez_v),
585 NEONMAP0(vcgezq_v),
586 NEONMAP0(vcgtz_v),
587 NEONMAP0(vcgtzq_v),
588 NEONMAP0(vclez_v),
589 NEONMAP0(vclezq_v),
590 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
591 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
592 NEONMAP0(vcltz_v),
593 NEONMAP0(vcltzq_v),
594 NEONMAP1(vclz_v, ctlz, Add1ArgType),
595 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
596 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
597 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
598 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
599 NEONMAP0(vcvt_f16_s16),
600 NEONMAP0(vcvt_f16_u16),
601 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
602 NEONMAP0(vcvt_f32_v),
603 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
604 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
605 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
606 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
607 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
608 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
609 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
610 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
611 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
612 NEONMAP0(vcvt_s16_f16),
613 NEONMAP0(vcvt_s32_v),
614 NEONMAP0(vcvt_s64_v),
615 NEONMAP0(vcvt_u16_f16),
616 NEONMAP0(vcvt_u32_v),
617 NEONMAP0(vcvt_u64_v),
618 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
619 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
620 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
621 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
622 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
623 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
624 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
625 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
626 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
627 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
628 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
629 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
630 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
631 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
632 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
633 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
634 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
635 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
636 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
637 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
638 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
639 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
640 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
641 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
642 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
643 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
644 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
645 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
646 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
647 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
648 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
649 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
650 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
651 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
652 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
653 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
654 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
655 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
656 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
657 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
658 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
659 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
660 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
661 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
662 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
663 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
664 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
665 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
666 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
667 NEONMAP0(vcvtq_f16_s16),
668 NEONMAP0(vcvtq_f16_u16),
669 NEONMAP0(vcvtq_f32_v),
670 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
671 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
672 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
673 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
674 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
675 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
676 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
677 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
678 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
679 NEONMAP0(vcvtq_s16_f16),
680 NEONMAP0(vcvtq_s32_v),
681 NEONMAP0(vcvtq_s64_v),
682 NEONMAP0(vcvtq_u16_f16),
683 NEONMAP0(vcvtq_u32_v),
684 NEONMAP0(vcvtq_u64_v),
685 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
686 NEONMAP1(vdot_u32, arm_neon_udot, 0),
687 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
688 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
689 NEONMAP0(vext_v),
690 NEONMAP0(vextq_v),
691 NEONMAP0(vfma_v),
692 NEONMAP0(vfmaq_v),
693 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
694 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
695 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
696 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
697 NEONMAP0(vld1_dup_v),
698 NEONMAP1(vld1_v, arm_neon_vld1, 0),
699 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
700 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
701 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
702 NEONMAP0(vld1q_dup_v),
703 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
704 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
705 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
706 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
707 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
708 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
709 NEONMAP1(vld2_v, arm_neon_vld2, 0),
710 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
711 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
712 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
713 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
714 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
715 NEONMAP1(vld3_v, arm_neon_vld3, 0),
716 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
717 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
718 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
719 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
720 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
721 NEONMAP1(vld4_v, arm_neon_vld4, 0),
722 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
723 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
724 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
725 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
726 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
727 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
728 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
729 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
730 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
731 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
732 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
733 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
734 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
735 NEONMAP0(vmovl_v),
736 NEONMAP0(vmovn_v),
737 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
738 NEONMAP0(vmull_v),
739 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
740 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
741 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
742 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
743 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
744 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
745 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
746 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
747 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
748 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
749 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
750 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
751 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
752 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
753 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
754 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
755 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
756 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
757 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
758 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
759 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
760 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
761 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
762 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
763 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
764 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
765 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
766 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
767 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
768 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
769 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
770 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
771 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
772 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
773 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
774 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
775 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
776 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
777 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
778 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
779 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
780 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
781 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
782 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
783 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
784 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
785 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
786 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
787 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
788 NEONMAP1(vrnd_v, trunc, Add1ArgType),
789 NEONMAP1(vrnda_v, round, Add1ArgType),
790 NEONMAP1(vrndaq_v, round, Add1ArgType),
791 NEONMAP0(vrndi_v),
792 NEONMAP0(vrndiq_v),
793 NEONMAP1(vrndm_v, floor, Add1ArgType),
794 NEONMAP1(vrndmq_v, floor, Add1ArgType),
795 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
796 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
797 NEONMAP1(vrndp_v, ceil, Add1ArgType),
798 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
799 NEONMAP1(vrndq_v, trunc, Add1ArgType),
800 NEONMAP1(vrndx_v, rint, Add1ArgType),
801 NEONMAP1(vrndxq_v, rint, Add1ArgType),
802 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
803 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
804 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
805 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
806 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
807 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
808 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
809 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
810 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
811 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
812 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
813 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
814 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
815 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
816 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
817 NEONMAP0(vshl_n_v),
818 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
819 NEONMAP0(vshll_n_v),
820 NEONMAP0(vshlq_n_v),
821 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
822 NEONMAP0(vshr_n_v),
823 NEONMAP0(vshrn_n_v),
824 NEONMAP0(vshrq_n_v),
825 NEONMAP1(vst1_v, arm_neon_vst1, 0),
826 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
827 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
828 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
829 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
830 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
831 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
832 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
833 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
834 NEONMAP1(vst2_v, arm_neon_vst2, 0),
835 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
836 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
837 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
838 NEONMAP1(vst3_v, arm_neon_vst3, 0),
839 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
840 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
841 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
842 NEONMAP1(vst4_v, arm_neon_vst4, 0),
843 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
844 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
845 NEONMAP0(vsubhn_v),
846 NEONMAP0(vtrn_v),
847 NEONMAP0(vtrnq_v),
848 NEONMAP0(vtst_v),
849 NEONMAP0(vtstq_v),
850 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
851 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
852 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
853 NEONMAP0(vuzp_v),
854 NEONMAP0(vuzpq_v),
855 NEONMAP0(vzip_v),
856 NEONMAP0(vzipq_v)
857};
858
859// clang-format on
860
861// Some intrinsics are equivalent for codegen.
862static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
863 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
864 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
865 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
866 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
867 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
868 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
869 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
870 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
871 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
872 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
873 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
874 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
875 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
876 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
877 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
878 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
879 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
880 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
881 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
882 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
883 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
884 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
885 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
886 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
887 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
888 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
889 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
890 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
891 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
892 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
893 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
894 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
895 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
896 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
897 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
898 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
899 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
900 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
901 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
902 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
903 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
904 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
905 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
906 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
907 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
908 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
909 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
910 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
911 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
912 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
913 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
914 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
915 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
916 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
917 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
918 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
919 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
920 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
921 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
922 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
923 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
924 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
925 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
926 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
927 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
928 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
929 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
930 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
931 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
932 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
933 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
934 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
935 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
936 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
937 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
938 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
939 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
940 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
941 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
942 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
943 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
944 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
945 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
946 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
947 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
948 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
949 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
950 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
951 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
952 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
953 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
954 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
955 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
956 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
957 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
958 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
959 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
960 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
961 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
962 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
963 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
964 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
965 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
966 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
967 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
968 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
969 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
970 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
971 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
972 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
973 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
974 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
975 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
976 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
977 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
978 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
979 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
980 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
981 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
982 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
983 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
984 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
985 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
986 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
987 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
988 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
989 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
990 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
991 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
992 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
993 // arbitrary one to be handled as tha canonical variation.
994 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
995 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
996 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
997 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
998 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
999 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1000 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1001 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1002 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1003 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1004 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1005 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1006};
1007
1008#undef NEONMAP0
1009#undef NEONMAP1
1010#undef NEONMAP2
1011
1012#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1013 { \
1014 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1015 TypeModifier \
1016 }
1017
1018#define SVEMAP2(NameBase, TypeModifier) \
1019 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1021#define GET_SVE_LLVM_INTRINSIC_MAP
1022#include "clang/Basic/arm_sve_builtin_cg.inc"
1023#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1024#undef GET_SVE_LLVM_INTRINSIC_MAP
1025};
1026
1027#undef SVEMAP1
1028#undef SVEMAP2
1029
1030#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1031 { \
1032 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1033 TypeModifier \
1034 }
1035
1036#define SMEMAP2(NameBase, TypeModifier) \
1037 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1039#define GET_SME_LLVM_INTRINSIC_MAP
1040#include "clang/Basic/arm_sme_builtin_cg.inc"
1041#undef GET_SME_LLVM_INTRINSIC_MAP
1042};
1043
1044#undef SMEMAP1
1045#undef SMEMAP2
1046
1048
1053
1054// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
1055// the corresponding info struct.
1056static const ARMVectorIntrinsicInfo *
1058 unsigned BuiltinID, bool &MapProvenSorted) {
1059
1060#ifndef NDEBUG
1061 if (!MapProvenSorted) {
1062 assert(llvm::is_sorted(IntrinsicMap));
1063 MapProvenSorted = true;
1064 }
1065#endif
1066
1068 llvm::lower_bound(IntrinsicMap, BuiltinID);
1069
1070 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1071 return Builtin;
1072
1073 return nullptr;
1074}
1075
1077 unsigned Modifier,
1078 llvm::Type *ArgType,
1079 const CallExpr *E) {
1080 int VectorSize = 0;
1081 if (Modifier & Use64BitVectors)
1082 VectorSize = 64;
1083 else if (Modifier & Use128BitVectors)
1084 VectorSize = 128;
1085
1086 // Return type.
1088 if (Modifier & AddRetType) {
1089 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1090 if (Modifier & VectorizeRetType)
1091 Ty = llvm::FixedVectorType::get(
1092 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1093
1094 Tys.push_back(Ty);
1095 }
1096
1097 // Arguments.
1098 if (Modifier & VectorizeArgTypes) {
1099 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1100 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1101 }
1102
1103 if (Modifier & (Add1ArgType | Add2ArgTypes))
1104 Tys.push_back(ArgType);
1105
1106 if (Modifier & Add2ArgTypes)
1107 Tys.push_back(ArgType);
1108
1109 if (Modifier & InventFloatType)
1110 Tys.push_back(FloatTy);
1111
1112 return CGM.getIntrinsic(IntrinsicID, Tys);
1113}
1114
1115//===----------------------------------------------------------------------===//
1116// Emit-helpers
1117//===----------------------------------------------------------------------===//
1119 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1120 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1121 assert(SISDInfo.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
1122
1123 switch (SISDInfo.BuiltinID) {
1124 case NEON::BI__builtin_neon_vcled_s64:
1125 case NEON::BI__builtin_neon_vcled_u64:
1126 case NEON::BI__builtin_neon_vcles_f32:
1127 case NEON::BI__builtin_neon_vcled_f64:
1128 case NEON::BI__builtin_neon_vcltd_s64:
1129 case NEON::BI__builtin_neon_vcltd_u64:
1130 case NEON::BI__builtin_neon_vclts_f32:
1131 case NEON::BI__builtin_neon_vcltd_f64:
1132 case NEON::BI__builtin_neon_vcales_f32:
1133 case NEON::BI__builtin_neon_vcaled_f64:
1134 case NEON::BI__builtin_neon_vcalts_f32:
1135 case NEON::BI__builtin_neon_vcaltd_f64:
1136 // Only one direction of comparisons actually exist, cmle is actually a cmge
1137 // with swapped operands. The table gives us the right intrinsic but we
1138 // still need to do the swap.
1139 std::swap(Ops[0], Ops[1]);
1140 break;
1141 }
1142
1143 // Use fptosi.sat/fptoui.sat unless under strict FP.
1144 unsigned LLVMIntrinsic = SISDInfo.LLVMIntrinsic;
1145 if (!CGF.Builder.getIsFPConstrained()) {
1146 if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzs)
1147 LLVMIntrinsic = Intrinsic::fptosi_sat;
1148 else if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzu)
1149 LLVMIntrinsic = Intrinsic::fptoui_sat;
1150 }
1151 llvm::Type *ArgTy = CGF.ConvertType(E->getArg(0)->getType());
1152 Function *F = CGF.LookupNeonLLVMIntrinsic(LLVMIntrinsic,
1153 SISDInfo.TypeModifier, ArgTy, E);
1154
1155 int j = 0;
1156 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1157 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1158 ai != ae; ++ai, ++j) {
1159 llvm::Type *ArgTy = ai->getType();
1160 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1161 ArgTy->getPrimitiveSizeInBits())
1162 continue;
1163 assert(
1164 ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy() &&
1165 "Expecting vector LLVM intrinsic type and scalar Clang builtin type!");
1166
1167 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1168 // it before inserting.
1169 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1170 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1171 Ops[j] =
1172 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1173 }
1174
1175 Value *Result = CGF.EmitNeonCall(F, Ops, SISDInfo.NameHint);
1176 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1177 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1178 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1179 return CGF.Builder.CreateExtractElement(Result, C0);
1180
1181 return CGF.Builder.CreateBitCast(Result, ResultType, SISDInfo.NameHint);
1182}
1183
1185 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1186 const char *NameHint, unsigned Modifier, const CallExpr *E,
1187 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1188 llvm::Triple::ArchType Arch) {
1189
1190 // Extract the trailing immediate argument that encodes the type discriminator
1191 // for this overloaded intrinsic.
1192 // TODO: Move to the parent code that takes care of argument processing.
1193 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1194 std::optional<llvm::APSInt> NeonTypeConst =
1196 if (!NeonTypeConst)
1197 return nullptr;
1198
1199 // Determine the type of this overloaded NEON intrinsic.
1200 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1201 const bool Usgn = Type.isUnsigned();
1202 const bool Quad = Type.isQuad();
1203 const bool Floating = Type.isFloatingPoint();
1204 const bool HasFastHalfType = getTarget().hasFastHalfType();
1205 const bool AllowBFloatArgsAndRet =
1206 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1207
1208 llvm::FixedVectorType *VTy =
1209 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1210 llvm::Type *Ty = VTy;
1211 if (!Ty)
1212 return nullptr;
1213
1214 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1215 return Builder.getInt32(addr.getAlignment().getQuantity());
1216 };
1217
1218 unsigned Int = LLVMIntrinsic;
1219 if ((Modifier & UnsignedAlts) && !Usgn)
1220 Int = AltLLVMIntrinsic;
1221
1222 switch (BuiltinID) {
1223 default: break;
1224 case NEON::BI__builtin_neon_splat_lane_v:
1225 case NEON::BI__builtin_neon_splat_laneq_v:
1226 case NEON::BI__builtin_neon_splatq_lane_v:
1227 case NEON::BI__builtin_neon_splatq_laneq_v: {
1228 auto NumElements = VTy->getElementCount();
1229 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1230 NumElements = NumElements * 2;
1231 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1232 NumElements = NumElements.divideCoefficientBy(2);
1233
1234 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1235 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1236 }
1237 case NEON::BI__builtin_neon_vpadd_v:
1238 case NEON::BI__builtin_neon_vpaddq_v:
1239 // We don't allow fp/int overloading of intrinsics.
1240 if (VTy->getElementType()->isFloatingPointTy() &&
1241 Int == Intrinsic::aarch64_neon_addp)
1242 Int = Intrinsic::aarch64_neon_faddp;
1243 break;
1244 case NEON::BI__builtin_neon_vabs_v:
1245 case NEON::BI__builtin_neon_vabsq_v:
1246 if (VTy->getElementType()->isFloatingPointTy())
1247 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1248 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1249 case NEON::BI__builtin_neon_vadd_v:
1250 case NEON::BI__builtin_neon_vaddq_v: {
1251 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1252 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1253 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1254 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1255 return Builder.CreateBitCast(Ops[0], Ty);
1256 }
1257 case NEON::BI__builtin_neon_vaddhn_v: {
1258 llvm::FixedVectorType *SrcTy =
1259 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1260
1261 // %sum = add <4 x i32> %lhs, %rhs
1262 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1263 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1264 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1265
1266 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1267 Constant *ShiftAmt =
1268 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1269 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1270
1271 // %res = trunc <4 x i32> %high to <4 x i16>
1272 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1273 }
1274 case NEON::BI__builtin_neon_vcale_v:
1275 case NEON::BI__builtin_neon_vcaleq_v:
1276 case NEON::BI__builtin_neon_vcalt_v:
1277 case NEON::BI__builtin_neon_vcaltq_v:
1278 std::swap(Ops[0], Ops[1]);
1279 [[fallthrough]];
1280 case NEON::BI__builtin_neon_vcage_v:
1281 case NEON::BI__builtin_neon_vcageq_v:
1282 case NEON::BI__builtin_neon_vcagt_v:
1283 case NEON::BI__builtin_neon_vcagtq_v: {
1284 llvm::Type *Ty;
1285 switch (VTy->getScalarSizeInBits()) {
1286 default: llvm_unreachable("unexpected type");
1287 case 32:
1288 Ty = FloatTy;
1289 break;
1290 case 64:
1291 Ty = DoubleTy;
1292 break;
1293 case 16:
1294 Ty = HalfTy;
1295 break;
1296 }
1297 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1298 llvm::Type *Tys[] = { VTy, VecFlt };
1299 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1300 return EmitNeonCall(F, Ops, NameHint);
1301 }
1302 case NEON::BI__builtin_neon_vceqz_v:
1303 case NEON::BI__builtin_neon_vceqzq_v:
1305 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1306 case NEON::BI__builtin_neon_vcgez_v:
1307 case NEON::BI__builtin_neon_vcgezq_v:
1309 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1310 "vcgez");
1311 case NEON::BI__builtin_neon_vclez_v:
1312 case NEON::BI__builtin_neon_vclezq_v:
1314 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1315 "vclez");
1316 case NEON::BI__builtin_neon_vcgtz_v:
1317 case NEON::BI__builtin_neon_vcgtzq_v:
1319 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1320 "vcgtz");
1321 case NEON::BI__builtin_neon_vcltz_v:
1322 case NEON::BI__builtin_neon_vcltzq_v:
1324 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1325 "vcltz");
1326 case NEON::BI__builtin_neon_vclz_v:
1327 case NEON::BI__builtin_neon_vclzq_v:
1328 // We generate target-independent intrinsic, which needs a second argument
1329 // for whether or not clz of zero is undefined; on ARM it isn't.
1330 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1331 break;
1332 case NEON::BI__builtin_neon_vcvt_f32_v:
1333 case NEON::BI__builtin_neon_vcvtq_f32_v:
1334 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1335 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1336 HasFastHalfType);
1337 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1338 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1339 case NEON::BI__builtin_neon_vcvt_f16_s16:
1340 case NEON::BI__builtin_neon_vcvt_f16_u16:
1341 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1342 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1343 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1344 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1345 HasFastHalfType);
1346 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1347 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1348 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1349 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1350 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1351 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1352 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1353 Function *F = CGM.getIntrinsic(Int, Tys);
1354 return EmitNeonCall(F, Ops, "vcvt_n");
1355 }
1356 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1357 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1358 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1359 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1360 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1361 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1362 Function *F = CGM.getIntrinsic(Int, Tys);
1363 return EmitNeonCall(F, Ops, "vcvt_n");
1364 }
1365 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1366 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1367 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1368 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1369 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1370 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1371 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1372 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1373 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1374 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1375 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1376 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1377 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1378 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1379 return EmitNeonCall(F, Ops, "vcvt_n");
1380 }
1381 case NEON::BI__builtin_neon_vcvt_s32_v:
1382 case NEON::BI__builtin_neon_vcvt_u32_v:
1383 case NEON::BI__builtin_neon_vcvt_s64_v:
1384 case NEON::BI__builtin_neon_vcvt_u64_v:
1385 case NEON::BI__builtin_neon_vcvt_s16_f16:
1386 case NEON::BI__builtin_neon_vcvt_u16_f16:
1387 case NEON::BI__builtin_neon_vcvtq_s32_v:
1388 case NEON::BI__builtin_neon_vcvtq_u32_v:
1389 case NEON::BI__builtin_neon_vcvtq_s64_v:
1390 case NEON::BI__builtin_neon_vcvtq_u64_v:
1391 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1392 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1393 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1394 if (Int) {
1395 // AArch64: use fptosi.sat/fptoui.sat unless under strict FP.
1396 if (!Builder.getIsFPConstrained())
1397 Int = Usgn ? Intrinsic::fptoui_sat : Intrinsic::fptosi_sat;
1398 llvm::Type *Tys[2] = {Ty, Ops[0]->getType()};
1399 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
1400 }
1401 // FIXME: ARM uses plain fptoui/fptosi which have UB on out-of-range
1402 // values. These should also use saturating intrinsics.
1403 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1404 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1405 }
1406 case NEON::BI__builtin_neon_vcvta_s16_f16:
1407 case NEON::BI__builtin_neon_vcvta_s32_v:
1408 case NEON::BI__builtin_neon_vcvta_s64_v:
1409 case NEON::BI__builtin_neon_vcvta_u16_f16:
1410 case NEON::BI__builtin_neon_vcvta_u32_v:
1411 case NEON::BI__builtin_neon_vcvta_u64_v:
1412 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1413 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1414 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1415 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1416 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1417 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1418 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1419 case NEON::BI__builtin_neon_vcvtn_s32_v:
1420 case NEON::BI__builtin_neon_vcvtn_s64_v:
1421 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1422 case NEON::BI__builtin_neon_vcvtn_u32_v:
1423 case NEON::BI__builtin_neon_vcvtn_u64_v:
1424 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1425 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1426 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1427 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1428 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1429 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1430 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1431 case NEON::BI__builtin_neon_vcvtp_s32_v:
1432 case NEON::BI__builtin_neon_vcvtp_s64_v:
1433 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1434 case NEON::BI__builtin_neon_vcvtp_u32_v:
1435 case NEON::BI__builtin_neon_vcvtp_u64_v:
1436 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1437 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1438 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1439 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1440 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1441 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1442 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1443 case NEON::BI__builtin_neon_vcvtm_s32_v:
1444 case NEON::BI__builtin_neon_vcvtm_s64_v:
1445 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1446 case NEON::BI__builtin_neon_vcvtm_u32_v:
1447 case NEON::BI__builtin_neon_vcvtm_u64_v:
1448 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1449 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1450 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1451 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1452 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1453 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1454 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1455 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1456 }
1457 case NEON::BI__builtin_neon_vcvtx_f32_v: {
1458 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
1459 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1460
1461 }
1462 case NEON::BI__builtin_neon_vext_v:
1463 case NEON::BI__builtin_neon_vextq_v: {
1464 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1465 SmallVector<int, 16> Indices;
1466 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1467 Indices.push_back(i+CV);
1468
1469 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1470 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1471 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
1472 }
1473 case NEON::BI__builtin_neon_vfma_v:
1474 case NEON::BI__builtin_neon_vfmaq_v: {
1475 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1476 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1477 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1478
1479 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
1481 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
1482 {Ops[1], Ops[2], Ops[0]});
1483 }
1484 case NEON::BI__builtin_neon_vld1_v:
1485 case NEON::BI__builtin_neon_vld1q_v: {
1486 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1487 Ops.push_back(getAlignmentValue32(PtrOp0));
1488 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
1489 }
1490 case NEON::BI__builtin_neon_vld1_x2_v:
1491 case NEON::BI__builtin_neon_vld1q_x2_v:
1492 case NEON::BI__builtin_neon_vld1_x3_v:
1493 case NEON::BI__builtin_neon_vld1q_x3_v:
1494 case NEON::BI__builtin_neon_vld1_x4_v:
1495 case NEON::BI__builtin_neon_vld1q_x4_v: {
1496 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1497 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1498 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
1499 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1500 }
1501 case NEON::BI__builtin_neon_vld2_v:
1502 case NEON::BI__builtin_neon_vld2q_v:
1503 case NEON::BI__builtin_neon_vld3_v:
1504 case NEON::BI__builtin_neon_vld3q_v:
1505 case NEON::BI__builtin_neon_vld4_v:
1506 case NEON::BI__builtin_neon_vld4q_v:
1507 case NEON::BI__builtin_neon_vld2_dup_v:
1508 case NEON::BI__builtin_neon_vld2q_dup_v:
1509 case NEON::BI__builtin_neon_vld3_dup_v:
1510 case NEON::BI__builtin_neon_vld3q_dup_v:
1511 case NEON::BI__builtin_neon_vld4_dup_v:
1512 case NEON::BI__builtin_neon_vld4q_dup_v: {
1513 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1514 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1515 Value *Align = getAlignmentValue32(PtrOp1);
1516 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
1517 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1518 }
1519 case NEON::BI__builtin_neon_vld1_dup_v:
1520 case NEON::BI__builtin_neon_vld1q_dup_v: {
1521 Value *V = PoisonValue::get(Ty);
1522 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
1523 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
1524 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
1525 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
1526 return EmitNeonSplat(Ops[0], CI);
1527 }
1528 case NEON::BI__builtin_neon_vld2_lane_v:
1529 case NEON::BI__builtin_neon_vld2q_lane_v:
1530 case NEON::BI__builtin_neon_vld3_lane_v:
1531 case NEON::BI__builtin_neon_vld3q_lane_v:
1532 case NEON::BI__builtin_neon_vld4_lane_v:
1533 case NEON::BI__builtin_neon_vld4q_lane_v: {
1534 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1535 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1536 for (unsigned I = 2; I < Ops.size() - 1; ++I)
1537 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
1538 Ops.push_back(getAlignmentValue32(PtrOp1));
1539 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
1540 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1541 }
1542 case NEON::BI__builtin_neon_vmovl_v: {
1543 llvm::FixedVectorType *DTy =
1544 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1545 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1546 if (Usgn)
1547 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1548 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1549 }
1550 case NEON::BI__builtin_neon_vmovn_v: {
1551 llvm::FixedVectorType *QTy =
1552 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1553 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1554 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1555 }
1556 case NEON::BI__builtin_neon_vmull_v:
1557 // FIXME: the integer vmull operations could be emitted in terms of pure
1558 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
1559 // hoisting the exts outside loops. Until global ISel comes along that can
1560 // see through such movement this leads to bad CodeGen. So we need an
1561 // intrinsic for now.
1562 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1563 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1564 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
1565 case NEON::BI__builtin_neon_vpadal_v:
1566 case NEON::BI__builtin_neon_vpadalq_v: {
1567 // The source operand type has twice as many elements of half the size.
1568 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1569 llvm::Type *EltTy =
1570 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1571 auto *NarrowTy =
1572 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1573 llvm::Type *Tys[2] = { Ty, NarrowTy };
1574 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1575 }
1576 case NEON::BI__builtin_neon_vpaddl_v:
1577 case NEON::BI__builtin_neon_vpaddlq_v: {
1578 // The source operand type has twice as many elements of half the size.
1579 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1580 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1581 auto *NarrowTy =
1582 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1583 llvm::Type *Tys[2] = { Ty, NarrowTy };
1584 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
1585 }
1586 case NEON::BI__builtin_neon_vqdmlal_v:
1587 case NEON::BI__builtin_neon_vqdmlsl_v: {
1588 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
1589 Ops[1] =
1590 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
1591 Ops.resize(2);
1592 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
1593 }
1594 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
1595 case NEON::BI__builtin_neon_vqdmulh_lane_v:
1596 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
1597 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
1598 auto *RTy = cast<llvm::FixedVectorType>(Ty);
1599 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
1600 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
1601 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
1602 RTy->getNumElements() * 2);
1603 llvm::Type *Tys[2] = {
1604 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1605 /*isQuad*/ false))};
1606 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1607 }
1608 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
1609 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
1610 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
1611 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
1612 llvm::Type *Tys[2] = {
1613 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1614 /*isQuad*/ true))};
1615 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1616 }
1617 case NEON::BI__builtin_neon_vqshl_n_v:
1618 case NEON::BI__builtin_neon_vqshlq_n_v:
1619 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1620 1, false);
1621 case NEON::BI__builtin_neon_vqshlu_n_v:
1622 case NEON::BI__builtin_neon_vqshluq_n_v:
1623 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
1624 1, false);
1625 case NEON::BI__builtin_neon_vrecpe_v:
1626 case NEON::BI__builtin_neon_vrecpeq_v:
1627 case NEON::BI__builtin_neon_vrsqrte_v:
1628 case NEON::BI__builtin_neon_vrsqrteq_v:
1629 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
1630 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1631 case NEON::BI__builtin_neon_vrndi_v:
1632 case NEON::BI__builtin_neon_vrndiq_v:
1633 Int = Builder.getIsFPConstrained()
1634 ? Intrinsic::experimental_constrained_nearbyint
1635 : Intrinsic::nearbyint;
1636 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1637 case NEON::BI__builtin_neon_vrshr_n_v:
1638 case NEON::BI__builtin_neon_vrshrq_n_v:
1639 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
1640 1, true);
1641 case NEON::BI__builtin_neon_vsha512hq_u64:
1642 case NEON::BI__builtin_neon_vsha512h2q_u64:
1643 case NEON::BI__builtin_neon_vsha512su0q_u64:
1644 case NEON::BI__builtin_neon_vsha512su1q_u64: {
1645 Function *F = CGM.getIntrinsic(Int);
1646 return EmitNeonCall(F, Ops, "");
1647 }
1648 case NEON::BI__builtin_neon_vshl_n_v:
1649 case NEON::BI__builtin_neon_vshlq_n_v:
1650 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1651 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
1652 "vshl_n");
1653 case NEON::BI__builtin_neon_vshll_n_v: {
1654 llvm::FixedVectorType *SrcTy =
1655 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1656 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1657 if (Usgn)
1658 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
1659 else
1660 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
1661 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
1662 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
1663 }
1664 case NEON::BI__builtin_neon_vshrn_n_v: {
1665 llvm::FixedVectorType *SrcTy =
1666 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1667 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1668 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
1669 if (Usgn)
1670 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
1671 else
1672 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
1673 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
1674 }
1675 case NEON::BI__builtin_neon_vshr_n_v:
1676 case NEON::BI__builtin_neon_vshrq_n_v:
1677 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
1678 case NEON::BI__builtin_neon_vst1_v:
1679 case NEON::BI__builtin_neon_vst1q_v:
1680 case NEON::BI__builtin_neon_vst2_v:
1681 case NEON::BI__builtin_neon_vst2q_v:
1682 case NEON::BI__builtin_neon_vst3_v:
1683 case NEON::BI__builtin_neon_vst3q_v:
1684 case NEON::BI__builtin_neon_vst4_v:
1685 case NEON::BI__builtin_neon_vst4q_v:
1686 case NEON::BI__builtin_neon_vst2_lane_v:
1687 case NEON::BI__builtin_neon_vst2q_lane_v:
1688 case NEON::BI__builtin_neon_vst3_lane_v:
1689 case NEON::BI__builtin_neon_vst3q_lane_v:
1690 case NEON::BI__builtin_neon_vst4_lane_v:
1691 case NEON::BI__builtin_neon_vst4q_lane_v: {
1692 llvm::Type *Tys[] = {Int8PtrTy, Ty};
1693 Ops.push_back(getAlignmentValue32(PtrOp0));
1694 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
1695 }
1696 case NEON::BI__builtin_neon_vsm3partw1q_u32:
1697 case NEON::BI__builtin_neon_vsm3partw2q_u32:
1698 case NEON::BI__builtin_neon_vsm3ss1q_u32:
1699 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
1700 case NEON::BI__builtin_neon_vsm4eq_u32: {
1701 Function *F = CGM.getIntrinsic(Int);
1702 return EmitNeonCall(F, Ops, "");
1703 }
1704 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
1705 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
1706 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
1707 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
1708 Function *F = CGM.getIntrinsic(Int);
1709 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
1710 return EmitNeonCall(F, Ops, "");
1711 }
1712 case NEON::BI__builtin_neon_vst1_x2_v:
1713 case NEON::BI__builtin_neon_vst1q_x2_v:
1714 case NEON::BI__builtin_neon_vst1_x3_v:
1715 case NEON::BI__builtin_neon_vst1q_x3_v:
1716 case NEON::BI__builtin_neon_vst1_x4_v:
1717 case NEON::BI__builtin_neon_vst1q_x4_v: {
1718 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
1719 // in AArch64 it comes last. We may want to stick to one or another.
1720 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
1721 Arch == llvm::Triple::aarch64_32) {
1722 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1723 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
1724 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1725 }
1726 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
1727 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1728 }
1729 case NEON::BI__builtin_neon_vsubhn_v: {
1730 llvm::FixedVectorType *SrcTy =
1731 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1732
1733 // %sum = add <4 x i32> %lhs, %rhs
1734 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1735 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1736 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
1737
1738 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1739 Constant *ShiftAmt =
1740 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1741 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
1742
1743 // %res = trunc <4 x i32> %high to <4 x i16>
1744 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
1745 }
1746 case NEON::BI__builtin_neon_vtrn_v:
1747 case NEON::BI__builtin_neon_vtrnq_v: {
1748 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1749 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1750 Value *SV = nullptr;
1751
1752 for (unsigned vi = 0; vi != 2; ++vi) {
1753 SmallVector<int, 16> Indices;
1754 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1755 Indices.push_back(i+vi);
1756 Indices.push_back(i+e+vi);
1757 }
1758 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1759 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
1760 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1761 }
1762 return SV;
1763 }
1764 case NEON::BI__builtin_neon_vtst_v:
1765 case NEON::BI__builtin_neon_vtstq_v: {
1766 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1767 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1768 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1769 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1770 ConstantAggregateZero::get(Ty));
1771 return Builder.CreateSExt(Ops[0], Ty, "vtst");
1772 }
1773 case NEON::BI__builtin_neon_vuzp_v:
1774 case NEON::BI__builtin_neon_vuzpq_v: {
1775 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1776 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1777 Value *SV = nullptr;
1778
1779 for (unsigned vi = 0; vi != 2; ++vi) {
1780 SmallVector<int, 16> Indices;
1781 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1782 Indices.push_back(2*i+vi);
1783
1784 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1785 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
1786 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1787 }
1788 return SV;
1789 }
1790 case NEON::BI__builtin_neon_vxarq_u64: {
1791 Function *F = CGM.getIntrinsic(Int);
1792 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
1793 return EmitNeonCall(F, Ops, "");
1794 }
1795 case NEON::BI__builtin_neon_vzip_v:
1796 case NEON::BI__builtin_neon_vzipq_v: {
1797 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1798 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1799 Value *SV = nullptr;
1800
1801 for (unsigned vi = 0; vi != 2; ++vi) {
1802 SmallVector<int, 16> Indices;
1803 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1804 Indices.push_back((i + vi*e) >> 1);
1805 Indices.push_back(((i + vi*e) >> 1)+e);
1806 }
1807 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1808 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
1809 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1810 }
1811 return SV;
1812 }
1813 case NEON::BI__builtin_neon_vdot_s32:
1814 case NEON::BI__builtin_neon_vdot_u32:
1815 case NEON::BI__builtin_neon_vdotq_s32:
1816 case NEON::BI__builtin_neon_vdotq_u32: {
1817 auto *InputTy =
1818 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1819 llvm::Type *Tys[2] = { Ty, InputTy };
1820 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
1821 }
1822 case NEON::BI__builtin_neon_vfmlal_low_f16:
1823 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
1824 auto *InputTy =
1825 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1826 llvm::Type *Tys[2] = { Ty, InputTy };
1827 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
1828 }
1829 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1830 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
1831 auto *InputTy =
1832 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1833 llvm::Type *Tys[2] = { Ty, InputTy };
1834 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
1835 }
1836 case NEON::BI__builtin_neon_vfmlal_high_f16:
1837 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
1838 auto *InputTy =
1839 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1840 llvm::Type *Tys[2] = { Ty, InputTy };
1841 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
1842 }
1843 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1844 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
1845 auto *InputTy =
1846 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1847 llvm::Type *Tys[2] = { Ty, InputTy };
1848 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
1849 }
1850 case NEON::BI__builtin_neon_vmmlaq_s32:
1851 case NEON::BI__builtin_neon_vmmlaq_u32: {
1852 auto *InputTy =
1853 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1854 llvm::Type *Tys[2] = { Ty, InputTy };
1855 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
1856 }
1857 case NEON::BI__builtin_neon_vusmmlaq_s32: {
1858 auto *InputTy =
1859 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1860 llvm::Type *Tys[2] = { Ty, InputTy };
1861 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
1862 }
1863 case NEON::BI__builtin_neon_vusdot_s32:
1864 case NEON::BI__builtin_neon_vusdotq_s32: {
1865 auto *InputTy =
1866 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1867 llvm::Type *Tys[2] = { Ty, InputTy };
1868 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
1869 }
1870 case NEON::BI__builtin_neon_vbfdot_f32:
1871 case NEON::BI__builtin_neon_vbfdotq_f32: {
1872 llvm::Type *InputTy =
1873 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
1874 llvm::Type *Tys[2] = { Ty, InputTy };
1875 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
1876 }
1877 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
1878 llvm::Type *Tys[1] = { Ty };
1879 Function *F = CGM.getIntrinsic(Int, Tys);
1880 return EmitNeonCall(F, Ops, "vcvtfp2bf");
1881 }
1882
1883 }
1884
1885 assert(Int && "Expected valid intrinsic number");
1886
1887 // Determine the type(s) of this overloaded AArch64 intrinsic.
1888 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
1889
1890 Value *Result = EmitNeonCall(F, Ops, NameHint);
1891 llvm::Type *ResultType = ConvertType(E->getType());
1892 // AArch64 intrinsic one-element vector type cast to
1893 // scalar type expected by the builtin
1894 return Builder.CreateBitCast(Result, ResultType, NameHint);
1895}
1896
1897Value *
1899 const CmpInst::Predicate Pred,
1900 const Twine &Name) {
1901
1902 if (isa<FixedVectorType>(Ty)) {
1903 // Vector types are cast to i8 vectors. Recover original type.
1904 Op = Builder.CreateBitCast(Op, Ty);
1905 }
1906
1907 Constant *zero = Constant::getNullValue(Op->getType());
1908
1909 if (CmpInst::isFPPredicate(Pred)) {
1910 if (Pred == CmpInst::FCMP_OEQ)
1911 Op = Builder.CreateFCmp(Pred, Op, zero);
1912 else
1913 Op = Builder.CreateFCmpS(Pred, Op, zero);
1914 } else {
1915 Op = Builder.CreateICmp(Pred, Op, zero);
1916 }
1917
1918 llvm::Type *ResTy = Ty;
1919 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
1920 ResTy = FixedVectorType::get(
1921 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
1922 VTy->getNumElements());
1923
1924 return Builder.CreateSExt(Op, ResTy, Name);
1925}
1926
1928 Value *ExtOp, Value *IndexOp,
1929 llvm::Type *ResTy, unsigned IntID,
1930 const char *Name) {
1932 if (ExtOp)
1933 TblOps.push_back(ExtOp);
1934
1935 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
1936 SmallVector<int, 16> Indices;
1937 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
1938 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
1939 Indices.push_back(2*i);
1940 Indices.push_back(2*i+1);
1941 }
1942
1943 int PairPos = 0, End = Ops.size() - 1;
1944 while (PairPos < End) {
1945 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1946 Ops[PairPos+1], Indices,
1947 Name));
1948 PairPos += 2;
1949 }
1950
1951 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
1952 // of the 128-bit lookup table with zero.
1953 if (PairPos == End) {
1954 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
1955 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1956 ZeroTbl, Indices, Name));
1957 }
1958
1959 Function *TblF;
1960 TblOps.push_back(IndexOp);
1961 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
1962
1963 return CGF.EmitNeonCall(TblF, TblOps, Name);
1964}
1965
1966Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
1967 unsigned Value;
1968 switch (BuiltinID) {
1969 default:
1970 return nullptr;
1971 case clang::ARM::BI__builtin_arm_nop:
1972 Value = 0;
1973 break;
1974 case clang::ARM::BI__builtin_arm_yield:
1975 case clang::ARM::BI__yield:
1976 Value = 1;
1977 break;
1978 case clang::ARM::BI__builtin_arm_wfe:
1979 case clang::ARM::BI__wfe:
1980 Value = 2;
1981 break;
1982 case clang::ARM::BI__builtin_arm_wfi:
1983 case clang::ARM::BI__wfi:
1984 Value = 3;
1985 break;
1986 case clang::ARM::BI__builtin_arm_sev:
1987 case clang::ARM::BI__sev:
1988 Value = 4;
1989 break;
1990 case clang::ARM::BI__builtin_arm_sevl:
1991 case clang::ARM::BI__sevl:
1992 Value = 5;
1993 break;
1994 }
1995
1996 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
1997 llvm::ConstantInt::get(Int32Ty, Value));
1998}
1999
2005
2006// Generates the IR for the read/write special register builtin,
2007// ValueType is the type of the value that is to be written or read,
2008// RegisterType is the type of the register being written to or read from.
2010 const CallExpr *E,
2011 llvm::Type *RegisterType,
2012 llvm::Type *ValueType,
2013 SpecialRegisterAccessKind AccessKind,
2014 StringRef SysReg = "") {
2015 // write and register intrinsics only support 32, 64 and 128 bit operations.
2016 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2017 RegisterType->isIntegerTy(128)) &&
2018 "Unsupported size for register.");
2019
2020 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2021 CodeGen::CodeGenModule &CGM = CGF.CGM;
2022 LLVMContext &Context = CGM.getLLVMContext();
2023
2024 if (SysReg.empty()) {
2025 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2026 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2027 }
2028
2029 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2030 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2031 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2032
2033 llvm::Type *Types[] = { RegisterType };
2034
2035 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2036 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2037 && "Can't fit 64-bit value in 32-bit register");
2038
2039 if (AccessKind != Write) {
2040 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2041 llvm::Function *F = CGM.getIntrinsic(
2042 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2043 : Intrinsic::read_register,
2044 Types);
2045 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2046
2047 if (MixedTypes)
2048 // Read into 64 bit register and then truncate result to 32 bit.
2049 return Builder.CreateTrunc(Call, ValueType);
2050
2051 if (ValueType->isPointerTy())
2052 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2053 return Builder.CreateIntToPtr(Call, ValueType);
2054
2055 return Call;
2056 }
2057
2058 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2059 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2060 if (MixedTypes) {
2061 // Extend 32 bit write value to 64 bit to pass to write.
2062 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2063 return Builder.CreateCall(F, { Metadata, ArgValue });
2064 }
2065
2066 if (ValueType->isPointerTy()) {
2067 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2068 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2069 return Builder.CreateCall(F, { Metadata, ArgValue });
2070 }
2071
2072 return Builder.CreateCall(F, { Metadata, ArgValue });
2073}
2074
2075static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2076 const CallExpr *E) {
2077 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2078 CodeGen::CodeGenModule &CGM = CGF.CGM;
2080
2081 auto getIntArg = [&](unsigned ArgNo) {
2082 Expr::EvalResult Result;
2083 if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
2084 llvm_unreachable("Expected constant argument to range prefetch.");
2085 return Result.Val.getInt().getExtValue();
2086 };
2087
2088 Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
2089 Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
2090 Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
2091
2092 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2093 auto Length = getIntArg(3);
2094 auto Count = getIntArg(4) - 1;
2095 auto Stride = getIntArg(5);
2096 auto Distance = getIntArg(6);
2097
2098 // Map ReuseDistance given in bytes to four bits representing decreasing
2099 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2100 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2101 // over the maximum is represented by 0 (distance not known).
2102 if (Distance > 0) {
2103 Distance = llvm::Log2_32_Ceil(Distance);
2104 if (Distance < 15)
2105 Distance = 15;
2106 else if (Distance > 29)
2107 Distance = 0;
2108 else
2109 Distance = 30 - Distance;
2110 }
2111
2112 uint64_t Mask22 = (1ULL << 22) - 1;
2113 uint64_t Mask16 = (1ULL << 16) - 1;
2114 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2115 ((Count & Mask16) << 22) | (Length & Mask22);
2116
2117 Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
2118 } else
2119 Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
2120
2121 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
2122 Ops);
2123}
2124
2125/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2126/// argument that specifies the vector type. The additional argument is meant
2127/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
2128/// should be kept consistent with the logic in Sema.
2129/// TODO: Make this return false for SISD builtins.
2130static bool HasExtraNeonArgument(unsigned BuiltinID) {
2131 // Required by the headers included below, but not in this particular
2132 // function.
2133 [[maybe_unused]] int PtrArgNum = -1;
2134 [[maybe_unused]] bool HasConstPtr = false;
2135
2136 // The mask encodes the type. We don't care about the actual value. Instead,
2137 // we just check whether its been set.
2138 uint64_t mask = 0;
2139 switch (BuiltinID) {
2140#define GET_NEON_OVERLOAD_CHECK
2141#include "clang/Basic/arm_fp16.inc"
2142#include "clang/Basic/arm_neon.inc"
2143#undef GET_NEON_OVERLOAD_CHECK
2144 // Non-neon builtins for controling VFP that take extra argument for
2145 // discriminating the type.
2146 case ARM::BI__builtin_arm_vcvtr_f:
2147 case ARM::BI__builtin_arm_vcvtr_d:
2148 mask = 1;
2149 }
2150
2151 if (mask)
2152 return true;
2153
2154 return false;
2155}
2156
2158 const CallExpr *E,
2160 llvm::Triple::ArchType Arch) {
2161 if (auto Hint = GetValueForARMHint(BuiltinID))
2162 return Hint;
2163
2164 if (BuiltinID == clang::ARM::BI__emit) {
2165 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2166 llvm::FunctionType *FTy =
2167 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2168
2170 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2171 llvm_unreachable("Sema will ensure that the parameter is constant");
2172
2173 llvm::APSInt Value = Result.Val.getInt();
2174 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2175
2176 llvm::InlineAsm *Emit =
2177 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2178 /*hasSideEffects=*/true)
2179 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2180 /*hasSideEffects=*/true);
2181
2182 return Builder.CreateCall(Emit);
2183 }
2184
2185 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2186 Value *Option = EmitScalarExpr(E->getArg(0));
2187 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2188 }
2189
2190 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2192 Value *RW = EmitScalarExpr(E->getArg(1));
2193 Value *IsData = EmitScalarExpr(E->getArg(2));
2194
2195 // Locality is not supported on ARM target
2196 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2197
2198 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2199 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2200 }
2201
2202 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2203 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2204 return Builder.CreateCall(
2205 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2206 }
2207
2208 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2209 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2210 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2211 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2212 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2213 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2214 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2215 return Res;
2216 }
2217
2218
2219 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2220 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2221 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2222 }
2223 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2224 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2225 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2226 "cls");
2227 }
2228
2229 if (BuiltinID == clang::ARM::BI__clear_cache) {
2230 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2231 const FunctionDecl *FD = E->getDirectCallee();
2232 Value *Ops[2];
2233 for (unsigned i = 0; i < 2; i++)
2234 Ops[i] = EmitScalarExpr(E->getArg(i));
2235 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2236 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2237 StringRef Name = FD->getName();
2238 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2239 }
2240
2241 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2242 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2243 Function *F;
2244
2245 switch (BuiltinID) {
2246 default: llvm_unreachable("unexpected builtin");
2247 case clang::ARM::BI__builtin_arm_mcrr:
2248 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2249 break;
2250 case clang::ARM::BI__builtin_arm_mcrr2:
2251 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2252 break;
2253 }
2254
2255 // MCRR{2} instruction has 5 operands but
2256 // the intrinsic has 4 because Rt and Rt2
2257 // are represented as a single unsigned 64
2258 // bit integer in the intrinsic definition
2259 // but internally it's represented as 2 32
2260 // bit integers.
2261
2262 Value *Coproc = EmitScalarExpr(E->getArg(0));
2263 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2264 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2265 Value *CRm = EmitScalarExpr(E->getArg(3));
2266
2267 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2268 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2269 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2270 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2271
2272 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2273 }
2274
2275 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2276 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2277 Function *F;
2278
2279 switch (BuiltinID) {
2280 default: llvm_unreachable("unexpected builtin");
2281 case clang::ARM::BI__builtin_arm_mrrc:
2282 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2283 break;
2284 case clang::ARM::BI__builtin_arm_mrrc2:
2285 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2286 break;
2287 }
2288
2289 Value *Coproc = EmitScalarExpr(E->getArg(0));
2290 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2291 Value *CRm = EmitScalarExpr(E->getArg(2));
2292 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2293
2294 // Returns an unsigned 64 bit integer, represented
2295 // as two 32 bit integers.
2296
2297 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2298 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2299 Rt = Builder.CreateZExt(Rt, Int64Ty);
2300 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2301
2302 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2303 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2304 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2305
2306 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2307 }
2308
2309 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2310 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2311 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2312 getContext().getTypeSize(E->getType()) == 64) ||
2313 BuiltinID == clang::ARM::BI__ldrexd) {
2314 Function *F;
2315
2316 switch (BuiltinID) {
2317 default: llvm_unreachable("unexpected builtin");
2318 case clang::ARM::BI__builtin_arm_ldaex:
2319 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2320 break;
2321 case clang::ARM::BI__builtin_arm_ldrexd:
2322 case clang::ARM::BI__builtin_arm_ldrex:
2323 case clang::ARM::BI__ldrexd:
2324 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2325 break;
2326 }
2327
2328 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2329 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2330
2331 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2332 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2333 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2334 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2335
2336 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2337 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2338 Val = Builder.CreateOr(Val, Val1);
2339 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2340 }
2341
2342 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2343 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2344 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2345
2346 QualType Ty = E->getType();
2347 llvm::Type *RealResTy = ConvertType(Ty);
2348 llvm::Type *IntTy =
2349 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2350
2351 Function *F = CGM.getIntrinsic(
2352 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2353 : Intrinsic::arm_ldrex,
2354 DefaultPtrTy);
2355 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2356 Val->addParamAttr(
2357 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2358
2359 if (RealResTy->isPointerTy())
2360 return Builder.CreateIntToPtr(Val, RealResTy);
2361 else {
2362 llvm::Type *IntResTy = llvm::IntegerType::get(
2363 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2364 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2365 RealResTy);
2366 }
2367 }
2368
2369 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2370 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2371 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2372 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2373 Function *F = CGM.getIntrinsic(
2374 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2375 : Intrinsic::arm_strexd);
2376 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2377
2378 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
2379 Value *Val = EmitScalarExpr(E->getArg(0));
2380 Builder.CreateStore(Val, Tmp);
2381
2382 Address LdPtr = Tmp.withElementType(STy);
2383 Val = Builder.CreateLoad(LdPtr);
2384
2385 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2386 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2387 Value *StPtr = EmitScalarExpr(E->getArg(1));
2388 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2389 }
2390
2391 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2392 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2393 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2394 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2395
2396 QualType Ty = E->getArg(0)->getType();
2397 llvm::Type *StoreTy =
2398 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2399
2400 if (StoreVal->getType()->isPointerTy())
2401 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2402 else {
2403 llvm::Type *IntTy = llvm::IntegerType::get(
2405 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2406 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2407 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2408 }
2409
2410 Function *F = CGM.getIntrinsic(
2411 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2412 : Intrinsic::arm_strex,
2413 StoreAddr->getType());
2414
2415 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2416 CI->addParamAttr(
2417 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2418 return CI;
2419 }
2420
2421 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2422 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2423 return Builder.CreateCall(F);
2424 }
2425
2426 // CRC32
2427 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2428 switch (BuiltinID) {
2429 case clang::ARM::BI__builtin_arm_crc32b:
2430 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2431 case clang::ARM::BI__builtin_arm_crc32cb:
2432 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2433 case clang::ARM::BI__builtin_arm_crc32h:
2434 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2435 case clang::ARM::BI__builtin_arm_crc32ch:
2436 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2437 case clang::ARM::BI__builtin_arm_crc32w:
2438 case clang::ARM::BI__builtin_arm_crc32d:
2439 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2440 case clang::ARM::BI__builtin_arm_crc32cw:
2441 case clang::ARM::BI__builtin_arm_crc32cd:
2442 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2443 }
2444
2445 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2446 Value *Arg0 = EmitScalarExpr(E->getArg(0));
2447 Value *Arg1 = EmitScalarExpr(E->getArg(1));
2448
2449 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2450 // intrinsics, hence we need different codegen for these cases.
2451 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2452 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2453 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2454 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
2455 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
2456 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
2457
2458 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2459 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
2460 return Builder.CreateCall(F, {Res, Arg1b});
2461 } else {
2462 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
2463
2464 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2465 return Builder.CreateCall(F, {Arg0, Arg1});
2466 }
2467 }
2468
2469 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2470 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2471 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2472 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2473 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2474 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2475
2476 SpecialRegisterAccessKind AccessKind = Write;
2477 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2478 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2479 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2480 AccessKind = VolatileRead;
2481
2482 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2483 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2484
2485 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2486 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2487
2488 llvm::Type *ValueType;
2489 llvm::Type *RegisterType;
2490 if (IsPointerBuiltin) {
2491 ValueType = VoidPtrTy;
2493 } else if (Is64Bit) {
2494 ValueType = RegisterType = Int64Ty;
2495 } else {
2496 ValueType = RegisterType = Int32Ty;
2497 }
2498
2499 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
2500 AccessKind);
2501 }
2502
2503 if (BuiltinID == ARM::BI__builtin_sponentry) {
2504 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
2505 return Builder.CreateCall(F);
2506 }
2507
2508 // Handle MSVC intrinsics before argument evaluation to prevent double
2509 // evaluation.
2510 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
2511 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
2512
2513 // Deal with MVE builtins
2514 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2515 return Result;
2516 // Handle CDE builtins
2517 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2518 return Result;
2519
2520 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
2521 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
2522 return P.first == BuiltinID;
2523 });
2524 if (It != end(NEONEquivalentIntrinsicMap))
2525 BuiltinID = It->second;
2526
2527 // Find out if any arguments are required to be integer constant
2528 // expressions.
2529 unsigned ICEArguments = 0;
2531 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2532 assert(Error == ASTContext::GE_None && "Should not codegen an error");
2533
2534 auto getAlignmentValue32 = [&](Address addr) -> Value* {
2535 return Builder.getInt32(addr.getAlignment().getQuantity());
2536 };
2537
2538 Address PtrOp0 = Address::invalid();
2539 Address PtrOp1 = Address::invalid();
2541 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
2542 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
2543 for (unsigned i = 0, e = NumArgs; i != e; i++) {
2544 if (i == 0) {
2545 switch (BuiltinID) {
2546 case NEON::BI__builtin_neon_vld1_v:
2547 case NEON::BI__builtin_neon_vld1q_v:
2548 case NEON::BI__builtin_neon_vld1q_lane_v:
2549 case NEON::BI__builtin_neon_vld1_lane_v:
2550 case NEON::BI__builtin_neon_vld1_dup_v:
2551 case NEON::BI__builtin_neon_vld1q_dup_v:
2552 case NEON::BI__builtin_neon_vst1_v:
2553 case NEON::BI__builtin_neon_vst1q_v:
2554 case NEON::BI__builtin_neon_vst1q_lane_v:
2555 case NEON::BI__builtin_neon_vst1_lane_v:
2556 case NEON::BI__builtin_neon_vst2_v:
2557 case NEON::BI__builtin_neon_vst2q_v:
2558 case NEON::BI__builtin_neon_vst2_lane_v:
2559 case NEON::BI__builtin_neon_vst2q_lane_v:
2560 case NEON::BI__builtin_neon_vst3_v:
2561 case NEON::BI__builtin_neon_vst3q_v:
2562 case NEON::BI__builtin_neon_vst3_lane_v:
2563 case NEON::BI__builtin_neon_vst3q_lane_v:
2564 case NEON::BI__builtin_neon_vst4_v:
2565 case NEON::BI__builtin_neon_vst4q_v:
2566 case NEON::BI__builtin_neon_vst4_lane_v:
2567 case NEON::BI__builtin_neon_vst4q_lane_v:
2568 // Get the alignment for the argument in addition to the value;
2569 // we'll use it later.
2570 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
2571 Ops.push_back(PtrOp0.emitRawPointer(*this));
2572 continue;
2573 }
2574 }
2575 if (i == 1) {
2576 switch (BuiltinID) {
2577 case NEON::BI__builtin_neon_vld2_v:
2578 case NEON::BI__builtin_neon_vld2q_v:
2579 case NEON::BI__builtin_neon_vld3_v:
2580 case NEON::BI__builtin_neon_vld3q_v:
2581 case NEON::BI__builtin_neon_vld4_v:
2582 case NEON::BI__builtin_neon_vld4q_v:
2583 case NEON::BI__builtin_neon_vld2_lane_v:
2584 case NEON::BI__builtin_neon_vld2q_lane_v:
2585 case NEON::BI__builtin_neon_vld3_lane_v:
2586 case NEON::BI__builtin_neon_vld3q_lane_v:
2587 case NEON::BI__builtin_neon_vld4_lane_v:
2588 case NEON::BI__builtin_neon_vld4q_lane_v:
2589 case NEON::BI__builtin_neon_vld2_dup_v:
2590 case NEON::BI__builtin_neon_vld2q_dup_v:
2591 case NEON::BI__builtin_neon_vld3_dup_v:
2592 case NEON::BI__builtin_neon_vld3q_dup_v:
2593 case NEON::BI__builtin_neon_vld4_dup_v:
2594 case NEON::BI__builtin_neon_vld4q_dup_v:
2595 // Get the alignment for the argument in addition to the value;
2596 // we'll use it later.
2597 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
2598 Ops.push_back(PtrOp1.emitRawPointer(*this));
2599 continue;
2600 }
2601 }
2602
2603 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
2604 }
2605
2606 switch (BuiltinID) {
2607 default: break;
2608
2609 case NEON::BI__builtin_neon_vget_lane_i8:
2610 case NEON::BI__builtin_neon_vget_lane_i16:
2611 case NEON::BI__builtin_neon_vget_lane_i32:
2612 case NEON::BI__builtin_neon_vget_lane_i64:
2613 case NEON::BI__builtin_neon_vget_lane_bf16:
2614 case NEON::BI__builtin_neon_vget_lane_f32:
2615 case NEON::BI__builtin_neon_vgetq_lane_i8:
2616 case NEON::BI__builtin_neon_vgetq_lane_i16:
2617 case NEON::BI__builtin_neon_vgetq_lane_i32:
2618 case NEON::BI__builtin_neon_vgetq_lane_i64:
2619 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2620 case NEON::BI__builtin_neon_vgetq_lane_f32:
2621 case NEON::BI__builtin_neon_vduph_lane_bf16:
2622 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2623 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
2624
2625 case NEON::BI__builtin_neon_vrndns_f32: {
2626 Value *Arg = EmitScalarExpr(E->getArg(0));
2627 llvm::Type *Tys[] = {Arg->getType()};
2628 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
2629 return Builder.CreateCall(F, {Arg}, "vrndn"); }
2630
2631 case NEON::BI__builtin_neon_vset_lane_i8:
2632 case NEON::BI__builtin_neon_vset_lane_i16:
2633 case NEON::BI__builtin_neon_vset_lane_i32:
2634 case NEON::BI__builtin_neon_vset_lane_i64:
2635 case NEON::BI__builtin_neon_vset_lane_bf16:
2636 case NEON::BI__builtin_neon_vset_lane_f32:
2637 case NEON::BI__builtin_neon_vsetq_lane_i8:
2638 case NEON::BI__builtin_neon_vsetq_lane_i16:
2639 case NEON::BI__builtin_neon_vsetq_lane_i32:
2640 case NEON::BI__builtin_neon_vsetq_lane_i64:
2641 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2642 case NEON::BI__builtin_neon_vsetq_lane_f32:
2643 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
2644
2645 case NEON::BI__builtin_neon_vsha1h_u32:
2646 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
2647 "vsha1h");
2648 case NEON::BI__builtin_neon_vsha1cq_u32:
2649 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
2650 "vsha1h");
2651 case NEON::BI__builtin_neon_vsha1pq_u32:
2652 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
2653 "vsha1h");
2654 case NEON::BI__builtin_neon_vsha1mq_u32:
2655 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
2656 "vsha1h");
2657
2658 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
2659 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
2660 "vcvtbfp2bf");
2661 }
2662
2663 // The ARM _MoveToCoprocessor builtins put the input register value as
2664 // the first argument, but the LLVM intrinsic expects it as the third one.
2665 case clang::ARM::BI_MoveToCoprocessor:
2666 case clang::ARM::BI_MoveToCoprocessor2: {
2667 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
2668 ? Intrinsic::arm_mcr
2669 : Intrinsic::arm_mcr2);
2670 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
2671 Ops[3], Ops[4], Ops[5]});
2672 }
2673 }
2674
2675 // Get the last argument, which specifies the vector type.
2676 assert(HasExtraArg);
2677 const Expr *Arg = E->getArg(E->getNumArgs()-1);
2678 std::optional<llvm::APSInt> Result =
2680 if (!Result)
2681 return nullptr;
2682
2683 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
2684 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
2685 // Determine the overloaded type of this builtin.
2686 llvm::Type *Ty;
2687 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
2688 Ty = FloatTy;
2689 else
2690 Ty = DoubleTy;
2691
2692 // Determine whether this is an unsigned conversion or not.
2693 bool usgn = Result->getZExtValue() == 1;
2694 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
2695
2696 // Call the appropriate intrinsic.
2697 Function *F = CGM.getIntrinsic(Int, Ty);
2698 return Builder.CreateCall(F, Ops, "vcvtr");
2699 }
2700
2701 // Determine the type of this overloaded NEON intrinsic.
2702 NeonTypeFlags Type = Result->getZExtValue();
2703 bool usgn = Type.isUnsigned();
2704 bool rightShift = false;
2705
2706 llvm::FixedVectorType *VTy =
2707 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
2708 getTarget().hasBFloat16Type());
2709 llvm::Type *Ty = VTy;
2710 if (!Ty)
2711 return nullptr;
2712
2713 // Many NEON builtins have identical semantics and uses in ARM and
2714 // AArch64. Emit these in a single function.
2715 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
2717 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
2718 if (Builtin)
2720 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
2721 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
2722
2723 unsigned Int;
2724 switch (BuiltinID) {
2725 default: return nullptr;
2726 case NEON::BI__builtin_neon_vld1q_lane_v:
2727 // Handle 64-bit integer elements as a special case. Use shuffles of
2728 // one-element vectors to avoid poor code for i64 in the backend.
2729 if (VTy->getElementType()->isIntegerTy(64)) {
2730 // Extract the other lane.
2731 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2732 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
2733 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
2734 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2735 // Load the value as a one-element vector.
2736 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
2737 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2738 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
2739 Value *Align = getAlignmentValue32(PtrOp0);
2740 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
2741 // Combine them.
2742 int Indices[] = {1 - Lane, Lane};
2743 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
2744 }
2745 [[fallthrough]];
2746 case NEON::BI__builtin_neon_vld1_lane_v: {
2747 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2748 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2749 Value *Ld = Builder.CreateLoad(PtrOp0);
2750 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
2751 }
2752 case NEON::BI__builtin_neon_vqrshrn_n_v:
2753 Int =
2754 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2755 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
2756 1, true);
2757 case NEON::BI__builtin_neon_vqrshrun_n_v:
2758 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
2759 Ops, "vqrshrun_n", 1, true);
2760 case NEON::BI__builtin_neon_vqshrn_n_v:
2761 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2762 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
2763 1, true);
2764 case NEON::BI__builtin_neon_vqshrun_n_v:
2765 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
2766 Ops, "vqshrun_n", 1, true);
2767 case NEON::BI__builtin_neon_vrecpe_v:
2768 case NEON::BI__builtin_neon_vrecpeq_v:
2769 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
2770 Ops, "vrecpe");
2771 case NEON::BI__builtin_neon_vrshrn_n_v:
2772 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2773 Ops, "vrshrn_n", 1, true);
2774 case NEON::BI__builtin_neon_vrsra_n_v:
2775 case NEON::BI__builtin_neon_vrsraq_n_v:
2776 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2777 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2778 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
2779 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2780 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
2781 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2782 case NEON::BI__builtin_neon_vsri_n_v:
2783 case NEON::BI__builtin_neon_vsriq_n_v:
2784 rightShift = true;
2785 [[fallthrough]];
2786 case NEON::BI__builtin_neon_vsli_n_v:
2787 case NEON::BI__builtin_neon_vsliq_n_v:
2788 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
2789 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2790 Ops, "vsli_n");
2791 case NEON::BI__builtin_neon_vsra_n_v:
2792 case NEON::BI__builtin_neon_vsraq_n_v:
2793 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2794 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
2795 return Builder.CreateAdd(Ops[0], Ops[1]);
2796 case NEON::BI__builtin_neon_vst1q_lane_v:
2797 // Handle 64-bit integer elements as a special case. Use a shuffle to get
2798 // a one-element vector and avoid poor code for i64 in the backend.
2799 if (VTy->getElementType()->isIntegerTy(64)) {
2800 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2801 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
2802 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2803 Ops[2] = getAlignmentValue32(PtrOp0);
2804 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
2805 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
2806 Tys), Ops);
2807 }
2808 [[fallthrough]];
2809 case NEON::BI__builtin_neon_vst1_lane_v: {
2810 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2811 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
2812 return Builder.CreateStore(Ops[1],
2813 PtrOp0.withElementType(Ops[1]->getType()));
2814 }
2815 case NEON::BI__builtin_neon_vtbl1_v:
2816 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
2817 Ops, "vtbl1");
2818 case NEON::BI__builtin_neon_vtbl2_v:
2819 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
2820 Ops, "vtbl2");
2821 case NEON::BI__builtin_neon_vtbl3_v:
2822 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
2823 Ops, "vtbl3");
2824 case NEON::BI__builtin_neon_vtbl4_v:
2825 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
2826 Ops, "vtbl4");
2827 case NEON::BI__builtin_neon_vtbx1_v:
2828 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
2829 Ops, "vtbx1");
2830 case NEON::BI__builtin_neon_vtbx2_v:
2831 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
2832 Ops, "vtbx2");
2833 case NEON::BI__builtin_neon_vtbx3_v:
2834 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
2835 Ops, "vtbx3");
2836 case NEON::BI__builtin_neon_vtbx4_v:
2837 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
2838 Ops, "vtbx4");
2839 }
2840}
2841
2842template<typename Integer>
2844 return E->getIntegerConstantExpr(Context)->getExtValue();
2845}
2846
2847static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
2848 llvm::Type *T, bool Unsigned) {
2849 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
2850 // which finds it convenient to specify signed/unsigned as a boolean flag.
2851 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
2852}
2853
2854static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
2855 uint32_t Shift, bool Unsigned) {
2856 // MVE helper function for integer shift right. This must handle signed vs
2857 // unsigned, and also deal specially with the case where the shift count is
2858 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
2859 // undefined behavior, but in MVE it's legal, so we must convert it to code
2860 // that is not undefined in IR.
2861 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
2862 ->getElementType()
2863 ->getPrimitiveSizeInBits();
2864 if (Shift == LaneBits) {
2865 // An unsigned shift of the full lane size always generates zero, so we can
2866 // simply emit a zero vector. A signed shift of the full lane size does the
2867 // same thing as shifting by one bit fewer.
2868 if (Unsigned)
2869 return llvm::Constant::getNullValue(V->getType());
2870 else
2871 --Shift;
2872 }
2873 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
2874}
2875
2876static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
2877 // MVE-specific helper function for a vector splat, which infers the element
2878 // count of the output vector by knowing that MVE vectors are all 128 bits
2879 // wide.
2880 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
2881 return Builder.CreateVectorSplat(Elements, V);
2882}
2883
2884static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
2885 CodeGenFunction *CGF,
2886 llvm::Value *V,
2887 llvm::Type *DestType) {
2888 // Convert one MVE vector type into another by reinterpreting its in-register
2889 // format.
2890 //
2891 // Little-endian, this is identical to a bitcast (which reinterprets the
2892 // memory format). But big-endian, they're not necessarily the same, because
2893 // the register and memory formats map to each other differently depending on
2894 // the lane size.
2895 //
2896 // We generate a bitcast whenever we can (if we're little-endian, or if the
2897 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
2898 // that performs the different kind of reinterpretation.
2899 if (CGF->getTarget().isBigEndian() &&
2900 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
2901 return Builder.CreateCall(
2902 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
2903 {DestType, V->getType()}),
2904 V);
2905 } else {
2906 return Builder.CreateBitCast(V, DestType);
2907 }
2908}
2909
2910static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
2911 // Make a shufflevector that extracts every other element of a vector (evens
2912 // or odds, as desired).
2913 SmallVector<int, 16> Indices;
2914 unsigned InputElements =
2915 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
2916 for (unsigned i = 0; i < InputElements; i += 2)
2917 Indices.push_back(i + Odd);
2918 return Builder.CreateShuffleVector(V, Indices);
2919}
2920
2921static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
2922 llvm::Value *V1) {
2923 // Make a shufflevector that interleaves two vectors element by element.
2924 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
2925 SmallVector<int, 16> Indices;
2926 unsigned InputElements =
2927 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
2928 for (unsigned i = 0; i < InputElements; i++) {
2929 Indices.push_back(i);
2930 Indices.push_back(i + InputElements);
2931 }
2932 return Builder.CreateShuffleVector(V0, V1, Indices);
2933}
2934
2935template<unsigned HighBit, unsigned OtherBits>
2936static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
2937 // MVE-specific helper function to make a vector splat of a constant such as
2938 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
2939 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
2940 unsigned LaneBits = T->getPrimitiveSizeInBits();
2941 uint32_t Value = HighBit << (LaneBits - 1);
2942 if (OtherBits)
2943 Value |= (1UL << (LaneBits - 1)) - 1;
2944 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
2945 return ARMMVEVectorSplat(Builder, Lane);
2946}
2947
2948static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
2949 llvm::Value *V,
2950 unsigned ReverseWidth) {
2951 // MVE-specific helper function which reverses the elements of a
2952 // vector within every (ReverseWidth)-bit collection of lanes.
2953 SmallVector<int, 16> Indices;
2954 unsigned LaneSize = V->getType()->getScalarSizeInBits();
2955 unsigned Elements = 128 / LaneSize;
2956 unsigned Mask = ReverseWidth / LaneSize - 1;
2957 for (unsigned i = 0; i < Elements; i++)
2958 Indices.push_back(i ^ Mask);
2959 return Builder.CreateShuffleVector(V, Indices);
2960}
2961
2962static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
2963 CodeGenFunction *CGF, llvm::Value *V,
2964 llvm::Type *Ty) {
2965 return Builder.CreateCall(
2966 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2967 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2968}
2969
2970static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
2971 CodeGenFunction *CGF, llvm::Value *V,
2972 llvm::Type *Ty) {
2973 return Builder.CreateCall(
2974 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2975 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2976}
2977
2978static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
2979 CodeGenFunction *CGF, llvm::Value *V,
2980 llvm::Type *Ty) {
2981 return Builder.CreateCall(
2982 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2983 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2984}
2985
2986static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
2987 CodeGenFunction *CGF, llvm::Value *V,
2988 llvm::Type *Ty) {
2989 return Builder.CreateCall(
2990 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2991 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2992}
2993
2995 const CallExpr *E,
2997 llvm::Triple::ArchType Arch) {
2998 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
2999 Intrinsic::ID IRIntr;
3000 unsigned NumVectors;
3001
3002 // Code autogenerated by Tablegen will handle all the simple builtins.
3003 switch (BuiltinID) {
3004 #include "clang/Basic/arm_mve_builtin_cg.inc"
3005
3006 // If we didn't match an MVE builtin id at all, go back to the
3007 // main EmitARMBuiltinExpr.
3008 default:
3009 return nullptr;
3010 }
3011
3012 // Anything that breaks from that switch is an MVE builtin that
3013 // needs handwritten code to generate.
3014
3015 switch (CustomCodeGenType) {
3016
3017 case CustomCodeGen::VLD24: {
3020
3021 auto MvecCType = E->getType();
3022 auto MvecLType = ConvertType(MvecCType);
3023 assert(MvecLType->isStructTy() &&
3024 "Return type for vld[24]q should be a struct");
3025 assert(MvecLType->getStructNumElements() == 1 &&
3026 "Return-type struct for vld[24]q should have one element");
3027 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3028 assert(MvecLTypeInner->isArrayTy() &&
3029 "Return-type struct for vld[24]q should contain an array");
3030 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3031 "Array member of return-type struct vld[24]q has wrong length");
3032 auto VecLType = MvecLTypeInner->getArrayElementType();
3033
3034 Tys.push_back(VecLType);
3035
3036 auto Addr = E->getArg(0);
3037 Ops.push_back(EmitScalarExpr(Addr));
3038 Tys.push_back(ConvertType(Addr->getType()));
3039
3040 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3041 Value *LoadResult = Builder.CreateCall(F, Ops);
3042 Value *MvecOut = PoisonValue::get(MvecLType);
3043 for (unsigned i = 0; i < NumVectors; ++i) {
3044 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3045 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3046 }
3047
3048 if (ReturnValue.isNull())
3049 return MvecOut;
3050 else
3051 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3052 }
3053
3054 case CustomCodeGen::VST24: {
3057
3058 auto Addr = E->getArg(0);
3059 Ops.push_back(EmitScalarExpr(Addr));
3060 Tys.push_back(ConvertType(Addr->getType()));
3061
3062 auto MvecCType = E->getArg(1)->getType();
3063 auto MvecLType = ConvertType(MvecCType);
3064 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3065 assert(MvecLType->getStructNumElements() == 1 &&
3066 "Data-type struct for vst2q should have one element");
3067 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3068 assert(MvecLTypeInner->isArrayTy() &&
3069 "Data-type struct for vst2q should contain an array");
3070 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3071 "Array member of return-type struct vld[24]q has wrong length");
3072 auto VecLType = MvecLTypeInner->getArrayElementType();
3073
3074 Tys.push_back(VecLType);
3075
3076 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3077 EmitAggExpr(E->getArg(1), MvecSlot);
3078 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3079 for (unsigned i = 0; i < NumVectors; i++)
3080 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3081
3082 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3083 Value *ToReturn = nullptr;
3084 for (unsigned i = 0; i < NumVectors; i++) {
3085 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3086 ToReturn = Builder.CreateCall(F, Ops);
3087 Ops.pop_back();
3088 }
3089 return ToReturn;
3090 }
3091 }
3092 llvm_unreachable("unknown custom codegen type.");
3093}
3094
3096 const CallExpr *E,
3098 llvm::Triple::ArchType Arch) {
3099 switch (BuiltinID) {
3100 default:
3101 return nullptr;
3102#include "clang/Basic/arm_cde_builtin_cg.inc"
3103 }
3104}
3105
3106static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3107 const CallExpr *E,
3109 llvm::Triple::ArchType Arch) {
3110 unsigned int Int = 0;
3111 const char *s = nullptr;
3112
3113 switch (BuiltinID) {
3114 default:
3115 return nullptr;
3116 case NEON::BI__builtin_neon_vtbl1_v:
3117 case NEON::BI__builtin_neon_vqtbl1_v:
3118 case NEON::BI__builtin_neon_vqtbl1q_v:
3119 case NEON::BI__builtin_neon_vtbl2_v:
3120 case NEON::BI__builtin_neon_vqtbl2_v:
3121 case NEON::BI__builtin_neon_vqtbl2q_v:
3122 case NEON::BI__builtin_neon_vtbl3_v:
3123 case NEON::BI__builtin_neon_vqtbl3_v:
3124 case NEON::BI__builtin_neon_vqtbl3q_v:
3125 case NEON::BI__builtin_neon_vtbl4_v:
3126 case NEON::BI__builtin_neon_vqtbl4_v:
3127 case NEON::BI__builtin_neon_vqtbl4q_v:
3128 break;
3129 case NEON::BI__builtin_neon_vtbx1_v:
3130 case NEON::BI__builtin_neon_vqtbx1_v:
3131 case NEON::BI__builtin_neon_vqtbx1q_v:
3132 case NEON::BI__builtin_neon_vtbx2_v:
3133 case NEON::BI__builtin_neon_vqtbx2_v:
3134 case NEON::BI__builtin_neon_vqtbx2q_v:
3135 case NEON::BI__builtin_neon_vtbx3_v:
3136 case NEON::BI__builtin_neon_vqtbx3_v:
3137 case NEON::BI__builtin_neon_vqtbx3q_v:
3138 case NEON::BI__builtin_neon_vtbx4_v:
3139 case NEON::BI__builtin_neon_vqtbx4_v:
3140 case NEON::BI__builtin_neon_vqtbx4q_v:
3141 break;
3142 }
3143
3144 assert(E->getNumArgs() >= 3);
3145
3146 // Get the last argument, which specifies the vector type.
3147 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3148 std::optional<llvm::APSInt> Result =
3150 if (!Result)
3151 return nullptr;
3152
3153 // Determine the type of this overloaded NEON intrinsic.
3154 NeonTypeFlags Type = Result->getZExtValue();
3155 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3156 if (!Ty)
3157 return nullptr;
3158
3159 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3160
3161 // AArch64 scalar builtins are not overloaded, they do not have an extra
3162 // argument that specifies the vector type, need to handle each case.
3163 switch (BuiltinID) {
3164 case NEON::BI__builtin_neon_vtbl1_v: {
3165 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3166 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3167 }
3168 case NEON::BI__builtin_neon_vtbl2_v: {
3169 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3170 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3171 }
3172 case NEON::BI__builtin_neon_vtbl3_v: {
3173 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3174 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3175 }
3176 case NEON::BI__builtin_neon_vtbl4_v: {
3177 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3178 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3179 }
3180 case NEON::BI__builtin_neon_vtbx1_v: {
3181 Value *TblRes =
3182 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3183 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3184
3185 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3186 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3187 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3188
3189 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3190 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3191 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3192 }
3193 case NEON::BI__builtin_neon_vtbx2_v: {
3194 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3195 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3196 }
3197 case NEON::BI__builtin_neon_vtbx3_v: {
3198 Value *TblRes =
3199 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3200 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3201
3202 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3203 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3204 TwentyFourV);
3205 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3206
3207 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3208 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3209 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3210 }
3211 case NEON::BI__builtin_neon_vtbx4_v: {
3212 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3213 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3214 }
3215 case NEON::BI__builtin_neon_vqtbl1_v:
3216 case NEON::BI__builtin_neon_vqtbl1q_v:
3217 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3218 case NEON::BI__builtin_neon_vqtbl2_v:
3219 case NEON::BI__builtin_neon_vqtbl2q_v: {
3220 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3221 case NEON::BI__builtin_neon_vqtbl3_v:
3222 case NEON::BI__builtin_neon_vqtbl3q_v:
3223 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3224 case NEON::BI__builtin_neon_vqtbl4_v:
3225 case NEON::BI__builtin_neon_vqtbl4q_v:
3226 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3227 case NEON::BI__builtin_neon_vqtbx1_v:
3228 case NEON::BI__builtin_neon_vqtbx1q_v:
3229 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3230 case NEON::BI__builtin_neon_vqtbx2_v:
3231 case NEON::BI__builtin_neon_vqtbx2q_v:
3232 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3233 case NEON::BI__builtin_neon_vqtbx3_v:
3234 case NEON::BI__builtin_neon_vqtbx3q_v:
3235 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3236 case NEON::BI__builtin_neon_vqtbx4_v:
3237 case NEON::BI__builtin_neon_vqtbx4q_v:
3238 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3239 }
3240 }
3241
3242 if (!Int)
3243 return nullptr;
3244
3245 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3246 return CGF.EmitNeonCall(F, Ops, s);
3247}
3248
3250 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3251 Op = Builder.CreateBitCast(Op, Int16Ty);
3252 Value *V = PoisonValue::get(VTy);
3253 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3254 Op = Builder.CreateInsertElement(V, Op, CI);
3255 return Op;
3256}
3257
3258/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3259/// access builtin. Only required if it can't be inferred from the base pointer
3260/// operand.
3262 switch (TypeFlags.getMemEltType()) {
3263 case SVETypeFlags::MemEltTyDefault:
3264 return getEltType(TypeFlags);
3265 case SVETypeFlags::MemEltTyInt8:
3266 return Builder.getInt8Ty();
3267 case SVETypeFlags::MemEltTyInt16:
3268 return Builder.getInt16Ty();
3269 case SVETypeFlags::MemEltTyInt32:
3270 return Builder.getInt32Ty();
3271 case SVETypeFlags::MemEltTyInt64:
3272 return Builder.getInt64Ty();
3273 }
3274 llvm_unreachable("Unknown MemEltType");
3275}
3276
3277llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3278 switch (TypeFlags.getEltType()) {
3279 default:
3280 llvm_unreachable("Invalid SVETypeFlag!");
3281
3282 case SVETypeFlags::EltTyMFloat8:
3283 case SVETypeFlags::EltTyInt8:
3284 return Builder.getInt8Ty();
3285 case SVETypeFlags::EltTyInt16:
3286 return Builder.getInt16Ty();
3287 case SVETypeFlags::EltTyInt32:
3288 return Builder.getInt32Ty();
3289 case SVETypeFlags::EltTyInt64:
3290 return Builder.getInt64Ty();
3291 case SVETypeFlags::EltTyInt128:
3292 return Builder.getInt128Ty();
3293
3294 case SVETypeFlags::EltTyFloat16:
3295 return Builder.getHalfTy();
3296 case SVETypeFlags::EltTyFloat32:
3297 return Builder.getFloatTy();
3298 case SVETypeFlags::EltTyFloat64:
3299 return Builder.getDoubleTy();
3300
3301 case SVETypeFlags::EltTyBFloat16:
3302 return Builder.getBFloatTy();
3303
3304 case SVETypeFlags::EltTyBool8:
3305 case SVETypeFlags::EltTyBool16:
3306 case SVETypeFlags::EltTyBool32:
3307 case SVETypeFlags::EltTyBool64:
3308 return Builder.getInt1Ty();
3309 }
3310}
3311
3312// Return the llvm predicate vector type corresponding to the specified element
3313// TypeFlags.
3314llvm::ScalableVectorType *
3316 switch (TypeFlags.getEltType()) {
3317 default: llvm_unreachable("Unhandled SVETypeFlag!");
3318
3319 case SVETypeFlags::EltTyInt8:
3320 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3321 case SVETypeFlags::EltTyInt16:
3322 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3323 case SVETypeFlags::EltTyInt32:
3324 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3325 case SVETypeFlags::EltTyInt64:
3326 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3327
3328 case SVETypeFlags::EltTyBFloat16:
3329 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3330 case SVETypeFlags::EltTyFloat16:
3331 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3332 case SVETypeFlags::EltTyFloat32:
3333 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3334 case SVETypeFlags::EltTyFloat64:
3335 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3336
3337 case SVETypeFlags::EltTyBool8:
3338 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3339 case SVETypeFlags::EltTyBool16:
3340 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3341 case SVETypeFlags::EltTyBool32:
3342 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3343 case SVETypeFlags::EltTyBool64:
3344 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3345 }
3346}
3347
3348// Return the llvm vector type corresponding to the specified element TypeFlags.
3349llvm::ScalableVectorType *
3351 switch (TypeFlags.getEltType()) {
3352 default:
3353 llvm_unreachable("Invalid SVETypeFlag!");
3354
3355 case SVETypeFlags::EltTyInt8:
3356 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3357 case SVETypeFlags::EltTyInt16:
3358 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3359 case SVETypeFlags::EltTyInt32:
3360 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3361 case SVETypeFlags::EltTyInt64:
3362 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3363
3364 case SVETypeFlags::EltTyMFloat8:
3365 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3366 case SVETypeFlags::EltTyFloat16:
3367 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3368 case SVETypeFlags::EltTyBFloat16:
3369 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3370 case SVETypeFlags::EltTyFloat32:
3371 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3372 case SVETypeFlags::EltTyFloat64:
3373 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3374
3375 case SVETypeFlags::EltTyBool8:
3376 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3377 case SVETypeFlags::EltTyBool16:
3378 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3379 case SVETypeFlags::EltTyBool32:
3380 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3381 case SVETypeFlags::EltTyBool64:
3382 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3383 }
3384}
3385
3386llvm::Value *
3388 Function *Ptrue =
3389 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3390 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
3391}
3392
3393constexpr unsigned SVEBitsPerBlock = 128;
3394
3395static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3396 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3397 return llvm::ScalableVectorType::get(EltTy, NumElts);
3398}
3399
3400// Reinterpret the input predicate so that it can be used to correctly isolate
3401// the elements of the specified datatype.
3403 llvm::ScalableVectorType *VTy) {
3404
3405 if (isa<TargetExtType>(Pred->getType()) &&
3406 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3407 return Pred;
3408
3409 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3410 if (Pred->getType() == RTy)
3411 return Pred;
3412
3413 unsigned IntID;
3414 llvm::Type *IntrinsicTy;
3415 switch (VTy->getMinNumElements()) {
3416 default:
3417 llvm_unreachable("unsupported element count!");
3418 case 1:
3419 case 2:
3420 case 4:
3421 case 8:
3422 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3423 IntrinsicTy = RTy;
3424 break;
3425 case 16:
3426 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3427 IntrinsicTy = Pred->getType();
3428 break;
3429 }
3430
3431 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3432 Value *C = Builder.CreateCall(F, Pred);
3433 assert(C->getType() == RTy && "Unexpected return type!");
3434 return C;
3435}
3436
3438 llvm::StructType *Ty) {
3439 if (PredTuple->getType() == Ty)
3440 return PredTuple;
3441
3442 Value *Ret = llvm::PoisonValue::get(Ty);
3443 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3444 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3445 Pred = EmitSVEPredicateCast(
3446 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3447 Ret = Builder.CreateInsertValue(Ret, Pred, I);
3448 }
3449
3450 return Ret;
3451}
3452
3455 unsigned IntID) {
3456 auto *ResultTy = getSVEType(TypeFlags);
3457 auto *OverloadedTy =
3458 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
3459 Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
3460
3461 // At the ACLE level there's only one predicate type, svbool_t, which is
3462 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3463 // actual type being loaded. For example, when loading doubles (i64) the
3464 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3465 // the predicate and the data being loaded must match. Cast to the type
3466 // expected by the intrinsic. The intrinsic itself should be defined in
3467 // a way than enforces relations between parameter types.
3468 Ops[0] = EmitSVEPredicateCast(
3469 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3470
3471 // Pass 0 when the offset is missing. This can only be applied when using
3472 // the "vector base" addressing mode for which ACLE allows no offset. The
3473 // corresponding LLVM IR always requires an offset.
3474 if (Ops.size() == 2) {
3475 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3476 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3477 }
3478
3479 // For "vector base, scalar index" scale the index so that it becomes a
3480 // scalar offset.
3481 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3482 unsigned BytesPerElt =
3483 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3484 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3485 }
3486
3487 Value *Call = Builder.CreateCall(F, Ops);
3488
3489 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3490 // other cases it's folded into a nop.
3491 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
3492 : Builder.CreateSExt(Call, ResultTy);
3493}
3494
3497 unsigned IntID) {
3498 auto *SrcDataTy = getSVEType(TypeFlags);
3499 auto *OverloadedTy =
3500 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
3501
3502 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3503 // it's the first argument. Move it accordingly.
3504 Ops.insert(Ops.begin(), Ops.pop_back_val());
3505
3506 Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
3507
3508 // Pass 0 when the offset is missing. This can only be applied when using
3509 // the "vector base" addressing mode for which ACLE allows no offset. The
3510 // corresponding LLVM IR always requires an offset.
3511 if (Ops.size() == 3) {
3512 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3513 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3514 }
3515
3516 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
3517 // folded into a nop.
3518 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
3519
3520 // At the ACLE level there's only one predicate type, svbool_t, which is
3521 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3522 // actual type being stored. For example, when storing doubles (i64) the
3523 // predicated should be <n x 2 x i1> instead. At the IR level the type of
3524 // the predicate and the data being stored must match. Cast to the type
3525 // expected by the intrinsic. The intrinsic itself should be defined in
3526 // a way that enforces relations between parameter types.
3527 Ops[1] = EmitSVEPredicateCast(
3528 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
3529
3530 // For "vector base, scalar index" scale the index so that it becomes a
3531 // scalar offset.
3532 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
3533 unsigned BytesPerElt =
3534 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3535 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
3536 }
3537
3538 return Builder.CreateCall(F, Ops);
3539}
3540
3543 unsigned IntID) {
3544 // The gather prefetches are overloaded on the vector input - this can either
3545 // be the vector of base addresses or vector of offsets.
3546 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
3547 if (!OverloadedTy)
3548 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
3549
3550 // Cast the predicate from svbool_t to the right number of elements.
3551 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
3552
3553 // vector + imm addressing modes
3554 if (Ops[1]->getType()->isVectorTy()) {
3555 if (Ops.size() == 3) {
3556 // Pass 0 for 'vector+imm' when the index is omitted.
3557 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3558
3559 // The sv_prfop is the last operand in the builtin and IR intrinsic.
3560 std::swap(Ops[2], Ops[3]);
3561 } else {
3562 // Index needs to be passed as scaled offset.
3563 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3564 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
3565 if (BytesPerElt > 1)
3566 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3567 }
3568
3569 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
3570 return Builder.CreateCall(F, Ops);
3571 }
3572
3573 Function *F = CGM.getIntrinsic(IntID, {Ops[1]->getType(), OverloadedTy});
3574 return Builder.CreateCall(F, Ops);
3575}
3576
3579 unsigned IntID) {
3580 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3581 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3582 Value *BasePtr = Ops[1];
3583
3584 // Does the load have an offset?
3585 if (Ops.size() > 2)
3586 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3587
3588 Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
3589 return Builder.CreateCall(F, {Predicate, BasePtr});
3590}
3591
3594 unsigned IntID) {
3595 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3596
3597 unsigned N;
3598 switch (IntID) {
3599 case Intrinsic::aarch64_sve_st2:
3600 case Intrinsic::aarch64_sve_st1_pn_x2:
3601 case Intrinsic::aarch64_sve_stnt1_pn_x2:
3602 case Intrinsic::aarch64_sve_st2q:
3603 N = 2;
3604 break;
3605 case Intrinsic::aarch64_sve_st3:
3606 case Intrinsic::aarch64_sve_st3q:
3607 N = 3;
3608 break;
3609 case Intrinsic::aarch64_sve_st4:
3610 case Intrinsic::aarch64_sve_st1_pn_x4:
3611 case Intrinsic::aarch64_sve_stnt1_pn_x4:
3612 case Intrinsic::aarch64_sve_st4q:
3613 N = 4;
3614 break;
3615 default:
3616 llvm_unreachable("unknown intrinsic!");
3617 }
3618
3619 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3620 Value *BasePtr = Ops[1];
3621
3622 // Does the store have an offset?
3623 if (Ops.size() > (2 + N))
3624 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3625
3626 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
3627 // need to break up the tuple vector.
3629 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
3630 Operands.push_back(Ops[I]);
3631 Operands.append({Predicate, BasePtr});
3632 Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
3633
3634 return Builder.CreateCall(F, Operands);
3635}
3636
3637// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
3638// svpmullt_pair intrinsics, with the exception that their results are bitcast
3639// to a wider type.
3642 unsigned BuiltinID) {
3643 // Splat scalar operand to vector (intrinsics with _n infix)
3644 if (TypeFlags.hasSplatOperand()) {
3645 unsigned OpNo = TypeFlags.getSplatOperand();
3646 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
3647 }
3648
3649 // The pair-wise function has a narrower overloaded type.
3650 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
3651 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
3652
3653 // Now bitcast to the wider result type.
3654 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
3655 return EmitSVEReinterpret(Call, Ty);
3656}
3657
3659 ArrayRef<Value *> Ops, unsigned BuiltinID) {
3660 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
3661 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
3662 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
3663}
3664
3667 unsigned BuiltinID) {
3668 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3669 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
3670 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3671
3672 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
3673 Value *BasePtr = Ops[1];
3674
3675 // Implement the index operand if not omitted.
3676 if (Ops.size() > 3)
3677 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3678
3679 Value *PrfOp = Ops.back();
3680
3681 llvm::Type *Tys[2] = {Predicate->getType(), BasePtr->getType()};
3682 Function *F = CGM.getIntrinsic(BuiltinID, Tys);
3683 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
3684}
3685
3687 llvm::Type *ReturnTy,
3689 unsigned IntrinsicID,
3690 bool IsZExtReturn) {
3691 QualType LangPTy = E->getArg(1)->getType();
3692 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3693 LangPTy->castAs<PointerType>()->getPointeeType());
3694
3695 // Mfloat8 types is stored as a vector, so extra work
3696 // to extract sclar element type is necessary.
3697 if (MemEltTy->isVectorTy()) {
3698 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3699 "Only <1 x i8> expected");
3700 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3701 }
3702
3703 // The vector type that is returned may be different from the
3704 // eventual type loaded from memory.
3705 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
3706 llvm::ScalableVectorType *MemoryTy = nullptr;
3707 llvm::ScalableVectorType *PredTy = nullptr;
3708 bool IsQuadLoad = false;
3709 switch (IntrinsicID) {
3710 case Intrinsic::aarch64_sve_ld1uwq:
3711 case Intrinsic::aarch64_sve_ld1udq:
3712 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3713 PredTy = llvm::ScalableVectorType::get(
3714 llvm::Type::getInt1Ty(getLLVMContext()), 1);
3715 IsQuadLoad = true;
3716 break;
3717 default:
3718 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3719 PredTy = MemoryTy;
3720 break;
3721 }
3722
3723 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3724 Value *BasePtr = Ops[1];
3725
3726 // Does the load have an offset?
3727 if (Ops.size() > 2)
3728 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3729
3730 llvm::Type *Tys[2] = {IsQuadLoad ? VectorTy : MemoryTy, BasePtr->getType()};
3731 Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
3732 auto *Load = Builder.CreateCall(F, {Predicate, BasePtr});
3733 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3734 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
3735
3736 if (IsQuadLoad)
3737 return Load;
3738
3739 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
3740 : Builder.CreateSExt(Load, VectorTy);
3741}
3742
3745 unsigned IntrinsicID) {
3746 QualType LangPTy = E->getArg(1)->getType();
3747 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3748 LangPTy->castAs<PointerType>()->getPointeeType());
3749
3750 // Mfloat8 types is stored as a vector, so extra work
3751 // to extract sclar element type is necessary.
3752 if (MemEltTy->isVectorTy()) {
3753 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3754 "Only <1 x i8> expected");
3755 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3756 }
3757
3758 // The vector type that is stored may be different from the
3759 // eventual type stored to memory.
3760 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
3761 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3762
3763 auto PredTy = MemoryTy;
3764 auto AddrMemoryTy = MemoryTy;
3765 bool IsQuadStore = false;
3766
3767 switch (IntrinsicID) {
3768 case Intrinsic::aarch64_sve_st1wq:
3769 case Intrinsic::aarch64_sve_st1dq:
3770 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3771 PredTy =
3772 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
3773 IsQuadStore = true;
3774 break;
3775 default:
3776 break;
3777 }
3778 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3779 Value *BasePtr = Ops[1];
3780
3781 // Does the store have an offset?
3782 if (Ops.size() == 4)
3783 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
3784
3785 // Last value is always the data
3786 Value *Val =
3787 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
3788
3789 llvm::Type *Tys[2] = {IsQuadStore ? VectorTy : MemoryTy, BasePtr->getType()};
3790 Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
3791 auto *Store = Builder.CreateCall(F, {Val, Predicate, BasePtr});
3792 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3793 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
3794 return Store;
3795}
3796
3799 unsigned IntID) {
3800 Ops[2] = EmitSVEPredicateCast(
3802
3803 SmallVector<Value *> NewOps;
3804 NewOps.push_back(Ops[2]);
3805
3806 llvm::Value *BasePtr = Ops[3];
3807 llvm::Value *RealSlice = Ops[1];
3808 // If the intrinsic contains the vnum parameter, multiply it with the vector
3809 // size in bytes.
3810 if (Ops.size() == 5) {
3811 Function *StreamingVectorLength =
3812 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
3813 llvm::Value *StreamingVectorLengthCall =
3814 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
3815 llvm::ConstantInt::get(Int64Ty, 8), "svl",
3816 /* HasNUW */ true, /* HasNSW */ true);
3817 llvm::Value *Mulvl =
3818 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
3819 // The type of the ptr parameter is void *, so use Int8Ty here.
3820 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
3821 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
3822 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
3823 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
3824 }
3825 NewOps.push_back(BasePtr);
3826 NewOps.push_back(Ops[0]);
3827 NewOps.push_back(RealSlice);
3828 Function *F = CGM.getIntrinsic(IntID, BasePtr->getType());
3829 return Builder.CreateCall(F, NewOps);
3830}
3831
3834 unsigned IntID) {
3835 auto *VecTy = getSVEType(TypeFlags);
3836 Function *F = CGM.getIntrinsic(IntID, VecTy);
3837 if (TypeFlags.isReadZA())
3838 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
3839 else if (TypeFlags.isWriteZA())
3840 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
3841 return Builder.CreateCall(F, Ops);
3842}
3843
3846 unsigned IntID) {
3847 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
3848 if (Ops.size() == 0)
3849 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
3850 Function *F = CGM.getIntrinsic(IntID, {});
3851 return Builder.CreateCall(F, Ops);
3852}
3853
3856 unsigned IntID) {
3857 if (Ops.size() == 2)
3858 Ops.push_back(Builder.getInt32(0));
3859 else
3860 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
3861 Function *F = CGM.getIntrinsic(IntID, Ops[1]->getType());
3862 return Builder.CreateCall(F, Ops);
3863}
3864
3865// Limit the usage of scalable llvm IR generated by the ACLE by using the
3866// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
3867Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
3868 return Builder.CreateVectorSplat(
3869 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
3870}
3871
3873 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
3874#ifndef NDEBUG
3875 auto *VecTy = cast<llvm::VectorType>(Ty);
3876 ElementCount EC = VecTy->getElementCount();
3877 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
3878 "Only <1 x i8> expected");
3879#endif
3880 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
3881 }
3882 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
3883}
3884
3886 // FIXME: For big endian this needs an additional REV, or needs a separate
3887 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
3888 // instruction is defined as 'bitwise' equivalent from memory point of
3889 // view (when storing/reloading), whereas the svreinterpret builtin
3890 // implements bitwise equivalent cast from register point of view.
3891 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
3892
3893 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
3894 Value *Tuple = llvm::PoisonValue::get(Ty);
3895
3896 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
3897 Value *In = Builder.CreateExtractValue(Val, I);
3898 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
3899 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
3900 }
3901
3902 return Tuple;
3903 }
3904
3905 return Builder.CreateBitCast(Val, Ty);
3906}
3907
3908static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3910 auto *SplatZero = Constant::getNullValue(Ty);
3911 Ops.insert(Ops.begin(), SplatZero);
3912}
3913
3914static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3916 auto *SplatUndef = UndefValue::get(Ty);
3917 Ops.insert(Ops.begin(), SplatUndef);
3918}
3919
3920SmallVector<llvm::Type *, 2>
3922 llvm::Type *ResultType,
3923 ArrayRef<Value *> Ops) {
3924 if (TypeFlags.isOverloadNone())
3925 return {};
3926
3927 llvm::Type *DefaultType = getSVEType(TypeFlags);
3928
3929 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
3930 return {DefaultType, Ops[1]->getType()};
3931
3932 if (TypeFlags.isOverloadWhileRW())
3933 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
3934
3935 if (TypeFlags.isOverloadFirstandLast())
3936 return {Ops[0]->getType(), Ops.back()->getType()};
3937
3938 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
3939 ResultType->isVectorTy())
3940 return {ResultType, Ops[1]->getType()};
3941
3942 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
3943 return {DefaultType};
3944}
3945
3947 ArrayRef<Value *> Ops) {
3948 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
3949 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
3950 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
3951
3952 if (TypeFlags.isTupleSet())
3953 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
3954 return Builder.CreateExtractValue(Ops[0], Idx);
3955}
3956
3958 llvm::Type *Ty,
3959 ArrayRef<Value *> Ops) {
3960 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
3961
3962 Value *Tuple = llvm::PoisonValue::get(Ty);
3963 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
3964 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
3965
3966 return Tuple;
3967}
3968
3970 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
3971 SVETypeFlags TypeFlags) {
3972 // Find out if any arguments are required to be integer constant expressions.
3973 unsigned ICEArguments = 0;
3975 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3976 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3977
3978 // Tuple set/get only requires one insert/extract vector, which is
3979 // created by EmitSVETupleSetOrGet.
3980 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
3981
3982 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
3983 bool IsICE = ICEArguments & (1 << i);
3984 Value *Arg = EmitScalarExpr(E->getArg(i));
3985
3986 if (IsICE) {
3987 // If this is required to be a constant, constant fold it so that we know
3988 // that the generated intrinsic gets a ConstantInt.
3989 std::optional<llvm::APSInt> Result =
3991 assert(Result && "Expected argument to be a constant");
3992
3993 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
3994 // truncate because the immediate has been range checked and no valid
3995 // immediate requires more than a handful of bits.
3996 *Result = Result->extOrTrunc(32);
3997 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
3998 continue;
3999 }
4000
4001 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4002 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4003 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4004
4005 continue;
4006 }
4007
4008 Ops.push_back(Arg);
4009 }
4010}
4011
4013 const CallExpr *E) {
4014 llvm::Type *Ty = ConvertType(E->getType());
4015 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4016 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4017 Value *Val = EmitScalarExpr(E->getArg(0));
4018 return EmitSVEReinterpret(Val, Ty);
4019 }
4020
4023
4025 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4026 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4027
4028 if (TypeFlags.isLoad())
4029 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4030 TypeFlags.isZExtReturn());
4031 if (TypeFlags.isStore())
4032 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4033 if (TypeFlags.isGatherLoad())
4034 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4035 if (TypeFlags.isScatterStore())
4036 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4037 if (TypeFlags.isPrefetch())
4038 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4039 if (TypeFlags.isGatherPrefetch())
4040 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4041 if (TypeFlags.isStructLoad())
4042 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4043 if (TypeFlags.isStructStore())
4044 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4045 if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4046 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4047 if (TypeFlags.isTupleCreate())
4048 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4049 if (TypeFlags.isUndef())
4050 return UndefValue::get(Ty);
4051
4052 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
4053 // -------------------------------------------------------------------
4054 if (Builtin->LLVMIntrinsic != 0) {
4055 // Emit set FPMR for intrinsics that require it
4056 if (TypeFlags.setsFPMR())
4057 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4058 Ops.pop_back_val());
4059 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4061
4062 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4064
4065 // Some ACLE builtins leave out the argument to specify the predicate
4066 // pattern, which is expected to be expanded to an SV_ALL pattern.
4067 if (TypeFlags.isAppendSVALL())
4068 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4069 if (TypeFlags.isInsertOp1SVALL())
4070 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4071
4072 // Predicates must match the main datatype.
4073 for (Value *&Op : Ops)
4074 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4075 if (PredTy->getElementType()->isIntegerTy(1))
4076 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4077
4078 // Splat scalar operand to vector (intrinsics with _n infix)
4079 if (TypeFlags.hasSplatOperand()) {
4080 unsigned OpNo = TypeFlags.getSplatOperand();
4081 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4082 }
4083
4084 if (TypeFlags.isReverseCompare())
4085 std::swap(Ops[1], Ops[2]);
4086 else if (TypeFlags.isReverseUSDOT())
4087 std::swap(Ops[1], Ops[2]);
4088 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4089 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4090 std::swap(Ops[1], Ops[2]);
4091 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4092 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4093 std::swap(Ops[1], Ops[3]);
4094
4095 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4096 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4097 llvm::Type *OpndTy = Ops[1]->getType();
4098 auto *SplatZero = Constant::getNullValue(OpndTy);
4099 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4100 }
4101
4102 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4103 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4104 Value *Call = Builder.CreateCall(F, Ops);
4105
4106 if (Call->getType() == Ty)
4107 return Call;
4108
4109 // Predicate results must be converted to svbool_t.
4110 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4111 return EmitSVEPredicateCast(Call, PredTy);
4112 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4113 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4114
4115 llvm_unreachable("unsupported element count!");
4116 }
4117
4118 switch (BuiltinID) {
4119 default:
4120 return nullptr;
4121
4122 case SVE::BI__builtin_sve_svreinterpret_b: {
4123 auto SVCountTy =
4124 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4125 Function *CastFromSVCountF =
4126 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4127 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4128 }
4129 case SVE::BI__builtin_sve_svreinterpret_c: {
4130 auto SVCountTy =
4131 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4132 Function *CastToSVCountF =
4133 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4134 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4135 }
4136
4137 case SVE::BI__builtin_sve_svpsel_lane_b8:
4138 case SVE::BI__builtin_sve_svpsel_lane_b16:
4139 case SVE::BI__builtin_sve_svpsel_lane_b32:
4140 case SVE::BI__builtin_sve_svpsel_lane_b64:
4141 case SVE::BI__builtin_sve_svpsel_lane_c8:
4142 case SVE::BI__builtin_sve_svpsel_lane_c16:
4143 case SVE::BI__builtin_sve_svpsel_lane_c32:
4144 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4145 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4146 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4147 "aarch64.svcount")) &&
4148 "Unexpected TargetExtType");
4149 auto SVCountTy =
4150 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4151 Function *CastFromSVCountF =
4152 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4153 Function *CastToSVCountF =
4154 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4155
4156 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4157 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4158 llvm::Value *Ops0 =
4159 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4160 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4161 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4162 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4163 }
4164 case SVE::BI__builtin_sve_svmov_b_z: {
4165 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4166 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4167 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4168 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4169 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4170 }
4171
4172 case SVE::BI__builtin_sve_svnot_b_z: {
4173 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4174 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4175 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4176 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4177 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4178 }
4179
4180 case SVE::BI__builtin_sve_svmovlb_u16:
4181 case SVE::BI__builtin_sve_svmovlb_u32:
4182 case SVE::BI__builtin_sve_svmovlb_u64:
4183 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4184
4185 case SVE::BI__builtin_sve_svmovlb_s16:
4186 case SVE::BI__builtin_sve_svmovlb_s32:
4187 case SVE::BI__builtin_sve_svmovlb_s64:
4188 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4189
4190 case SVE::BI__builtin_sve_svmovlt_u16:
4191 case SVE::BI__builtin_sve_svmovlt_u32:
4192 case SVE::BI__builtin_sve_svmovlt_u64:
4193 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4194
4195 case SVE::BI__builtin_sve_svmovlt_s16:
4196 case SVE::BI__builtin_sve_svmovlt_s32:
4197 case SVE::BI__builtin_sve_svmovlt_s64:
4198 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4199
4200 case SVE::BI__builtin_sve_svpmullt_u16:
4201 case SVE::BI__builtin_sve_svpmullt_u64:
4202 case SVE::BI__builtin_sve_svpmullt_n_u16:
4203 case SVE::BI__builtin_sve_svpmullt_n_u64:
4204 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4205
4206 case SVE::BI__builtin_sve_svpmullb_u16:
4207 case SVE::BI__builtin_sve_svpmullb_u64:
4208 case SVE::BI__builtin_sve_svpmullb_n_u16:
4209 case SVE::BI__builtin_sve_svpmullb_n_u64:
4210 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4211
4212 case SVE::BI__builtin_sve_svdup_n_b8:
4213 case SVE::BI__builtin_sve_svdup_n_b16:
4214 case SVE::BI__builtin_sve_svdup_n_b32:
4215 case SVE::BI__builtin_sve_svdup_n_b64: {
4216 Value *CmpNE =
4217 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4218 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4219 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4221 }
4222
4223 case SVE::BI__builtin_sve_svdupq_n_b8:
4224 case SVE::BI__builtin_sve_svdupq_n_b16:
4225 case SVE::BI__builtin_sve_svdupq_n_b32:
4226 case SVE::BI__builtin_sve_svdupq_n_b64:
4227 case SVE::BI__builtin_sve_svdupq_n_u8:
4228 case SVE::BI__builtin_sve_svdupq_n_s8:
4229 case SVE::BI__builtin_sve_svdupq_n_u64:
4230 case SVE::BI__builtin_sve_svdupq_n_f64:
4231 case SVE::BI__builtin_sve_svdupq_n_s64:
4232 case SVE::BI__builtin_sve_svdupq_n_u16:
4233 case SVE::BI__builtin_sve_svdupq_n_f16:
4234 case SVE::BI__builtin_sve_svdupq_n_bf16:
4235 case SVE::BI__builtin_sve_svdupq_n_s16:
4236 case SVE::BI__builtin_sve_svdupq_n_u32:
4237 case SVE::BI__builtin_sve_svdupq_n_f32:
4238 case SVE::BI__builtin_sve_svdupq_n_s32: {
4239 // These builtins are implemented by storing each element to an array and using
4240 // ld1rq to materialize a vector.
4241 unsigned NumOpnds = Ops.size();
4242
4243 bool IsBoolTy =
4244 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4245
4246 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4247 // so that the compare can use the width that is natural for the expected
4248 // number of predicate lanes.
4249 llvm::Type *EltTy = Ops[0]->getType();
4250 if (IsBoolTy)
4251 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4252
4254 for (unsigned I = 0; I < NumOpnds; ++I)
4255 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4256 Value *Vec = BuildVector(VecOps);
4257
4258 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4259 Value *InsertSubVec = Builder.CreateInsertVector(
4260 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4261
4262 Function *F =
4263 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4264 Value *DupQLane =
4265 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4266
4267 if (!IsBoolTy)
4268 return DupQLane;
4269
4270 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4271 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4272
4273 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4274 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4275 : Intrinsic::aarch64_sve_cmpne_wide,
4276 OverloadedTy);
4277 Value *Call = Builder.CreateCall(
4278 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4280 }
4281
4282 case SVE::BI__builtin_sve_svpfalse_b:
4283 return ConstantInt::getFalse(Ty);
4284
4285 case SVE::BI__builtin_sve_svpfalse_c: {
4286 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4287 Function *CastToSVCountF =
4288 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4289 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4290 }
4291
4292 case SVE::BI__builtin_sve_svlen_bf16:
4293 case SVE::BI__builtin_sve_svlen_f16:
4294 case SVE::BI__builtin_sve_svlen_f32:
4295 case SVE::BI__builtin_sve_svlen_f64:
4296 case SVE::BI__builtin_sve_svlen_s8:
4297 case SVE::BI__builtin_sve_svlen_s16:
4298 case SVE::BI__builtin_sve_svlen_s32:
4299 case SVE::BI__builtin_sve_svlen_s64:
4300 case SVE::BI__builtin_sve_svlen_u8:
4301 case SVE::BI__builtin_sve_svlen_u16:
4302 case SVE::BI__builtin_sve_svlen_u32:
4303 case SVE::BI__builtin_sve_svlen_u64: {
4304 SVETypeFlags TF(Builtin->TypeModifier);
4305 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4306 }
4307
4308 case SVE::BI__builtin_sve_svtbl2_u8:
4309 case SVE::BI__builtin_sve_svtbl2_s8:
4310 case SVE::BI__builtin_sve_svtbl2_u16:
4311 case SVE::BI__builtin_sve_svtbl2_s16:
4312 case SVE::BI__builtin_sve_svtbl2_u32:
4313 case SVE::BI__builtin_sve_svtbl2_s32:
4314 case SVE::BI__builtin_sve_svtbl2_u64:
4315 case SVE::BI__builtin_sve_svtbl2_s64:
4316 case SVE::BI__builtin_sve_svtbl2_f16:
4317 case SVE::BI__builtin_sve_svtbl2_bf16:
4318 case SVE::BI__builtin_sve_svtbl2_f32:
4319 case SVE::BI__builtin_sve_svtbl2_f64: {
4320 SVETypeFlags TF(Builtin->TypeModifier);
4321 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4322 return Builder.CreateCall(F, Ops);
4323 }
4324
4325 case SVE::BI__builtin_sve_svset_neonq_s8:
4326 case SVE::BI__builtin_sve_svset_neonq_s16:
4327 case SVE::BI__builtin_sve_svset_neonq_s32:
4328 case SVE::BI__builtin_sve_svset_neonq_s64:
4329 case SVE::BI__builtin_sve_svset_neonq_u8:
4330 case SVE::BI__builtin_sve_svset_neonq_u16:
4331 case SVE::BI__builtin_sve_svset_neonq_u32:
4332 case SVE::BI__builtin_sve_svset_neonq_u64:
4333 case SVE::BI__builtin_sve_svset_neonq_f16:
4334 case SVE::BI__builtin_sve_svset_neonq_f32:
4335 case SVE::BI__builtin_sve_svset_neonq_f64:
4336 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4337 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4338 }
4339
4340 case SVE::BI__builtin_sve_svget_neonq_s8:
4341 case SVE::BI__builtin_sve_svget_neonq_s16:
4342 case SVE::BI__builtin_sve_svget_neonq_s32:
4343 case SVE::BI__builtin_sve_svget_neonq_s64:
4344 case SVE::BI__builtin_sve_svget_neonq_u8:
4345 case SVE::BI__builtin_sve_svget_neonq_u16:
4346 case SVE::BI__builtin_sve_svget_neonq_u32:
4347 case SVE::BI__builtin_sve_svget_neonq_u64:
4348 case SVE::BI__builtin_sve_svget_neonq_f16:
4349 case SVE::BI__builtin_sve_svget_neonq_f32:
4350 case SVE::BI__builtin_sve_svget_neonq_f64:
4351 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4352 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4353 }
4354
4355 case SVE::BI__builtin_sve_svdup_neonq_s8:
4356 case SVE::BI__builtin_sve_svdup_neonq_s16:
4357 case SVE::BI__builtin_sve_svdup_neonq_s32:
4358 case SVE::BI__builtin_sve_svdup_neonq_s64:
4359 case SVE::BI__builtin_sve_svdup_neonq_u8:
4360 case SVE::BI__builtin_sve_svdup_neonq_u16:
4361 case SVE::BI__builtin_sve_svdup_neonq_u32:
4362 case SVE::BI__builtin_sve_svdup_neonq_u64:
4363 case SVE::BI__builtin_sve_svdup_neonq_f16:
4364 case SVE::BI__builtin_sve_svdup_neonq_f32:
4365 case SVE::BI__builtin_sve_svdup_neonq_f64:
4366 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4367 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4368 uint64_t(0));
4369 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4370 {Insert, Builder.getInt64(0)});
4371 }
4372 }
4373
4374 /// Should not happen
4375 return nullptr;
4376}
4377
4378static void swapCommutativeSMEOperands(unsigned BuiltinID,
4380 unsigned MultiVec;
4381 switch (BuiltinID) {
4382 default:
4383 return;
4384 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4385 MultiVec = 1;
4386 break;
4387 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4388 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4389 MultiVec = 2;
4390 break;
4391 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4392 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4393 MultiVec = 4;
4394 break;
4395 }
4396
4397 if (MultiVec > 0)
4398 for (unsigned I = 0; I < MultiVec; ++I)
4399 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4400}
4401
4403 const CallExpr *E) {
4406
4408 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4409 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4410
4411 if (TypeFlags.isLoad() || TypeFlags.isStore())
4412 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4413 if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4414 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4415 if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4416 BuiltinID == SME::BI__builtin_sme_svzero_za)
4417 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4418 if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4419 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4420 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4421 BuiltinID == SME::BI__builtin_sme_svstr_za)
4422 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4423
4424 // Emit set FPMR for intrinsics that require it
4425 if (TypeFlags.setsFPMR())
4426 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4427 Ops.pop_back_val());
4428 // Handle builtins which require their multi-vector operands to be swapped
4429 swapCommutativeSMEOperands(BuiltinID, Ops);
4430
4431 auto isCntsBuiltin = [&]() {
4432 switch (BuiltinID) {
4433 default:
4434 return 0;
4435 case SME::BI__builtin_sme_svcntsb:
4436 return 8;
4437 case SME::BI__builtin_sme_svcntsh:
4438 return 4;
4439 case SME::BI__builtin_sme_svcntsw:
4440 return 2;
4441 }
4442 };
4443
4444 if (auto Mul = isCntsBuiltin()) {
4445 llvm::Value *Cntd =
4446 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
4447 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
4448 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4449 }
4450
4451 // Should not happen!
4452 if (Builtin->LLVMIntrinsic == 0)
4453 return nullptr;
4454
4455 // Predicates must match the main datatype.
4456 for (Value *&Op : Ops)
4457 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4458 if (PredTy->getElementType()->isIntegerTy(1))
4459 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4460
4461 if (BuiltinID == SME::BI__builtin_sme_svldr_zt ||
4462 BuiltinID == SME::BI__builtin_sme_svstr_zt) {
4463 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, Ops[1]->getType());
4464 return Builder.CreateCall(F, Ops);
4465 }
4466
4467 Function *F =
4468 TypeFlags.isOverloadNone()
4469 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
4470 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
4471
4472 return Builder.CreateCall(F, Ops);
4473}
4474
4475/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4476/// return it as an i8 pointer.
4478 LLVMContext &Context = CGF.CGM.getLLVMContext();
4479 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
4480 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4481 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4482 llvm::Function *F =
4483 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
4484 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
4485 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
4486}
4487
4489 const CallExpr *E,
4490 llvm::Triple::ArchType Arch) {
4491 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4492 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4493 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4494
4495 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4496 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4497 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4498
4499 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4500 return EmitAArch64CpuSupports(E);
4501
4502 unsigned HintID = static_cast<unsigned>(-1);
4503 switch (BuiltinID) {
4504 default: break;
4505 case clang::AArch64::BI__builtin_arm_nop:
4506 HintID = 0;
4507 break;
4508 case clang::AArch64::BI__builtin_arm_yield:
4509 case clang::AArch64::BI__yield:
4510 HintID = 1;
4511 break;
4512 case clang::AArch64::BI__builtin_arm_wfe:
4513 case clang::AArch64::BI__wfe:
4514 HintID = 2;
4515 break;
4516 case clang::AArch64::BI__builtin_arm_wfi:
4517 case clang::AArch64::BI__wfi:
4518 HintID = 3;
4519 break;
4520 case clang::AArch64::BI__builtin_arm_sev:
4521 case clang::AArch64::BI__sev:
4522 HintID = 4;
4523 break;
4524 case clang::AArch64::BI__builtin_arm_sevl:
4525 case clang::AArch64::BI__sevl:
4526 HintID = 5;
4527 break;
4528 }
4529
4530 if (HintID != static_cast<unsigned>(-1)) {
4531 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4532 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4533 }
4534
4535 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
4536 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4537 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4538 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
4539 }
4540
4541 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
4542 // Create call to __arm_sme_state and store the results to the two pointers.
4543 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
4544 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
4545 false),
4546 "__arm_sme_state"));
4547 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
4548 "aarch64_pstate_sm_compatible");
4549 CI->setAttributes(Attrs);
4550 CI->setCallingConv(
4551 llvm::CallingConv::
4552 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
4553 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
4555 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
4557 }
4558
4559 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
4560 assert((getContext().getTypeSize(E->getType()) == 32) &&
4561 "rbit of unusual size!");
4562 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4563 return Builder.CreateCall(
4564 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4565 }
4566 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
4567 assert((getContext().getTypeSize(E->getType()) == 64) &&
4568 "rbit of unusual size!");
4569 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4570 return Builder.CreateCall(
4571 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4572 }
4573
4574 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
4575 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
4576 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4577 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
4578 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
4579 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
4580 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
4581 return Res;
4582 }
4583
4584 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
4585 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4586 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
4587 "cls");
4588 }
4589 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
4590 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4591 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
4592 "cls");
4593 }
4594
4595 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
4596 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
4597 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4598 llvm::Type *Ty = Arg->getType();
4599 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
4600 Arg, "frint32z");
4601 }
4602
4603 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
4604 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
4605 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4606 llvm::Type *Ty = Arg->getType();
4607 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
4608 Arg, "frint64z");
4609 }
4610
4611 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
4612 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
4613 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4614 llvm::Type *Ty = Arg->getType();
4615 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
4616 Arg, "frint32x");
4617 }
4618
4619 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
4620 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
4621 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4622 llvm::Type *Ty = Arg->getType();
4623 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
4624 Arg, "frint64x");
4625 }
4626
4627 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
4628 assert((getContext().getTypeSize(E->getType()) == 32) &&
4629 "__jcvt of unusual size!");
4630 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4631 return Builder.CreateCall(
4632 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
4633 }
4634
4635 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
4636 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
4637 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
4638 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
4639 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
4640 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
4641
4642 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
4643 // Load from the address via an LLVM intrinsic, receiving a
4644 // tuple of 8 i64 words, and store each one to ValPtr.
4645 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
4646 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
4647 llvm::Value *ToRet;
4648 for (size_t i = 0; i < 8; i++) {
4649 llvm::Value *ValOffsetPtr =
4650 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4651 Address Addr =
4652 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4653 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
4654 }
4655 return ToRet;
4656 }
4657
4658 // Load 8 i64 words from ValPtr, and store them to the address
4659 // via an LLVM intrinsic.
4661 Args.push_back(MemAddr);
4662 for (size_t i = 0; i < 8; i++) {
4663 llvm::Value *ValOffsetPtr =
4664 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4665 Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4666 Args.push_back(Builder.CreateLoad(Addr));
4667 }
4668
4669 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
4670 ? Intrinsic::aarch64_st64b
4671 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
4672 ? Intrinsic::aarch64_st64bv
4673 : Intrinsic::aarch64_st64bv0);
4674 Function *F = CGM.getIntrinsic(Intr);
4675 return Builder.CreateCall(F, Args);
4676 }
4677
4678 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
4679 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
4680
4681 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
4682 ? Intrinsic::aarch64_rndr
4683 : Intrinsic::aarch64_rndrrs);
4684 Function *F = CGM.getIntrinsic(Intr);
4685 llvm::Value *Val = Builder.CreateCall(F);
4686 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
4687 Value *Status = Builder.CreateExtractValue(Val, 1);
4688
4689 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
4690 Builder.CreateStore(RandomValue, MemAddress);
4691 Status = Builder.CreateZExt(Status, Int32Ty);
4692 return Status;
4693 }
4694
4695 if (BuiltinID == clang::AArch64::BI__clear_cache) {
4696 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4697 const FunctionDecl *FD = E->getDirectCallee();
4698 Value *Ops[2];
4699 for (unsigned i = 0; i < 2; i++)
4700 Ops[i] = EmitScalarExpr(E->getArg(i));
4701 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4702 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4703 StringRef Name = FD->getName();
4704 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4705 }
4706
4707 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4708 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
4709 getContext().getTypeSize(E->getType()) == 128) {
4710 Function *F =
4711 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4712 ? Intrinsic::aarch64_ldaxp
4713 : Intrinsic::aarch64_ldxp);
4714
4715 Value *LdPtr = EmitScalarExpr(E->getArg(0));
4716 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
4717
4718 Value *Val0 = Builder.CreateExtractValue(Val, 1);
4719 Value *Val1 = Builder.CreateExtractValue(Val, 0);
4720 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4721 Val0 = Builder.CreateZExt(Val0, Int128Ty);
4722 Val1 = Builder.CreateZExt(Val1, Int128Ty);
4723
4724 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4725 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4726 Val = Builder.CreateOr(Val, Val1);
4727 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4728 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4729 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
4730 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4731
4732 QualType Ty = E->getType();
4733 llvm::Type *RealResTy = ConvertType(Ty);
4734 llvm::Type *IntTy =
4735 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4736
4737 Function *F =
4738 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4739 ? Intrinsic::aarch64_ldaxr
4740 : Intrinsic::aarch64_ldxr,
4741 DefaultPtrTy);
4742 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4743 Val->addParamAttr(
4744 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
4745
4746 if (RealResTy->isPointerTy())
4747 return Builder.CreateIntToPtr(Val, RealResTy);
4748
4749 llvm::Type *IntResTy = llvm::IntegerType::get(
4750 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4751 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
4752 RealResTy);
4753 }
4754
4755 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4756 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
4757 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4758 Function *F =
4759 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4760 ? Intrinsic::aarch64_stlxp
4761 : Intrinsic::aarch64_stxp);
4762 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
4763
4764 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4765 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4766
4767 Tmp = Tmp.withElementType(STy);
4768 llvm::Value *Val = Builder.CreateLoad(Tmp);
4769
4770 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4771 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4772 Value *StPtr = EmitScalarExpr(E->getArg(1));
4773 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4774 }
4775
4776 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4777 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
4778 Value *StoreVal = EmitScalarExpr(E->getArg(0));
4779 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4780
4781 QualType Ty = E->getArg(0)->getType();
4782 llvm::Type *StoreTy =
4783 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4784
4785 if (StoreVal->getType()->isPointerTy())
4786 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4787 else {
4788 llvm::Type *IntTy = llvm::IntegerType::get(
4790 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4791 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4792 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4793 }
4794
4795 Function *F =
4796 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4797 ? Intrinsic::aarch64_stlxr
4798 : Intrinsic::aarch64_stxr,
4799 StoreAddr->getType());
4800 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4801 CI->addParamAttr(
4802 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
4803 return CI;
4804 }
4805
4806 if (BuiltinID == clang::AArch64::BI__getReg) {
4808 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4809 llvm_unreachable("Sema will ensure that the parameter is constant");
4810
4811 llvm::APSInt Value = Result.Val.getInt();
4812 LLVMContext &Context = CGM.getLLVMContext();
4813 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
4814
4815 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4816 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4817 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4818
4819 llvm::Function *F =
4820 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
4821 return Builder.CreateCall(F, Metadata);
4822 }
4823
4824 if (BuiltinID == clang::AArch64::BI__break) {
4826 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4827 llvm_unreachable("Sema will ensure that the parameter is constant");
4828
4829 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4830 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
4831 }
4832
4833 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
4834 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4835 return Builder.CreateCall(F);
4836 }
4837
4838 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
4839 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
4840 llvm::SyncScope::SingleThread);
4841
4842 // CRC32
4843 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4844 switch (BuiltinID) {
4845 case clang::AArch64::BI__builtin_arm_crc32b:
4846 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4847 case clang::AArch64::BI__builtin_arm_crc32cb:
4848 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4849 case clang::AArch64::BI__builtin_arm_crc32h:
4850 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4851 case clang::AArch64::BI__builtin_arm_crc32ch:
4852 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4853 case clang::AArch64::BI__builtin_arm_crc32w:
4854 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4855 case clang::AArch64::BI__builtin_arm_crc32cw:
4856 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4857 case clang::AArch64::BI__builtin_arm_crc32d:
4858 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4859 case clang::AArch64::BI__builtin_arm_crc32cd:
4860 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4861 }
4862
4863 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4864 Value *Arg0 = EmitScalarExpr(E->getArg(0));
4865 Value *Arg1 = EmitScalarExpr(E->getArg(1));
4866 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4867
4868 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4869 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4870
4871 return Builder.CreateCall(F, {Arg0, Arg1});
4872 }
4873
4874 // Memory Operations (MOPS)
4875 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
4876 Value *Dst = EmitScalarExpr(E->getArg(0));
4877 Value *Val = EmitScalarExpr(E->getArg(1));
4878 Value *Size = EmitScalarExpr(E->getArg(2));
4879 Val = Builder.CreateTrunc(Val, Int8Ty);
4880 Size = Builder.CreateIntCast(Size, Int64Ty, false);
4881 return Builder.CreateCall(
4882 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
4883 }
4884
4885 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
4886 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
4887 return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
4888
4889 // Memory Tagging Extensions (MTE) Intrinsics
4890 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
4891 switch (BuiltinID) {
4892 case clang::AArch64::BI__builtin_arm_irg:
4893 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
4894 case clang::AArch64::BI__builtin_arm_addg:
4895 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
4896 case clang::AArch64::BI__builtin_arm_gmi:
4897 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
4898 case clang::AArch64::BI__builtin_arm_ldg:
4899 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
4900 case clang::AArch64::BI__builtin_arm_stg:
4901 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
4902 case clang::AArch64::BI__builtin_arm_subp:
4903 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
4904 }
4905
4906 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
4907 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
4909 Value *Mask = EmitScalarExpr(E->getArg(1));
4910
4911 Mask = Builder.CreateZExt(Mask, Int64Ty);
4912 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4913 {Pointer, Mask});
4914 }
4915 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
4917 Value *TagOffset = EmitScalarExpr(E->getArg(1));
4918
4919 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
4920 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4921 {Pointer, TagOffset});
4922 }
4923 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
4925 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
4926
4927 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
4928 return Builder.CreateCall(
4929 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
4930 }
4931 // Although it is possible to supply a different return
4932 // address (first arg) to this intrinsic, for now we set
4933 // return address same as input address.
4934 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
4935 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4936 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4937 {TagAddress, TagAddress});
4938 }
4939 // Although it is possible to supply a different tag (to set)
4940 // to this intrinsic (as first arg), for now we supply
4941 // the tag that is in input address arg (common use case).
4942 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
4943 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4944 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4945 {TagAddress, TagAddress});
4946 }
4947 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
4948 Value *PointerA = EmitScalarExpr(E->getArg(0));
4949 Value *PointerB = EmitScalarExpr(E->getArg(1));
4950 return Builder.CreateCall(
4951 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
4952 }
4953 }
4954
4955 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4956 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4957 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4958 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4959 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
4960 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
4961 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
4962 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
4963
4964 SpecialRegisterAccessKind AccessKind = Write;
4965 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4966 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4967 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4968 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
4969 AccessKind = VolatileRead;
4970
4971 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4972 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
4973
4974 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4975 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
4976
4977 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4978 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
4979
4980 llvm::Type *ValueType;
4981 llvm::Type *RegisterType = Int64Ty;
4982 if (Is32Bit) {
4983 ValueType = Int32Ty;
4984 } else if (Is128Bit) {
4985 llvm::Type *Int128Ty =
4986 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
4987 ValueType = Int128Ty;
4988 RegisterType = Int128Ty;
4989 } else if (IsPointerBuiltin) {
4990 ValueType = VoidPtrTy;
4991 } else {
4992 ValueType = Int64Ty;
4993 };
4994
4995 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
4996 AccessKind);
4997 }
4998
4999 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5000 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
5001 LLVMContext &Context = CGM.getLLVMContext();
5002
5003 unsigned SysReg =
5004 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5005
5006 std::string SysRegStr;
5007 llvm::raw_string_ostream(SysRegStr)
5008 << (0b10 | SysReg >> 14) << ":" << ((SysReg >> 11) & 7) << ":"
5009 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5010 << (SysReg & 7);
5011
5012 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5013 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5014 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5015
5016 llvm::Type *RegisterType = Int64Ty;
5017 llvm::Type *Types[] = { RegisterType };
5018
5019 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5020 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5021
5022 return Builder.CreateCall(F, Metadata);
5023 }
5024
5025 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5026 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5027 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5028
5029 return Result;
5030 }
5031
5032 if (BuiltinID == clang::AArch64::BI__sys) {
5033 unsigned SysReg =
5034 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5035 const unsigned Op1 = SysReg >> 11;
5036 const unsigned CRn = (SysReg >> 7) & 0xf;
5037 const unsigned CRm = (SysReg >> 3) & 0xf;
5038 const unsigned Op2 = SysReg & 0x7;
5039
5040 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sys),
5041 {Builder.getInt32(Op1), Builder.getInt32(CRn),
5042 Builder.getInt32(CRm), Builder.getInt32(Op2),
5043 EmitScalarExpr(E->getArg(1))});
5044
5045 // Return 0 for convenience, even though MSVC returns some other undefined
5046 // value.
5047 return ConstantInt::get(Builder.getInt32Ty(), 0);
5048 }
5049
5050 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5051 llvm::Function *F =
5052 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5053 return Builder.CreateCall(F);
5054 }
5055
5056 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5057 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5058 return Builder.CreateCall(F);
5059 }
5060
5061 if (BuiltinID == clang::AArch64::BI__mulh ||
5062 BuiltinID == clang::AArch64::BI__umulh) {
5063 llvm::Type *ResType = ConvertType(E->getType());
5064 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5065
5066 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5067 Value *LHS =
5068 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5069 Value *RHS =
5070 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5071
5072 Value *MulResult, *HigherBits;
5073 if (IsSigned) {
5074 MulResult = Builder.CreateNSWMul(LHS, RHS);
5075 HigherBits = Builder.CreateAShr(MulResult, 64);
5076 } else {
5077 MulResult = Builder.CreateNUWMul(LHS, RHS);
5078 HigherBits = Builder.CreateLShr(MulResult, 64);
5079 }
5080 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5081
5082 return HigherBits;
5083 }
5084
5085 if (BuiltinID == AArch64::BI__writex18byte ||
5086 BuiltinID == AArch64::BI__writex18word ||
5087 BuiltinID == AArch64::BI__writex18dword ||
5088 BuiltinID == AArch64::BI__writex18qword) {
5089 // Process the args first
5090 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5091 Value *DataArg = EmitScalarExpr(E->getArg(1));
5092
5093 // Read x18 as i8*
5094 llvm::Value *X18 = readX18AsPtr(*this);
5095
5096 // Store val at x18 + offset
5097 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5098 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5099 StoreInst *Store =
5100 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5101 return Store;
5102 }
5103
5104 if (BuiltinID == AArch64::BI__readx18byte ||
5105 BuiltinID == AArch64::BI__readx18word ||
5106 BuiltinID == AArch64::BI__readx18dword ||
5107 BuiltinID == AArch64::BI__readx18qword) {
5108 // Process the args first
5109 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5110
5111 // Read x18 as i8*
5112 llvm::Value *X18 = readX18AsPtr(*this);
5113
5114 // Load x18 + offset
5115 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5116 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5117 llvm::Type *IntTy = ConvertType(E->getType());
5118 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5119 return Load;
5120 }
5121
5122 if (BuiltinID == AArch64::BI__addx18byte ||
5123 BuiltinID == AArch64::BI__addx18word ||
5124 BuiltinID == AArch64::BI__addx18dword ||
5125 BuiltinID == AArch64::BI__addx18qword ||
5126 BuiltinID == AArch64::BI__incx18byte ||
5127 BuiltinID == AArch64::BI__incx18word ||
5128 BuiltinID == AArch64::BI__incx18dword ||
5129 BuiltinID == AArch64::BI__incx18qword) {
5130 llvm::Type *IntTy;
5131 bool isIncrement;
5132 switch (BuiltinID) {
5133 case AArch64::BI__incx18byte:
5134 IntTy = Int8Ty;
5135 isIncrement = true;
5136 break;
5137 case AArch64::BI__incx18word:
5138 IntTy = Int16Ty;
5139 isIncrement = true;
5140 break;
5141 case AArch64::BI__incx18dword:
5142 IntTy = Int32Ty;
5143 isIncrement = true;
5144 break;
5145 case AArch64::BI__incx18qword:
5146 IntTy = Int64Ty;
5147 isIncrement = true;
5148 break;
5149 default:
5150 IntTy = ConvertType(E->getArg(1)->getType());
5151 isIncrement = false;
5152 break;
5153 }
5154 // Process the args first
5155 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5156 Value *ValToAdd =
5157 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5158
5159 // Read x18 as i8*
5160 llvm::Value *X18 = readX18AsPtr(*this);
5161
5162 // Load x18 + offset
5163 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5164 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5165 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5166
5167 // Add values
5168 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5169
5170 // Store val at x18 + offset
5171 StoreInst *Store =
5172 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5173 return Store;
5174 }
5175
5176 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5177 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5178 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5179 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5180 Value *Arg = EmitScalarExpr(E->getArg(0));
5181 llvm::Type *RetTy = ConvertType(E->getType());
5182 return Builder.CreateBitCast(Arg, RetTy);
5183 }
5184
5185 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5186 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5187 BuiltinID == AArch64::BI_CountLeadingZeros ||
5188 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5189 Value *Arg = EmitScalarExpr(E->getArg(0));
5190 llvm::Type *ArgType = Arg->getType();
5191
5192 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5193 BuiltinID == AArch64::BI_CountLeadingOnes64)
5194 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5195
5196 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5197 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5198
5199 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5200 BuiltinID == AArch64::BI_CountLeadingZeros64)
5201 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5202 return Result;
5203 }
5204
5205 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5206 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5207 Value *Arg = EmitScalarExpr(E->getArg(0));
5208
5209 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5210 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5211 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5212
5213 Value *Result = Builder.CreateCall(F, Arg, "cls");
5214 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5215 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5216 return Result;
5217 }
5218
5219 if (BuiltinID == AArch64::BI_CountOneBits ||
5220 BuiltinID == AArch64::BI_CountOneBits64) {
5221 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5222 llvm::Type *ArgType = ArgValue->getType();
5223 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5224
5225 Value *Result = Builder.CreateCall(F, ArgValue);
5226 if (BuiltinID == AArch64::BI_CountOneBits64)
5227 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5228 return Result;
5229 }
5230
5231 if (BuiltinID == AArch64::BI__prefetch) {
5233 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5234 Value *Locality = ConstantInt::get(Int32Ty, 3);
5235 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5236 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5237 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5238 }
5239
5240 if (BuiltinID == AArch64::BI__hlt) {
5241 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5242 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5243
5244 // Return 0 for convenience, even though MSVC returns some other undefined
5245 // value.
5246 return ConstantInt::get(Builder.getInt32Ty(), 0);
5247 }
5248
5249 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5250 return Builder.CreateFPTrunc(
5251 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5252 Builder.getFloatTy()),
5253 Builder.getBFloatTy());
5254
5255 // Handle MSVC intrinsics before argument evaluation to prevent double
5256 // evaluation.
5257 if (std::optional<MSVCIntrin> MsvcIntId =
5259 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5260
5261 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5262 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5263 return P.first == BuiltinID;
5264 });
5265 if (It != end(NEONEquivalentIntrinsicMap))
5266 BuiltinID = It->second;
5267
5268 // Check whether this is an SISD builtin.
5269 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5271 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5272 bool IsSISD = (Builtin != nullptr);
5273
5274 // Find out if any arguments are required to be integer constant
5275 // expressions.
5276 unsigned ICEArguments = 0;
5278 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5279 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5280
5282 Address PtrOp0 = Address::invalid();
5283 // Note the assumption that SISD intrinsics do not contain extra arguments.
5284 // TODO: Fold this into a single function call instead of, effectively, two
5285 // separate checks.
5286 bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
5287 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5288 for (unsigned i = 0, e = NumArgs; i != e; i++) {
5289 if (i == 0) {
5290 switch (BuiltinID) {
5291 case NEON::BI__builtin_neon_vld1_v:
5292 case NEON::BI__builtin_neon_vld1q_v:
5293 case NEON::BI__builtin_neon_vld1_dup_v:
5294 case NEON::BI__builtin_neon_vld1q_dup_v:
5295 case NEON::BI__builtin_neon_vld1_lane_v:
5296 case NEON::BI__builtin_neon_vld1q_lane_v:
5297 case NEON::BI__builtin_neon_vst1_v:
5298 case NEON::BI__builtin_neon_vst1q_v:
5299 case NEON::BI__builtin_neon_vst1_lane_v:
5300 case NEON::BI__builtin_neon_vst1q_lane_v:
5301 case NEON::BI__builtin_neon_vldap1_lane_s64:
5302 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5303 case NEON::BI__builtin_neon_vstl1_lane_s64:
5304 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5305 // Get the alignment for the argument in addition to the value;
5306 // we'll use it later.
5307 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5308 Ops.push_back(PtrOp0.emitRawPointer(*this));
5309 continue;
5310 }
5311 }
5312 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5313 }
5314
5315 if (Builtin) {
5317 assert(Result && "SISD intrinsic should have been handled");
5318 return Result;
5319 }
5320
5321 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5323 if (std::optional<llvm::APSInt> Result =
5325 // Determine the type of this overloaded NEON intrinsic.
5326 Type = NeonTypeFlags(Result->getZExtValue());
5327
5328 bool usgn = Type.isUnsigned();
5329 bool quad = Type.isQuad();
5330 unsigned Int;
5331
5332 // Not all intrinsics handled by the common case work for AArch64 yet, so only
5333 // defer to common code if it's been added to our special map.
5336
5337 if (Builtin)
5339 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5340 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5341 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
5342
5343 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
5344 return V;
5345
5346 // Handle non-overloaded intrinsics first.
5347 switch (BuiltinID) {
5348 default: break;
5349 case NEON::BI__builtin_neon_vabsh_f16:
5350 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5351 case NEON::BI__builtin_neon_vaddq_p128: {
5352 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5353 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5354 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5355 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5356 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5357 return Builder.CreateBitCast(Ops[0], Int128Ty);
5358 }
5359 case NEON::BI__builtin_neon_vldrq_p128: {
5360 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5361 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
5363 }
5364 case NEON::BI__builtin_neon_vstrq_p128: {
5365 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5366 }
5367 case NEON::BI__builtin_neon_vcvts_f32_u32:
5368 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5369 usgn = true;
5370 [[fallthrough]];
5371 case NEON::BI__builtin_neon_vcvts_f32_s32:
5372 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5373 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5374 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5375 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5376 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5377 if (usgn)
5378 return Builder.CreateUIToFP(Ops[0], FTy);
5379 return Builder.CreateSIToFP(Ops[0], FTy);
5380 }
5381 case NEON::BI__builtin_neon_vcvth_f16_u16:
5382 case NEON::BI__builtin_neon_vcvth_f16_u32:
5383 case NEON::BI__builtin_neon_vcvth_f16_u64:
5384 usgn = true;
5385 [[fallthrough]];
5386 case NEON::BI__builtin_neon_vcvth_f16_s16:
5387 case NEON::BI__builtin_neon_vcvth_f16_s32:
5388 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5389 llvm::Type *FTy = HalfTy;
5390 llvm::Type *InTy;
5391 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5392 InTy = Int64Ty;
5393 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5394 InTy = Int32Ty;
5395 else
5396 InTy = Int16Ty;
5397 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5398 if (usgn)
5399 return Builder.CreateUIToFP(Ops[0], FTy);
5400 return Builder.CreateSIToFP(Ops[0], FTy);
5401 }
5402 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5403 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5404 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5405 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5406 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5407 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5408 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5409 case NEON::BI__builtin_neon_vcvtph_s16_f16: {
5410 llvm::Type *InTy = Int16Ty;
5411 llvm::Type* FTy = HalfTy;
5412 llvm::Type *Tys[2] = {InTy, FTy};
5413 switch (BuiltinID) {
5414 default: llvm_unreachable("missing builtin ID in switch!");
5415 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5416 Int = Intrinsic::aarch64_neon_fcvtau; break;
5417 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5418 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5419 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5420 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5421 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5422 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5423 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5424 Int = Intrinsic::aarch64_neon_fcvtas; break;
5425 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5426 Int = Intrinsic::aarch64_neon_fcvtms; break;
5427 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5428 Int = Intrinsic::aarch64_neon_fcvtns; break;
5429 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5430 Int = Intrinsic::aarch64_neon_fcvtps; break;
5431 }
5432 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5433 }
5434 case NEON::BI__builtin_neon_vcaleh_f16:
5435 case NEON::BI__builtin_neon_vcalth_f16:
5436 case NEON::BI__builtin_neon_vcageh_f16:
5437 case NEON::BI__builtin_neon_vcagth_f16: {
5438 llvm::Type* InTy = Int32Ty;
5439 llvm::Type* FTy = HalfTy;
5440 llvm::Type *Tys[2] = {InTy, FTy};
5441 switch (BuiltinID) {
5442 default: llvm_unreachable("missing builtin ID in switch!");
5443 case NEON::BI__builtin_neon_vcageh_f16:
5444 Int = Intrinsic::aarch64_neon_facge; break;
5445 case NEON::BI__builtin_neon_vcagth_f16:
5446 Int = Intrinsic::aarch64_neon_facgt; break;
5447 case NEON::BI__builtin_neon_vcaleh_f16:
5448 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5449 case NEON::BI__builtin_neon_vcalth_f16:
5450 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5451 }
5452 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
5453 return Builder.CreateTrunc(Ops[0], Int16Ty);
5454 }
5455 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5456 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5457 llvm::Type* InTy = Int32Ty;
5458 llvm::Type* FTy = HalfTy;
5459 llvm::Type *Tys[2] = {InTy, FTy};
5460 switch (BuiltinID) {
5461 default: llvm_unreachable("missing builtin ID in switch!");
5462 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5463 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5464 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5465 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5466 }
5467 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5468 return Builder.CreateTrunc(Ops[0], Int16Ty);
5469 }
5470 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5471 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5472 llvm::Type* FTy = HalfTy;
5473 llvm::Type* InTy = Int32Ty;
5474 llvm::Type *Tys[2] = {FTy, InTy};
5475 switch (BuiltinID) {
5476 default: llvm_unreachable("missing builtin ID in switch!");
5477 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5478 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5479 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
5480 break;
5481 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5482 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5483 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
5484 break;
5485 }
5486 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5487 }
5488 case NEON::BI__builtin_neon_vpaddd_s64: {
5489 // TODO: Isn't this handled by
5490 // EmitCommonNeonSISDBuiltinExpr?
5491 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
5492 // The vector is v2f64, so make sure it's bitcast to that.
5493 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2i64");
5494 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5495 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5496 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5497 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5498 // Pairwise addition of a v2f64 into a scalar f64.
5499 return Builder.CreateAdd(Op0, Op1, "vpaddd");
5500 }
5501 case NEON::BI__builtin_neon_vpaddd_f64: {
5502 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
5503 // The vector is v2f64, so make sure it's bitcast to that.
5504 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f64");
5505 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5506 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5507 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5508 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5509 // Pairwise addition of a v2f64 into a scalar f64.
5510 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5511 }
5512 case NEON::BI__builtin_neon_vpadds_f32: {
5513 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
5514 // The vector is v2f32, so make sure it's bitcast to that.
5515 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f32");
5516 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5517 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5518 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5519 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5520 // Pairwise addition of a v2f32 into a scalar f32.
5521 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5522 }
5523 case NEON::BI__builtin_neon_vceqzd_s64:
5526 ICmpInst::ICMP_EQ, "vceqz");
5527 case NEON::BI__builtin_neon_vceqzd_f64:
5528 case NEON::BI__builtin_neon_vceqzs_f32:
5529 case NEON::BI__builtin_neon_vceqzh_f16:
5532 ICmpInst::FCMP_OEQ, "vceqz");
5533 case NEON::BI__builtin_neon_vcgezd_s64:
5536 ICmpInst::ICMP_SGE, "vcgez");
5537 case NEON::BI__builtin_neon_vcgezd_f64:
5538 case NEON::BI__builtin_neon_vcgezs_f32:
5539 case NEON::BI__builtin_neon_vcgezh_f16:
5542 ICmpInst::FCMP_OGE, "vcgez");
5543 case NEON::BI__builtin_neon_vclezd_s64:
5546 ICmpInst::ICMP_SLE, "vclez");
5547 case NEON::BI__builtin_neon_vclezd_f64:
5548 case NEON::BI__builtin_neon_vclezs_f32:
5549 case NEON::BI__builtin_neon_vclezh_f16:
5552 ICmpInst::FCMP_OLE, "vclez");
5553 case NEON::BI__builtin_neon_vcgtzd_s64:
5556 ICmpInst::ICMP_SGT, "vcgtz");
5557 case NEON::BI__builtin_neon_vcgtzd_f64:
5558 case NEON::BI__builtin_neon_vcgtzs_f32:
5559 case NEON::BI__builtin_neon_vcgtzh_f16:
5562 ICmpInst::FCMP_OGT, "vcgtz");
5563 case NEON::BI__builtin_neon_vcltzd_s64:
5566 ICmpInst::ICMP_SLT, "vcltz");
5567
5568 case NEON::BI__builtin_neon_vcltzd_f64:
5569 case NEON::BI__builtin_neon_vcltzs_f32:
5570 case NEON::BI__builtin_neon_vcltzh_f16:
5573 ICmpInst::FCMP_OLT, "vcltz");
5574
5575 case NEON::BI__builtin_neon_vceqzd_u64: {
5578 ICmpInst::ICMP_EQ, "vceqzd");
5579 }
5580 case NEON::BI__builtin_neon_vceqd_f64:
5581 case NEON::BI__builtin_neon_vcled_f64:
5582 case NEON::BI__builtin_neon_vcltd_f64:
5583 case NEON::BI__builtin_neon_vcged_f64:
5584 case NEON::BI__builtin_neon_vcgtd_f64: {
5585 llvm::CmpInst::Predicate P;
5586 switch (BuiltinID) {
5587 default: llvm_unreachable("missing builtin ID in switch!");
5588 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5589 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5590 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5591 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5592 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5593 }
5594 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5595 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5596 if (P == llvm::FCmpInst::FCMP_OEQ)
5597 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5598 else
5599 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5600 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5601 }
5602 case NEON::BI__builtin_neon_vceqs_f32:
5603 case NEON::BI__builtin_neon_vcles_f32:
5604 case NEON::BI__builtin_neon_vclts_f32:
5605 case NEON::BI__builtin_neon_vcges_f32:
5606 case NEON::BI__builtin_neon_vcgts_f32: {
5607 llvm::CmpInst::Predicate P;
5608 switch (BuiltinID) {
5609 default: llvm_unreachable("missing builtin ID in switch!");
5610 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5611 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5612 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5613 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5614 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5615 }
5616 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5617 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5618 if (P == llvm::FCmpInst::FCMP_OEQ)
5619 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5620 else
5621 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5622 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5623 }
5624 case NEON::BI__builtin_neon_vceqh_f16:
5625 case NEON::BI__builtin_neon_vcleh_f16:
5626 case NEON::BI__builtin_neon_vclth_f16:
5627 case NEON::BI__builtin_neon_vcgeh_f16:
5628 case NEON::BI__builtin_neon_vcgth_f16: {
5629 llvm::CmpInst::Predicate P;
5630 switch (BuiltinID) {
5631 default: llvm_unreachable("missing builtin ID in switch!");
5632 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
5633 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
5634 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
5635 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
5636 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
5637 }
5638 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
5639 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
5640 if (P == llvm::FCmpInst::FCMP_OEQ)
5641 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5642 else
5643 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5644 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
5645 }
5646 case NEON::BI__builtin_neon_vceqd_s64:
5647 case NEON::BI__builtin_neon_vceqd_u64:
5648 case NEON::BI__builtin_neon_vcgtd_s64:
5649 case NEON::BI__builtin_neon_vcgtd_u64:
5650 case NEON::BI__builtin_neon_vcltd_s64:
5651 case NEON::BI__builtin_neon_vcltd_u64:
5652 case NEON::BI__builtin_neon_vcged_u64:
5653 case NEON::BI__builtin_neon_vcged_s64:
5654 case NEON::BI__builtin_neon_vcled_u64:
5655 case NEON::BI__builtin_neon_vcled_s64: {
5656 llvm::CmpInst::Predicate P;
5657 switch (BuiltinID) {
5658 default: llvm_unreachable("missing builtin ID in switch!");
5659 case NEON::BI__builtin_neon_vceqd_s64:
5660 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5661 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5662 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5663 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5664 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5665 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5666 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5667 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5668 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5669 }
5670 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5671 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5672 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5673 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5674 }
5675 case NEON::BI__builtin_neon_vnegd_s64:
5676 return Builder.CreateNeg(Ops[0], "vnegd");
5677 case NEON::BI__builtin_neon_vnegh_f16:
5678 return Builder.CreateFNeg(Ops[0], "vnegh");
5679 case NEON::BI__builtin_neon_vtstd_s64:
5680 case NEON::BI__builtin_neon_vtstd_u64: {
5681 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5682 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5683 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5684 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5685 llvm::Constant::getNullValue(Int64Ty));
5686 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5687 }
5688 case NEON::BI__builtin_neon_vset_lane_i8:
5689 case NEON::BI__builtin_neon_vset_lane_i16:
5690 case NEON::BI__builtin_neon_vset_lane_i32:
5691 case NEON::BI__builtin_neon_vset_lane_i64:
5692 case NEON::BI__builtin_neon_vset_lane_bf16:
5693 case NEON::BI__builtin_neon_vset_lane_f32:
5694 case NEON::BI__builtin_neon_vsetq_lane_i8:
5695 case NEON::BI__builtin_neon_vsetq_lane_i16:
5696 case NEON::BI__builtin_neon_vsetq_lane_i32:
5697 case NEON::BI__builtin_neon_vsetq_lane_i64:
5698 case NEON::BI__builtin_neon_vsetq_lane_bf16:
5699 case NEON::BI__builtin_neon_vsetq_lane_f32:
5700 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5701 case NEON::BI__builtin_neon_vset_lane_f64:
5702 // The vector type needs a cast for the v1f64 variant.
5703 Ops[1] =
5704 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
5705 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5706 case NEON::BI__builtin_neon_vset_lane_mf8:
5707 case NEON::BI__builtin_neon_vsetq_lane_mf8:
5708 // The input vector type needs a cast to scalar type.
5709 Ops[0] =
5710 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
5711 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5712 case NEON::BI__builtin_neon_vsetq_lane_f64:
5713 // The vector type needs a cast for the v2f64 variant.
5714 Ops[1] =
5715 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
5716 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5717
5718 case NEON::BI__builtin_neon_vget_lane_i8:
5719 case NEON::BI__builtin_neon_vdupb_lane_i8:
5720 Ops[0] =
5721 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
5722 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5723 case NEON::BI__builtin_neon_vgetq_lane_i8:
5724 case NEON::BI__builtin_neon_vdupb_laneq_i8:
5725 Ops[0] =
5726 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
5727 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5728 case NEON::BI__builtin_neon_vget_lane_mf8:
5729 case NEON::BI__builtin_neon_vdupb_lane_mf8:
5730 case NEON::BI__builtin_neon_vgetq_lane_mf8:
5731 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
5732 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5733 case NEON::BI__builtin_neon_vget_lane_i16:
5734 case NEON::BI__builtin_neon_vduph_lane_i16:
5735 Ops[0] =
5736 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
5737 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5738 case NEON::BI__builtin_neon_vgetq_lane_i16:
5739 case NEON::BI__builtin_neon_vduph_laneq_i16:
5740 Ops[0] =
5741 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
5742 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5743 case NEON::BI__builtin_neon_vget_lane_i32:
5744 case NEON::BI__builtin_neon_vdups_lane_i32:
5745 Ops[0] =
5746 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
5747 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5748 case NEON::BI__builtin_neon_vdups_lane_f32:
5749 Ops[0] =
5750 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
5751 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdups_lane");
5752 case NEON::BI__builtin_neon_vgetq_lane_i32:
5753 case NEON::BI__builtin_neon_vdups_laneq_i32:
5754 Ops[0] =
5755 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
5756 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5757 case NEON::BI__builtin_neon_vget_lane_i64:
5758 case NEON::BI__builtin_neon_vdupd_lane_i64:
5759 Ops[0] =
5760 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
5761 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5762 case NEON::BI__builtin_neon_vdupd_lane_f64:
5763 Ops[0] =
5764 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
5765 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdupd_lane");
5766 case NEON::BI__builtin_neon_vgetq_lane_i64:
5767 case NEON::BI__builtin_neon_vdupd_laneq_i64:
5768 Ops[0] =
5769 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
5770 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5771 case NEON::BI__builtin_neon_vget_lane_f32:
5772 Ops[0] =
5773 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
5774 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5775 case NEON::BI__builtin_neon_vget_lane_f64:
5776 Ops[0] =
5777 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
5778 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5779 case NEON::BI__builtin_neon_vgetq_lane_f32:
5780 case NEON::BI__builtin_neon_vdups_laneq_f32:
5781 Ops[0] =
5782 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
5783 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5784 case NEON::BI__builtin_neon_vgetq_lane_f64:
5785 case NEON::BI__builtin_neon_vdupd_laneq_f64:
5786 Ops[0] =
5787 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
5788 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5789 case NEON::BI__builtin_neon_vaddh_f16:
5790 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
5791 case NEON::BI__builtin_neon_vsubh_f16:
5792 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
5793 case NEON::BI__builtin_neon_vmulh_f16:
5794 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
5795 case NEON::BI__builtin_neon_vdivh_f16:
5796 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
5797 case NEON::BI__builtin_neon_vfmah_f16:
5798 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5800 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5801 {Ops[1], Ops[2], Ops[0]});
5802 case NEON::BI__builtin_neon_vfmsh_f16: {
5803 Value *Neg = Builder.CreateFNeg(Ops[1], "vsubh");
5804
5805 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5807 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5808 {Neg, Ops[2], Ops[0]});
5809 }
5810 case NEON::BI__builtin_neon_vaddd_s64:
5811 case NEON::BI__builtin_neon_vaddd_u64:
5812 return Builder.CreateAdd(Ops[0], Ops[1], "vaddd");
5813 case NEON::BI__builtin_neon_vsubd_s64:
5814 case NEON::BI__builtin_neon_vsubd_u64:
5815 return Builder.CreateSub(Ops[0], Ops[1], "vsubd");
5816 case NEON::BI__builtin_neon_vqdmlalh_s16:
5817 case NEON::BI__builtin_neon_vqdmlslh_s16: {
5818 SmallVector<Value *, 2> ProductOps;
5819 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5820 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5821 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5822 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5823 ProductOps, "vqdmlXl");
5824 Constant *CI = ConstantInt::get(SizeTy, 0);
5825 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5826
5827 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5828 ? Intrinsic::aarch64_neon_sqadd
5829 : Intrinsic::aarch64_neon_sqsub;
5830 // Drop the 2nd multiplication argument before the accumulation
5831 Ops.pop_back();
5832 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5833 }
5834 case NEON::BI__builtin_neon_vqshlud_n_s64: {
5835 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5836 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5837 Ops, "vqshlu_n");
5838 }
5839 case NEON::BI__builtin_neon_vqshld_n_u64:
5840 case NEON::BI__builtin_neon_vqshld_n_s64: {
5841 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5842 ? Intrinsic::aarch64_neon_uqshl
5843 : Intrinsic::aarch64_neon_sqshl;
5844 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5845 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5846 }
5847 case NEON::BI__builtin_neon_vrshrd_n_u64:
5848 case NEON::BI__builtin_neon_vrshrd_n_s64: {
5849 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5850 ? Intrinsic::aarch64_neon_urshl
5851 : Intrinsic::aarch64_neon_srshl;
5852 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5853 Ops[1] = ConstantInt::get(Int64Ty, -SV);
5854 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5855 }
5856 case NEON::BI__builtin_neon_vrsrad_n_u64:
5857 case NEON::BI__builtin_neon_vrsrad_n_s64: {
5858 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5859 ? Intrinsic::aarch64_neon_urshl
5860 : Intrinsic::aarch64_neon_srshl;
5861 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5862 Ops[2] = Builder.CreateNeg(Ops[2]);
5863 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5864 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5865 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5866 }
5867 case NEON::BI__builtin_neon_vshld_n_s64:
5868 case NEON::BI__builtin_neon_vshld_n_u64: {
5869 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5870 return Builder.CreateShl(
5871 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5872 }
5873 case NEON::BI__builtin_neon_vshrd_n_s64: {
5874 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5875 return Builder.CreateAShr(
5876 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5877 Amt->getZExtValue())),
5878 "shrd_n");
5879 }
5880 case NEON::BI__builtin_neon_vshrd_n_u64: {
5881 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5882 uint64_t ShiftAmt = Amt->getZExtValue();
5883 // Right-shifting an unsigned value by its size yields 0.
5884 if (ShiftAmt == 64)
5885 return ConstantInt::get(Int64Ty, 0);
5886 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5887 "shrd_n");
5888 }
5889 case NEON::BI__builtin_neon_vsrad_n_s64: {
5890 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5891 Ops[1] = Builder.CreateAShr(
5892 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5893 Amt->getZExtValue())),
5894 "shrd_n");
5895 return Builder.CreateAdd(Ops[0], Ops[1]);
5896 }
5897 case NEON::BI__builtin_neon_vsrad_n_u64: {
5898 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5899 uint64_t ShiftAmt = Amt->getZExtValue();
5900 // Right-shifting an unsigned value by its size yields 0.
5901 // As Op + 0 = Op, return Ops[0] directly.
5902 if (ShiftAmt == 64)
5903 return Ops[0];
5904 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5905 "shrd_n");
5906 return Builder.CreateAdd(Ops[0], Ops[1]);
5907 }
5908 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5909 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5910 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5911 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5912 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
5913 SmallVector<Value *, 2> ProductOps;
5914 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5915 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5916 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5917 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5918 ProductOps, "vqdmlXl");
5919 Constant *CI = ConstantInt::get(SizeTy, 0);
5920 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5921 // Drop lane-selection and the corresponding vector argument (these have
5922 // already been used)
5923 Ops.pop_back_n(2);
5924
5925 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5926 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5927 ? Intrinsic::aarch64_neon_sqadd
5928 : Intrinsic::aarch64_neon_sqsub;
5929 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5930 }
5931 case NEON::BI__builtin_neon_vqdmlals_s32:
5932 case NEON::BI__builtin_neon_vqdmlsls_s32: {
5933 SmallVector<Value *, 2> ProductOps;
5934 ProductOps.push_back(Ops[1]);
5935 ProductOps.push_back(Ops[2]);
5936 Ops[1] =
5937 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5938 ProductOps, "vqdmlXl");
5939
5940 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5941 ? Intrinsic::aarch64_neon_sqadd
5942 : Intrinsic::aarch64_neon_sqsub;
5943 // Drop the 2nd multiplication argument before the accumulation
5944 Ops.pop_back();
5945 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5946 }
5947 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5948 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5949 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5950 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5951 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
5952 SmallVector<Value *, 2> ProductOps;
5953 ProductOps.push_back(Ops[1]);
5954 ProductOps.push_back(Ops[2]);
5955 Ops[1] =
5956 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5957 ProductOps, "vqdmlXl");
5958 // Drop lane-selection and the corresponding vector argument (these have
5959 // already been used)
5960 Ops.pop_back_n(2);
5961
5962 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5963 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5964 ? Intrinsic::aarch64_neon_sqadd
5965 : Intrinsic::aarch64_neon_sqsub;
5966 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5967 }
5968 case NEON::BI__builtin_neon_vget_lane_bf16:
5969 case NEON::BI__builtin_neon_vduph_lane_bf16:
5970 case NEON::BI__builtin_neon_vduph_lane_f16: {
5971 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5972 }
5973 case NEON::BI__builtin_neon_vgetq_lane_bf16:
5974 case NEON::BI__builtin_neon_vduph_laneq_bf16:
5975 case NEON::BI__builtin_neon_vduph_laneq_f16: {
5976 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5977 }
5978 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
5979 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
5980 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
5981 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
5982 }
5983 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
5984 SmallVector<int, 16> ConcatMask(8);
5985 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
5986 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
5987 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
5988 llvm::Value *Trunc =
5989 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
5990 return Builder.CreateShuffleVector(
5991 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
5992 }
5993 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
5994 SmallVector<int, 16> ConcatMask(8);
5995 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
5996 SmallVector<int, 16> LoMask(4);
5997 std::iota(LoMask.begin(), LoMask.end(), 0);
5998 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
5999 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6000 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6001 llvm::Value *Inactive = Builder.CreateShuffleVector(
6002 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6003 llvm::Value *Trunc =
6004 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6005 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6006 }
6007
6008 case clang::AArch64::BI_InterlockedAdd:
6009 case clang::AArch64::BI_InterlockedAdd_acq:
6010 case clang::AArch64::BI_InterlockedAdd_rel:
6011 case clang::AArch64::BI_InterlockedAdd_nf:
6012 case clang::AArch64::BI_InterlockedAdd64:
6013 case clang::AArch64::BI_InterlockedAdd64_acq:
6014 case clang::AArch64::BI_InterlockedAdd64_rel:
6015 case clang::AArch64::BI_InterlockedAdd64_nf: {
6016 Address DestAddr = CheckAtomicAlignment(*this, E);
6017 Value *Val = Ops[1];
6018 llvm::AtomicOrdering Ordering;
6019 switch (BuiltinID) {
6020 case clang::AArch64::BI_InterlockedAdd:
6021 case clang::AArch64::BI_InterlockedAdd64:
6022 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6023 break;
6024 case clang::AArch64::BI_InterlockedAdd_acq:
6025 case clang::AArch64::BI_InterlockedAdd64_acq:
6026 Ordering = llvm::AtomicOrdering::Acquire;
6027 break;
6028 case clang::AArch64::BI_InterlockedAdd_rel:
6029 case clang::AArch64::BI_InterlockedAdd64_rel:
6030 Ordering = llvm::AtomicOrdering::Release;
6031 break;
6032 case clang::AArch64::BI_InterlockedAdd_nf:
6033 case clang::AArch64::BI_InterlockedAdd64_nf:
6034 Ordering = llvm::AtomicOrdering::Monotonic;
6035 break;
6036 default:
6037 llvm_unreachable("missing builtin ID in switch!");
6038 }
6039 AtomicRMWInst *RMWI =
6040 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6041 return Builder.CreateAdd(RMWI, Val);
6042 }
6043 }
6044
6045 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6046 llvm::Type *Ty = VTy;
6047 if (!Ty)
6048 return nullptr;
6049
6050 bool ExtractLow = false;
6051 bool ExtendLaneArg = false;
6052 switch (BuiltinID) {
6053 default: return nullptr;
6054 case NEON::BI__builtin_neon_vbsl_v:
6055 case NEON::BI__builtin_neon_vbslq_v: {
6056 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6057 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6058 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6059 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6060
6061 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6062 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6063 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6064 return Builder.CreateBitCast(Ops[0], Ty);
6065 }
6066 case NEON::BI__builtin_neon_vfma_lane_v:
6067 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6068 // The ARM builtins (and instructions) have the addend as the first
6069 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6070 Value *Addend = Ops[0];
6071 Value *Multiplicand = Ops[1];
6072 Value *LaneSource = Ops[2];
6073 Ops[0] = Multiplicand;
6074 Ops[1] = LaneSource;
6075 Ops[2] = Addend;
6076
6077 // Now adjust things to handle the lane access.
6078 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6079 ? llvm::FixedVectorType::get(VTy->getElementType(),
6080 VTy->getNumElements() / 2)
6081 : VTy;
6082 llvm::Constant *cst = cast<Constant>(Ops[3]);
6083 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6084 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6085 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6086
6087 Ops.pop_back();
6088 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6089 : Intrinsic::fma;
6090 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6091 }
6092 case NEON::BI__builtin_neon_vfma_laneq_v: {
6093 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6094 // v1f64 fma should be mapped to Neon scalar f64 fma
6095 if (VTy && VTy->getElementType() == DoubleTy) {
6096 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6097 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6098 llvm::FixedVectorType *VTy =
6100 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6101 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6102 Value *Result;
6104 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6105 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6106 return Builder.CreateBitCast(Result, Ty);
6107 }
6108 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6109 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6110
6111 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6112 VTy->getNumElements() * 2);
6113 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6114 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6115 cast<ConstantInt>(Ops[3]));
6116 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6117
6119 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6120 {Ops[2], Ops[1], Ops[0]});
6121 }
6122 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6123 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6124 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6125
6126 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6127 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6129 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6130 {Ops[2], Ops[1], Ops[0]});
6131 }
6132 case NEON::BI__builtin_neon_vfmah_lane_f16:
6133 case NEON::BI__builtin_neon_vfmas_lane_f32:
6134 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6135 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6136 case NEON::BI__builtin_neon_vfmad_lane_f64:
6137 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6138 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6139 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6141 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6142 {Ops[1], Ops[2], Ops[0]});
6143 }
6144 case NEON::BI__builtin_neon_vmull_v:
6145 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6146 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6147 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6148 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6149 case NEON::BI__builtin_neon_vmax_v:
6150 case NEON::BI__builtin_neon_vmaxq_v:
6151 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6152 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6153 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6154 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6155 case NEON::BI__builtin_neon_vmaxh_f16: {
6156 Int = Intrinsic::aarch64_neon_fmax;
6157 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6158 }
6159 case NEON::BI__builtin_neon_vmin_v:
6160 case NEON::BI__builtin_neon_vminq_v:
6161 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6162 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6163 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6164 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6165 case NEON::BI__builtin_neon_vminh_f16: {
6166 Int = Intrinsic::aarch64_neon_fmin;
6167 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6168 }
6169 case NEON::BI__builtin_neon_vabd_v:
6170 case NEON::BI__builtin_neon_vabdq_v:
6171 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6172 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6173 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6174 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6175 case NEON::BI__builtin_neon_vpadal_v:
6176 case NEON::BI__builtin_neon_vpadalq_v: {
6177 unsigned ArgElts = VTy->getNumElements();
6178 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6179 unsigned BitWidth = EltTy->getBitWidth();
6180 auto *ArgTy = llvm::FixedVectorType::get(
6181 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6182 llvm::Type* Tys[2] = { VTy, ArgTy };
6183 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6185 TmpOps.push_back(Ops[1]);
6186 Function *F = CGM.getIntrinsic(Int, Tys);
6187 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6188 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6189 return Builder.CreateAdd(tmp, addend);
6190 }
6191 case NEON::BI__builtin_neon_vpmin_v:
6192 case NEON::BI__builtin_neon_vpminq_v:
6193 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6194 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6195 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6196 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6197 case NEON::BI__builtin_neon_vpmax_v:
6198 case NEON::BI__builtin_neon_vpmaxq_v:
6199 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6200 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6201 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6202 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6203 case NEON::BI__builtin_neon_vminnm_v:
6204 case NEON::BI__builtin_neon_vminnmq_v:
6205 Int = Intrinsic::aarch64_neon_fminnm;
6206 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6207 case NEON::BI__builtin_neon_vminnmh_f16:
6208 Int = Intrinsic::aarch64_neon_fminnm;
6209 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6210 case NEON::BI__builtin_neon_vmaxnm_v:
6211 case NEON::BI__builtin_neon_vmaxnmq_v:
6212 Int = Intrinsic::aarch64_neon_fmaxnm;
6213 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6214 case NEON::BI__builtin_neon_vmaxnmh_f16:
6215 Int = Intrinsic::aarch64_neon_fmaxnm;
6216 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6217 case NEON::BI__builtin_neon_vrecpss_f32: {
6218 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6219 Ops, "vrecps");
6220 }
6221 case NEON::BI__builtin_neon_vrecpsd_f64:
6222 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6223 Ops, "vrecps");
6224 case NEON::BI__builtin_neon_vrecpsh_f16:
6225 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6226 Ops, "vrecps");
6227 case NEON::BI__builtin_neon_vqshrun_n_v:
6228 Int = Intrinsic::aarch64_neon_sqshrun;
6229 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6230 case NEON::BI__builtin_neon_vqrshrun_n_v:
6231 Int = Intrinsic::aarch64_neon_sqrshrun;
6232 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6233 case NEON::BI__builtin_neon_vqshrn_n_v:
6234 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6235 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6236 case NEON::BI__builtin_neon_vrshrn_n_v:
6237 Int = Intrinsic::aarch64_neon_rshrn;
6238 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6239 case NEON::BI__builtin_neon_vqrshrn_n_v:
6240 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6241 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6242 case NEON::BI__builtin_neon_vrndah_f16: {
6243 Int = Builder.getIsFPConstrained()
6244 ? Intrinsic::experimental_constrained_round
6245 : Intrinsic::round;
6246 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6247 }
6248 case NEON::BI__builtin_neon_vrnda_v:
6249 case NEON::BI__builtin_neon_vrndaq_v: {
6250 Int = Builder.getIsFPConstrained()
6251 ? Intrinsic::experimental_constrained_round
6252 : Intrinsic::round;
6253 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6254 }
6255 case NEON::BI__builtin_neon_vrndih_f16: {
6256 Int = Builder.getIsFPConstrained()
6257 ? Intrinsic::experimental_constrained_nearbyint
6258 : Intrinsic::nearbyint;
6259 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6260 }
6261 case NEON::BI__builtin_neon_vrndmh_f16: {
6262 Int = Builder.getIsFPConstrained()
6263 ? Intrinsic::experimental_constrained_floor
6264 : Intrinsic::floor;
6265 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6266 }
6267 case NEON::BI__builtin_neon_vrndm_v:
6268 case NEON::BI__builtin_neon_vrndmq_v: {
6269 Int = Builder.getIsFPConstrained()
6270 ? Intrinsic::experimental_constrained_floor
6271 : Intrinsic::floor;
6272 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6273 }
6274 case NEON::BI__builtin_neon_vrndnh_f16: {
6275 Int = Builder.getIsFPConstrained()
6276 ? Intrinsic::experimental_constrained_roundeven
6277 : Intrinsic::roundeven;
6278 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6279 }
6280 case NEON::BI__builtin_neon_vrndn_v:
6281 case NEON::BI__builtin_neon_vrndnq_v: {
6282 Int = Builder.getIsFPConstrained()
6283 ? Intrinsic::experimental_constrained_roundeven
6284 : Intrinsic::roundeven;
6285 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6286 }
6287 case NEON::BI__builtin_neon_vrndns_f32: {
6288 Int = Builder.getIsFPConstrained()
6289 ? Intrinsic::experimental_constrained_roundeven
6290 : Intrinsic::roundeven;
6291 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6292 }
6293 case NEON::BI__builtin_neon_vrndph_f16: {
6294 Int = Builder.getIsFPConstrained()
6295 ? Intrinsic::experimental_constrained_ceil
6296 : Intrinsic::ceil;
6297 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6298 }
6299 case NEON::BI__builtin_neon_vrndp_v:
6300 case NEON::BI__builtin_neon_vrndpq_v: {
6301 Int = Builder.getIsFPConstrained()
6302 ? Intrinsic::experimental_constrained_ceil
6303 : Intrinsic::ceil;
6304 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6305 }
6306 case NEON::BI__builtin_neon_vrndxh_f16: {
6307 Int = Builder.getIsFPConstrained()
6308 ? Intrinsic::experimental_constrained_rint
6309 : Intrinsic::rint;
6310 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6311 }
6312 case NEON::BI__builtin_neon_vrndx_v:
6313 case NEON::BI__builtin_neon_vrndxq_v: {
6314 Int = Builder.getIsFPConstrained()
6315 ? Intrinsic::experimental_constrained_rint
6316 : Intrinsic::rint;
6317 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6318 }
6319 case NEON::BI__builtin_neon_vrndh_f16: {
6320 Int = Builder.getIsFPConstrained()
6321 ? Intrinsic::experimental_constrained_trunc
6322 : Intrinsic::trunc;
6323 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6324 }
6325 case NEON::BI__builtin_neon_vrnd32x_f32:
6326 case NEON::BI__builtin_neon_vrnd32xq_f32:
6327 case NEON::BI__builtin_neon_vrnd32x_f64:
6328 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6329 Int = Intrinsic::aarch64_neon_frint32x;
6330 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6331 }
6332 case NEON::BI__builtin_neon_vrnd32z_f32:
6333 case NEON::BI__builtin_neon_vrnd32zq_f32:
6334 case NEON::BI__builtin_neon_vrnd32z_f64:
6335 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6336 Int = Intrinsic::aarch64_neon_frint32z;
6337 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6338 }
6339 case NEON::BI__builtin_neon_vrnd64x_f32:
6340 case NEON::BI__builtin_neon_vrnd64xq_f32:
6341 case NEON::BI__builtin_neon_vrnd64x_f64:
6342 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6343 Int = Intrinsic::aarch64_neon_frint64x;
6344 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6345 }
6346 case NEON::BI__builtin_neon_vrnd64z_f32:
6347 case NEON::BI__builtin_neon_vrnd64zq_f32:
6348 case NEON::BI__builtin_neon_vrnd64z_f64:
6349 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6350 Int = Intrinsic::aarch64_neon_frint64z;
6351 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6352 }
6353 case NEON::BI__builtin_neon_vrnd_v:
6354 case NEON::BI__builtin_neon_vrndq_v: {
6355 Int = Builder.getIsFPConstrained()
6356 ? Intrinsic::experimental_constrained_trunc
6357 : Intrinsic::trunc;
6358 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6359 }
6360 case NEON::BI__builtin_neon_vcvt_f64_v:
6361 case NEON::BI__builtin_neon_vcvtq_f64_v:
6362 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6363 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6364 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6365 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6366 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6367 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6368 "unexpected vcvt_f64_f32 builtin");
6369 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6370 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6371
6372 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6373 }
6374 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6375 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6376 "unexpected vcvt_f32_f64 builtin");
6377 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6378 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6379
6380 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6381 }
6382 case NEON::BI__builtin_neon_vcvta_s16_f16:
6383 case NEON::BI__builtin_neon_vcvta_u16_f16:
6384 case NEON::BI__builtin_neon_vcvta_s32_v:
6385 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6386 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6387 case NEON::BI__builtin_neon_vcvta_u32_v:
6388 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6389 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6390 case NEON::BI__builtin_neon_vcvta_s64_v:
6391 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6392 case NEON::BI__builtin_neon_vcvta_u64_v:
6393 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6394 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6395 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6396 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6397 }
6398 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6399 case NEON::BI__builtin_neon_vcvtm_s32_v:
6400 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6401 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6402 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6403 case NEON::BI__builtin_neon_vcvtm_u32_v:
6404 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6405 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6406 case NEON::BI__builtin_neon_vcvtm_s64_v:
6407 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6408 case NEON::BI__builtin_neon_vcvtm_u64_v:
6409 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6410 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6411 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6412 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6413 }
6414 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6415 case NEON::BI__builtin_neon_vcvtn_s32_v:
6416 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6417 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6418 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6419 case NEON::BI__builtin_neon_vcvtn_u32_v:
6420 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6421 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6422 case NEON::BI__builtin_neon_vcvtn_s64_v:
6423 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6424 case NEON::BI__builtin_neon_vcvtn_u64_v:
6425 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6426 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6427 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6428 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6429 }
6430 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6431 case NEON::BI__builtin_neon_vcvtp_s32_v:
6432 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6433 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6434 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6435 case NEON::BI__builtin_neon_vcvtp_u32_v:
6436 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6437 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6438 case NEON::BI__builtin_neon_vcvtp_s64_v:
6439 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6440 case NEON::BI__builtin_neon_vcvtp_u64_v:
6441 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6442 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6443 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6444 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6445 }
6446 case NEON::BI__builtin_neon_vmulx_v:
6447 case NEON::BI__builtin_neon_vmulxq_v: {
6448 Int = Intrinsic::aarch64_neon_fmulx;
6449 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6450 }
6451 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6452 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6453 // vmulx_lane should be mapped to Neon scalar mulx after
6454 // extracting the scalar element
6455 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6456 Ops.pop_back();
6457 Int = Intrinsic::aarch64_neon_fmulx;
6458 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
6459 }
6460 case NEON::BI__builtin_neon_vmul_lane_v:
6461 case NEON::BI__builtin_neon_vmul_laneq_v: {
6462 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6463 bool Quad = false;
6464 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6465 Quad = true;
6466 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6467 llvm::FixedVectorType *VTy =
6469 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6470 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6471 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6472 return Builder.CreateBitCast(Result, Ty);
6473 }
6474 case NEON::BI__builtin_neon_vpmaxnm_v:
6475 case NEON::BI__builtin_neon_vpmaxnmq_v: {
6476 Int = Intrinsic::aarch64_neon_fmaxnmp;
6477 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6478 }
6479 case NEON::BI__builtin_neon_vpminnm_v:
6480 case NEON::BI__builtin_neon_vpminnmq_v: {
6481 Int = Intrinsic::aarch64_neon_fminnmp;
6482 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6483 }
6484 case NEON::BI__builtin_neon_vsqrth_f16: {
6485 Int = Builder.getIsFPConstrained()
6486 ? Intrinsic::experimental_constrained_sqrt
6487 : Intrinsic::sqrt;
6488 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
6489 }
6490 case NEON::BI__builtin_neon_vsqrt_v:
6491 case NEON::BI__builtin_neon_vsqrtq_v: {
6492 Int = Builder.getIsFPConstrained()
6493 ? Intrinsic::experimental_constrained_sqrt
6494 : Intrinsic::sqrt;
6495 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6496 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6497 }
6498 case NEON::BI__builtin_neon_vrbit_v:
6499 case NEON::BI__builtin_neon_vrbitq_v: {
6500 Int = Intrinsic::bitreverse;
6501 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6502 }
6503 case NEON::BI__builtin_neon_vmaxv_f16: {
6504 Int = Intrinsic::aarch64_neon_fmaxv;
6505 Ty = HalfTy;
6506 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6507 llvm::Type *Tys[2] = {Ty, VTy};
6508 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6509 return Builder.CreateTrunc(Ops[0], HalfTy);
6510 }
6511 case NEON::BI__builtin_neon_vmaxvq_f16: {
6512 Int = Intrinsic::aarch64_neon_fmaxv;
6513 Ty = HalfTy;
6514 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6515 llvm::Type *Tys[2] = {Ty, VTy};
6516 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6517 return Builder.CreateTrunc(Ops[0], HalfTy);
6518 }
6519 case NEON::BI__builtin_neon_vminv_f16: {
6520 Int = Intrinsic::aarch64_neon_fminv;
6521 Ty = HalfTy;
6522 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6523 llvm::Type *Tys[2] = {Ty, VTy};
6524 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6525 return Builder.CreateTrunc(Ops[0], HalfTy);
6526 }
6527 case NEON::BI__builtin_neon_vminvq_f16: {
6528 Int = Intrinsic::aarch64_neon_fminv;
6529 Ty = HalfTy;
6530 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6531 llvm::Type *Tys[2] = {Ty, VTy};
6532 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6533 return Builder.CreateTrunc(Ops[0], HalfTy);
6534 }
6535 case NEON::BI__builtin_neon_vmaxnmv_f16: {
6536 Int = Intrinsic::aarch64_neon_fmaxnmv;
6537 Ty = HalfTy;
6538 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6539 llvm::Type *Tys[2] = {Ty, VTy};
6540 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6541 return Builder.CreateTrunc(Ops[0], HalfTy);
6542 }
6543 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6544 Int = Intrinsic::aarch64_neon_fmaxnmv;
6545 Ty = HalfTy;
6546 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6547 llvm::Type *Tys[2] = {Ty, VTy};
6548 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6549 return Builder.CreateTrunc(Ops[0], HalfTy);
6550 }
6551 case NEON::BI__builtin_neon_vminnmv_f16: {
6552 Int = Intrinsic::aarch64_neon_fminnmv;
6553 Ty = HalfTy;
6554 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6555 llvm::Type *Tys[2] = {Ty, VTy};
6556 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6557 return Builder.CreateTrunc(Ops[0], HalfTy);
6558 }
6559 case NEON::BI__builtin_neon_vminnmvq_f16: {
6560 Int = Intrinsic::aarch64_neon_fminnmv;
6561 Ty = HalfTy;
6562 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6563 llvm::Type *Tys[2] = {Ty, VTy};
6564 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6565 return Builder.CreateTrunc(Ops[0], HalfTy);
6566 }
6567 case NEON::BI__builtin_neon_vmul_n_f64: {
6568 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6569 Value *RHS = Builder.CreateBitCast(Ops[1], DoubleTy);
6570 return Builder.CreateFMul(Ops[0], RHS);
6571 }
6572 case NEON::BI__builtin_neon_vaddlv_u8: {
6573 Int = Intrinsic::aarch64_neon_uaddlv;
6574 Ty = Int32Ty;
6575 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
6576 llvm::Type *Tys[2] = {Ty, VTy};
6577 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6578 return Builder.CreateTrunc(Ops[0], Int16Ty);
6579 }
6580 case NEON::BI__builtin_neon_vaddlv_u16: {
6581 Int = Intrinsic::aarch64_neon_uaddlv;
6582 Ty = Int32Ty;
6583 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
6584 llvm::Type *Tys[2] = {Ty, VTy};
6585 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6586 }
6587 case NEON::BI__builtin_neon_vaddlvq_u8: {
6588 Int = Intrinsic::aarch64_neon_uaddlv;
6589 Ty = Int32Ty;
6590 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
6591 llvm::Type *Tys[2] = {Ty, VTy};
6592 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6593 return Builder.CreateTrunc(Ops[0], Int16Ty);
6594 }
6595 case NEON::BI__builtin_neon_vaddlvq_u16: {
6596 Int = Intrinsic::aarch64_neon_uaddlv;
6597 Ty = Int32Ty;
6598 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
6599 llvm::Type *Tys[2] = {Ty, VTy};
6600 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6601 }
6602 case NEON::BI__builtin_neon_vaddlv_s8: {
6603 Int = Intrinsic::aarch64_neon_saddlv;
6604 Ty = Int32Ty;
6605 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
6606 llvm::Type *Tys[2] = {Ty, VTy};
6607 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6608 return Builder.CreateTrunc(Ops[0], Int16Ty);
6609 }
6610 case NEON::BI__builtin_neon_vaddlv_s16: {
6611 Int = Intrinsic::aarch64_neon_saddlv;
6612 Ty = Int32Ty;
6613 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
6614 llvm::Type *Tys[2] = {Ty, VTy};
6615 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6616 }
6617 case NEON::BI__builtin_neon_vaddlvq_s8: {
6618 Int = Intrinsic::aarch64_neon_saddlv;
6619 Ty = Int32Ty;
6620 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
6621 llvm::Type *Tys[2] = {Ty, VTy};
6622 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6623 return Builder.CreateTrunc(Ops[0], Int16Ty);
6624 }
6625 case NEON::BI__builtin_neon_vaddlvq_s16: {
6626 Int = Intrinsic::aarch64_neon_saddlv;
6627 Ty = Int32Ty;
6628 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
6629 llvm::Type *Tys[2] = {Ty, VTy};
6630 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6631 }
6632 case NEON::BI__builtin_neon_vsri_n_v:
6633 case NEON::BI__builtin_neon_vsriq_n_v: {
6634 Int = Intrinsic::aarch64_neon_vsri;
6635 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6636 return EmitNeonCall(Intrin, Ops, "vsri_n");
6637 }
6638 case NEON::BI__builtin_neon_vsli_n_v:
6639 case NEON::BI__builtin_neon_vsliq_n_v: {
6640 Int = Intrinsic::aarch64_neon_vsli;
6641 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6642 return EmitNeonCall(Intrin, Ops, "vsli_n");
6643 }
6644 case NEON::BI__builtin_neon_vsra_n_v:
6645 case NEON::BI__builtin_neon_vsraq_n_v:
6646 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6647 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6648 return Builder.CreateAdd(Ops[0], Ops[1]);
6649 case NEON::BI__builtin_neon_vrsra_n_v:
6650 case NEON::BI__builtin_neon_vrsraq_n_v: {
6651 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6653 TmpOps.push_back(Ops[1]);
6654 TmpOps.push_back(Ops[2]);
6655 Function* F = CGM.getIntrinsic(Int, Ty);
6656 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6657 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6658 return Builder.CreateAdd(Ops[0], tmp);
6659 }
6660 case NEON::BI__builtin_neon_vld1_v:
6661 case NEON::BI__builtin_neon_vld1q_v: {
6662 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
6663 }
6664 case NEON::BI__builtin_neon_vst1_v:
6665 case NEON::BI__builtin_neon_vst1q_v:
6666 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6667 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6668 case NEON::BI__builtin_neon_vld1_lane_v:
6669 case NEON::BI__builtin_neon_vld1q_lane_v: {
6670 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6671 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6672 PtrOp0.getAlignment());
6673 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6674 }
6675 case NEON::BI__builtin_neon_vldap1_lane_s64:
6676 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
6677 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6678 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
6679 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
6680 LI->setAtomic(llvm::AtomicOrdering::Acquire);
6681 Ops[0] = LI;
6682 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
6683 }
6684 case NEON::BI__builtin_neon_vld1_dup_v:
6685 case NEON::BI__builtin_neon_vld1q_dup_v: {
6686 Value *V = PoisonValue::get(Ty);
6687 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6688 PtrOp0.getAlignment());
6689 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6690 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6691 return EmitNeonSplat(Ops[0], CI);
6692 }
6693 case NEON::BI__builtin_neon_vst1_lane_v:
6694 case NEON::BI__builtin_neon_vst1q_lane_v:
6695 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6696 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6697 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6698 case NEON::BI__builtin_neon_vstl1_lane_s64:
6699 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
6700 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6701 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6702 llvm::StoreInst *SI =
6703 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6704 SI->setAtomic(llvm::AtomicOrdering::Release);
6705 return SI;
6706 }
6707 case NEON::BI__builtin_neon_vld2_v:
6708 case NEON::BI__builtin_neon_vld2q_v: {
6709 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6710 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6711 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6712 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6713 }
6714 case NEON::BI__builtin_neon_vld3_v:
6715 case NEON::BI__builtin_neon_vld3q_v: {
6716 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6717 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6718 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6719 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6720 }
6721 case NEON::BI__builtin_neon_vld4_v:
6722 case NEON::BI__builtin_neon_vld4q_v: {
6723 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6724 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6725 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6726 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6727 }
6728 case NEON::BI__builtin_neon_vld2_dup_v:
6729 case NEON::BI__builtin_neon_vld2q_dup_v: {
6730 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6731 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6732 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6733 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6734 }
6735 case NEON::BI__builtin_neon_vld3_dup_v:
6736 case NEON::BI__builtin_neon_vld3q_dup_v: {
6737 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6738 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6739 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6740 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6741 }
6742 case NEON::BI__builtin_neon_vld4_dup_v:
6743 case NEON::BI__builtin_neon_vld4q_dup_v: {
6744 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6745 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6746 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6747 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6748 }
6749 case NEON::BI__builtin_neon_vld2_lane_v:
6750 case NEON::BI__builtin_neon_vld2q_lane_v: {
6751 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6752 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6753 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6754 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6755 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6756 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6757 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
6758 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6759 }
6760 case NEON::BI__builtin_neon_vld3_lane_v:
6761 case NEON::BI__builtin_neon_vld3q_lane_v: {
6762 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6763 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6764 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6765 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6766 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6767 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6768 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6769 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
6770 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6771 }
6772 case NEON::BI__builtin_neon_vld4_lane_v:
6773 case NEON::BI__builtin_neon_vld4q_lane_v: {
6774 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6775 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6776 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6777 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6778 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6779 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6780 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6781 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6782 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
6783 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6784 }
6785 case NEON::BI__builtin_neon_vst2_v:
6786 case NEON::BI__builtin_neon_vst2q_v: {
6787 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6788 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6789 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6790 Ops, "");
6791 }
6792 case NEON::BI__builtin_neon_vst2_lane_v:
6793 case NEON::BI__builtin_neon_vst2q_lane_v: {
6794 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6795 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6796 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6797 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6798 Ops, "");
6799 }
6800 case NEON::BI__builtin_neon_vst3_v:
6801 case NEON::BI__builtin_neon_vst3q_v: {
6802 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6803 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6804 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6805 Ops, "");
6806 }
6807 case NEON::BI__builtin_neon_vst3_lane_v:
6808 case NEON::BI__builtin_neon_vst3q_lane_v: {
6809 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6810 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6811 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6812 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6813 Ops, "");
6814 }
6815 case NEON::BI__builtin_neon_vst4_v:
6816 case NEON::BI__builtin_neon_vst4q_v: {
6817 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6818 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6819 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6820 Ops, "");
6821 }
6822 case NEON::BI__builtin_neon_vst4_lane_v:
6823 case NEON::BI__builtin_neon_vst4q_lane_v: {
6824 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6825 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6826 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6827 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6828 Ops, "");
6829 }
6830 case NEON::BI__builtin_neon_vtrn_v:
6831 case NEON::BI__builtin_neon_vtrnq_v: {
6832 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6833 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6834 Value *SV = nullptr;
6835
6836 for (unsigned vi = 0; vi != 2; ++vi) {
6837 SmallVector<int, 16> Indices;
6838 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6839 Indices.push_back(i+vi);
6840 Indices.push_back(i+e+vi);
6841 }
6842 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6843 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6844 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6845 }
6846 return SV;
6847 }
6848 case NEON::BI__builtin_neon_vuzp_v:
6849 case NEON::BI__builtin_neon_vuzpq_v: {
6850 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6851 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6852 Value *SV = nullptr;
6853
6854 for (unsigned vi = 0; vi != 2; ++vi) {
6855 SmallVector<int, 16> Indices;
6856 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6857 Indices.push_back(2*i+vi);
6858
6859 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6860 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6861 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6862 }
6863 return SV;
6864 }
6865 case NEON::BI__builtin_neon_vzip_v:
6866 case NEON::BI__builtin_neon_vzipq_v: {
6867 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6868 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6869 Value *SV = nullptr;
6870
6871 for (unsigned vi = 0; vi != 2; ++vi) {
6872 SmallVector<int, 16> Indices;
6873 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6874 Indices.push_back((i + vi*e) >> 1);
6875 Indices.push_back(((i + vi*e) >> 1)+e);
6876 }
6877 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6878 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6879 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6880 }
6881 return SV;
6882 }
6883 case NEON::BI__builtin_neon_vqtbl1q_v: {
6884 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6885 Ops, "vtbl1");
6886 }
6887 case NEON::BI__builtin_neon_vqtbl2q_v: {
6888 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6889 Ops, "vtbl2");
6890 }
6891 case NEON::BI__builtin_neon_vqtbl3q_v: {
6892 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6893 Ops, "vtbl3");
6894 }
6895 case NEON::BI__builtin_neon_vqtbl4q_v: {
6896 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6897 Ops, "vtbl4");
6898 }
6899 case NEON::BI__builtin_neon_vqtbx1q_v: {
6900 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6901 Ops, "vtbx1");
6902 }
6903 case NEON::BI__builtin_neon_vqtbx2q_v: {
6904 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6905 Ops, "vtbx2");
6906 }
6907 case NEON::BI__builtin_neon_vqtbx3q_v: {
6908 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6909 Ops, "vtbx3");
6910 }
6911 case NEON::BI__builtin_neon_vqtbx4q_v: {
6912 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6913 Ops, "vtbx4");
6914 }
6915 case NEON::BI__builtin_neon_vsqadd_v:
6916 case NEON::BI__builtin_neon_vsqaddq_v: {
6917 Int = Intrinsic::aarch64_neon_usqadd;
6918 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6919 }
6920 case NEON::BI__builtin_neon_vuqadd_v:
6921 case NEON::BI__builtin_neon_vuqaddq_v: {
6922 Int = Intrinsic::aarch64_neon_suqadd;
6923 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6924 }
6925
6926 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
6927 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
6928 case NEON::BI__builtin_neon_vluti2_laneq_f16:
6929 case NEON::BI__builtin_neon_vluti2_laneq_p16:
6930 case NEON::BI__builtin_neon_vluti2_laneq_p8:
6931 case NEON::BI__builtin_neon_vluti2_laneq_s16:
6932 case NEON::BI__builtin_neon_vluti2_laneq_s8:
6933 case NEON::BI__builtin_neon_vluti2_laneq_u16:
6934 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
6935 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6936 llvm::Type *Tys[2];
6937 Tys[0] = Ty;
6938 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6939 /*isQuad*/ false));
6940 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
6941 }
6942 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
6943 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
6944 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
6945 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
6946 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
6947 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
6948 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
6949 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
6950 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
6951 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6952 llvm::Type *Tys[2];
6953 Tys[0] = Ty;
6954 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6955 /*isQuad*/ true));
6956 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
6957 }
6958 case NEON::BI__builtin_neon_vluti2_lane_mf8:
6959 case NEON::BI__builtin_neon_vluti2_lane_bf16:
6960 case NEON::BI__builtin_neon_vluti2_lane_f16:
6961 case NEON::BI__builtin_neon_vluti2_lane_p16:
6962 case NEON::BI__builtin_neon_vluti2_lane_p8:
6963 case NEON::BI__builtin_neon_vluti2_lane_s16:
6964 case NEON::BI__builtin_neon_vluti2_lane_s8:
6965 case NEON::BI__builtin_neon_vluti2_lane_u16:
6966 case NEON::BI__builtin_neon_vluti2_lane_u8: {
6967 Int = Intrinsic::aarch64_neon_vluti2_lane;
6968 llvm::Type *Tys[2];
6969 Tys[0] = Ty;
6970 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6971 /*isQuad*/ false));
6972 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
6973 }
6974 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
6975 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
6976 case NEON::BI__builtin_neon_vluti2q_lane_f16:
6977 case NEON::BI__builtin_neon_vluti2q_lane_p16:
6978 case NEON::BI__builtin_neon_vluti2q_lane_p8:
6979 case NEON::BI__builtin_neon_vluti2q_lane_s16:
6980 case NEON::BI__builtin_neon_vluti2q_lane_s8:
6981 case NEON::BI__builtin_neon_vluti2q_lane_u16:
6982 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
6983 Int = Intrinsic::aarch64_neon_vluti2_lane;
6984 llvm::Type *Tys[2];
6985 Tys[0] = Ty;
6986 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6987 /*isQuad*/ true));
6988 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
6989 }
6990 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
6991 case NEON::BI__builtin_neon_vluti4q_lane_p8:
6992 case NEON::BI__builtin_neon_vluti4q_lane_s8:
6993 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
6994 Int = Intrinsic::aarch64_neon_vluti4q_lane;
6995 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
6996 }
6997 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
6998 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
6999 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7000 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7001 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7002 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7003 }
7004 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7005 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7006 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7007 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7008 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7009 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7010 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7011 }
7012 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7013 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7014 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7015 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7016 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7017 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7018 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7019 }
7020 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7021 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7022 {llvm::FixedVectorType::get(HalfTy, 8),
7023 llvm::FixedVectorType::get(Int8Ty, 16)},
7024 Ops, E, "fmmla");
7025 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7026 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7027 {llvm::FixedVectorType::get(FloatTy, 4),
7028 llvm::FixedVectorType::get(Int8Ty, 16)},
7029 Ops, E, "fmmla");
7030 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7031 ExtractLow = true;
7032 [[fallthrough]];
7033 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7034 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7035 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7036 llvm::FixedVectorType::get(BFloatTy, 8),
7037 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7038 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7039 ExtractLow = true;
7040 [[fallthrough]];
7041 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7042 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7043 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7044 llvm::FixedVectorType::get(BFloatTy, 8),
7045 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7046 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7047 ExtractLow = true;
7048 [[fallthrough]];
7049 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7050 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7051 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7052 llvm::FixedVectorType::get(HalfTy, 8),
7053 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7054 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7055 ExtractLow = true;
7056 [[fallthrough]];
7057 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7058 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7059 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7060 llvm::FixedVectorType::get(HalfTy, 8),
7061 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7062 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7063 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7064 llvm::FixedVectorType::get(Int8Ty, 8),
7065 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7066 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7067 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7068 llvm::FixedVectorType::get(Int8Ty, 8),
7069 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7070 E, "vfcvtn");
7071 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7072 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7073 llvm::FixedVectorType::get(Int8Ty, 16),
7074 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7075 E, "vfcvtn");
7076 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7077 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7078 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7079 uint64_t(0));
7080 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7081 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7082 }
7083
7084 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7085 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7086 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7087 Ops, E, "fdot2");
7088 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7089 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7090 ExtendLaneArg = true;
7091 [[fallthrough]];
7092 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7093 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7094 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7095 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7096 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7097 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7098 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7099 FloatTy, Ops, E, "fdot4");
7100 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7101 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7102 ExtendLaneArg = true;
7103 [[fallthrough]];
7104 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7105 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7106 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7107 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7108
7109 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7110 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7111 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7112 "vmlal");
7113 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7114 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7115 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7116 "vmlal");
7117 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7118 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7119 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7120 "vmlall");
7121 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7122 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7123 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7124 "vmlall");
7125 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7126 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7127 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7128 "vmlall");
7129 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7130 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7131 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7132 "vmlall");
7133 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7134 ExtendLaneArg = true;
7135 [[fallthrough]];
7136 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7137 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7138 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7139 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7140 ExtendLaneArg = true;
7141 [[fallthrough]];
7142 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7143 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7144 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7145 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7146 ExtendLaneArg = true;
7147 [[fallthrough]];
7148 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7149 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7150 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7151 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7152 ExtendLaneArg = true;
7153 [[fallthrough]];
7154 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7155 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7156 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7157 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7158 ExtendLaneArg = true;
7159 [[fallthrough]];
7160 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7161 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7162 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7163 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7164 ExtendLaneArg = true;
7165 [[fallthrough]];
7166 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7167 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7168 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7169 case NEON::BI__builtin_neon_vamin_f16:
7170 case NEON::BI__builtin_neon_vaminq_f16:
7171 case NEON::BI__builtin_neon_vamin_f32:
7172 case NEON::BI__builtin_neon_vaminq_f32:
7173 case NEON::BI__builtin_neon_vaminq_f64: {
7174 Int = Intrinsic::aarch64_neon_famin;
7175 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7176 }
7177 case NEON::BI__builtin_neon_vamax_f16:
7178 case NEON::BI__builtin_neon_vamaxq_f16:
7179 case NEON::BI__builtin_neon_vamax_f32:
7180 case NEON::BI__builtin_neon_vamaxq_f32:
7181 case NEON::BI__builtin_neon_vamaxq_f64: {
7182 Int = Intrinsic::aarch64_neon_famax;
7183 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7184 }
7185 case NEON::BI__builtin_neon_vscale_f16:
7186 case NEON::BI__builtin_neon_vscaleq_f16:
7187 case NEON::BI__builtin_neon_vscale_f32:
7188 case NEON::BI__builtin_neon_vscaleq_f32:
7189 case NEON::BI__builtin_neon_vscaleq_f64: {
7190 Int = Intrinsic::aarch64_neon_fp8_fscale;
7191 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7192 }
7193 }
7194}
7195
7197 const CallExpr *E) {
7198 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7199 BuiltinID == BPF::BI__builtin_btf_type_id ||
7200 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7201 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7202 "unexpected BPF builtin");
7203
7204 // A sequence number, injected into IR builtin functions, to
7205 // prevent CSE given the only difference of the function
7206 // may just be the debuginfo metadata.
7207 static uint32_t BuiltinSeqNum;
7208
7209 switch (BuiltinID) {
7210 default:
7211 llvm_unreachable("Unexpected BPF builtin");
7212 case BPF::BI__builtin_preserve_field_info: {
7213 const Expr *Arg = E->getArg(0);
7214 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7215
7216 if (!getDebugInfo()) {
7217 CGM.Error(E->getExprLoc(),
7218 "using __builtin_preserve_field_info() without -g");
7219 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7220 : EmitLValue(Arg).emitRawPointer(*this);
7221 }
7222
7223 // Enable underlying preserve_*_access_index() generation.
7224 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7225 IsInPreservedAIRegion = true;
7226 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7227 : EmitLValue(Arg).emitRawPointer(*this);
7228 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7229
7230 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7231 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7232
7233 // Built the IR for the preserve_field_info intrinsic.
7234 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7235 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7236 {FieldAddr->getType()});
7237 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7238 }
7239 case BPF::BI__builtin_btf_type_id:
7240 case BPF::BI__builtin_preserve_type_info: {
7241 if (!getDebugInfo()) {
7242 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7243 return nullptr;
7244 }
7245
7246 const Expr *Arg0 = E->getArg(0);
7247 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7248 Arg0->getType(), Arg0->getExprLoc());
7249
7250 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7251 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7252 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7253
7254 llvm::Function *FnDecl;
7255 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7256 FnDecl = Intrinsic::getOrInsertDeclaration(
7257 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7258 else
7259 FnDecl = Intrinsic::getOrInsertDeclaration(
7260 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7261 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
7262 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7263 return Fn;
7264 }
7265 case BPF::BI__builtin_preserve_enum_value: {
7266 if (!getDebugInfo()) {
7267 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7268 return nullptr;
7269 }
7270
7271 const Expr *Arg0 = E->getArg(0);
7272 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7273 Arg0->getType(), Arg0->getExprLoc());
7274
7275 // Find enumerator
7276 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
7277 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
7278 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
7279 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
7280
7281 auto InitVal = Enumerator->getInitVal();
7282 std::string InitValStr;
7283 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7284 InitValStr = std::to_string(InitVal.getSExtValue());
7285 else
7286 InitValStr = std::to_string(InitVal.getZExtValue());
7287 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7288 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
7289
7290 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7291 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7292 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7293
7294 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7295 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
7296 CallInst *Fn =
7297 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
7298 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7299 return Fn;
7300 }
7301 }
7302}
7303
7306 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7307 "Not a power-of-two sized vector!");
7308 bool AllConstants = true;
7309 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7310 AllConstants &= isa<Constant>(Ops[i]);
7311
7312 // If this is a constant vector, create a ConstantVector.
7313 if (AllConstants) {
7315 for (llvm::Value *Op : Ops)
7316 CstOps.push_back(cast<Constant>(Op));
7317 return llvm::ConstantVector::get(CstOps);
7318 }
7319
7320 // Otherwise, insertelement the values to build the vector.
7321 Value *Result = llvm::PoisonValue::get(
7322 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
7323
7324 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7325 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
7326
7327 return Result;
7328}
7329
7330Value *CodeGenFunction::EmitAArch64CpuInit() {
7331 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
7332 llvm::FunctionCallee Func =
7333 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
7334 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
7335 cast<llvm::GlobalValue>(Func.getCallee())
7336 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
7337 return Builder.CreateCall(Func);
7338}
7339
7340Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
7341 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
7342 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
7344 ArgStr.split(Features, "+");
7345 for (auto &Feature : Features) {
7346 Feature = Feature.trim();
7347 if (!llvm::AArch64::parseFMVExtension(Feature))
7348 return Builder.getFalse();
7349 if (Feature != "default")
7350 Features.push_back(Feature);
7351 }
7352 return EmitAArch64CpuSupports(Features);
7353}
7354
7355llvm::Value *
7356CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
7357 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
7358 Value *Result = Builder.getTrue();
7359 if (FeaturesMask != 0) {
7360 // Get features from structure in runtime library
7361 // struct {
7362 // unsigned long long features;
7363 // } __aarch64_cpu_features;
7364 llvm::Type *STy = llvm::StructType::get(Int64Ty);
7365 llvm::Constant *AArch64CPUFeatures =
7366 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
7367 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
7368 llvm::Value *CpuFeatures = Builder.CreateGEP(
7369 STy, AArch64CPUFeatures,
7370 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
7371 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
7373 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
7374 Value *Bitset = Builder.CreateAnd(Features, Mask);
7375 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
7376 Result = Builder.CreateAnd(Result, Cmp);
7377 }
7378 return Result;
7379}
Utilities used for generating code for AArch64 that are shared between the classic and ClangIR code-g...
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
#define NEONMAP0(NameBase)
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, mlir::Type eltTy)
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > intrinsicMap, unsigned builtinID, bool &mapProvenSorted)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:2009
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:2884
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:2854
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:2847
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3908
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:1038
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3106
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4378
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:1050
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:1020
static llvm::Value * ARMMVECreateFPToSI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2978
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2130
static llvm::Value * ARMMVECreateFPToUI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2986
static llvm::Value * ARMMVECreateSIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2962
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:1051
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3914
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:2843
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:1052
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:2921
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:3393
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:862
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:361
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:4477
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:2948
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:33
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:192
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:540
static llvm::Value * ARMMVECreateUIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2970
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:2910
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:2936
SpecialRegisterAccessKind
Definition ARM.cpp:2000
@ VolatileRead
Definition ARM.cpp:2002
@ NormalRead
Definition ARM.cpp:2001
@ Write
Definition ARM.cpp:2003
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:2876
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:1047
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1118
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:344
static Value * EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:2075
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:1927
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:1049
TokenType getType() const
Returns the token's type, e.g.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:57
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:227
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3129
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1603
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:551
Address getAddress() const
Definition CGValue.h:691
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:3402
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:472
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:7305
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3577
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:3686
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:447
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3541
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3832
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3261
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3495
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3743
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4402
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:3969
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3453
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1076
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3277
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1184
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4012
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:493
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2157
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3350
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7196
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3854
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3957
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3640
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2994
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:509
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3921
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:487
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:4488
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:456
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3249
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3095
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:1898
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:307
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:3658
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:3437
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3665
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3844
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1596
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3592
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:194
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3797
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3387
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:3885
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1712
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3315
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3946
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:441
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:379
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3095
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3086
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:454
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:277
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2015
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3383
QualType getPointeeType() const
Definition TypeBase.h:3393
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
EltType getEltType() const
bool isOverloadFirstandLast() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:712
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1871
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9333
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:789
QualType getType() const
Definition Decl.h:723
QualType getType() const
Definition Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:155
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:840
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601