clang 23.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
19#include "llvm/IR/InlineAsm.h"
20#include "llvm/IR/IntrinsicsAArch64.h"
21#include "llvm/IR/IntrinsicsARM.h"
22#include "llvm/IR/IntrinsicsBPF.h"
23#include "llvm/TargetParser/AArch64TargetParser.h"
24
25#include <numeric>
26
27using namespace clang;
28using namespace CodeGen;
29using namespace llvm;
30using namespace clang::aarch64;
31
32static std::optional<CodeGenFunction::MSVCIntrin>
33translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
34 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
35 switch (BuiltinID) {
36 default:
37 return std::nullopt;
38 case clang::AArch64::BI_BitScanForward:
39 case clang::AArch64::BI_BitScanForward64:
40 return MSVCIntrin::_BitScanForward;
41 case clang::AArch64::BI_BitScanReverse:
42 case clang::AArch64::BI_BitScanReverse64:
43 return MSVCIntrin::_BitScanReverse;
44 case clang::AArch64::BI_InterlockedAnd64:
45 return MSVCIntrin::_InterlockedAnd;
46 case clang::AArch64::BI_InterlockedExchange64:
47 return MSVCIntrin::_InterlockedExchange;
48 case clang::AArch64::BI_InterlockedExchangeAdd64:
49 return MSVCIntrin::_InterlockedExchangeAdd;
50 case clang::AArch64::BI_InterlockedExchangeSub64:
51 return MSVCIntrin::_InterlockedExchangeSub;
52 case clang::AArch64::BI_InterlockedOr64:
53 return MSVCIntrin::_InterlockedOr;
54 case clang::AArch64::BI_InterlockedXor64:
55 return MSVCIntrin::_InterlockedXor;
56 case clang::AArch64::BI_InterlockedDecrement64:
57 return MSVCIntrin::_InterlockedDecrement;
58 case clang::AArch64::BI_InterlockedIncrement64:
59 return MSVCIntrin::_InterlockedIncrement;
60 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
62 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
63 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
64 return MSVCIntrin::_InterlockedExchangeAdd_acq;
65 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
67 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
68 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
69 return MSVCIntrin::_InterlockedExchangeAdd_rel;
70 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
72 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
73 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
74 return MSVCIntrin::_InterlockedExchangeAdd_nf;
75 case clang::AArch64::BI_InterlockedExchange8_acq:
76 case clang::AArch64::BI_InterlockedExchange16_acq:
77 case clang::AArch64::BI_InterlockedExchange_acq:
78 case clang::AArch64::BI_InterlockedExchange64_acq:
79 case clang::AArch64::BI_InterlockedExchangePointer_acq:
80 return MSVCIntrin::_InterlockedExchange_acq;
81 case clang::AArch64::BI_InterlockedExchange8_rel:
82 case clang::AArch64::BI_InterlockedExchange16_rel:
83 case clang::AArch64::BI_InterlockedExchange_rel:
84 case clang::AArch64::BI_InterlockedExchange64_rel:
85 case clang::AArch64::BI_InterlockedExchangePointer_rel:
86 return MSVCIntrin::_InterlockedExchange_rel;
87 case clang::AArch64::BI_InterlockedExchange8_nf:
88 case clang::AArch64::BI_InterlockedExchange16_nf:
89 case clang::AArch64::BI_InterlockedExchange_nf:
90 case clang::AArch64::BI_InterlockedExchange64_nf:
91 case clang::AArch64::BI_InterlockedExchangePointer_nf:
92 return MSVCIntrin::_InterlockedExchange_nf;
93 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
95 case clang::AArch64::BI_InterlockedCompareExchange_acq:
96 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
97 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
98 return MSVCIntrin::_InterlockedCompareExchange_acq;
99 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
101 case clang::AArch64::BI_InterlockedCompareExchange_rel:
102 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
103 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
104 return MSVCIntrin::_InterlockedCompareExchange_rel;
105 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
107 case clang::AArch64::BI_InterlockedCompareExchange_nf:
108 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
109 return MSVCIntrin::_InterlockedCompareExchange_nf;
110 case clang::AArch64::BI_InterlockedCompareExchange128:
111 return MSVCIntrin::_InterlockedCompareExchange128;
112 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
113 return MSVCIntrin::_InterlockedCompareExchange128_acq;
114 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
115 return MSVCIntrin::_InterlockedCompareExchange128_nf;
116 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
117 return MSVCIntrin::_InterlockedCompareExchange128_rel;
118 case clang::AArch64::BI_InterlockedOr8_acq:
119 case clang::AArch64::BI_InterlockedOr16_acq:
120 case clang::AArch64::BI_InterlockedOr_acq:
121 case clang::AArch64::BI_InterlockedOr64_acq:
122 return MSVCIntrin::_InterlockedOr_acq;
123 case clang::AArch64::BI_InterlockedOr8_rel:
124 case clang::AArch64::BI_InterlockedOr16_rel:
125 case clang::AArch64::BI_InterlockedOr_rel:
126 case clang::AArch64::BI_InterlockedOr64_rel:
127 return MSVCIntrin::_InterlockedOr_rel;
128 case clang::AArch64::BI_InterlockedOr8_nf:
129 case clang::AArch64::BI_InterlockedOr16_nf:
130 case clang::AArch64::BI_InterlockedOr_nf:
131 case clang::AArch64::BI_InterlockedOr64_nf:
132 return MSVCIntrin::_InterlockedOr_nf;
133 case clang::AArch64::BI_InterlockedXor8_acq:
134 case clang::AArch64::BI_InterlockedXor16_acq:
135 case clang::AArch64::BI_InterlockedXor_acq:
136 case clang::AArch64::BI_InterlockedXor64_acq:
137 return MSVCIntrin::_InterlockedXor_acq;
138 case clang::AArch64::BI_InterlockedXor8_rel:
139 case clang::AArch64::BI_InterlockedXor16_rel:
140 case clang::AArch64::BI_InterlockedXor_rel:
141 case clang::AArch64::BI_InterlockedXor64_rel:
142 return MSVCIntrin::_InterlockedXor_rel;
143 case clang::AArch64::BI_InterlockedXor8_nf:
144 case clang::AArch64::BI_InterlockedXor16_nf:
145 case clang::AArch64::BI_InterlockedXor_nf:
146 case clang::AArch64::BI_InterlockedXor64_nf:
147 return MSVCIntrin::_InterlockedXor_nf;
148 case clang::AArch64::BI_InterlockedAnd8_acq:
149 case clang::AArch64::BI_InterlockedAnd16_acq:
150 case clang::AArch64::BI_InterlockedAnd_acq:
151 case clang::AArch64::BI_InterlockedAnd64_acq:
152 return MSVCIntrin::_InterlockedAnd_acq;
153 case clang::AArch64::BI_InterlockedAnd8_rel:
154 case clang::AArch64::BI_InterlockedAnd16_rel:
155 case clang::AArch64::BI_InterlockedAnd_rel:
156 case clang::AArch64::BI_InterlockedAnd64_rel:
157 return MSVCIntrin::_InterlockedAnd_rel;
158 case clang::AArch64::BI_InterlockedAnd8_nf:
159 case clang::AArch64::BI_InterlockedAnd16_nf:
160 case clang::AArch64::BI_InterlockedAnd_nf:
161 case clang::AArch64::BI_InterlockedAnd64_nf:
162 return MSVCIntrin::_InterlockedAnd_nf;
163 case clang::AArch64::BI_InterlockedIncrement16_acq:
164 case clang::AArch64::BI_InterlockedIncrement_acq:
165 case clang::AArch64::BI_InterlockedIncrement64_acq:
166 return MSVCIntrin::_InterlockedIncrement_acq;
167 case clang::AArch64::BI_InterlockedIncrement16_rel:
168 case clang::AArch64::BI_InterlockedIncrement_rel:
169 case clang::AArch64::BI_InterlockedIncrement64_rel:
170 return MSVCIntrin::_InterlockedIncrement_rel;
171 case clang::AArch64::BI_InterlockedIncrement16_nf:
172 case clang::AArch64::BI_InterlockedIncrement_nf:
173 case clang::AArch64::BI_InterlockedIncrement64_nf:
174 return MSVCIntrin::_InterlockedIncrement_nf;
175 case clang::AArch64::BI_InterlockedDecrement16_acq:
176 case clang::AArch64::BI_InterlockedDecrement_acq:
177 case clang::AArch64::BI_InterlockedDecrement64_acq:
178 return MSVCIntrin::_InterlockedDecrement_acq;
179 case clang::AArch64::BI_InterlockedDecrement16_rel:
180 case clang::AArch64::BI_InterlockedDecrement_rel:
181 case clang::AArch64::BI_InterlockedDecrement64_rel:
182 return MSVCIntrin::_InterlockedDecrement_rel;
183 case clang::AArch64::BI_InterlockedDecrement16_nf:
184 case clang::AArch64::BI_InterlockedDecrement_nf:
185 case clang::AArch64::BI_InterlockedDecrement64_nf:
186 return MSVCIntrin::_InterlockedDecrement_nf;
187 }
188 llvm_unreachable("must return from switch");
189}
190
191static std::optional<CodeGenFunction::MSVCIntrin>
192translateArmToMsvcIntrin(unsigned BuiltinID) {
193 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
194 switch (BuiltinID) {
195 default:
196 return std::nullopt;
197 case clang::ARM::BI_BitScanForward:
198 case clang::ARM::BI_BitScanForward64:
199 return MSVCIntrin::_BitScanForward;
200 case clang::ARM::BI_BitScanReverse:
201 case clang::ARM::BI_BitScanReverse64:
202 return MSVCIntrin::_BitScanReverse;
203 case clang::ARM::BI_InterlockedAnd64:
204 return MSVCIntrin::_InterlockedAnd;
205 case clang::ARM::BI_InterlockedExchange64:
206 return MSVCIntrin::_InterlockedExchange;
207 case clang::ARM::BI_InterlockedExchangeAdd64:
208 return MSVCIntrin::_InterlockedExchangeAdd;
209 case clang::ARM::BI_InterlockedExchangeSub64:
210 return MSVCIntrin::_InterlockedExchangeSub;
211 case clang::ARM::BI_InterlockedOr64:
212 return MSVCIntrin::_InterlockedOr;
213 case clang::ARM::BI_InterlockedXor64:
214 return MSVCIntrin::_InterlockedXor;
215 case clang::ARM::BI_InterlockedDecrement64:
216 return MSVCIntrin::_InterlockedDecrement;
217 case clang::ARM::BI_InterlockedIncrement64:
218 return MSVCIntrin::_InterlockedIncrement;
219 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
221 case clang::ARM::BI_InterlockedExchangeAdd_acq:
222 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
223 return MSVCIntrin::_InterlockedExchangeAdd_acq;
224 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
226 case clang::ARM::BI_InterlockedExchangeAdd_rel:
227 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
228 return MSVCIntrin::_InterlockedExchangeAdd_rel;
229 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
231 case clang::ARM::BI_InterlockedExchangeAdd_nf:
232 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
233 return MSVCIntrin::_InterlockedExchangeAdd_nf;
234 case clang::ARM::BI_InterlockedExchange8_acq:
235 case clang::ARM::BI_InterlockedExchange16_acq:
236 case clang::ARM::BI_InterlockedExchange_acq:
237 case clang::ARM::BI_InterlockedExchange64_acq:
238 case clang::ARM::BI_InterlockedExchangePointer_acq:
239 return MSVCIntrin::_InterlockedExchange_acq;
240 case clang::ARM::BI_InterlockedExchange8_rel:
241 case clang::ARM::BI_InterlockedExchange16_rel:
242 case clang::ARM::BI_InterlockedExchange_rel:
243 case clang::ARM::BI_InterlockedExchange64_rel:
244 case clang::ARM::BI_InterlockedExchangePointer_rel:
245 return MSVCIntrin::_InterlockedExchange_rel;
246 case clang::ARM::BI_InterlockedExchange8_nf:
247 case clang::ARM::BI_InterlockedExchange16_nf:
248 case clang::ARM::BI_InterlockedExchange_nf:
249 case clang::ARM::BI_InterlockedExchange64_nf:
250 case clang::ARM::BI_InterlockedExchangePointer_nf:
251 return MSVCIntrin::_InterlockedExchange_nf;
252 case clang::ARM::BI_InterlockedCompareExchange8_acq:
253 case clang::ARM::BI_InterlockedCompareExchange16_acq:
254 case clang::ARM::BI_InterlockedCompareExchange_acq:
255 case clang::ARM::BI_InterlockedCompareExchange64_acq:
256 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
257 return MSVCIntrin::_InterlockedCompareExchange_acq;
258 case clang::ARM::BI_InterlockedCompareExchange8_rel:
259 case clang::ARM::BI_InterlockedCompareExchange16_rel:
260 case clang::ARM::BI_InterlockedCompareExchange_rel:
261 case clang::ARM::BI_InterlockedCompareExchange64_rel:
262 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
263 return MSVCIntrin::_InterlockedCompareExchange_rel;
264 case clang::ARM::BI_InterlockedCompareExchange8_nf:
265 case clang::ARM::BI_InterlockedCompareExchange16_nf:
266 case clang::ARM::BI_InterlockedCompareExchange_nf:
267 case clang::ARM::BI_InterlockedCompareExchange64_nf:
268 return MSVCIntrin::_InterlockedCompareExchange_nf;
269 case clang::ARM::BI_InterlockedOr8_acq:
270 case clang::ARM::BI_InterlockedOr16_acq:
271 case clang::ARM::BI_InterlockedOr_acq:
272 case clang::ARM::BI_InterlockedOr64_acq:
273 return MSVCIntrin::_InterlockedOr_acq;
274 case clang::ARM::BI_InterlockedOr8_rel:
275 case clang::ARM::BI_InterlockedOr16_rel:
276 case clang::ARM::BI_InterlockedOr_rel:
277 case clang::ARM::BI_InterlockedOr64_rel:
278 return MSVCIntrin::_InterlockedOr_rel;
279 case clang::ARM::BI_InterlockedOr8_nf:
280 case clang::ARM::BI_InterlockedOr16_nf:
281 case clang::ARM::BI_InterlockedOr_nf:
282 case clang::ARM::BI_InterlockedOr64_nf:
283 return MSVCIntrin::_InterlockedOr_nf;
284 case clang::ARM::BI_InterlockedXor8_acq:
285 case clang::ARM::BI_InterlockedXor16_acq:
286 case clang::ARM::BI_InterlockedXor_acq:
287 case clang::ARM::BI_InterlockedXor64_acq:
288 return MSVCIntrin::_InterlockedXor_acq;
289 case clang::ARM::BI_InterlockedXor8_rel:
290 case clang::ARM::BI_InterlockedXor16_rel:
291 case clang::ARM::BI_InterlockedXor_rel:
292 case clang::ARM::BI_InterlockedXor64_rel:
293 return MSVCIntrin::_InterlockedXor_rel;
294 case clang::ARM::BI_InterlockedXor8_nf:
295 case clang::ARM::BI_InterlockedXor16_nf:
296 case clang::ARM::BI_InterlockedXor_nf:
297 case clang::ARM::BI_InterlockedXor64_nf:
298 return MSVCIntrin::_InterlockedXor_nf;
299 case clang::ARM::BI_InterlockedAnd8_acq:
300 case clang::ARM::BI_InterlockedAnd16_acq:
301 case clang::ARM::BI_InterlockedAnd_acq:
302 case clang::ARM::BI_InterlockedAnd64_acq:
303 return MSVCIntrin::_InterlockedAnd_acq;
304 case clang::ARM::BI_InterlockedAnd8_rel:
305 case clang::ARM::BI_InterlockedAnd16_rel:
306 case clang::ARM::BI_InterlockedAnd_rel:
307 case clang::ARM::BI_InterlockedAnd64_rel:
308 return MSVCIntrin::_InterlockedAnd_rel;
309 case clang::ARM::BI_InterlockedAnd8_nf:
310 case clang::ARM::BI_InterlockedAnd16_nf:
311 case clang::ARM::BI_InterlockedAnd_nf:
312 case clang::ARM::BI_InterlockedAnd64_nf:
313 return MSVCIntrin::_InterlockedAnd_nf;
314 case clang::ARM::BI_InterlockedIncrement16_acq:
315 case clang::ARM::BI_InterlockedIncrement_acq:
316 case clang::ARM::BI_InterlockedIncrement64_acq:
317 return MSVCIntrin::_InterlockedIncrement_acq;
318 case clang::ARM::BI_InterlockedIncrement16_rel:
319 case clang::ARM::BI_InterlockedIncrement_rel:
320 case clang::ARM::BI_InterlockedIncrement64_rel:
321 return MSVCIntrin::_InterlockedIncrement_rel;
322 case clang::ARM::BI_InterlockedIncrement16_nf:
323 case clang::ARM::BI_InterlockedIncrement_nf:
324 case clang::ARM::BI_InterlockedIncrement64_nf:
325 return MSVCIntrin::_InterlockedIncrement_nf;
326 case clang::ARM::BI_InterlockedDecrement16_acq:
327 case clang::ARM::BI_InterlockedDecrement_acq:
328 case clang::ARM::BI_InterlockedDecrement64_acq:
329 return MSVCIntrin::_InterlockedDecrement_acq;
330 case clang::ARM::BI_InterlockedDecrement16_rel:
331 case clang::ARM::BI_InterlockedDecrement_rel:
332 case clang::ARM::BI_InterlockedDecrement64_rel:
333 return MSVCIntrin::_InterlockedDecrement_rel;
334 case clang::ARM::BI_InterlockedDecrement16_nf:
335 case clang::ARM::BI_InterlockedDecrement_nf:
336 case clang::ARM::BI_InterlockedDecrement64_nf:
337 return MSVCIntrin::_InterlockedDecrement_nf;
338 }
339 llvm_unreachable("must return from switch");
340}
341
342// Emit an intrinsic where all operands are of the same type as the result.
343// Depending on mode, this may be a constrained floating-point intrinsic.
345 unsigned IntrinsicID,
346 unsigned ConstrainedIntrinsicID,
347 llvm::Type *Ty,
348 ArrayRef<Value *> Args) {
349 Function *F;
350 if (CGF.Builder.getIsFPConstrained())
351 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
352 else
353 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
354
355 if (CGF.Builder.getIsFPConstrained())
356 return CGF.Builder.CreateConstrainedFPCall(F, Args);
357
358 return CGF.Builder.CreateCall(F, Args);
359}
360
361static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
362 NeonTypeFlags TypeFlags,
363 bool HasFastHalfType = true,
364 bool V1Ty = false,
365 bool AllowBFloatArgsAndRet = true) {
366 int IsQuad = TypeFlags.isQuad();
367 switch (TypeFlags.getEltType()) {
371 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
374 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
376 if (AllowBFloatArgsAndRet)
377 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
378 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
380 if (HasFastHalfType)
381 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 return Builder.CreateCall(F, Ops, name);
445}
446
450 const CallExpr *E, const char *name) {
451 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
452 Ops.pop_back_val());
453 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
454}
455
457 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
458 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
459
460 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
461 RetTy->getPrimitiveSizeInBits();
462 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
463 Ops[1]->getType()};
464 if (ExtendLaneArg) {
465 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
466 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
467 uint64_t(0));
468 }
469 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
470}
471
473 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
474 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
475
476 if (ExtendLaneArg) {
477 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
478 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
479 uint64_t(0));
480 }
481 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
482 RetTy->getPrimitiveSizeInBits();
483 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
484 Ops, E, name);
485}
486
488 bool neg) {
489 int SV = cast<ConstantInt>(V)->getSExtValue();
490 return ConstantInt::getSigned(Ty, neg ? -SV : SV);
491}
492
493Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
494 llvm::Type *Ty1, bool Extract,
496 const CallExpr *E,
497 const char *name) {
498 llvm::Type *Tys[] = {Ty0, Ty1};
499 if (Extract) {
500 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
501 // the vector.
502 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
503 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
504 }
505 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
506}
507
508// Right-shift a vector by a constant.
510 llvm::Type *Ty, bool usgn,
511 const char *name) {
512 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
513
514 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
515 int EltSize = VTy->getScalarSizeInBits();
516
517 Vec = Builder.CreateBitCast(Vec, Ty);
518
519 // lshr/ashr are undefined when the shift amount is equal to the vector
520 // element size.
521 if (ShiftAmt == EltSize) {
522 if (usgn) {
523 // Right-shifting an unsigned value by its size yields 0.
524 return llvm::ConstantAggregateZero::get(VTy);
525 } else {
526 // Right-shifting a signed value by its size is equivalent
527 // to a shift of size-1.
528 --ShiftAmt;
529 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
530 }
531 }
532
533 Shift = EmitNeonShiftVector(Shift, Ty, false);
534 if (usgn)
535 return Builder.CreateLShr(Vec, Shift, name);
536 return Builder.CreateAShr(Vec, Shift, name);
537}
538
539// clang-format off
541 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
542 NEONMAP0(splat_lane_v),
543 NEONMAP0(splat_laneq_v),
544 NEONMAP0(splatq_lane_v),
545 NEONMAP0(splatq_laneq_v),
546 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
547 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
548 NEONMAP1(vabs_v, arm_neon_vabs, 0),
549 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
550 NEONMAP0(vadd_v),
551 NEONMAP0(vaddhn_v),
552 NEONMAP0(vaddq_v),
553 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
554 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
555 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
556 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
557 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
558 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
559 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
560 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
561 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
562 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
563 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
564 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
565 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
566 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
567 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
568 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
569 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
570 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
571 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
572 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
573 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
574 NEONMAP1(vcage_v, arm_neon_vacge, 0),
575 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
576 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
577 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
578 NEONMAP1(vcale_v, arm_neon_vacge, 0),
579 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
580 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
581 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
582 NEONMAP0(vceqz_v),
583 NEONMAP0(vceqzq_v),
584 NEONMAP0(vcgez_v),
585 NEONMAP0(vcgezq_v),
586 NEONMAP0(vcgtz_v),
587 NEONMAP0(vcgtzq_v),
588 NEONMAP0(vclez_v),
589 NEONMAP0(vclezq_v),
590 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
591 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
592 NEONMAP0(vcltz_v),
593 NEONMAP0(vcltzq_v),
594 NEONMAP1(vclz_v, ctlz, Add1ArgType),
595 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
596 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
597 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
598 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
599 NEONMAP0(vcvt_f16_s16),
600 NEONMAP0(vcvt_f16_u16),
601 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
602 NEONMAP0(vcvt_f32_v),
603 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
604 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
605 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
606 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
607 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
608 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
609 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
610 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
611 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
612 NEONMAP0(vcvt_s16_f16),
613 NEONMAP0(vcvt_s32_v),
614 NEONMAP0(vcvt_s64_v),
615 NEONMAP0(vcvt_u16_f16),
616 NEONMAP0(vcvt_u32_v),
617 NEONMAP0(vcvt_u64_v),
618 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
619 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
620 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
621 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
622 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
623 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
624 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
625 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
626 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
627 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
628 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
629 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
630 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
631 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
632 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
633 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
634 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
635 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
636 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
637 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
638 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
639 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
640 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
641 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
642 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
643 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
644 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
645 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
646 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
647 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
648 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
649 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
650 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
651 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
652 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
653 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
654 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
655 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
656 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
657 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
658 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
659 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
660 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
661 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
662 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
663 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
664 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
665 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
666 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
667 NEONMAP0(vcvtq_f16_s16),
668 NEONMAP0(vcvtq_f16_u16),
669 NEONMAP0(vcvtq_f32_v),
670 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
671 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
672 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
673 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
674 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
675 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
676 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
677 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
678 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
679 NEONMAP0(vcvtq_s16_f16),
680 NEONMAP0(vcvtq_s32_v),
681 NEONMAP0(vcvtq_s64_v),
682 NEONMAP0(vcvtq_u16_f16),
683 NEONMAP0(vcvtq_u32_v),
684 NEONMAP0(vcvtq_u64_v),
685 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
686 NEONMAP1(vdot_u32, arm_neon_udot, 0),
687 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
688 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
689 NEONMAP0(vext_v),
690 NEONMAP0(vextq_v),
691 NEONMAP0(vfma_v),
692 NEONMAP0(vfmaq_v),
693 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
694 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
695 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
696 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
697 NEONMAP0(vld1_dup_v),
698 NEONMAP1(vld1_v, arm_neon_vld1, 0),
699 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
700 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
701 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
702 NEONMAP0(vld1q_dup_v),
703 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
704 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
705 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
706 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
707 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
708 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
709 NEONMAP1(vld2_v, arm_neon_vld2, 0),
710 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
711 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
712 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
713 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
714 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
715 NEONMAP1(vld3_v, arm_neon_vld3, 0),
716 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
717 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
718 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
719 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
720 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
721 NEONMAP1(vld4_v, arm_neon_vld4, 0),
722 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
723 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
724 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
725 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
726 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
727 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
728 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
729 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
730 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
731 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
732 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
733 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
734 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
735 NEONMAP0(vmovl_v),
736 NEONMAP0(vmovn_v),
737 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
738 NEONMAP0(vmull_v),
739 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
740 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
741 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
742 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
743 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
744 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
745 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
746 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
747 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
748 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
749 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
750 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
751 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
752 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
753 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
754 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
755 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
756 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
757 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
758 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
759 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
760 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
761 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
762 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
763 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
764 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
765 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
766 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
767 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
768 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
769 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
770 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
771 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
772 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
773 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
774 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
775 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
776 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
777 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
778 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
779 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
780 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
781 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
782 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
783 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
784 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
785 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
786 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
787 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
788 NEONMAP1(vrnd_v, trunc, Add1ArgType),
789 NEONMAP1(vrnda_v, round, Add1ArgType),
790 NEONMAP1(vrndaq_v, round, Add1ArgType),
791 NEONMAP0(vrndi_v),
792 NEONMAP0(vrndiq_v),
793 NEONMAP1(vrndm_v, floor, Add1ArgType),
794 NEONMAP1(vrndmq_v, floor, Add1ArgType),
795 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
796 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
797 NEONMAP1(vrndp_v, ceil, Add1ArgType),
798 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
799 NEONMAP1(vrndq_v, trunc, Add1ArgType),
800 NEONMAP1(vrndx_v, rint, Add1ArgType),
801 NEONMAP1(vrndxq_v, rint, Add1ArgType),
802 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
803 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
804 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
805 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
806 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
807 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
808 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
809 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
810 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
811 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
812 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
813 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
814 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
815 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
816 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
817 NEONMAP0(vshl_n_v),
818 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
819 NEONMAP0(vshll_n_v),
820 NEONMAP0(vshlq_n_v),
821 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
822 NEONMAP0(vshr_n_v),
823 NEONMAP0(vshrn_n_v),
824 NEONMAP0(vshrq_n_v),
825 NEONMAP1(vst1_v, arm_neon_vst1, 0),
826 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
827 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
828 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
829 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
830 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
831 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
832 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
833 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
834 NEONMAP1(vst2_v, arm_neon_vst2, 0),
835 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
836 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
837 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
838 NEONMAP1(vst3_v, arm_neon_vst3, 0),
839 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
840 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
841 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
842 NEONMAP1(vst4_v, arm_neon_vst4, 0),
843 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
844 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
845 NEONMAP0(vsubhn_v),
846 NEONMAP0(vtrn_v),
847 NEONMAP0(vtrnq_v),
848 NEONMAP0(vtst_v),
849 NEONMAP0(vtstq_v),
850 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
851 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
852 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
853 NEONMAP0(vuzp_v),
854 NEONMAP0(vuzpq_v),
855 NEONMAP0(vzip_v),
856 NEONMAP0(vzipq_v)
857};
858
859// clang-format on
860
861// Some intrinsics are equivalent for codegen.
862static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
863 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
864 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
865 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
866 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
867 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
868 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
869 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
870 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
871 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
872 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
873 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
874 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
875 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
876 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
877 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
878 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
879 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
880 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
881 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
882 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
883 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
884 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
885 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
886 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
887 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
888 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
889 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
890 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
891 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
892 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
893 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
894 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
895 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
896 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
897 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
898 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
899 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
900 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
901 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
902 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
903 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
904 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
905 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
906 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
907 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
908 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
909 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
910 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
911 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
912 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
913 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
914 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
915 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
916 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
917 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
918 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
919 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
920 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
921 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
922 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
923 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
924 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
925 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
926 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
927 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
928 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
929 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
930 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
931 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
932 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
933 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
934 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
935 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
936 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
937 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
938 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
939 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
940 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
941 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
942 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
943 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
944 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
945 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
946 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
947 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
948 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
949 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
950 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
951 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
952 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
953 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
954 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
955 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
956 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
957 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
958 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
959 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
960 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
961 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
962 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
963 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
964 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
965 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
966 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
967 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
968 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
969 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
970 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
971 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
972 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
973 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
974 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
975 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
976 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
977 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
978 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
979 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
980 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
981 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
982 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
983 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
984 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
985 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
986 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
987 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
988 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
989 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
990 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
991 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
992 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
993 // arbitrary one to be handled as tha canonical variation.
994 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
995 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
996 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
997 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
998 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
999 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1000 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1001 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1002 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1003 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1004 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1005 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1006};
1007
1008#undef NEONMAP0
1009#undef NEONMAP1
1010#undef NEONMAP2
1011
1012#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1013 { \
1014 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1015 TypeModifier \
1016 }
1017
1018#define SVEMAP2(NameBase, TypeModifier) \
1019 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1021#define GET_SVE_LLVM_INTRINSIC_MAP
1022#include "clang/Basic/arm_sve_builtin_cg.inc"
1023#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1024#undef GET_SVE_LLVM_INTRINSIC_MAP
1025};
1026
1027#undef SVEMAP1
1028#undef SVEMAP2
1029
1030#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1031 { \
1032 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1033 TypeModifier \
1034 }
1035
1036#define SMEMAP2(NameBase, TypeModifier) \
1037 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1039#define GET_SME_LLVM_INTRINSIC_MAP
1040#include "clang/Basic/arm_sme_builtin_cg.inc"
1041#undef GET_SME_LLVM_INTRINSIC_MAP
1042};
1043
1044#undef SMEMAP1
1045#undef SMEMAP2
1046
1048
1053
1054// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
1055// the corresponding info struct.
1056static const ARMVectorIntrinsicInfo *
1058 unsigned BuiltinID, bool &MapProvenSorted) {
1059
1060#ifndef NDEBUG
1061 if (!MapProvenSorted) {
1062 assert(llvm::is_sorted(IntrinsicMap));
1063 MapProvenSorted = true;
1064 }
1065#endif
1066
1068 llvm::lower_bound(IntrinsicMap, BuiltinID);
1069
1070 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1071 return Builtin;
1072
1073 return nullptr;
1074}
1075
1077 unsigned Modifier,
1078 llvm::Type *ArgType,
1079 const CallExpr *E) {
1080 int VectorSize = 0;
1081 if (Modifier & Use64BitVectors)
1082 VectorSize = 64;
1083 else if (Modifier & Use128BitVectors)
1084 VectorSize = 128;
1085
1086 // Return type.
1088 if (Modifier & AddRetType) {
1089 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1090 if (Modifier & VectorizeRetType)
1091 Ty = llvm::FixedVectorType::get(
1092 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1093
1094 Tys.push_back(Ty);
1095 }
1096
1097 // Arguments.
1098 if (Modifier & VectorizeArgTypes) {
1099 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1100 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1101 }
1102
1103 if (Modifier & (Add1ArgType | Add2ArgTypes))
1104 Tys.push_back(ArgType);
1105
1106 if (Modifier & Add2ArgTypes)
1107 Tys.push_back(ArgType);
1108
1109 if (Modifier & InventFloatType)
1110 Tys.push_back(FloatTy);
1111
1112 return CGM.getIntrinsic(IntrinsicID, Tys);
1113}
1114
1115//===----------------------------------------------------------------------===//
1116// Emit-helpers
1117//===----------------------------------------------------------------------===//
1119 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1120 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1121 assert(SISDInfo.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
1122
1123 switch (SISDInfo.BuiltinID) {
1124 case NEON::BI__builtin_neon_vcled_s64:
1125 case NEON::BI__builtin_neon_vcled_u64:
1126 case NEON::BI__builtin_neon_vcles_f32:
1127 case NEON::BI__builtin_neon_vcled_f64:
1128 case NEON::BI__builtin_neon_vcltd_s64:
1129 case NEON::BI__builtin_neon_vcltd_u64:
1130 case NEON::BI__builtin_neon_vclts_f32:
1131 case NEON::BI__builtin_neon_vcltd_f64:
1132 case NEON::BI__builtin_neon_vcales_f32:
1133 case NEON::BI__builtin_neon_vcaled_f64:
1134 case NEON::BI__builtin_neon_vcalts_f32:
1135 case NEON::BI__builtin_neon_vcaltd_f64:
1136 // Only one direction of comparisons actually exist, cmle is actually a cmge
1137 // with swapped operands. The table gives us the right intrinsic but we
1138 // still need to do the swap.
1139 std::swap(Ops[0], Ops[1]);
1140 break;
1141 }
1142
1143 // Determine the type(s) of this overloaded AArch64 intrinsic.
1144 llvm::Type *ArgTy = CGF.ConvertType(E->getArg(0)->getType());
1145 Function *F = CGF.LookupNeonLLVMIntrinsic(SISDInfo.LLVMIntrinsic,
1146 SISDInfo.TypeModifier, ArgTy, E);
1147
1148 int j = 0;
1149 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1150 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1151 ai != ae; ++ai, ++j) {
1152 llvm::Type *ArgTy = ai->getType();
1153 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1154 ArgTy->getPrimitiveSizeInBits())
1155 continue;
1156 assert(
1157 ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy() &&
1158 "Expecting vector LLVM intrinsic type and scalar Clang builtin type!");
1159
1160 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1161 // it before inserting.
1162 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1163 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1164 Ops[j] =
1165 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1166 }
1167
1168 Value *Result = CGF.EmitNeonCall(F, Ops, SISDInfo.NameHint);
1169 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1170 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1171 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1172 return CGF.Builder.CreateExtractElement(Result, C0);
1173
1174 return CGF.Builder.CreateBitCast(Result, ResultType, SISDInfo.NameHint);
1175}
1176
1178 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1179 const char *NameHint, unsigned Modifier, const CallExpr *E,
1180 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1181 llvm::Triple::ArchType Arch) {
1182
1183 // Extract the trailing immediate argument that encodes the type discriminator
1184 // for this overloaded intrinsic.
1185 // TODO: Move to the parent code that takes care of argument processing.
1186 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1187 std::optional<llvm::APSInt> NeonTypeConst =
1189 if (!NeonTypeConst)
1190 return nullptr;
1191
1192 // Determine the type of this overloaded NEON intrinsic.
1193 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1194 const bool Usgn = Type.isUnsigned();
1195 const bool Quad = Type.isQuad();
1196 const bool Floating = Type.isFloatingPoint();
1197 const bool HasFastHalfType = getTarget().hasFastHalfType();
1198 const bool AllowBFloatArgsAndRet =
1199 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1200
1201 llvm::FixedVectorType *VTy =
1202 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1203 llvm::Type *Ty = VTy;
1204 if (!Ty)
1205 return nullptr;
1206
1207 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1208 return Builder.getInt32(addr.getAlignment().getQuantity());
1209 };
1210
1211 unsigned Int = LLVMIntrinsic;
1212 if ((Modifier & UnsignedAlts) && !Usgn)
1213 Int = AltLLVMIntrinsic;
1214
1215 switch (BuiltinID) {
1216 default: break;
1217 case NEON::BI__builtin_neon_splat_lane_v:
1218 case NEON::BI__builtin_neon_splat_laneq_v:
1219 case NEON::BI__builtin_neon_splatq_lane_v:
1220 case NEON::BI__builtin_neon_splatq_laneq_v: {
1221 auto NumElements = VTy->getElementCount();
1222 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1223 NumElements = NumElements * 2;
1224 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1225 NumElements = NumElements.divideCoefficientBy(2);
1226
1227 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1228 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1229 }
1230 case NEON::BI__builtin_neon_vpadd_v:
1231 case NEON::BI__builtin_neon_vpaddq_v:
1232 // We don't allow fp/int overloading of intrinsics.
1233 if (VTy->getElementType()->isFloatingPointTy() &&
1234 Int == Intrinsic::aarch64_neon_addp)
1235 Int = Intrinsic::aarch64_neon_faddp;
1236 break;
1237 case NEON::BI__builtin_neon_vabs_v:
1238 case NEON::BI__builtin_neon_vabsq_v:
1239 if (VTy->getElementType()->isFloatingPointTy())
1240 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1241 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1242 case NEON::BI__builtin_neon_vadd_v:
1243 case NEON::BI__builtin_neon_vaddq_v: {
1244 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1245 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1246 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1247 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1248 return Builder.CreateBitCast(Ops[0], Ty);
1249 }
1250 case NEON::BI__builtin_neon_vaddhn_v: {
1251 llvm::FixedVectorType *SrcTy =
1252 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1253
1254 // %sum = add <4 x i32> %lhs, %rhs
1255 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1256 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1257 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1258
1259 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1260 Constant *ShiftAmt =
1261 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1262 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1263
1264 // %res = trunc <4 x i32> %high to <4 x i16>
1265 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1266 }
1267 case NEON::BI__builtin_neon_vcale_v:
1268 case NEON::BI__builtin_neon_vcaleq_v:
1269 case NEON::BI__builtin_neon_vcalt_v:
1270 case NEON::BI__builtin_neon_vcaltq_v:
1271 std::swap(Ops[0], Ops[1]);
1272 [[fallthrough]];
1273 case NEON::BI__builtin_neon_vcage_v:
1274 case NEON::BI__builtin_neon_vcageq_v:
1275 case NEON::BI__builtin_neon_vcagt_v:
1276 case NEON::BI__builtin_neon_vcagtq_v: {
1277 llvm::Type *Ty;
1278 switch (VTy->getScalarSizeInBits()) {
1279 default: llvm_unreachable("unexpected type");
1280 case 32:
1281 Ty = FloatTy;
1282 break;
1283 case 64:
1284 Ty = DoubleTy;
1285 break;
1286 case 16:
1287 Ty = HalfTy;
1288 break;
1289 }
1290 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1291 llvm::Type *Tys[] = { VTy, VecFlt };
1292 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1293 return EmitNeonCall(F, Ops, NameHint);
1294 }
1295 case NEON::BI__builtin_neon_vceqz_v:
1296 case NEON::BI__builtin_neon_vceqzq_v:
1298 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1299 case NEON::BI__builtin_neon_vcgez_v:
1300 case NEON::BI__builtin_neon_vcgezq_v:
1302 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1303 "vcgez");
1304 case NEON::BI__builtin_neon_vclez_v:
1305 case NEON::BI__builtin_neon_vclezq_v:
1307 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1308 "vclez");
1309 case NEON::BI__builtin_neon_vcgtz_v:
1310 case NEON::BI__builtin_neon_vcgtzq_v:
1312 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1313 "vcgtz");
1314 case NEON::BI__builtin_neon_vcltz_v:
1315 case NEON::BI__builtin_neon_vcltzq_v:
1317 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1318 "vcltz");
1319 case NEON::BI__builtin_neon_vclz_v:
1320 case NEON::BI__builtin_neon_vclzq_v:
1321 // We generate target-independent intrinsic, which needs a second argument
1322 // for whether or not clz of zero is undefined; on ARM it isn't.
1323 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1324 break;
1325 case NEON::BI__builtin_neon_vcvt_f32_v:
1326 case NEON::BI__builtin_neon_vcvtq_f32_v:
1327 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1328 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1329 HasFastHalfType);
1330 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1331 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1332 case NEON::BI__builtin_neon_vcvt_f16_s16:
1333 case NEON::BI__builtin_neon_vcvt_f16_u16:
1334 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1335 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1336 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1337 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1338 HasFastHalfType);
1339 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1340 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1341 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1342 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1343 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1344 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1345 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1346 Function *F = CGM.getIntrinsic(Int, Tys);
1347 return EmitNeonCall(F, Ops, "vcvt_n");
1348 }
1349 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1350 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1351 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1352 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1353 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1354 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1355 Function *F = CGM.getIntrinsic(Int, Tys);
1356 return EmitNeonCall(F, Ops, "vcvt_n");
1357 }
1358 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1359 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1360 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1361 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1362 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1363 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1364 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1365 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1366 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1367 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1368 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1369 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1370 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1371 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1372 return EmitNeonCall(F, Ops, "vcvt_n");
1373 }
1374 case NEON::BI__builtin_neon_vcvt_s32_v:
1375 case NEON::BI__builtin_neon_vcvt_u32_v:
1376 case NEON::BI__builtin_neon_vcvt_s64_v:
1377 case NEON::BI__builtin_neon_vcvt_u64_v:
1378 case NEON::BI__builtin_neon_vcvt_s16_f16:
1379 case NEON::BI__builtin_neon_vcvt_u16_f16:
1380 case NEON::BI__builtin_neon_vcvtq_s32_v:
1381 case NEON::BI__builtin_neon_vcvtq_u32_v:
1382 case NEON::BI__builtin_neon_vcvtq_s64_v:
1383 case NEON::BI__builtin_neon_vcvtq_u64_v:
1384 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1385 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1386 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1387 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1388 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1389 }
1390 case NEON::BI__builtin_neon_vcvta_s16_f16:
1391 case NEON::BI__builtin_neon_vcvta_s32_v:
1392 case NEON::BI__builtin_neon_vcvta_s64_v:
1393 case NEON::BI__builtin_neon_vcvta_u16_f16:
1394 case NEON::BI__builtin_neon_vcvta_u32_v:
1395 case NEON::BI__builtin_neon_vcvta_u64_v:
1396 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1397 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1398 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1399 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1400 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1401 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1402 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1403 case NEON::BI__builtin_neon_vcvtn_s32_v:
1404 case NEON::BI__builtin_neon_vcvtn_s64_v:
1405 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1406 case NEON::BI__builtin_neon_vcvtn_u32_v:
1407 case NEON::BI__builtin_neon_vcvtn_u64_v:
1408 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1409 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1410 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1411 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1412 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1413 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1414 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1415 case NEON::BI__builtin_neon_vcvtp_s32_v:
1416 case NEON::BI__builtin_neon_vcvtp_s64_v:
1417 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1418 case NEON::BI__builtin_neon_vcvtp_u32_v:
1419 case NEON::BI__builtin_neon_vcvtp_u64_v:
1420 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1421 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1422 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1423 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1424 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1425 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1426 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1427 case NEON::BI__builtin_neon_vcvtm_s32_v:
1428 case NEON::BI__builtin_neon_vcvtm_s64_v:
1429 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1430 case NEON::BI__builtin_neon_vcvtm_u32_v:
1431 case NEON::BI__builtin_neon_vcvtm_u64_v:
1432 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1433 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1434 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1435 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1436 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1437 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1438 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1439 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1440 }
1441 case NEON::BI__builtin_neon_vcvtx_f32_v: {
1442 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
1443 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1444
1445 }
1446 case NEON::BI__builtin_neon_vext_v:
1447 case NEON::BI__builtin_neon_vextq_v: {
1448 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1449 SmallVector<int, 16> Indices;
1450 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1451 Indices.push_back(i+CV);
1452
1453 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1454 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1455 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
1456 }
1457 case NEON::BI__builtin_neon_vfma_v:
1458 case NEON::BI__builtin_neon_vfmaq_v: {
1459 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1460 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1461 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1462
1463 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
1465 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
1466 {Ops[1], Ops[2], Ops[0]});
1467 }
1468 case NEON::BI__builtin_neon_vld1_v:
1469 case NEON::BI__builtin_neon_vld1q_v: {
1470 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1471 Ops.push_back(getAlignmentValue32(PtrOp0));
1472 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
1473 }
1474 case NEON::BI__builtin_neon_vld1_x2_v:
1475 case NEON::BI__builtin_neon_vld1q_x2_v:
1476 case NEON::BI__builtin_neon_vld1_x3_v:
1477 case NEON::BI__builtin_neon_vld1q_x3_v:
1478 case NEON::BI__builtin_neon_vld1_x4_v:
1479 case NEON::BI__builtin_neon_vld1q_x4_v: {
1480 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1481 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1482 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
1483 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1484 }
1485 case NEON::BI__builtin_neon_vld2_v:
1486 case NEON::BI__builtin_neon_vld2q_v:
1487 case NEON::BI__builtin_neon_vld3_v:
1488 case NEON::BI__builtin_neon_vld3q_v:
1489 case NEON::BI__builtin_neon_vld4_v:
1490 case NEON::BI__builtin_neon_vld4q_v:
1491 case NEON::BI__builtin_neon_vld2_dup_v:
1492 case NEON::BI__builtin_neon_vld2q_dup_v:
1493 case NEON::BI__builtin_neon_vld3_dup_v:
1494 case NEON::BI__builtin_neon_vld3q_dup_v:
1495 case NEON::BI__builtin_neon_vld4_dup_v:
1496 case NEON::BI__builtin_neon_vld4q_dup_v: {
1497 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1498 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1499 Value *Align = getAlignmentValue32(PtrOp1);
1500 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
1501 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1502 }
1503 case NEON::BI__builtin_neon_vld1_dup_v:
1504 case NEON::BI__builtin_neon_vld1q_dup_v: {
1505 Value *V = PoisonValue::get(Ty);
1506 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
1507 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
1508 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
1509 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
1510 return EmitNeonSplat(Ops[0], CI);
1511 }
1512 case NEON::BI__builtin_neon_vld2_lane_v:
1513 case NEON::BI__builtin_neon_vld2q_lane_v:
1514 case NEON::BI__builtin_neon_vld3_lane_v:
1515 case NEON::BI__builtin_neon_vld3q_lane_v:
1516 case NEON::BI__builtin_neon_vld4_lane_v:
1517 case NEON::BI__builtin_neon_vld4q_lane_v: {
1518 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1519 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1520 for (unsigned I = 2; I < Ops.size() - 1; ++I)
1521 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
1522 Ops.push_back(getAlignmentValue32(PtrOp1));
1523 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
1524 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1525 }
1526 case NEON::BI__builtin_neon_vmovl_v: {
1527 llvm::FixedVectorType *DTy =
1528 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1529 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1530 if (Usgn)
1531 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1532 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1533 }
1534 case NEON::BI__builtin_neon_vmovn_v: {
1535 llvm::FixedVectorType *QTy =
1536 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1537 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1538 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1539 }
1540 case NEON::BI__builtin_neon_vmull_v:
1541 // FIXME: the integer vmull operations could be emitted in terms of pure
1542 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
1543 // hoisting the exts outside loops. Until global ISel comes along that can
1544 // see through such movement this leads to bad CodeGen. So we need an
1545 // intrinsic for now.
1546 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1547 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1548 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
1549 case NEON::BI__builtin_neon_vpadal_v:
1550 case NEON::BI__builtin_neon_vpadalq_v: {
1551 // The source operand type has twice as many elements of half the size.
1552 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1553 llvm::Type *EltTy =
1554 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1555 auto *NarrowTy =
1556 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1557 llvm::Type *Tys[2] = { Ty, NarrowTy };
1558 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1559 }
1560 case NEON::BI__builtin_neon_vpaddl_v:
1561 case NEON::BI__builtin_neon_vpaddlq_v: {
1562 // The source operand type has twice as many elements of half the size.
1563 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1564 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1565 auto *NarrowTy =
1566 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1567 llvm::Type *Tys[2] = { Ty, NarrowTy };
1568 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
1569 }
1570 case NEON::BI__builtin_neon_vqdmlal_v:
1571 case NEON::BI__builtin_neon_vqdmlsl_v: {
1572 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
1573 Ops[1] =
1574 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
1575 Ops.resize(2);
1576 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
1577 }
1578 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
1579 case NEON::BI__builtin_neon_vqdmulh_lane_v:
1580 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
1581 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
1582 auto *RTy = cast<llvm::FixedVectorType>(Ty);
1583 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
1584 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
1585 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
1586 RTy->getNumElements() * 2);
1587 llvm::Type *Tys[2] = {
1588 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1589 /*isQuad*/ false))};
1590 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1591 }
1592 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
1593 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
1594 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
1595 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
1596 llvm::Type *Tys[2] = {
1597 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1598 /*isQuad*/ true))};
1599 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1600 }
1601 case NEON::BI__builtin_neon_vqshl_n_v:
1602 case NEON::BI__builtin_neon_vqshlq_n_v:
1603 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1604 1, false);
1605 case NEON::BI__builtin_neon_vqshlu_n_v:
1606 case NEON::BI__builtin_neon_vqshluq_n_v:
1607 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
1608 1, false);
1609 case NEON::BI__builtin_neon_vrecpe_v:
1610 case NEON::BI__builtin_neon_vrecpeq_v:
1611 case NEON::BI__builtin_neon_vrsqrte_v:
1612 case NEON::BI__builtin_neon_vrsqrteq_v:
1613 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
1614 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1615 case NEON::BI__builtin_neon_vrndi_v:
1616 case NEON::BI__builtin_neon_vrndiq_v:
1617 Int = Builder.getIsFPConstrained()
1618 ? Intrinsic::experimental_constrained_nearbyint
1619 : Intrinsic::nearbyint;
1620 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1621 case NEON::BI__builtin_neon_vrshr_n_v:
1622 case NEON::BI__builtin_neon_vrshrq_n_v:
1623 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
1624 1, true);
1625 case NEON::BI__builtin_neon_vsha512hq_u64:
1626 case NEON::BI__builtin_neon_vsha512h2q_u64:
1627 case NEON::BI__builtin_neon_vsha512su0q_u64:
1628 case NEON::BI__builtin_neon_vsha512su1q_u64: {
1629 Function *F = CGM.getIntrinsic(Int);
1630 return EmitNeonCall(F, Ops, "");
1631 }
1632 case NEON::BI__builtin_neon_vshl_n_v:
1633 case NEON::BI__builtin_neon_vshlq_n_v:
1634 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1635 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
1636 "vshl_n");
1637 case NEON::BI__builtin_neon_vshll_n_v: {
1638 llvm::FixedVectorType *SrcTy =
1639 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1640 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1641 if (Usgn)
1642 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
1643 else
1644 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
1645 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
1646 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
1647 }
1648 case NEON::BI__builtin_neon_vshrn_n_v: {
1649 llvm::FixedVectorType *SrcTy =
1650 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1651 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1652 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
1653 if (Usgn)
1654 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
1655 else
1656 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
1657 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
1658 }
1659 case NEON::BI__builtin_neon_vshr_n_v:
1660 case NEON::BI__builtin_neon_vshrq_n_v:
1661 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
1662 case NEON::BI__builtin_neon_vst1_v:
1663 case NEON::BI__builtin_neon_vst1q_v:
1664 case NEON::BI__builtin_neon_vst2_v:
1665 case NEON::BI__builtin_neon_vst2q_v:
1666 case NEON::BI__builtin_neon_vst3_v:
1667 case NEON::BI__builtin_neon_vst3q_v:
1668 case NEON::BI__builtin_neon_vst4_v:
1669 case NEON::BI__builtin_neon_vst4q_v:
1670 case NEON::BI__builtin_neon_vst2_lane_v:
1671 case NEON::BI__builtin_neon_vst2q_lane_v:
1672 case NEON::BI__builtin_neon_vst3_lane_v:
1673 case NEON::BI__builtin_neon_vst3q_lane_v:
1674 case NEON::BI__builtin_neon_vst4_lane_v:
1675 case NEON::BI__builtin_neon_vst4q_lane_v: {
1676 llvm::Type *Tys[] = {Int8PtrTy, Ty};
1677 Ops.push_back(getAlignmentValue32(PtrOp0));
1678 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
1679 }
1680 case NEON::BI__builtin_neon_vsm3partw1q_u32:
1681 case NEON::BI__builtin_neon_vsm3partw2q_u32:
1682 case NEON::BI__builtin_neon_vsm3ss1q_u32:
1683 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
1684 case NEON::BI__builtin_neon_vsm4eq_u32: {
1685 Function *F = CGM.getIntrinsic(Int);
1686 return EmitNeonCall(F, Ops, "");
1687 }
1688 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
1689 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
1690 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
1691 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
1692 Function *F = CGM.getIntrinsic(Int);
1693 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
1694 return EmitNeonCall(F, Ops, "");
1695 }
1696 case NEON::BI__builtin_neon_vst1_x2_v:
1697 case NEON::BI__builtin_neon_vst1q_x2_v:
1698 case NEON::BI__builtin_neon_vst1_x3_v:
1699 case NEON::BI__builtin_neon_vst1q_x3_v:
1700 case NEON::BI__builtin_neon_vst1_x4_v:
1701 case NEON::BI__builtin_neon_vst1q_x4_v: {
1702 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
1703 // in AArch64 it comes last. We may want to stick to one or another.
1704 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
1705 Arch == llvm::Triple::aarch64_32) {
1706 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1707 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
1708 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1709 }
1710 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
1711 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1712 }
1713 case NEON::BI__builtin_neon_vsubhn_v: {
1714 llvm::FixedVectorType *SrcTy =
1715 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1716
1717 // %sum = add <4 x i32> %lhs, %rhs
1718 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1719 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1720 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
1721
1722 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1723 Constant *ShiftAmt =
1724 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1725 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
1726
1727 // %res = trunc <4 x i32> %high to <4 x i16>
1728 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
1729 }
1730 case NEON::BI__builtin_neon_vtrn_v:
1731 case NEON::BI__builtin_neon_vtrnq_v: {
1732 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1733 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1734 Value *SV = nullptr;
1735
1736 for (unsigned vi = 0; vi != 2; ++vi) {
1737 SmallVector<int, 16> Indices;
1738 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1739 Indices.push_back(i+vi);
1740 Indices.push_back(i+e+vi);
1741 }
1742 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1743 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
1744 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1745 }
1746 return SV;
1747 }
1748 case NEON::BI__builtin_neon_vtst_v:
1749 case NEON::BI__builtin_neon_vtstq_v: {
1750 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1751 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1752 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1753 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1754 ConstantAggregateZero::get(Ty));
1755 return Builder.CreateSExt(Ops[0], Ty, "vtst");
1756 }
1757 case NEON::BI__builtin_neon_vuzp_v:
1758 case NEON::BI__builtin_neon_vuzpq_v: {
1759 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1760 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1761 Value *SV = nullptr;
1762
1763 for (unsigned vi = 0; vi != 2; ++vi) {
1764 SmallVector<int, 16> Indices;
1765 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1766 Indices.push_back(2*i+vi);
1767
1768 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1769 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
1770 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1771 }
1772 return SV;
1773 }
1774 case NEON::BI__builtin_neon_vxarq_u64: {
1775 Function *F = CGM.getIntrinsic(Int);
1776 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
1777 return EmitNeonCall(F, Ops, "");
1778 }
1779 case NEON::BI__builtin_neon_vzip_v:
1780 case NEON::BI__builtin_neon_vzipq_v: {
1781 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1782 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1783 Value *SV = nullptr;
1784
1785 for (unsigned vi = 0; vi != 2; ++vi) {
1786 SmallVector<int, 16> Indices;
1787 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1788 Indices.push_back((i + vi*e) >> 1);
1789 Indices.push_back(((i + vi*e) >> 1)+e);
1790 }
1791 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1792 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
1793 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1794 }
1795 return SV;
1796 }
1797 case NEON::BI__builtin_neon_vdot_s32:
1798 case NEON::BI__builtin_neon_vdot_u32:
1799 case NEON::BI__builtin_neon_vdotq_s32:
1800 case NEON::BI__builtin_neon_vdotq_u32: {
1801 auto *InputTy =
1802 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1803 llvm::Type *Tys[2] = { Ty, InputTy };
1804 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
1805 }
1806 case NEON::BI__builtin_neon_vfmlal_low_f16:
1807 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
1808 auto *InputTy =
1809 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1810 llvm::Type *Tys[2] = { Ty, InputTy };
1811 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
1812 }
1813 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1814 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
1815 auto *InputTy =
1816 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1817 llvm::Type *Tys[2] = { Ty, InputTy };
1818 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
1819 }
1820 case NEON::BI__builtin_neon_vfmlal_high_f16:
1821 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
1822 auto *InputTy =
1823 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1824 llvm::Type *Tys[2] = { Ty, InputTy };
1825 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
1826 }
1827 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1828 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
1829 auto *InputTy =
1830 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1831 llvm::Type *Tys[2] = { Ty, InputTy };
1832 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
1833 }
1834 case NEON::BI__builtin_neon_vmmlaq_s32:
1835 case NEON::BI__builtin_neon_vmmlaq_u32: {
1836 auto *InputTy =
1837 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1838 llvm::Type *Tys[2] = { Ty, InputTy };
1839 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
1840 }
1841 case NEON::BI__builtin_neon_vusmmlaq_s32: {
1842 auto *InputTy =
1843 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1844 llvm::Type *Tys[2] = { Ty, InputTy };
1845 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
1846 }
1847 case NEON::BI__builtin_neon_vusdot_s32:
1848 case NEON::BI__builtin_neon_vusdotq_s32: {
1849 auto *InputTy =
1850 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1851 llvm::Type *Tys[2] = { Ty, InputTy };
1852 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
1853 }
1854 case NEON::BI__builtin_neon_vbfdot_f32:
1855 case NEON::BI__builtin_neon_vbfdotq_f32: {
1856 llvm::Type *InputTy =
1857 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
1858 llvm::Type *Tys[2] = { Ty, InputTy };
1859 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
1860 }
1861 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
1862 llvm::Type *Tys[1] = { Ty };
1863 Function *F = CGM.getIntrinsic(Int, Tys);
1864 return EmitNeonCall(F, Ops, "vcvtfp2bf");
1865 }
1866
1867 }
1868
1869 assert(Int && "Expected valid intrinsic number");
1870
1871 // Determine the type(s) of this overloaded AArch64 intrinsic.
1872 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
1873
1874 Value *Result = EmitNeonCall(F, Ops, NameHint);
1875 llvm::Type *ResultType = ConvertType(E->getType());
1876 // AArch64 intrinsic one-element vector type cast to
1877 // scalar type expected by the builtin
1878 return Builder.CreateBitCast(Result, ResultType, NameHint);
1879}
1880
1881Value *
1883 const CmpInst::Predicate Pred,
1884 const Twine &Name) {
1885
1886 if (isa<FixedVectorType>(Ty)) {
1887 // Vector types are cast to i8 vectors. Recover original type.
1888 Op = Builder.CreateBitCast(Op, Ty);
1889 }
1890
1891 Constant *zero = Constant::getNullValue(Op->getType());
1892
1893 if (CmpInst::isFPPredicate(Pred)) {
1894 if (Pred == CmpInst::FCMP_OEQ)
1895 Op = Builder.CreateFCmp(Pred, Op, zero);
1896 else
1897 Op = Builder.CreateFCmpS(Pred, Op, zero);
1898 } else {
1899 Op = Builder.CreateICmp(Pred, Op, zero);
1900 }
1901
1902 llvm::Type *ResTy = Ty;
1903 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
1904 ResTy = FixedVectorType::get(
1905 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
1906 VTy->getNumElements());
1907
1908 return Builder.CreateSExt(Op, ResTy, Name);
1909}
1910
1912 Value *ExtOp, Value *IndexOp,
1913 llvm::Type *ResTy, unsigned IntID,
1914 const char *Name) {
1916 if (ExtOp)
1917 TblOps.push_back(ExtOp);
1918
1919 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
1920 SmallVector<int, 16> Indices;
1921 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
1922 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
1923 Indices.push_back(2*i);
1924 Indices.push_back(2*i+1);
1925 }
1926
1927 int PairPos = 0, End = Ops.size() - 1;
1928 while (PairPos < End) {
1929 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1930 Ops[PairPos+1], Indices,
1931 Name));
1932 PairPos += 2;
1933 }
1934
1935 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
1936 // of the 128-bit lookup table with zero.
1937 if (PairPos == End) {
1938 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
1939 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1940 ZeroTbl, Indices, Name));
1941 }
1942
1943 Function *TblF;
1944 TblOps.push_back(IndexOp);
1945 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
1946
1947 return CGF.EmitNeonCall(TblF, TblOps, Name);
1948}
1949
1950Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
1951 unsigned Value;
1952 switch (BuiltinID) {
1953 default:
1954 return nullptr;
1955 case clang::ARM::BI__builtin_arm_nop:
1956 Value = 0;
1957 break;
1958 case clang::ARM::BI__builtin_arm_yield:
1959 case clang::ARM::BI__yield:
1960 Value = 1;
1961 break;
1962 case clang::ARM::BI__builtin_arm_wfe:
1963 case clang::ARM::BI__wfe:
1964 Value = 2;
1965 break;
1966 case clang::ARM::BI__builtin_arm_wfi:
1967 case clang::ARM::BI__wfi:
1968 Value = 3;
1969 break;
1970 case clang::ARM::BI__builtin_arm_sev:
1971 case clang::ARM::BI__sev:
1972 Value = 4;
1973 break;
1974 case clang::ARM::BI__builtin_arm_sevl:
1975 case clang::ARM::BI__sevl:
1976 Value = 5;
1977 break;
1978 }
1979
1980 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
1981 llvm::ConstantInt::get(Int32Ty, Value));
1982}
1983
1989
1990// Generates the IR for the read/write special register builtin,
1991// ValueType is the type of the value that is to be written or read,
1992// RegisterType is the type of the register being written to or read from.
1994 const CallExpr *E,
1995 llvm::Type *RegisterType,
1996 llvm::Type *ValueType,
1997 SpecialRegisterAccessKind AccessKind,
1998 StringRef SysReg = "") {
1999 // write and register intrinsics only support 32, 64 and 128 bit operations.
2000 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2001 RegisterType->isIntegerTy(128)) &&
2002 "Unsupported size for register.");
2003
2004 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2005 CodeGen::CodeGenModule &CGM = CGF.CGM;
2006 LLVMContext &Context = CGM.getLLVMContext();
2007
2008 if (SysReg.empty()) {
2009 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2010 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2011 }
2012
2013 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2014 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2015 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2016
2017 llvm::Type *Types[] = { RegisterType };
2018
2019 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2020 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2021 && "Can't fit 64-bit value in 32-bit register");
2022
2023 if (AccessKind != Write) {
2024 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2025 llvm::Function *F = CGM.getIntrinsic(
2026 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2027 : Intrinsic::read_register,
2028 Types);
2029 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2030
2031 if (MixedTypes)
2032 // Read into 64 bit register and then truncate result to 32 bit.
2033 return Builder.CreateTrunc(Call, ValueType);
2034
2035 if (ValueType->isPointerTy())
2036 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2037 return Builder.CreateIntToPtr(Call, ValueType);
2038
2039 return Call;
2040 }
2041
2042 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2043 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2044 if (MixedTypes) {
2045 // Extend 32 bit write value to 64 bit to pass to write.
2046 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2047 return Builder.CreateCall(F, { Metadata, ArgValue });
2048 }
2049
2050 if (ValueType->isPointerTy()) {
2051 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2052 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2053 return Builder.CreateCall(F, { Metadata, ArgValue });
2054 }
2055
2056 return Builder.CreateCall(F, { Metadata, ArgValue });
2057}
2058
2059static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2060 const CallExpr *E) {
2061 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2062 CodeGen::CodeGenModule &CGM = CGF.CGM;
2064
2065 auto getIntArg = [&](unsigned ArgNo) {
2066 Expr::EvalResult Result;
2067 if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
2068 llvm_unreachable("Expected constant argument to range prefetch.");
2069 return Result.Val.getInt().getExtValue();
2070 };
2071
2072 Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
2073 Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
2074 Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
2075
2076 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2077 auto Length = getIntArg(3);
2078 auto Count = getIntArg(4) - 1;
2079 auto Stride = getIntArg(5);
2080 auto Distance = getIntArg(6);
2081
2082 // Map ReuseDistance given in bytes to four bits representing decreasing
2083 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2084 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2085 // over the maximum is represented by 0 (distance not known).
2086 if (Distance > 0) {
2087 Distance = llvm::Log2_32_Ceil(Distance);
2088 if (Distance < 15)
2089 Distance = 15;
2090 else if (Distance > 29)
2091 Distance = 0;
2092 else
2093 Distance = 30 - Distance;
2094 }
2095
2096 uint64_t Mask22 = (1ULL << 22) - 1;
2097 uint64_t Mask16 = (1ULL << 16) - 1;
2098 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2099 ((Count & Mask16) << 22) | (Length & Mask22);
2100
2101 Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
2102 } else
2103 Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
2104
2105 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
2106 Ops);
2107}
2108
2109/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2110/// argument that specifies the vector type. The additional argument is meant
2111/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
2112/// should be kept consistent with the logic in Sema.
2113/// TODO: Make this return false for SISD builtins.
2114static bool HasExtraNeonArgument(unsigned BuiltinID) {
2115 // Required by the headers included below, but not in this particular
2116 // function.
2117 [[maybe_unused]] int PtrArgNum = -1;
2118 [[maybe_unused]] bool HasConstPtr = false;
2119
2120 // The mask encodes the type. We don't care about the actual value. Instead,
2121 // we just check whether its been set.
2122 uint64_t mask = 0;
2123 switch (BuiltinID) {
2124#define GET_NEON_OVERLOAD_CHECK
2125#include "clang/Basic/arm_fp16.inc"
2126#include "clang/Basic/arm_neon.inc"
2127#undef GET_NEON_OVERLOAD_CHECK
2128 // Non-neon builtins for controling VFP that take extra argument for
2129 // discriminating the type.
2130 case ARM::BI__builtin_arm_vcvtr_f:
2131 case ARM::BI__builtin_arm_vcvtr_d:
2132 mask = 1;
2133 }
2134
2135 if (mask)
2136 return true;
2137
2138 return false;
2139}
2140
2142 const CallExpr *E,
2144 llvm::Triple::ArchType Arch) {
2145 if (auto Hint = GetValueForARMHint(BuiltinID))
2146 return Hint;
2147
2148 if (BuiltinID == clang::ARM::BI__emit) {
2149 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2150 llvm::FunctionType *FTy =
2151 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2152
2154 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2155 llvm_unreachable("Sema will ensure that the parameter is constant");
2156
2157 llvm::APSInt Value = Result.Val.getInt();
2158 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2159
2160 llvm::InlineAsm *Emit =
2161 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2162 /*hasSideEffects=*/true)
2163 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2164 /*hasSideEffects=*/true);
2165
2166 return Builder.CreateCall(Emit);
2167 }
2168
2169 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2170 Value *Option = EmitScalarExpr(E->getArg(0));
2171 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2172 }
2173
2174 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2176 Value *RW = EmitScalarExpr(E->getArg(1));
2177 Value *IsData = EmitScalarExpr(E->getArg(2));
2178
2179 // Locality is not supported on ARM target
2180 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2181
2182 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2183 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2184 }
2185
2186 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2187 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2188 return Builder.CreateCall(
2189 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2190 }
2191
2192 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2193 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2194 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2195 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2196 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2197 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2198 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2199 return Res;
2200 }
2201
2202
2203 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2204 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2205 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2206 }
2207 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2208 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2209 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2210 "cls");
2211 }
2212
2213 if (BuiltinID == clang::ARM::BI__clear_cache) {
2214 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2215 const FunctionDecl *FD = E->getDirectCallee();
2216 Value *Ops[2];
2217 for (unsigned i = 0; i < 2; i++)
2218 Ops[i] = EmitScalarExpr(E->getArg(i));
2219 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2220 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2221 StringRef Name = FD->getName();
2222 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2223 }
2224
2225 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2226 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2227 Function *F;
2228
2229 switch (BuiltinID) {
2230 default: llvm_unreachable("unexpected builtin");
2231 case clang::ARM::BI__builtin_arm_mcrr:
2232 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2233 break;
2234 case clang::ARM::BI__builtin_arm_mcrr2:
2235 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2236 break;
2237 }
2238
2239 // MCRR{2} instruction has 5 operands but
2240 // the intrinsic has 4 because Rt and Rt2
2241 // are represented as a single unsigned 64
2242 // bit integer in the intrinsic definition
2243 // but internally it's represented as 2 32
2244 // bit integers.
2245
2246 Value *Coproc = EmitScalarExpr(E->getArg(0));
2247 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2248 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2249 Value *CRm = EmitScalarExpr(E->getArg(3));
2250
2251 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2252 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2253 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2254 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2255
2256 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2257 }
2258
2259 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2260 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2261 Function *F;
2262
2263 switch (BuiltinID) {
2264 default: llvm_unreachable("unexpected builtin");
2265 case clang::ARM::BI__builtin_arm_mrrc:
2266 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2267 break;
2268 case clang::ARM::BI__builtin_arm_mrrc2:
2269 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2270 break;
2271 }
2272
2273 Value *Coproc = EmitScalarExpr(E->getArg(0));
2274 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2275 Value *CRm = EmitScalarExpr(E->getArg(2));
2276 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2277
2278 // Returns an unsigned 64 bit integer, represented
2279 // as two 32 bit integers.
2280
2281 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2282 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2283 Rt = Builder.CreateZExt(Rt, Int64Ty);
2284 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2285
2286 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2287 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2288 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2289
2290 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2291 }
2292
2293 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2294 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2295 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2296 getContext().getTypeSize(E->getType()) == 64) ||
2297 BuiltinID == clang::ARM::BI__ldrexd) {
2298 Function *F;
2299
2300 switch (BuiltinID) {
2301 default: llvm_unreachable("unexpected builtin");
2302 case clang::ARM::BI__builtin_arm_ldaex:
2303 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2304 break;
2305 case clang::ARM::BI__builtin_arm_ldrexd:
2306 case clang::ARM::BI__builtin_arm_ldrex:
2307 case clang::ARM::BI__ldrexd:
2308 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2309 break;
2310 }
2311
2312 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2313 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2314
2315 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2316 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2317 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2318 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2319
2320 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2321 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2322 Val = Builder.CreateOr(Val, Val1);
2323 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2324 }
2325
2326 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2327 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2328 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2329
2330 QualType Ty = E->getType();
2331 llvm::Type *RealResTy = ConvertType(Ty);
2332 llvm::Type *IntTy =
2333 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2334
2335 Function *F = CGM.getIntrinsic(
2336 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2337 : Intrinsic::arm_ldrex,
2338 DefaultPtrTy);
2339 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2340 Val->addParamAttr(
2341 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2342
2343 if (RealResTy->isPointerTy())
2344 return Builder.CreateIntToPtr(Val, RealResTy);
2345 else {
2346 llvm::Type *IntResTy = llvm::IntegerType::get(
2347 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2348 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2349 RealResTy);
2350 }
2351 }
2352
2353 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2354 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2355 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2356 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2357 Function *F = CGM.getIntrinsic(
2358 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2359 : Intrinsic::arm_strexd);
2360 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2361
2362 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
2363 Value *Val = EmitScalarExpr(E->getArg(0));
2364 Builder.CreateStore(Val, Tmp);
2365
2366 Address LdPtr = Tmp.withElementType(STy);
2367 Val = Builder.CreateLoad(LdPtr);
2368
2369 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2370 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2371 Value *StPtr = EmitScalarExpr(E->getArg(1));
2372 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2373 }
2374
2375 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2376 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2377 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2378 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2379
2380 QualType Ty = E->getArg(0)->getType();
2381 llvm::Type *StoreTy =
2382 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2383
2384 if (StoreVal->getType()->isPointerTy())
2385 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2386 else {
2387 llvm::Type *IntTy = llvm::IntegerType::get(
2389 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2390 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2391 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2392 }
2393
2394 Function *F = CGM.getIntrinsic(
2395 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2396 : Intrinsic::arm_strex,
2397 StoreAddr->getType());
2398
2399 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2400 CI->addParamAttr(
2401 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2402 return CI;
2403 }
2404
2405 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2406 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2407 return Builder.CreateCall(F);
2408 }
2409
2410 // CRC32
2411 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2412 switch (BuiltinID) {
2413 case clang::ARM::BI__builtin_arm_crc32b:
2414 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2415 case clang::ARM::BI__builtin_arm_crc32cb:
2416 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2417 case clang::ARM::BI__builtin_arm_crc32h:
2418 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2419 case clang::ARM::BI__builtin_arm_crc32ch:
2420 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2421 case clang::ARM::BI__builtin_arm_crc32w:
2422 case clang::ARM::BI__builtin_arm_crc32d:
2423 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2424 case clang::ARM::BI__builtin_arm_crc32cw:
2425 case clang::ARM::BI__builtin_arm_crc32cd:
2426 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2427 }
2428
2429 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2430 Value *Arg0 = EmitScalarExpr(E->getArg(0));
2431 Value *Arg1 = EmitScalarExpr(E->getArg(1));
2432
2433 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2434 // intrinsics, hence we need different codegen for these cases.
2435 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2436 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2437 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2438 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
2439 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
2440 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
2441
2442 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2443 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
2444 return Builder.CreateCall(F, {Res, Arg1b});
2445 } else {
2446 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
2447
2448 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2449 return Builder.CreateCall(F, {Arg0, Arg1});
2450 }
2451 }
2452
2453 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2454 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2455 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2456 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2457 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2458 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2459
2460 SpecialRegisterAccessKind AccessKind = Write;
2461 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2462 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2463 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2464 AccessKind = VolatileRead;
2465
2466 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2467 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2468
2469 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2470 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2471
2472 llvm::Type *ValueType;
2473 llvm::Type *RegisterType;
2474 if (IsPointerBuiltin) {
2475 ValueType = VoidPtrTy;
2477 } else if (Is64Bit) {
2478 ValueType = RegisterType = Int64Ty;
2479 } else {
2480 ValueType = RegisterType = Int32Ty;
2481 }
2482
2483 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
2484 AccessKind);
2485 }
2486
2487 if (BuiltinID == ARM::BI__builtin_sponentry) {
2488 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
2489 return Builder.CreateCall(F);
2490 }
2491
2492 // Handle MSVC intrinsics before argument evaluation to prevent double
2493 // evaluation.
2494 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
2495 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
2496
2497 // Deal with MVE builtins
2498 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2499 return Result;
2500 // Handle CDE builtins
2501 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2502 return Result;
2503
2504 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
2505 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
2506 return P.first == BuiltinID;
2507 });
2508 if (It != end(NEONEquivalentIntrinsicMap))
2509 BuiltinID = It->second;
2510
2511 // Find out if any arguments are required to be integer constant
2512 // expressions.
2513 unsigned ICEArguments = 0;
2515 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2516 assert(Error == ASTContext::GE_None && "Should not codegen an error");
2517
2518 auto getAlignmentValue32 = [&](Address addr) -> Value* {
2519 return Builder.getInt32(addr.getAlignment().getQuantity());
2520 };
2521
2522 Address PtrOp0 = Address::invalid();
2523 Address PtrOp1 = Address::invalid();
2525 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
2526 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
2527 for (unsigned i = 0, e = NumArgs; i != e; i++) {
2528 if (i == 0) {
2529 switch (BuiltinID) {
2530 case NEON::BI__builtin_neon_vld1_v:
2531 case NEON::BI__builtin_neon_vld1q_v:
2532 case NEON::BI__builtin_neon_vld1q_lane_v:
2533 case NEON::BI__builtin_neon_vld1_lane_v:
2534 case NEON::BI__builtin_neon_vld1_dup_v:
2535 case NEON::BI__builtin_neon_vld1q_dup_v:
2536 case NEON::BI__builtin_neon_vst1_v:
2537 case NEON::BI__builtin_neon_vst1q_v:
2538 case NEON::BI__builtin_neon_vst1q_lane_v:
2539 case NEON::BI__builtin_neon_vst1_lane_v:
2540 case NEON::BI__builtin_neon_vst2_v:
2541 case NEON::BI__builtin_neon_vst2q_v:
2542 case NEON::BI__builtin_neon_vst2_lane_v:
2543 case NEON::BI__builtin_neon_vst2q_lane_v:
2544 case NEON::BI__builtin_neon_vst3_v:
2545 case NEON::BI__builtin_neon_vst3q_v:
2546 case NEON::BI__builtin_neon_vst3_lane_v:
2547 case NEON::BI__builtin_neon_vst3q_lane_v:
2548 case NEON::BI__builtin_neon_vst4_v:
2549 case NEON::BI__builtin_neon_vst4q_v:
2550 case NEON::BI__builtin_neon_vst4_lane_v:
2551 case NEON::BI__builtin_neon_vst4q_lane_v:
2552 // Get the alignment for the argument in addition to the value;
2553 // we'll use it later.
2554 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
2555 Ops.push_back(PtrOp0.emitRawPointer(*this));
2556 continue;
2557 }
2558 }
2559 if (i == 1) {
2560 switch (BuiltinID) {
2561 case NEON::BI__builtin_neon_vld2_v:
2562 case NEON::BI__builtin_neon_vld2q_v:
2563 case NEON::BI__builtin_neon_vld3_v:
2564 case NEON::BI__builtin_neon_vld3q_v:
2565 case NEON::BI__builtin_neon_vld4_v:
2566 case NEON::BI__builtin_neon_vld4q_v:
2567 case NEON::BI__builtin_neon_vld2_lane_v:
2568 case NEON::BI__builtin_neon_vld2q_lane_v:
2569 case NEON::BI__builtin_neon_vld3_lane_v:
2570 case NEON::BI__builtin_neon_vld3q_lane_v:
2571 case NEON::BI__builtin_neon_vld4_lane_v:
2572 case NEON::BI__builtin_neon_vld4q_lane_v:
2573 case NEON::BI__builtin_neon_vld2_dup_v:
2574 case NEON::BI__builtin_neon_vld2q_dup_v:
2575 case NEON::BI__builtin_neon_vld3_dup_v:
2576 case NEON::BI__builtin_neon_vld3q_dup_v:
2577 case NEON::BI__builtin_neon_vld4_dup_v:
2578 case NEON::BI__builtin_neon_vld4q_dup_v:
2579 // Get the alignment for the argument in addition to the value;
2580 // we'll use it later.
2581 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
2582 Ops.push_back(PtrOp1.emitRawPointer(*this));
2583 continue;
2584 }
2585 }
2586
2587 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
2588 }
2589
2590 switch (BuiltinID) {
2591 default: break;
2592
2593 case NEON::BI__builtin_neon_vget_lane_i8:
2594 case NEON::BI__builtin_neon_vget_lane_i16:
2595 case NEON::BI__builtin_neon_vget_lane_i32:
2596 case NEON::BI__builtin_neon_vget_lane_i64:
2597 case NEON::BI__builtin_neon_vget_lane_bf16:
2598 case NEON::BI__builtin_neon_vget_lane_f32:
2599 case NEON::BI__builtin_neon_vgetq_lane_i8:
2600 case NEON::BI__builtin_neon_vgetq_lane_i16:
2601 case NEON::BI__builtin_neon_vgetq_lane_i32:
2602 case NEON::BI__builtin_neon_vgetq_lane_i64:
2603 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2604 case NEON::BI__builtin_neon_vgetq_lane_f32:
2605 case NEON::BI__builtin_neon_vduph_lane_bf16:
2606 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2607 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
2608
2609 case NEON::BI__builtin_neon_vrndns_f32: {
2610 Value *Arg = EmitScalarExpr(E->getArg(0));
2611 llvm::Type *Tys[] = {Arg->getType()};
2612 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
2613 return Builder.CreateCall(F, {Arg}, "vrndn"); }
2614
2615 case NEON::BI__builtin_neon_vset_lane_i8:
2616 case NEON::BI__builtin_neon_vset_lane_i16:
2617 case NEON::BI__builtin_neon_vset_lane_i32:
2618 case NEON::BI__builtin_neon_vset_lane_i64:
2619 case NEON::BI__builtin_neon_vset_lane_bf16:
2620 case NEON::BI__builtin_neon_vset_lane_f32:
2621 case NEON::BI__builtin_neon_vsetq_lane_i8:
2622 case NEON::BI__builtin_neon_vsetq_lane_i16:
2623 case NEON::BI__builtin_neon_vsetq_lane_i32:
2624 case NEON::BI__builtin_neon_vsetq_lane_i64:
2625 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2626 case NEON::BI__builtin_neon_vsetq_lane_f32:
2627 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
2628
2629 case NEON::BI__builtin_neon_vsha1h_u32:
2630 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
2631 "vsha1h");
2632 case NEON::BI__builtin_neon_vsha1cq_u32:
2633 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
2634 "vsha1h");
2635 case NEON::BI__builtin_neon_vsha1pq_u32:
2636 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
2637 "vsha1h");
2638 case NEON::BI__builtin_neon_vsha1mq_u32:
2639 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
2640 "vsha1h");
2641
2642 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
2643 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
2644 "vcvtbfp2bf");
2645 }
2646
2647 // The ARM _MoveToCoprocessor builtins put the input register value as
2648 // the first argument, but the LLVM intrinsic expects it as the third one.
2649 case clang::ARM::BI_MoveToCoprocessor:
2650 case clang::ARM::BI_MoveToCoprocessor2: {
2651 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
2652 ? Intrinsic::arm_mcr
2653 : Intrinsic::arm_mcr2);
2654 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
2655 Ops[3], Ops[4], Ops[5]});
2656 }
2657 }
2658
2659 // Get the last argument, which specifies the vector type.
2660 assert(HasExtraArg);
2661 const Expr *Arg = E->getArg(E->getNumArgs()-1);
2662 std::optional<llvm::APSInt> Result =
2664 if (!Result)
2665 return nullptr;
2666
2667 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
2668 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
2669 // Determine the overloaded type of this builtin.
2670 llvm::Type *Ty;
2671 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
2672 Ty = FloatTy;
2673 else
2674 Ty = DoubleTy;
2675
2676 // Determine whether this is an unsigned conversion or not.
2677 bool usgn = Result->getZExtValue() == 1;
2678 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
2679
2680 // Call the appropriate intrinsic.
2681 Function *F = CGM.getIntrinsic(Int, Ty);
2682 return Builder.CreateCall(F, Ops, "vcvtr");
2683 }
2684
2685 // Determine the type of this overloaded NEON intrinsic.
2686 NeonTypeFlags Type = Result->getZExtValue();
2687 bool usgn = Type.isUnsigned();
2688 bool rightShift = false;
2689
2690 llvm::FixedVectorType *VTy =
2691 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
2692 getTarget().hasBFloat16Type());
2693 llvm::Type *Ty = VTy;
2694 if (!Ty)
2695 return nullptr;
2696
2697 // Many NEON builtins have identical semantics and uses in ARM and
2698 // AArch64. Emit these in a single function.
2699 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
2701 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
2702 if (Builtin)
2704 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
2705 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
2706
2707 unsigned Int;
2708 switch (BuiltinID) {
2709 default: return nullptr;
2710 case NEON::BI__builtin_neon_vld1q_lane_v:
2711 // Handle 64-bit integer elements as a special case. Use shuffles of
2712 // one-element vectors to avoid poor code for i64 in the backend.
2713 if (VTy->getElementType()->isIntegerTy(64)) {
2714 // Extract the other lane.
2715 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2716 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
2717 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
2718 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2719 // Load the value as a one-element vector.
2720 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
2721 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2722 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
2723 Value *Align = getAlignmentValue32(PtrOp0);
2724 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
2725 // Combine them.
2726 int Indices[] = {1 - Lane, Lane};
2727 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
2728 }
2729 [[fallthrough]];
2730 case NEON::BI__builtin_neon_vld1_lane_v: {
2731 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2732 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2733 Value *Ld = Builder.CreateLoad(PtrOp0);
2734 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
2735 }
2736 case NEON::BI__builtin_neon_vqrshrn_n_v:
2737 Int =
2738 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2739 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
2740 1, true);
2741 case NEON::BI__builtin_neon_vqrshrun_n_v:
2742 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
2743 Ops, "vqrshrun_n", 1, true);
2744 case NEON::BI__builtin_neon_vqshrn_n_v:
2745 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2746 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
2747 1, true);
2748 case NEON::BI__builtin_neon_vqshrun_n_v:
2749 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
2750 Ops, "vqshrun_n", 1, true);
2751 case NEON::BI__builtin_neon_vrecpe_v:
2752 case NEON::BI__builtin_neon_vrecpeq_v:
2753 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
2754 Ops, "vrecpe");
2755 case NEON::BI__builtin_neon_vrshrn_n_v:
2756 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2757 Ops, "vrshrn_n", 1, true);
2758 case NEON::BI__builtin_neon_vrsra_n_v:
2759 case NEON::BI__builtin_neon_vrsraq_n_v:
2760 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2761 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2762 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
2763 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2764 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
2765 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2766 case NEON::BI__builtin_neon_vsri_n_v:
2767 case NEON::BI__builtin_neon_vsriq_n_v:
2768 rightShift = true;
2769 [[fallthrough]];
2770 case NEON::BI__builtin_neon_vsli_n_v:
2771 case NEON::BI__builtin_neon_vsliq_n_v:
2772 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
2773 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2774 Ops, "vsli_n");
2775 case NEON::BI__builtin_neon_vsra_n_v:
2776 case NEON::BI__builtin_neon_vsraq_n_v:
2777 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2778 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
2779 return Builder.CreateAdd(Ops[0], Ops[1]);
2780 case NEON::BI__builtin_neon_vst1q_lane_v:
2781 // Handle 64-bit integer elements as a special case. Use a shuffle to get
2782 // a one-element vector and avoid poor code for i64 in the backend.
2783 if (VTy->getElementType()->isIntegerTy(64)) {
2784 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2785 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
2786 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2787 Ops[2] = getAlignmentValue32(PtrOp0);
2788 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
2789 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
2790 Tys), Ops);
2791 }
2792 [[fallthrough]];
2793 case NEON::BI__builtin_neon_vst1_lane_v: {
2794 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2795 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
2796 return Builder.CreateStore(Ops[1],
2797 PtrOp0.withElementType(Ops[1]->getType()));
2798 }
2799 case NEON::BI__builtin_neon_vtbl1_v:
2800 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
2801 Ops, "vtbl1");
2802 case NEON::BI__builtin_neon_vtbl2_v:
2803 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
2804 Ops, "vtbl2");
2805 case NEON::BI__builtin_neon_vtbl3_v:
2806 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
2807 Ops, "vtbl3");
2808 case NEON::BI__builtin_neon_vtbl4_v:
2809 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
2810 Ops, "vtbl4");
2811 case NEON::BI__builtin_neon_vtbx1_v:
2812 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
2813 Ops, "vtbx1");
2814 case NEON::BI__builtin_neon_vtbx2_v:
2815 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
2816 Ops, "vtbx2");
2817 case NEON::BI__builtin_neon_vtbx3_v:
2818 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
2819 Ops, "vtbx3");
2820 case NEON::BI__builtin_neon_vtbx4_v:
2821 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
2822 Ops, "vtbx4");
2823 }
2824}
2825
2826template<typename Integer>
2828 return E->getIntegerConstantExpr(Context)->getExtValue();
2829}
2830
2831static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
2832 llvm::Type *T, bool Unsigned) {
2833 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
2834 // which finds it convenient to specify signed/unsigned as a boolean flag.
2835 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
2836}
2837
2838static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
2839 uint32_t Shift, bool Unsigned) {
2840 // MVE helper function for integer shift right. This must handle signed vs
2841 // unsigned, and also deal specially with the case where the shift count is
2842 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
2843 // undefined behavior, but in MVE it's legal, so we must convert it to code
2844 // that is not undefined in IR.
2845 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
2846 ->getElementType()
2847 ->getPrimitiveSizeInBits();
2848 if (Shift == LaneBits) {
2849 // An unsigned shift of the full lane size always generates zero, so we can
2850 // simply emit a zero vector. A signed shift of the full lane size does the
2851 // same thing as shifting by one bit fewer.
2852 if (Unsigned)
2853 return llvm::Constant::getNullValue(V->getType());
2854 else
2855 --Shift;
2856 }
2857 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
2858}
2859
2860static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
2861 // MVE-specific helper function for a vector splat, which infers the element
2862 // count of the output vector by knowing that MVE vectors are all 128 bits
2863 // wide.
2864 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
2865 return Builder.CreateVectorSplat(Elements, V);
2866}
2867
2868static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
2869 CodeGenFunction *CGF,
2870 llvm::Value *V,
2871 llvm::Type *DestType) {
2872 // Convert one MVE vector type into another by reinterpreting its in-register
2873 // format.
2874 //
2875 // Little-endian, this is identical to a bitcast (which reinterprets the
2876 // memory format). But big-endian, they're not necessarily the same, because
2877 // the register and memory formats map to each other differently depending on
2878 // the lane size.
2879 //
2880 // We generate a bitcast whenever we can (if we're little-endian, or if the
2881 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
2882 // that performs the different kind of reinterpretation.
2883 if (CGF->getTarget().isBigEndian() &&
2884 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
2885 return Builder.CreateCall(
2886 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
2887 {DestType, V->getType()}),
2888 V);
2889 } else {
2890 return Builder.CreateBitCast(V, DestType);
2891 }
2892}
2893
2894static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
2895 // Make a shufflevector that extracts every other element of a vector (evens
2896 // or odds, as desired).
2897 SmallVector<int, 16> Indices;
2898 unsigned InputElements =
2899 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
2900 for (unsigned i = 0; i < InputElements; i += 2)
2901 Indices.push_back(i + Odd);
2902 return Builder.CreateShuffleVector(V, Indices);
2903}
2904
2905static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
2906 llvm::Value *V1) {
2907 // Make a shufflevector that interleaves two vectors element by element.
2908 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
2909 SmallVector<int, 16> Indices;
2910 unsigned InputElements =
2911 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
2912 for (unsigned i = 0; i < InputElements; i++) {
2913 Indices.push_back(i);
2914 Indices.push_back(i + InputElements);
2915 }
2916 return Builder.CreateShuffleVector(V0, V1, Indices);
2917}
2918
2919template<unsigned HighBit, unsigned OtherBits>
2920static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
2921 // MVE-specific helper function to make a vector splat of a constant such as
2922 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
2923 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
2924 unsigned LaneBits = T->getPrimitiveSizeInBits();
2925 uint32_t Value = HighBit << (LaneBits - 1);
2926 if (OtherBits)
2927 Value |= (1UL << (LaneBits - 1)) - 1;
2928 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
2929 return ARMMVEVectorSplat(Builder, Lane);
2930}
2931
2932static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
2933 llvm::Value *V,
2934 unsigned ReverseWidth) {
2935 // MVE-specific helper function which reverses the elements of a
2936 // vector within every (ReverseWidth)-bit collection of lanes.
2937 SmallVector<int, 16> Indices;
2938 unsigned LaneSize = V->getType()->getScalarSizeInBits();
2939 unsigned Elements = 128 / LaneSize;
2940 unsigned Mask = ReverseWidth / LaneSize - 1;
2941 for (unsigned i = 0; i < Elements; i++)
2942 Indices.push_back(i ^ Mask);
2943 return Builder.CreateShuffleVector(V, Indices);
2944}
2945
2946static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
2947 CodeGenFunction *CGF, llvm::Value *V,
2948 llvm::Type *Ty) {
2949 return Builder.CreateCall(
2950 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2951 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2952}
2953
2954static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
2955 CodeGenFunction *CGF, llvm::Value *V,
2956 llvm::Type *Ty) {
2957 return Builder.CreateCall(
2958 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2959 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2960}
2961
2962static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
2963 CodeGenFunction *CGF, llvm::Value *V,
2964 llvm::Type *Ty) {
2965 return Builder.CreateCall(
2966 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2967 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2968}
2969
2970static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
2971 CodeGenFunction *CGF, llvm::Value *V,
2972 llvm::Type *Ty) {
2973 return Builder.CreateCall(
2974 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2975 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2976}
2977
2979 const CallExpr *E,
2981 llvm::Triple::ArchType Arch) {
2982 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
2983 Intrinsic::ID IRIntr;
2984 unsigned NumVectors;
2985
2986 // Code autogenerated by Tablegen will handle all the simple builtins.
2987 switch (BuiltinID) {
2988 #include "clang/Basic/arm_mve_builtin_cg.inc"
2989
2990 // If we didn't match an MVE builtin id at all, go back to the
2991 // main EmitARMBuiltinExpr.
2992 default:
2993 return nullptr;
2994 }
2995
2996 // Anything that breaks from that switch is an MVE builtin that
2997 // needs handwritten code to generate.
2998
2999 switch (CustomCodeGenType) {
3000
3001 case CustomCodeGen::VLD24: {
3004
3005 auto MvecCType = E->getType();
3006 auto MvecLType = ConvertType(MvecCType);
3007 assert(MvecLType->isStructTy() &&
3008 "Return type for vld[24]q should be a struct");
3009 assert(MvecLType->getStructNumElements() == 1 &&
3010 "Return-type struct for vld[24]q should have one element");
3011 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3012 assert(MvecLTypeInner->isArrayTy() &&
3013 "Return-type struct for vld[24]q should contain an array");
3014 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3015 "Array member of return-type struct vld[24]q has wrong length");
3016 auto VecLType = MvecLTypeInner->getArrayElementType();
3017
3018 Tys.push_back(VecLType);
3019
3020 auto Addr = E->getArg(0);
3021 Ops.push_back(EmitScalarExpr(Addr));
3022 Tys.push_back(ConvertType(Addr->getType()));
3023
3024 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3025 Value *LoadResult = Builder.CreateCall(F, Ops);
3026 Value *MvecOut = PoisonValue::get(MvecLType);
3027 for (unsigned i = 0; i < NumVectors; ++i) {
3028 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3029 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3030 }
3031
3032 if (ReturnValue.isNull())
3033 return MvecOut;
3034 else
3035 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3036 }
3037
3038 case CustomCodeGen::VST24: {
3041
3042 auto Addr = E->getArg(0);
3043 Ops.push_back(EmitScalarExpr(Addr));
3044 Tys.push_back(ConvertType(Addr->getType()));
3045
3046 auto MvecCType = E->getArg(1)->getType();
3047 auto MvecLType = ConvertType(MvecCType);
3048 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3049 assert(MvecLType->getStructNumElements() == 1 &&
3050 "Data-type struct for vst2q should have one element");
3051 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3052 assert(MvecLTypeInner->isArrayTy() &&
3053 "Data-type struct for vst2q should contain an array");
3054 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3055 "Array member of return-type struct vld[24]q has wrong length");
3056 auto VecLType = MvecLTypeInner->getArrayElementType();
3057
3058 Tys.push_back(VecLType);
3059
3060 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3061 EmitAggExpr(E->getArg(1), MvecSlot);
3062 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3063 for (unsigned i = 0; i < NumVectors; i++)
3064 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3065
3066 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3067 Value *ToReturn = nullptr;
3068 for (unsigned i = 0; i < NumVectors; i++) {
3069 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3070 ToReturn = Builder.CreateCall(F, Ops);
3071 Ops.pop_back();
3072 }
3073 return ToReturn;
3074 }
3075 }
3076 llvm_unreachable("unknown custom codegen type.");
3077}
3078
3080 const CallExpr *E,
3082 llvm::Triple::ArchType Arch) {
3083 switch (BuiltinID) {
3084 default:
3085 return nullptr;
3086#include "clang/Basic/arm_cde_builtin_cg.inc"
3087 }
3088}
3089
3090static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3091 const CallExpr *E,
3093 llvm::Triple::ArchType Arch) {
3094 unsigned int Int = 0;
3095 const char *s = nullptr;
3096
3097 switch (BuiltinID) {
3098 default:
3099 return nullptr;
3100 case NEON::BI__builtin_neon_vtbl1_v:
3101 case NEON::BI__builtin_neon_vqtbl1_v:
3102 case NEON::BI__builtin_neon_vqtbl1q_v:
3103 case NEON::BI__builtin_neon_vtbl2_v:
3104 case NEON::BI__builtin_neon_vqtbl2_v:
3105 case NEON::BI__builtin_neon_vqtbl2q_v:
3106 case NEON::BI__builtin_neon_vtbl3_v:
3107 case NEON::BI__builtin_neon_vqtbl3_v:
3108 case NEON::BI__builtin_neon_vqtbl3q_v:
3109 case NEON::BI__builtin_neon_vtbl4_v:
3110 case NEON::BI__builtin_neon_vqtbl4_v:
3111 case NEON::BI__builtin_neon_vqtbl4q_v:
3112 break;
3113 case NEON::BI__builtin_neon_vtbx1_v:
3114 case NEON::BI__builtin_neon_vqtbx1_v:
3115 case NEON::BI__builtin_neon_vqtbx1q_v:
3116 case NEON::BI__builtin_neon_vtbx2_v:
3117 case NEON::BI__builtin_neon_vqtbx2_v:
3118 case NEON::BI__builtin_neon_vqtbx2q_v:
3119 case NEON::BI__builtin_neon_vtbx3_v:
3120 case NEON::BI__builtin_neon_vqtbx3_v:
3121 case NEON::BI__builtin_neon_vqtbx3q_v:
3122 case NEON::BI__builtin_neon_vtbx4_v:
3123 case NEON::BI__builtin_neon_vqtbx4_v:
3124 case NEON::BI__builtin_neon_vqtbx4q_v:
3125 break;
3126 }
3127
3128 assert(E->getNumArgs() >= 3);
3129
3130 // Get the last argument, which specifies the vector type.
3131 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3132 std::optional<llvm::APSInt> Result =
3134 if (!Result)
3135 return nullptr;
3136
3137 // Determine the type of this overloaded NEON intrinsic.
3138 NeonTypeFlags Type = Result->getZExtValue();
3139 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3140 if (!Ty)
3141 return nullptr;
3142
3143 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3144
3145 // AArch64 scalar builtins are not overloaded, they do not have an extra
3146 // argument that specifies the vector type, need to handle each case.
3147 switch (BuiltinID) {
3148 case NEON::BI__builtin_neon_vtbl1_v: {
3149 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3150 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3151 }
3152 case NEON::BI__builtin_neon_vtbl2_v: {
3153 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3154 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3155 }
3156 case NEON::BI__builtin_neon_vtbl3_v: {
3157 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3158 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3159 }
3160 case NEON::BI__builtin_neon_vtbl4_v: {
3161 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3162 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3163 }
3164 case NEON::BI__builtin_neon_vtbx1_v: {
3165 Value *TblRes =
3166 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3167 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3168
3169 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3170 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3171 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3172
3173 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3174 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3175 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3176 }
3177 case NEON::BI__builtin_neon_vtbx2_v: {
3178 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3179 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3180 }
3181 case NEON::BI__builtin_neon_vtbx3_v: {
3182 Value *TblRes =
3183 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3184 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3185
3186 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3187 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3188 TwentyFourV);
3189 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3190
3191 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3192 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3193 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3194 }
3195 case NEON::BI__builtin_neon_vtbx4_v: {
3196 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3197 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3198 }
3199 case NEON::BI__builtin_neon_vqtbl1_v:
3200 case NEON::BI__builtin_neon_vqtbl1q_v:
3201 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3202 case NEON::BI__builtin_neon_vqtbl2_v:
3203 case NEON::BI__builtin_neon_vqtbl2q_v: {
3204 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3205 case NEON::BI__builtin_neon_vqtbl3_v:
3206 case NEON::BI__builtin_neon_vqtbl3q_v:
3207 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3208 case NEON::BI__builtin_neon_vqtbl4_v:
3209 case NEON::BI__builtin_neon_vqtbl4q_v:
3210 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3211 case NEON::BI__builtin_neon_vqtbx1_v:
3212 case NEON::BI__builtin_neon_vqtbx1q_v:
3213 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3214 case NEON::BI__builtin_neon_vqtbx2_v:
3215 case NEON::BI__builtin_neon_vqtbx2q_v:
3216 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3217 case NEON::BI__builtin_neon_vqtbx3_v:
3218 case NEON::BI__builtin_neon_vqtbx3q_v:
3219 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3220 case NEON::BI__builtin_neon_vqtbx4_v:
3221 case NEON::BI__builtin_neon_vqtbx4q_v:
3222 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3223 }
3224 }
3225
3226 if (!Int)
3227 return nullptr;
3228
3229 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3230 return CGF.EmitNeonCall(F, Ops, s);
3231}
3232
3234 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3235 Op = Builder.CreateBitCast(Op, Int16Ty);
3236 Value *V = PoisonValue::get(VTy);
3237 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3238 Op = Builder.CreateInsertElement(V, Op, CI);
3239 return Op;
3240}
3241
3242/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3243/// access builtin. Only required if it can't be inferred from the base pointer
3244/// operand.
3246 switch (TypeFlags.getMemEltType()) {
3247 case SVETypeFlags::MemEltTyDefault:
3248 return getEltType(TypeFlags);
3249 case SVETypeFlags::MemEltTyInt8:
3250 return Builder.getInt8Ty();
3251 case SVETypeFlags::MemEltTyInt16:
3252 return Builder.getInt16Ty();
3253 case SVETypeFlags::MemEltTyInt32:
3254 return Builder.getInt32Ty();
3255 case SVETypeFlags::MemEltTyInt64:
3256 return Builder.getInt64Ty();
3257 }
3258 llvm_unreachable("Unknown MemEltType");
3259}
3260
3261llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3262 switch (TypeFlags.getEltType()) {
3263 default:
3264 llvm_unreachable("Invalid SVETypeFlag!");
3265
3266 case SVETypeFlags::EltTyMFloat8:
3267 case SVETypeFlags::EltTyInt8:
3268 return Builder.getInt8Ty();
3269 case SVETypeFlags::EltTyInt16:
3270 return Builder.getInt16Ty();
3271 case SVETypeFlags::EltTyInt32:
3272 return Builder.getInt32Ty();
3273 case SVETypeFlags::EltTyInt64:
3274 return Builder.getInt64Ty();
3275 case SVETypeFlags::EltTyInt128:
3276 return Builder.getInt128Ty();
3277
3278 case SVETypeFlags::EltTyFloat16:
3279 return Builder.getHalfTy();
3280 case SVETypeFlags::EltTyFloat32:
3281 return Builder.getFloatTy();
3282 case SVETypeFlags::EltTyFloat64:
3283 return Builder.getDoubleTy();
3284
3285 case SVETypeFlags::EltTyBFloat16:
3286 return Builder.getBFloatTy();
3287
3288 case SVETypeFlags::EltTyBool8:
3289 case SVETypeFlags::EltTyBool16:
3290 case SVETypeFlags::EltTyBool32:
3291 case SVETypeFlags::EltTyBool64:
3292 return Builder.getInt1Ty();
3293 }
3294}
3295
3296// Return the llvm predicate vector type corresponding to the specified element
3297// TypeFlags.
3298llvm::ScalableVectorType *
3300 switch (TypeFlags.getEltType()) {
3301 default: llvm_unreachable("Unhandled SVETypeFlag!");
3302
3303 case SVETypeFlags::EltTyInt8:
3304 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3305 case SVETypeFlags::EltTyInt16:
3306 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3307 case SVETypeFlags::EltTyInt32:
3308 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3309 case SVETypeFlags::EltTyInt64:
3310 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3311
3312 case SVETypeFlags::EltTyBFloat16:
3313 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3314 case SVETypeFlags::EltTyFloat16:
3315 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3316 case SVETypeFlags::EltTyFloat32:
3317 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3318 case SVETypeFlags::EltTyFloat64:
3319 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3320
3321 case SVETypeFlags::EltTyBool8:
3322 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3323 case SVETypeFlags::EltTyBool16:
3324 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3325 case SVETypeFlags::EltTyBool32:
3326 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3327 case SVETypeFlags::EltTyBool64:
3328 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3329 }
3330}
3331
3332// Return the llvm vector type corresponding to the specified element TypeFlags.
3333llvm::ScalableVectorType *
3335 switch (TypeFlags.getEltType()) {
3336 default:
3337 llvm_unreachable("Invalid SVETypeFlag!");
3338
3339 case SVETypeFlags::EltTyInt8:
3340 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3341 case SVETypeFlags::EltTyInt16:
3342 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3343 case SVETypeFlags::EltTyInt32:
3344 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3345 case SVETypeFlags::EltTyInt64:
3346 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3347
3348 case SVETypeFlags::EltTyMFloat8:
3349 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3350 case SVETypeFlags::EltTyFloat16:
3351 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3352 case SVETypeFlags::EltTyBFloat16:
3353 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3354 case SVETypeFlags::EltTyFloat32:
3355 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3356 case SVETypeFlags::EltTyFloat64:
3357 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3358
3359 case SVETypeFlags::EltTyBool8:
3360 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3361 case SVETypeFlags::EltTyBool16:
3362 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3363 case SVETypeFlags::EltTyBool32:
3364 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3365 case SVETypeFlags::EltTyBool64:
3366 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3367 }
3368}
3369
3370llvm::Value *
3372 Function *Ptrue =
3373 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3374 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
3375}
3376
3377constexpr unsigned SVEBitsPerBlock = 128;
3378
3379static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3380 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3381 return llvm::ScalableVectorType::get(EltTy, NumElts);
3382}
3383
3384// Reinterpret the input predicate so that it can be used to correctly isolate
3385// the elements of the specified datatype.
3387 llvm::ScalableVectorType *VTy) {
3388
3389 if (isa<TargetExtType>(Pred->getType()) &&
3390 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3391 return Pred;
3392
3393 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3394 if (Pred->getType() == RTy)
3395 return Pred;
3396
3397 unsigned IntID;
3398 llvm::Type *IntrinsicTy;
3399 switch (VTy->getMinNumElements()) {
3400 default:
3401 llvm_unreachable("unsupported element count!");
3402 case 1:
3403 case 2:
3404 case 4:
3405 case 8:
3406 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3407 IntrinsicTy = RTy;
3408 break;
3409 case 16:
3410 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3411 IntrinsicTy = Pred->getType();
3412 break;
3413 }
3414
3415 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3416 Value *C = Builder.CreateCall(F, Pred);
3417 assert(C->getType() == RTy && "Unexpected return type!");
3418 return C;
3419}
3420
3422 llvm::StructType *Ty) {
3423 if (PredTuple->getType() == Ty)
3424 return PredTuple;
3425
3426 Value *Ret = llvm::PoisonValue::get(Ty);
3427 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3428 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3429 Pred = EmitSVEPredicateCast(
3430 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3431 Ret = Builder.CreateInsertValue(Ret, Pred, I);
3432 }
3433
3434 return Ret;
3435}
3436
3439 unsigned IntID) {
3440 auto *ResultTy = getSVEType(TypeFlags);
3441 auto *OverloadedTy =
3442 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
3443
3444 Function *F = nullptr;
3445 if (Ops[1]->getType()->isVectorTy())
3446 // This is the "vector base, scalar offset" case. In order to uniquely
3447 // map this built-in to an LLVM IR intrinsic, we need both the return type
3448 // and the type of the vector base.
3449 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
3450 else
3451 // This is the "scalar base, vector offset case". The type of the offset
3452 // is encoded in the name of the intrinsic. We only need to specify the
3453 // return type in order to uniquely map this built-in to an LLVM IR
3454 // intrinsic.
3455 F = CGM.getIntrinsic(IntID, OverloadedTy);
3456
3457 // At the ACLE level there's only one predicate type, svbool_t, which is
3458 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3459 // actual type being loaded. For example, when loading doubles (i64) the
3460 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3461 // the predicate and the data being loaded must match. Cast to the type
3462 // expected by the intrinsic. The intrinsic itself should be defined in
3463 // a way than enforces relations between parameter types.
3464 Ops[0] = EmitSVEPredicateCast(
3465 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3466
3467 // Pass 0 when the offset is missing. This can only be applied when using
3468 // the "vector base" addressing mode for which ACLE allows no offset. The
3469 // corresponding LLVM IR always requires an offset.
3470 if (Ops.size() == 2) {
3471 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3472 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3473 }
3474
3475 // For "vector base, scalar index" scale the index so that it becomes a
3476 // scalar offset.
3477 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3478 unsigned BytesPerElt =
3479 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3480 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3481 }
3482
3483 Value *Call = Builder.CreateCall(F, Ops);
3484
3485 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3486 // other cases it's folded into a nop.
3487 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
3488 : Builder.CreateSExt(Call, ResultTy);
3489}
3490
3493 unsigned IntID) {
3494 auto *SrcDataTy = getSVEType(TypeFlags);
3495 auto *OverloadedTy =
3496 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
3497
3498 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3499 // it's the first argument. Move it accordingly.
3500 Ops.insert(Ops.begin(), Ops.pop_back_val());
3501
3502 Function *F = nullptr;
3503 if (Ops[2]->getType()->isVectorTy())
3504 // This is the "vector base, scalar offset" case. In order to uniquely
3505 // map this built-in to an LLVM IR intrinsic, we need both the return type
3506 // and the type of the vector base.
3507 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
3508 else
3509 // This is the "scalar base, vector offset case". The type of the offset
3510 // is encoded in the name of the intrinsic. We only need to specify the
3511 // return type in order to uniquely map this built-in to an LLVM IR
3512 // intrinsic.
3513 F = CGM.getIntrinsic(IntID, OverloadedTy);
3514
3515 // Pass 0 when the offset is missing. This can only be applied when using
3516 // the "vector base" addressing mode for which ACLE allows no offset. The
3517 // corresponding LLVM IR always requires an offset.
3518 if (Ops.size() == 3) {
3519 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3520 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3521 }
3522
3523 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
3524 // folded into a nop.
3525 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
3526
3527 // At the ACLE level there's only one predicate type, svbool_t, which is
3528 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3529 // actual type being stored. For example, when storing doubles (i64) the
3530 // predicated should be <n x 2 x i1> instead. At the IR level the type of
3531 // the predicate and the data being stored must match. Cast to the type
3532 // expected by the intrinsic. The intrinsic itself should be defined in
3533 // a way that enforces relations between parameter types.
3534 Ops[1] = EmitSVEPredicateCast(
3535 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
3536
3537 // For "vector base, scalar index" scale the index so that it becomes a
3538 // scalar offset.
3539 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
3540 unsigned BytesPerElt =
3541 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3542 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
3543 }
3544
3545 return Builder.CreateCall(F, Ops);
3546}
3547
3550 unsigned IntID) {
3551 // The gather prefetches are overloaded on the vector input - this can either
3552 // be the vector of base addresses or vector of offsets.
3553 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
3554 if (!OverloadedTy)
3555 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
3556
3557 // Cast the predicate from svbool_t to the right number of elements.
3558 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
3559
3560 // vector + imm addressing modes
3561 if (Ops[1]->getType()->isVectorTy()) {
3562 if (Ops.size() == 3) {
3563 // Pass 0 for 'vector+imm' when the index is omitted.
3564 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3565
3566 // The sv_prfop is the last operand in the builtin and IR intrinsic.
3567 std::swap(Ops[2], Ops[3]);
3568 } else {
3569 // Index needs to be passed as scaled offset.
3570 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3571 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
3572 if (BytesPerElt > 1)
3573 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3574 }
3575 }
3576
3577 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
3578 return Builder.CreateCall(F, Ops);
3579}
3580
3583 unsigned IntID) {
3584 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3585 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3586 Value *BasePtr = Ops[1];
3587
3588 // Does the load have an offset?
3589 if (Ops.size() > 2)
3590 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3591
3592 Function *F = CGM.getIntrinsic(IntID, {VTy});
3593 return Builder.CreateCall(F, {Predicate, BasePtr});
3594}
3595
3598 unsigned IntID) {
3599 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3600
3601 unsigned N;
3602 switch (IntID) {
3603 case Intrinsic::aarch64_sve_st2:
3604 case Intrinsic::aarch64_sve_st1_pn_x2:
3605 case Intrinsic::aarch64_sve_stnt1_pn_x2:
3606 case Intrinsic::aarch64_sve_st2q:
3607 N = 2;
3608 break;
3609 case Intrinsic::aarch64_sve_st3:
3610 case Intrinsic::aarch64_sve_st3q:
3611 N = 3;
3612 break;
3613 case Intrinsic::aarch64_sve_st4:
3614 case Intrinsic::aarch64_sve_st1_pn_x4:
3615 case Intrinsic::aarch64_sve_stnt1_pn_x4:
3616 case Intrinsic::aarch64_sve_st4q:
3617 N = 4;
3618 break;
3619 default:
3620 llvm_unreachable("unknown intrinsic!");
3621 }
3622
3623 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3624 Value *BasePtr = Ops[1];
3625
3626 // Does the store have an offset?
3627 if (Ops.size() > (2 + N))
3628 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3629
3630 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
3631 // need to break up the tuple vector.
3633 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
3634 Operands.push_back(Ops[I]);
3635 Operands.append({Predicate, BasePtr});
3636 Function *F = CGM.getIntrinsic(IntID, { VTy });
3637
3638 return Builder.CreateCall(F, Operands);
3639}
3640
3641// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
3642// svpmullt_pair intrinsics, with the exception that their results are bitcast
3643// to a wider type.
3646 unsigned BuiltinID) {
3647 // Splat scalar operand to vector (intrinsics with _n infix)
3648 if (TypeFlags.hasSplatOperand()) {
3649 unsigned OpNo = TypeFlags.getSplatOperand();
3650 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
3651 }
3652
3653 // The pair-wise function has a narrower overloaded type.
3654 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
3655 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
3656
3657 // Now bitcast to the wider result type.
3658 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
3659 return EmitSVEReinterpret(Call, Ty);
3660}
3661
3663 ArrayRef<Value *> Ops, unsigned BuiltinID) {
3664 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
3665 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
3666 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
3667}
3668
3671 unsigned BuiltinID) {
3672 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3673 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
3674 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3675
3676 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
3677 Value *BasePtr = Ops[1];
3678
3679 // Implement the index operand if not omitted.
3680 if (Ops.size() > 3)
3681 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3682
3683 Value *PrfOp = Ops.back();
3684
3685 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
3686 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
3687}
3688
3690 llvm::Type *ReturnTy,
3692 unsigned IntrinsicID,
3693 bool IsZExtReturn) {
3694 QualType LangPTy = E->getArg(1)->getType();
3695 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3696 LangPTy->castAs<PointerType>()->getPointeeType());
3697
3698 // Mfloat8 types is stored as a vector, so extra work
3699 // to extract sclar element type is necessary.
3700 if (MemEltTy->isVectorTy()) {
3701 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3702 "Only <1 x i8> expected");
3703 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3704 }
3705
3706 // The vector type that is returned may be different from the
3707 // eventual type loaded from memory.
3708 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
3709 llvm::ScalableVectorType *MemoryTy = nullptr;
3710 llvm::ScalableVectorType *PredTy = nullptr;
3711 bool IsQuadLoad = false;
3712 switch (IntrinsicID) {
3713 case Intrinsic::aarch64_sve_ld1uwq:
3714 case Intrinsic::aarch64_sve_ld1udq:
3715 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3716 PredTy = llvm::ScalableVectorType::get(
3717 llvm::Type::getInt1Ty(getLLVMContext()), 1);
3718 IsQuadLoad = true;
3719 break;
3720 default:
3721 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3722 PredTy = MemoryTy;
3723 break;
3724 }
3725
3726 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3727 Value *BasePtr = Ops[1];
3728
3729 // Does the load have an offset?
3730 if (Ops.size() > 2)
3731 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3732
3733 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
3734 auto *Load =
3735 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
3736 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3737 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
3738
3739 if (IsQuadLoad)
3740 return Load;
3741
3742 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
3743 : Builder.CreateSExt(Load, VectorTy);
3744}
3745
3748 unsigned IntrinsicID) {
3749 QualType LangPTy = E->getArg(1)->getType();
3750 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3751 LangPTy->castAs<PointerType>()->getPointeeType());
3752
3753 // Mfloat8 types is stored as a vector, so extra work
3754 // to extract sclar element type is necessary.
3755 if (MemEltTy->isVectorTy()) {
3756 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3757 "Only <1 x i8> expected");
3758 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3759 }
3760
3761 // The vector type that is stored may be different from the
3762 // eventual type stored to memory.
3763 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
3764 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3765
3766 auto PredTy = MemoryTy;
3767 auto AddrMemoryTy = MemoryTy;
3768 bool IsQuadStore = false;
3769
3770 switch (IntrinsicID) {
3771 case Intrinsic::aarch64_sve_st1wq:
3772 case Intrinsic::aarch64_sve_st1dq:
3773 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3774 PredTy =
3775 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
3776 IsQuadStore = true;
3777 break;
3778 default:
3779 break;
3780 }
3781 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3782 Value *BasePtr = Ops[1];
3783
3784 // Does the store have an offset?
3785 if (Ops.size() == 4)
3786 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
3787
3788 // Last value is always the data
3789 Value *Val =
3790 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
3791
3792 Function *F =
3793 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
3794 auto *Store =
3795 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
3796 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3797 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
3798 return Store;
3799}
3800
3803 unsigned IntID) {
3804 Ops[2] = EmitSVEPredicateCast(
3806
3807 SmallVector<Value *> NewOps;
3808 NewOps.push_back(Ops[2]);
3809
3810 llvm::Value *BasePtr = Ops[3];
3811 llvm::Value *RealSlice = Ops[1];
3812 // If the intrinsic contains the vnum parameter, multiply it with the vector
3813 // size in bytes.
3814 if (Ops.size() == 5) {
3815 Function *StreamingVectorLength =
3816 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
3817 llvm::Value *StreamingVectorLengthCall =
3818 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
3819 llvm::ConstantInt::get(Int64Ty, 8), "svl",
3820 /* HasNUW */ true, /* HasNSW */ true);
3821 llvm::Value *Mulvl =
3822 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
3823 // The type of the ptr parameter is void *, so use Int8Ty here.
3824 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
3825 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
3826 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
3827 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
3828 }
3829 NewOps.push_back(BasePtr);
3830 NewOps.push_back(Ops[0]);
3831 NewOps.push_back(RealSlice);
3832 Function *F = CGM.getIntrinsic(IntID);
3833 return Builder.CreateCall(F, NewOps);
3834}
3835
3838 unsigned IntID) {
3839 auto *VecTy = getSVEType(TypeFlags);
3840 Function *F = CGM.getIntrinsic(IntID, VecTy);
3841 if (TypeFlags.isReadZA())
3842 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
3843 else if (TypeFlags.isWriteZA())
3844 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
3845 return Builder.CreateCall(F, Ops);
3846}
3847
3850 unsigned IntID) {
3851 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
3852 if (Ops.size() == 0)
3853 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
3854 Function *F = CGM.getIntrinsic(IntID, {});
3855 return Builder.CreateCall(F, Ops);
3856}
3857
3860 unsigned IntID) {
3861 if (Ops.size() == 2)
3862 Ops.push_back(Builder.getInt32(0));
3863 else
3864 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
3865 Function *F = CGM.getIntrinsic(IntID, {});
3866 return Builder.CreateCall(F, Ops);
3867}
3868
3869// Limit the usage of scalable llvm IR generated by the ACLE by using the
3870// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
3871Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
3872 return Builder.CreateVectorSplat(
3873 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
3874}
3875
3877 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
3878#ifndef NDEBUG
3879 auto *VecTy = cast<llvm::VectorType>(Ty);
3880 ElementCount EC = VecTy->getElementCount();
3881 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
3882 "Only <1 x i8> expected");
3883#endif
3884 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
3885 }
3886 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
3887}
3888
3890 // FIXME: For big endian this needs an additional REV, or needs a separate
3891 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
3892 // instruction is defined as 'bitwise' equivalent from memory point of
3893 // view (when storing/reloading), whereas the svreinterpret builtin
3894 // implements bitwise equivalent cast from register point of view.
3895 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
3896
3897 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
3898 Value *Tuple = llvm::PoisonValue::get(Ty);
3899
3900 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
3901 Value *In = Builder.CreateExtractValue(Val, I);
3902 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
3903 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
3904 }
3905
3906 return Tuple;
3907 }
3908
3909 return Builder.CreateBitCast(Val, Ty);
3910}
3911
3912static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3914 auto *SplatZero = Constant::getNullValue(Ty);
3915 Ops.insert(Ops.begin(), SplatZero);
3916}
3917
3918static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3920 auto *SplatUndef = UndefValue::get(Ty);
3921 Ops.insert(Ops.begin(), SplatUndef);
3922}
3923
3924SmallVector<llvm::Type *, 2>
3926 llvm::Type *ResultType,
3927 ArrayRef<Value *> Ops) {
3928 if (TypeFlags.isOverloadNone())
3929 return {};
3930
3931 llvm::Type *DefaultType = getSVEType(TypeFlags);
3932
3933 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
3934 return {DefaultType, Ops[1]->getType()};
3935
3936 if (TypeFlags.isOverloadWhileRW())
3937 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
3938
3939 if (TypeFlags.isOverloadFirstandLast())
3940 return {Ops[0]->getType(), Ops.back()->getType()};
3941
3942 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
3943 ResultType->isVectorTy())
3944 return {ResultType, Ops[1]->getType()};
3945
3946 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
3947 return {DefaultType};
3948}
3949
3951 ArrayRef<Value *> Ops) {
3952 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
3953 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
3954 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
3955
3956 if (TypeFlags.isTupleSet())
3957 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
3958 return Builder.CreateExtractValue(Ops[0], Idx);
3959}
3960
3962 llvm::Type *Ty,
3963 ArrayRef<Value *> Ops) {
3964 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
3965
3966 Value *Tuple = llvm::PoisonValue::get(Ty);
3967 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
3968 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
3969
3970 return Tuple;
3971}
3972
3974 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
3975 SVETypeFlags TypeFlags) {
3976 // Find out if any arguments are required to be integer constant expressions.
3977 unsigned ICEArguments = 0;
3979 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3980 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3981
3982 // Tuple set/get only requires one insert/extract vector, which is
3983 // created by EmitSVETupleSetOrGet.
3984 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
3985
3986 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
3987 bool IsICE = ICEArguments & (1 << i);
3988 Value *Arg = EmitScalarExpr(E->getArg(i));
3989
3990 if (IsICE) {
3991 // If this is required to be a constant, constant fold it so that we know
3992 // that the generated intrinsic gets a ConstantInt.
3993 std::optional<llvm::APSInt> Result =
3995 assert(Result && "Expected argument to be a constant");
3996
3997 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
3998 // truncate because the immediate has been range checked and no valid
3999 // immediate requires more than a handful of bits.
4000 *Result = Result->extOrTrunc(32);
4001 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4002 continue;
4003 }
4004
4005 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4006 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4007 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4008
4009 continue;
4010 }
4011
4012 Ops.push_back(Arg);
4013 }
4014}
4015
4017 const CallExpr *E) {
4018 llvm::Type *Ty = ConvertType(E->getType());
4019 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4020 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4021 Value *Val = EmitScalarExpr(E->getArg(0));
4022 return EmitSVEReinterpret(Val, Ty);
4023 }
4024
4027
4029 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4030 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4031
4032 if (TypeFlags.isLoad())
4033 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4034 TypeFlags.isZExtReturn());
4035 if (TypeFlags.isStore())
4036 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4037 if (TypeFlags.isGatherLoad())
4038 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4039 if (TypeFlags.isScatterStore())
4040 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4041 if (TypeFlags.isPrefetch())
4042 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4043 if (TypeFlags.isGatherPrefetch())
4044 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4045 if (TypeFlags.isStructLoad())
4046 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4047 if (TypeFlags.isStructStore())
4048 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4049 if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4050 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4051 if (TypeFlags.isTupleCreate())
4052 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4053 if (TypeFlags.isUndef())
4054 return UndefValue::get(Ty);
4055
4056 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
4057 // -------------------------------------------------------------------
4058 if (Builtin->LLVMIntrinsic != 0) {
4059 // Emit set FPMR for intrinsics that require it
4060 if (TypeFlags.setsFPMR())
4061 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4062 Ops.pop_back_val());
4063 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4065
4066 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4068
4069 // Some ACLE builtins leave out the argument to specify the predicate
4070 // pattern, which is expected to be expanded to an SV_ALL pattern.
4071 if (TypeFlags.isAppendSVALL())
4072 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4073 if (TypeFlags.isInsertOp1SVALL())
4074 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4075
4076 // Predicates must match the main datatype.
4077 for (Value *&Op : Ops)
4078 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4079 if (PredTy->getElementType()->isIntegerTy(1))
4080 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4081
4082 // Splat scalar operand to vector (intrinsics with _n infix)
4083 if (TypeFlags.hasSplatOperand()) {
4084 unsigned OpNo = TypeFlags.getSplatOperand();
4085 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4086 }
4087
4088 if (TypeFlags.isReverseCompare())
4089 std::swap(Ops[1], Ops[2]);
4090 else if (TypeFlags.isReverseUSDOT())
4091 std::swap(Ops[1], Ops[2]);
4092 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4093 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4094 std::swap(Ops[1], Ops[2]);
4095 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4096 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4097 std::swap(Ops[1], Ops[3]);
4098
4099 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4100 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4101 llvm::Type *OpndTy = Ops[1]->getType();
4102 auto *SplatZero = Constant::getNullValue(OpndTy);
4103 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4104 }
4105
4106 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4107 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4108 Value *Call = Builder.CreateCall(F, Ops);
4109
4110 if (Call->getType() == Ty)
4111 return Call;
4112
4113 // Predicate results must be converted to svbool_t.
4114 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4115 return EmitSVEPredicateCast(Call, PredTy);
4116 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4117 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4118
4119 llvm_unreachable("unsupported element count!");
4120 }
4121
4122 switch (BuiltinID) {
4123 default:
4124 return nullptr;
4125
4126 case SVE::BI__builtin_sve_svreinterpret_b: {
4127 auto SVCountTy =
4128 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4129 Function *CastFromSVCountF =
4130 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4131 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4132 }
4133 case SVE::BI__builtin_sve_svreinterpret_c: {
4134 auto SVCountTy =
4135 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4136 Function *CastToSVCountF =
4137 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4138 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4139 }
4140
4141 case SVE::BI__builtin_sve_svpsel_lane_b8:
4142 case SVE::BI__builtin_sve_svpsel_lane_b16:
4143 case SVE::BI__builtin_sve_svpsel_lane_b32:
4144 case SVE::BI__builtin_sve_svpsel_lane_b64:
4145 case SVE::BI__builtin_sve_svpsel_lane_c8:
4146 case SVE::BI__builtin_sve_svpsel_lane_c16:
4147 case SVE::BI__builtin_sve_svpsel_lane_c32:
4148 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4149 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4150 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4151 "aarch64.svcount")) &&
4152 "Unexpected TargetExtType");
4153 auto SVCountTy =
4154 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4155 Function *CastFromSVCountF =
4156 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4157 Function *CastToSVCountF =
4158 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4159
4160 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4161 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4162 llvm::Value *Ops0 =
4163 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4164 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4165 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4166 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4167 }
4168 case SVE::BI__builtin_sve_svmov_b_z: {
4169 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4170 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4171 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4172 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4173 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4174 }
4175
4176 case SVE::BI__builtin_sve_svnot_b_z: {
4177 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4178 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4179 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4180 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4181 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4182 }
4183
4184 case SVE::BI__builtin_sve_svmovlb_u16:
4185 case SVE::BI__builtin_sve_svmovlb_u32:
4186 case SVE::BI__builtin_sve_svmovlb_u64:
4187 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4188
4189 case SVE::BI__builtin_sve_svmovlb_s16:
4190 case SVE::BI__builtin_sve_svmovlb_s32:
4191 case SVE::BI__builtin_sve_svmovlb_s64:
4192 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4193
4194 case SVE::BI__builtin_sve_svmovlt_u16:
4195 case SVE::BI__builtin_sve_svmovlt_u32:
4196 case SVE::BI__builtin_sve_svmovlt_u64:
4197 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4198
4199 case SVE::BI__builtin_sve_svmovlt_s16:
4200 case SVE::BI__builtin_sve_svmovlt_s32:
4201 case SVE::BI__builtin_sve_svmovlt_s64:
4202 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4203
4204 case SVE::BI__builtin_sve_svpmullt_u16:
4205 case SVE::BI__builtin_sve_svpmullt_u64:
4206 case SVE::BI__builtin_sve_svpmullt_n_u16:
4207 case SVE::BI__builtin_sve_svpmullt_n_u64:
4208 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4209
4210 case SVE::BI__builtin_sve_svpmullb_u16:
4211 case SVE::BI__builtin_sve_svpmullb_u64:
4212 case SVE::BI__builtin_sve_svpmullb_n_u16:
4213 case SVE::BI__builtin_sve_svpmullb_n_u64:
4214 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4215
4216 case SVE::BI__builtin_sve_svdup_n_b8:
4217 case SVE::BI__builtin_sve_svdup_n_b16:
4218 case SVE::BI__builtin_sve_svdup_n_b32:
4219 case SVE::BI__builtin_sve_svdup_n_b64: {
4220 Value *CmpNE =
4221 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4222 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4223 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4225 }
4226
4227 case SVE::BI__builtin_sve_svdupq_n_b8:
4228 case SVE::BI__builtin_sve_svdupq_n_b16:
4229 case SVE::BI__builtin_sve_svdupq_n_b32:
4230 case SVE::BI__builtin_sve_svdupq_n_b64:
4231 case SVE::BI__builtin_sve_svdupq_n_u8:
4232 case SVE::BI__builtin_sve_svdupq_n_s8:
4233 case SVE::BI__builtin_sve_svdupq_n_u64:
4234 case SVE::BI__builtin_sve_svdupq_n_f64:
4235 case SVE::BI__builtin_sve_svdupq_n_s64:
4236 case SVE::BI__builtin_sve_svdupq_n_u16:
4237 case SVE::BI__builtin_sve_svdupq_n_f16:
4238 case SVE::BI__builtin_sve_svdupq_n_bf16:
4239 case SVE::BI__builtin_sve_svdupq_n_s16:
4240 case SVE::BI__builtin_sve_svdupq_n_u32:
4241 case SVE::BI__builtin_sve_svdupq_n_f32:
4242 case SVE::BI__builtin_sve_svdupq_n_s32: {
4243 // These builtins are implemented by storing each element to an array and using
4244 // ld1rq to materialize a vector.
4245 unsigned NumOpnds = Ops.size();
4246
4247 bool IsBoolTy =
4248 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4249
4250 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4251 // so that the compare can use the width that is natural for the expected
4252 // number of predicate lanes.
4253 llvm::Type *EltTy = Ops[0]->getType();
4254 if (IsBoolTy)
4255 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4256
4258 for (unsigned I = 0; I < NumOpnds; ++I)
4259 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4260 Value *Vec = BuildVector(VecOps);
4261
4262 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4263 Value *InsertSubVec = Builder.CreateInsertVector(
4264 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4265
4266 Function *F =
4267 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4268 Value *DupQLane =
4269 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4270
4271 if (!IsBoolTy)
4272 return DupQLane;
4273
4274 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4275 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4276
4277 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4278 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4279 : Intrinsic::aarch64_sve_cmpne_wide,
4280 OverloadedTy);
4281 Value *Call = Builder.CreateCall(
4282 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4284 }
4285
4286 case SVE::BI__builtin_sve_svpfalse_b:
4287 return ConstantInt::getFalse(Ty);
4288
4289 case SVE::BI__builtin_sve_svpfalse_c: {
4290 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4291 Function *CastToSVCountF =
4292 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4293 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4294 }
4295
4296 case SVE::BI__builtin_sve_svlen_bf16:
4297 case SVE::BI__builtin_sve_svlen_f16:
4298 case SVE::BI__builtin_sve_svlen_f32:
4299 case SVE::BI__builtin_sve_svlen_f64:
4300 case SVE::BI__builtin_sve_svlen_s8:
4301 case SVE::BI__builtin_sve_svlen_s16:
4302 case SVE::BI__builtin_sve_svlen_s32:
4303 case SVE::BI__builtin_sve_svlen_s64:
4304 case SVE::BI__builtin_sve_svlen_u8:
4305 case SVE::BI__builtin_sve_svlen_u16:
4306 case SVE::BI__builtin_sve_svlen_u32:
4307 case SVE::BI__builtin_sve_svlen_u64: {
4308 SVETypeFlags TF(Builtin->TypeModifier);
4309 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4310 }
4311
4312 case SVE::BI__builtin_sve_svtbl2_u8:
4313 case SVE::BI__builtin_sve_svtbl2_s8:
4314 case SVE::BI__builtin_sve_svtbl2_u16:
4315 case SVE::BI__builtin_sve_svtbl2_s16:
4316 case SVE::BI__builtin_sve_svtbl2_u32:
4317 case SVE::BI__builtin_sve_svtbl2_s32:
4318 case SVE::BI__builtin_sve_svtbl2_u64:
4319 case SVE::BI__builtin_sve_svtbl2_s64:
4320 case SVE::BI__builtin_sve_svtbl2_f16:
4321 case SVE::BI__builtin_sve_svtbl2_bf16:
4322 case SVE::BI__builtin_sve_svtbl2_f32:
4323 case SVE::BI__builtin_sve_svtbl2_f64: {
4324 SVETypeFlags TF(Builtin->TypeModifier);
4325 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4326 return Builder.CreateCall(F, Ops);
4327 }
4328
4329 case SVE::BI__builtin_sve_svset_neonq_s8:
4330 case SVE::BI__builtin_sve_svset_neonq_s16:
4331 case SVE::BI__builtin_sve_svset_neonq_s32:
4332 case SVE::BI__builtin_sve_svset_neonq_s64:
4333 case SVE::BI__builtin_sve_svset_neonq_u8:
4334 case SVE::BI__builtin_sve_svset_neonq_u16:
4335 case SVE::BI__builtin_sve_svset_neonq_u32:
4336 case SVE::BI__builtin_sve_svset_neonq_u64:
4337 case SVE::BI__builtin_sve_svset_neonq_f16:
4338 case SVE::BI__builtin_sve_svset_neonq_f32:
4339 case SVE::BI__builtin_sve_svset_neonq_f64:
4340 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4341 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4342 }
4343
4344 case SVE::BI__builtin_sve_svget_neonq_s8:
4345 case SVE::BI__builtin_sve_svget_neonq_s16:
4346 case SVE::BI__builtin_sve_svget_neonq_s32:
4347 case SVE::BI__builtin_sve_svget_neonq_s64:
4348 case SVE::BI__builtin_sve_svget_neonq_u8:
4349 case SVE::BI__builtin_sve_svget_neonq_u16:
4350 case SVE::BI__builtin_sve_svget_neonq_u32:
4351 case SVE::BI__builtin_sve_svget_neonq_u64:
4352 case SVE::BI__builtin_sve_svget_neonq_f16:
4353 case SVE::BI__builtin_sve_svget_neonq_f32:
4354 case SVE::BI__builtin_sve_svget_neonq_f64:
4355 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4356 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4357 }
4358
4359 case SVE::BI__builtin_sve_svdup_neonq_s8:
4360 case SVE::BI__builtin_sve_svdup_neonq_s16:
4361 case SVE::BI__builtin_sve_svdup_neonq_s32:
4362 case SVE::BI__builtin_sve_svdup_neonq_s64:
4363 case SVE::BI__builtin_sve_svdup_neonq_u8:
4364 case SVE::BI__builtin_sve_svdup_neonq_u16:
4365 case SVE::BI__builtin_sve_svdup_neonq_u32:
4366 case SVE::BI__builtin_sve_svdup_neonq_u64:
4367 case SVE::BI__builtin_sve_svdup_neonq_f16:
4368 case SVE::BI__builtin_sve_svdup_neonq_f32:
4369 case SVE::BI__builtin_sve_svdup_neonq_f64:
4370 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4371 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4372 uint64_t(0));
4373 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4374 {Insert, Builder.getInt64(0)});
4375 }
4376 }
4377
4378 /// Should not happen
4379 return nullptr;
4380}
4381
4382static void swapCommutativeSMEOperands(unsigned BuiltinID,
4384 unsigned MultiVec;
4385 switch (BuiltinID) {
4386 default:
4387 return;
4388 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4389 MultiVec = 1;
4390 break;
4391 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4392 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4393 MultiVec = 2;
4394 break;
4395 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4396 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4397 MultiVec = 4;
4398 break;
4399 }
4400
4401 if (MultiVec > 0)
4402 for (unsigned I = 0; I < MultiVec; ++I)
4403 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4404}
4405
4407 const CallExpr *E) {
4410
4412 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4413 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4414
4415 if (TypeFlags.isLoad() || TypeFlags.isStore())
4416 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4417 if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4418 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4419 if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4420 BuiltinID == SME::BI__builtin_sme_svzero_za)
4421 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4422 if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4423 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4424 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4425 BuiltinID == SME::BI__builtin_sme_svstr_za)
4426 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4427
4428 // Emit set FPMR for intrinsics that require it
4429 if (TypeFlags.setsFPMR())
4430 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4431 Ops.pop_back_val());
4432 // Handle builtins which require their multi-vector operands to be swapped
4433 swapCommutativeSMEOperands(BuiltinID, Ops);
4434
4435 auto isCntsBuiltin = [&]() {
4436 switch (BuiltinID) {
4437 default:
4438 return 0;
4439 case SME::BI__builtin_sme_svcntsb:
4440 return 8;
4441 case SME::BI__builtin_sme_svcntsh:
4442 return 4;
4443 case SME::BI__builtin_sme_svcntsw:
4444 return 2;
4445 }
4446 };
4447
4448 if (auto Mul = isCntsBuiltin()) {
4449 llvm::Value *Cntd =
4450 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
4451 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
4452 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4453 }
4454
4455 // Should not happen!
4456 if (Builtin->LLVMIntrinsic == 0)
4457 return nullptr;
4458
4459 // Predicates must match the main datatype.
4460 for (Value *&Op : Ops)
4461 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4462 if (PredTy->getElementType()->isIntegerTy(1))
4463 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4464
4465 Function *F =
4466 TypeFlags.isOverloadNone()
4467 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
4468 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
4469
4470 return Builder.CreateCall(F, Ops);
4471}
4472
4473/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4474/// return it as an i8 pointer.
4476 LLVMContext &Context = CGF.CGM.getLLVMContext();
4477 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
4478 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4479 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4480 llvm::Function *F =
4481 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
4482 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
4483 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
4484}
4485
4487 const CallExpr *E,
4488 llvm::Triple::ArchType Arch) {
4489 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4490 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4491 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4492
4493 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4494 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4495 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4496
4497 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4498 return EmitAArch64CpuSupports(E);
4499
4500 unsigned HintID = static_cast<unsigned>(-1);
4501 switch (BuiltinID) {
4502 default: break;
4503 case clang::AArch64::BI__builtin_arm_nop:
4504 HintID = 0;
4505 break;
4506 case clang::AArch64::BI__builtin_arm_yield:
4507 case clang::AArch64::BI__yield:
4508 HintID = 1;
4509 break;
4510 case clang::AArch64::BI__builtin_arm_wfe:
4511 case clang::AArch64::BI__wfe:
4512 HintID = 2;
4513 break;
4514 case clang::AArch64::BI__builtin_arm_wfi:
4515 case clang::AArch64::BI__wfi:
4516 HintID = 3;
4517 break;
4518 case clang::AArch64::BI__builtin_arm_sev:
4519 case clang::AArch64::BI__sev:
4520 HintID = 4;
4521 break;
4522 case clang::AArch64::BI__builtin_arm_sevl:
4523 case clang::AArch64::BI__sevl:
4524 HintID = 5;
4525 break;
4526 }
4527
4528 if (HintID != static_cast<unsigned>(-1)) {
4529 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4530 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4531 }
4532
4533 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
4534 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4535 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4536 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
4537 }
4538
4539 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
4540 // Create call to __arm_sme_state and store the results to the two pointers.
4541 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
4542 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
4543 false),
4544 "__arm_sme_state"));
4545 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
4546 "aarch64_pstate_sm_compatible");
4547 CI->setAttributes(Attrs);
4548 CI->setCallingConv(
4549 llvm::CallingConv::
4550 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
4551 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
4553 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
4555 }
4556
4557 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
4558 assert((getContext().getTypeSize(E->getType()) == 32) &&
4559 "rbit of unusual size!");
4560 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4561 return Builder.CreateCall(
4562 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4563 }
4564 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
4565 assert((getContext().getTypeSize(E->getType()) == 64) &&
4566 "rbit of unusual size!");
4567 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4568 return Builder.CreateCall(
4569 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4570 }
4571
4572 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
4573 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
4574 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4575 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
4576 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
4577 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
4578 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
4579 return Res;
4580 }
4581
4582 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
4583 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4584 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
4585 "cls");
4586 }
4587 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
4588 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4589 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
4590 "cls");
4591 }
4592
4593 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
4594 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
4595 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4596 llvm::Type *Ty = Arg->getType();
4597 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
4598 Arg, "frint32z");
4599 }
4600
4601 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
4602 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
4603 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4604 llvm::Type *Ty = Arg->getType();
4605 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
4606 Arg, "frint64z");
4607 }
4608
4609 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
4610 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
4611 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4612 llvm::Type *Ty = Arg->getType();
4613 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
4614 Arg, "frint32x");
4615 }
4616
4617 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
4618 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
4619 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4620 llvm::Type *Ty = Arg->getType();
4621 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
4622 Arg, "frint64x");
4623 }
4624
4625 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
4626 assert((getContext().getTypeSize(E->getType()) == 32) &&
4627 "__jcvt of unusual size!");
4628 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4629 return Builder.CreateCall(
4630 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
4631 }
4632
4633 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
4634 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
4635 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
4636 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
4637 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
4638 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
4639
4640 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
4641 // Load from the address via an LLVM intrinsic, receiving a
4642 // tuple of 8 i64 words, and store each one to ValPtr.
4643 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
4644 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
4645 llvm::Value *ToRet;
4646 for (size_t i = 0; i < 8; i++) {
4647 llvm::Value *ValOffsetPtr =
4648 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4649 Address Addr =
4650 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4651 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
4652 }
4653 return ToRet;
4654 }
4655
4656 // Load 8 i64 words from ValPtr, and store them to the address
4657 // via an LLVM intrinsic.
4659 Args.push_back(MemAddr);
4660 for (size_t i = 0; i < 8; i++) {
4661 llvm::Value *ValOffsetPtr =
4662 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4663 Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4664 Args.push_back(Builder.CreateLoad(Addr));
4665 }
4666
4667 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
4668 ? Intrinsic::aarch64_st64b
4669 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
4670 ? Intrinsic::aarch64_st64bv
4671 : Intrinsic::aarch64_st64bv0);
4672 Function *F = CGM.getIntrinsic(Intr);
4673 return Builder.CreateCall(F, Args);
4674 }
4675
4676 if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
4677 Value *StoreAddr = EmitScalarExpr(E->getArg(0));
4678 Value *StoreValue = EmitScalarExpr(E->getArg(1));
4679
4680 auto *OrderC = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
4681 auto *PolicyC = cast<ConstantInt>(EmitScalarExpr(E->getArg(3)));
4682
4683 // Compute pointee bit-width from arg0 and create as i32 constant
4684 QualType ValQT =
4686 unsigned SizeBits = getContext().getTypeSize(ValQT);
4687 auto *SizeC = llvm::ConstantInt::get(Int32Ty, SizeBits);
4688
4689 Value *StoreValue64 = Builder.CreateIntCast(StoreValue, Int64Ty,
4690 ValQT->isSignedIntegerType());
4691
4692 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store,
4693 {StoreAddr->getType()});
4694
4695 // Emit a single intrinsic so backend can expand to STSHH followed by
4696 // atomic store, to guarantee STSHH immediately precedes STR insn
4697 return Builder.CreateCall(
4698 F, {StoreAddr, StoreValue64,
4699 ConstantInt::get(Int32Ty, OrderC->getZExtValue()),
4700 ConstantInt::get(Int32Ty, PolicyC->getZExtValue()), SizeC});
4701 }
4702
4703 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
4704 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
4705
4706 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
4707 ? Intrinsic::aarch64_rndr
4708 : Intrinsic::aarch64_rndrrs);
4709 Function *F = CGM.getIntrinsic(Intr);
4710 llvm::Value *Val = Builder.CreateCall(F);
4711 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
4712 Value *Status = Builder.CreateExtractValue(Val, 1);
4713
4714 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
4715 Builder.CreateStore(RandomValue, MemAddress);
4716 Status = Builder.CreateZExt(Status, Int32Ty);
4717 return Status;
4718 }
4719
4720 if (BuiltinID == clang::AArch64::BI__clear_cache) {
4721 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4722 const FunctionDecl *FD = E->getDirectCallee();
4723 Value *Ops[2];
4724 for (unsigned i = 0; i < 2; i++)
4725 Ops[i] = EmitScalarExpr(E->getArg(i));
4726 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4727 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4728 StringRef Name = FD->getName();
4729 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4730 }
4731
4732 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4733 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
4734 getContext().getTypeSize(E->getType()) == 128) {
4735 Function *F =
4736 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4737 ? Intrinsic::aarch64_ldaxp
4738 : Intrinsic::aarch64_ldxp);
4739
4740 Value *LdPtr = EmitScalarExpr(E->getArg(0));
4741 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
4742
4743 Value *Val0 = Builder.CreateExtractValue(Val, 1);
4744 Value *Val1 = Builder.CreateExtractValue(Val, 0);
4745 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4746 Val0 = Builder.CreateZExt(Val0, Int128Ty);
4747 Val1 = Builder.CreateZExt(Val1, Int128Ty);
4748
4749 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4750 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4751 Val = Builder.CreateOr(Val, Val1);
4752 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4753 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4754 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
4755 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4756
4757 QualType Ty = E->getType();
4758 llvm::Type *RealResTy = ConvertType(Ty);
4759 llvm::Type *IntTy =
4760 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4761
4762 Function *F =
4763 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4764 ? Intrinsic::aarch64_ldaxr
4765 : Intrinsic::aarch64_ldxr,
4766 DefaultPtrTy);
4767 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4768 Val->addParamAttr(
4769 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
4770
4771 if (RealResTy->isPointerTy())
4772 return Builder.CreateIntToPtr(Val, RealResTy);
4773
4774 llvm::Type *IntResTy = llvm::IntegerType::get(
4775 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4776 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
4777 RealResTy);
4778 }
4779
4780 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4781 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
4782 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4783 Function *F =
4784 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4785 ? Intrinsic::aarch64_stlxp
4786 : Intrinsic::aarch64_stxp);
4787 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
4788
4789 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4790 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4791
4792 Tmp = Tmp.withElementType(STy);
4793 llvm::Value *Val = Builder.CreateLoad(Tmp);
4794
4795 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4796 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4797 Value *StPtr = EmitScalarExpr(E->getArg(1));
4798 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4799 }
4800
4801 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4802 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
4803 Value *StoreVal = EmitScalarExpr(E->getArg(0));
4804 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4805
4806 QualType Ty = E->getArg(0)->getType();
4807 llvm::Type *StoreTy =
4808 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4809
4810 if (StoreVal->getType()->isPointerTy())
4811 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4812 else {
4813 llvm::Type *IntTy = llvm::IntegerType::get(
4815 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4816 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4817 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4818 }
4819
4820 Function *F =
4821 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4822 ? Intrinsic::aarch64_stlxr
4823 : Intrinsic::aarch64_stxr,
4824 StoreAddr->getType());
4825 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4826 CI->addParamAttr(
4827 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
4828 return CI;
4829 }
4830
4831 if (BuiltinID == clang::AArch64::BI__getReg) {
4833 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4834 llvm_unreachable("Sema will ensure that the parameter is constant");
4835
4836 llvm::APSInt Value = Result.Val.getInt();
4837 LLVMContext &Context = CGM.getLLVMContext();
4838 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
4839
4840 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4841 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4842 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4843
4844 llvm::Function *F =
4845 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
4846 return Builder.CreateCall(F, Metadata);
4847 }
4848
4849 if (BuiltinID == clang::AArch64::BI__break) {
4851 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4852 llvm_unreachable("Sema will ensure that the parameter is constant");
4853
4854 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4855 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
4856 }
4857
4858 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
4859 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4860 return Builder.CreateCall(F);
4861 }
4862
4863 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
4864 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
4865 llvm::SyncScope::SingleThread);
4866
4867 // CRC32
4868 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4869 switch (BuiltinID) {
4870 case clang::AArch64::BI__builtin_arm_crc32b:
4871 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4872 case clang::AArch64::BI__builtin_arm_crc32cb:
4873 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4874 case clang::AArch64::BI__builtin_arm_crc32h:
4875 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4876 case clang::AArch64::BI__builtin_arm_crc32ch:
4877 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4878 case clang::AArch64::BI__builtin_arm_crc32w:
4879 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4880 case clang::AArch64::BI__builtin_arm_crc32cw:
4881 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4882 case clang::AArch64::BI__builtin_arm_crc32d:
4883 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4884 case clang::AArch64::BI__builtin_arm_crc32cd:
4885 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4886 }
4887
4888 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4889 Value *Arg0 = EmitScalarExpr(E->getArg(0));
4890 Value *Arg1 = EmitScalarExpr(E->getArg(1));
4891 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4892
4893 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4894 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4895
4896 return Builder.CreateCall(F, {Arg0, Arg1});
4897 }
4898
4899 // Memory Operations (MOPS)
4900 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
4901 Value *Dst = EmitScalarExpr(E->getArg(0));
4902 Value *Val = EmitScalarExpr(E->getArg(1));
4903 Value *Size = EmitScalarExpr(E->getArg(2));
4904 Val = Builder.CreateTrunc(Val, Int8Ty);
4905 Size = Builder.CreateIntCast(Size, Int64Ty, false);
4906 return Builder.CreateCall(
4907 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
4908 }
4909
4910 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
4911 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
4912 return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
4913
4914 // Memory Tagging Extensions (MTE) Intrinsics
4915 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
4916 switch (BuiltinID) {
4917 case clang::AArch64::BI__builtin_arm_irg:
4918 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
4919 case clang::AArch64::BI__builtin_arm_addg:
4920 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
4921 case clang::AArch64::BI__builtin_arm_gmi:
4922 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
4923 case clang::AArch64::BI__builtin_arm_ldg:
4924 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
4925 case clang::AArch64::BI__builtin_arm_stg:
4926 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
4927 case clang::AArch64::BI__builtin_arm_subp:
4928 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
4929 }
4930
4931 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
4932 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
4934 Value *Mask = EmitScalarExpr(E->getArg(1));
4935
4936 Mask = Builder.CreateZExt(Mask, Int64Ty);
4937 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4938 {Pointer, Mask});
4939 }
4940 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
4942 Value *TagOffset = EmitScalarExpr(E->getArg(1));
4943
4944 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
4945 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4946 {Pointer, TagOffset});
4947 }
4948 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
4950 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
4951
4952 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
4953 return Builder.CreateCall(
4954 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
4955 }
4956 // Although it is possible to supply a different return
4957 // address (first arg) to this intrinsic, for now we set
4958 // return address same as input address.
4959 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
4960 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4961 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4962 {TagAddress, TagAddress});
4963 }
4964 // Although it is possible to supply a different tag (to set)
4965 // to this intrinsic (as first arg), for now we supply
4966 // the tag that is in input address arg (common use case).
4967 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
4968 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4969 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4970 {TagAddress, TagAddress});
4971 }
4972 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
4973 Value *PointerA = EmitScalarExpr(E->getArg(0));
4974 Value *PointerB = EmitScalarExpr(E->getArg(1));
4975 return Builder.CreateCall(
4976 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
4977 }
4978 }
4979
4980 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4981 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4982 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4983 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4984 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
4985 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
4986 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
4987 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
4988
4989 SpecialRegisterAccessKind AccessKind = Write;
4990 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4991 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4992 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4993 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
4994 AccessKind = VolatileRead;
4995
4996 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4997 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
4998
4999 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5000 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5001
5002 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5003 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5004
5005 llvm::Type *ValueType;
5006 llvm::Type *RegisterType = Int64Ty;
5007 if (Is32Bit) {
5008 ValueType = Int32Ty;
5009 } else if (Is128Bit) {
5010 llvm::Type *Int128Ty =
5011 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5012 ValueType = Int128Ty;
5013 RegisterType = Int128Ty;
5014 } else if (IsPointerBuiltin) {
5015 ValueType = VoidPtrTy;
5016 } else {
5017 ValueType = Int64Ty;
5018 };
5019
5020 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5021 AccessKind);
5022 }
5023
5024 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5025 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
5026 LLVMContext &Context = CGM.getLLVMContext();
5027
5028 unsigned SysReg =
5029 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5030
5031 std::string SysRegStr;
5032 llvm::raw_string_ostream(SysRegStr)
5033 << (0b10 | SysReg >> 14) << ":" << ((SysReg >> 11) & 7) << ":"
5034 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5035 << (SysReg & 7);
5036
5037 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5038 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5039 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5040
5041 llvm::Type *RegisterType = Int64Ty;
5042 llvm::Type *Types[] = { RegisterType };
5043
5044 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5045 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5046
5047 return Builder.CreateCall(F, Metadata);
5048 }
5049
5050 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5051 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5052 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5053
5054 return Result;
5055 }
5056
5057 if (BuiltinID == clang::AArch64::BI__sys) {
5058 unsigned SysReg =
5059 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5060 const unsigned Op1 = SysReg >> 11;
5061 const unsigned CRn = (SysReg >> 7) & 0xf;
5062 const unsigned CRm = (SysReg >> 3) & 0xf;
5063 const unsigned Op2 = SysReg & 0x7;
5064
5065 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sys),
5066 {Builder.getInt32(Op1), Builder.getInt32(CRn),
5067 Builder.getInt32(CRm), Builder.getInt32(Op2),
5068 EmitScalarExpr(E->getArg(1))});
5069
5070 // Return 0 for convenience, even though MSVC returns some other undefined
5071 // value.
5072 return ConstantInt::get(Builder.getInt32Ty(), 0);
5073 }
5074
5075 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5076 llvm::Function *F =
5077 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5078 return Builder.CreateCall(F);
5079 }
5080
5081 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5082 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5083 return Builder.CreateCall(F);
5084 }
5085
5086 if (BuiltinID == clang::AArch64::BI__mulh ||
5087 BuiltinID == clang::AArch64::BI__umulh) {
5088 llvm::Type *ResType = ConvertType(E->getType());
5089 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5090
5091 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5092 Value *LHS =
5093 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5094 Value *RHS =
5095 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5096
5097 Value *MulResult, *HigherBits;
5098 if (IsSigned) {
5099 MulResult = Builder.CreateNSWMul(LHS, RHS);
5100 HigherBits = Builder.CreateAShr(MulResult, 64);
5101 } else {
5102 MulResult = Builder.CreateNUWMul(LHS, RHS);
5103 HigherBits = Builder.CreateLShr(MulResult, 64);
5104 }
5105 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5106
5107 return HigherBits;
5108 }
5109
5110 if (BuiltinID == AArch64::BI__writex18byte ||
5111 BuiltinID == AArch64::BI__writex18word ||
5112 BuiltinID == AArch64::BI__writex18dword ||
5113 BuiltinID == AArch64::BI__writex18qword) {
5114 // Process the args first
5115 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5116 Value *DataArg = EmitScalarExpr(E->getArg(1));
5117
5118 // Read x18 as i8*
5119 llvm::Value *X18 = readX18AsPtr(*this);
5120
5121 // Store val at x18 + offset
5122 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5123 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5124 StoreInst *Store =
5125 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5126 return Store;
5127 }
5128
5129 if (BuiltinID == AArch64::BI__readx18byte ||
5130 BuiltinID == AArch64::BI__readx18word ||
5131 BuiltinID == AArch64::BI__readx18dword ||
5132 BuiltinID == AArch64::BI__readx18qword) {
5133 // Process the args first
5134 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5135
5136 // Read x18 as i8*
5137 llvm::Value *X18 = readX18AsPtr(*this);
5138
5139 // Load x18 + offset
5140 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5141 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5142 llvm::Type *IntTy = ConvertType(E->getType());
5143 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5144 return Load;
5145 }
5146
5147 if (BuiltinID == AArch64::BI__addx18byte ||
5148 BuiltinID == AArch64::BI__addx18word ||
5149 BuiltinID == AArch64::BI__addx18dword ||
5150 BuiltinID == AArch64::BI__addx18qword ||
5151 BuiltinID == AArch64::BI__incx18byte ||
5152 BuiltinID == AArch64::BI__incx18word ||
5153 BuiltinID == AArch64::BI__incx18dword ||
5154 BuiltinID == AArch64::BI__incx18qword) {
5155 llvm::Type *IntTy;
5156 bool isIncrement;
5157 switch (BuiltinID) {
5158 case AArch64::BI__incx18byte:
5159 IntTy = Int8Ty;
5160 isIncrement = true;
5161 break;
5162 case AArch64::BI__incx18word:
5163 IntTy = Int16Ty;
5164 isIncrement = true;
5165 break;
5166 case AArch64::BI__incx18dword:
5167 IntTy = Int32Ty;
5168 isIncrement = true;
5169 break;
5170 case AArch64::BI__incx18qword:
5171 IntTy = Int64Ty;
5172 isIncrement = true;
5173 break;
5174 default:
5175 IntTy = ConvertType(E->getArg(1)->getType());
5176 isIncrement = false;
5177 break;
5178 }
5179 // Process the args first
5180 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5181 Value *ValToAdd =
5182 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5183
5184 // Read x18 as i8*
5185 llvm::Value *X18 = readX18AsPtr(*this);
5186
5187 // Load x18 + offset
5188 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5189 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5190 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5191
5192 // Add values
5193 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5194
5195 // Store val at x18 + offset
5196 StoreInst *Store =
5197 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5198 return Store;
5199 }
5200
5201 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5202 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5203 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5204 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5205 Value *Arg = EmitScalarExpr(E->getArg(0));
5206 llvm::Type *RetTy = ConvertType(E->getType());
5207 return Builder.CreateBitCast(Arg, RetTy);
5208 }
5209
5210 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5211 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5212 BuiltinID == AArch64::BI_CountLeadingZeros ||
5213 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5214 Value *Arg = EmitScalarExpr(E->getArg(0));
5215 llvm::Type *ArgType = Arg->getType();
5216
5217 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5218 BuiltinID == AArch64::BI_CountLeadingOnes64)
5219 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5220
5221 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5222 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5223
5224 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5225 BuiltinID == AArch64::BI_CountLeadingZeros64)
5226 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5227 return Result;
5228 }
5229
5230 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5231 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5232 Value *Arg = EmitScalarExpr(E->getArg(0));
5233
5234 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5235 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5236 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5237
5238 Value *Result = Builder.CreateCall(F, Arg, "cls");
5239 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5240 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5241 return Result;
5242 }
5243
5244 if (BuiltinID == AArch64::BI_CountOneBits ||
5245 BuiltinID == AArch64::BI_CountOneBits64) {
5246 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5247 llvm::Type *ArgType = ArgValue->getType();
5248 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5249
5250 Value *Result = Builder.CreateCall(F, ArgValue);
5251 if (BuiltinID == AArch64::BI_CountOneBits64)
5252 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5253 return Result;
5254 }
5255
5256 if (BuiltinID == AArch64::BI__prefetch) {
5258 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5259 Value *Locality = ConstantInt::get(Int32Ty, 3);
5260 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5261 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5262 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5263 }
5264
5265 if (BuiltinID == AArch64::BI__hlt) {
5266 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5267 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5268
5269 // Return 0 for convenience, even though MSVC returns some other undefined
5270 // value.
5271 return ConstantInt::get(Builder.getInt32Ty(), 0);
5272 }
5273
5274 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5275 return Builder.CreateFPTrunc(
5276 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5277 Builder.getFloatTy()),
5278 Builder.getBFloatTy());
5279
5280 // Handle MSVC intrinsics before argument evaluation to prevent double
5281 // evaluation.
5282 if (std::optional<MSVCIntrin> MsvcIntId =
5284 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5285
5286 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5287 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5288 return P.first == BuiltinID;
5289 });
5290 if (It != end(NEONEquivalentIntrinsicMap))
5291 BuiltinID = It->second;
5292
5293 // Check whether this is an SISD builtin.
5294 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5296 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5297 bool IsSISD = (Builtin != nullptr);
5298
5299 // Find out if any arguments are required to be integer constant
5300 // expressions.
5301 unsigned ICEArguments = 0;
5303 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5304 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5305
5307 Address PtrOp0 = Address::invalid();
5308 // Note the assumption that SISD intrinsics do not contain extra arguments.
5309 // TODO: Fold this into a single function call instead of, effectively, two
5310 // separate checks.
5311 bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
5312 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5313 for (unsigned i = 0, e = NumArgs; i != e; i++) {
5314 if (i == 0) {
5315 switch (BuiltinID) {
5316 case NEON::BI__builtin_neon_vld1_v:
5317 case NEON::BI__builtin_neon_vld1q_v:
5318 case NEON::BI__builtin_neon_vld1_dup_v:
5319 case NEON::BI__builtin_neon_vld1q_dup_v:
5320 case NEON::BI__builtin_neon_vld1_lane_v:
5321 case NEON::BI__builtin_neon_vld1q_lane_v:
5322 case NEON::BI__builtin_neon_vst1_v:
5323 case NEON::BI__builtin_neon_vst1q_v:
5324 case NEON::BI__builtin_neon_vst1_lane_v:
5325 case NEON::BI__builtin_neon_vst1q_lane_v:
5326 case NEON::BI__builtin_neon_vldap1_lane_s64:
5327 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5328 case NEON::BI__builtin_neon_vstl1_lane_s64:
5329 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5330 // Get the alignment for the argument in addition to the value;
5331 // we'll use it later.
5332 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5333 Ops.push_back(PtrOp0.emitRawPointer(*this));
5334 continue;
5335 }
5336 }
5337 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5338 }
5339
5340 if (Builtin) {
5342 assert(Result && "SISD intrinsic should have been handled");
5343 return Result;
5344 }
5345
5346 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5348 if (std::optional<llvm::APSInt> Result =
5350 // Determine the type of this overloaded NEON intrinsic.
5351 Type = NeonTypeFlags(Result->getZExtValue());
5352
5353 bool usgn = Type.isUnsigned();
5354 bool quad = Type.isQuad();
5355 unsigned Int;
5356
5357 // Not all intrinsics handled by the common case work for AArch64 yet, so only
5358 // defer to common code if it's been added to our special map.
5361
5362 if (Builtin)
5364 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5365 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5366 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
5367
5368 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
5369 return V;
5370
5371 // Handle non-overloaded intrinsics first.
5372 switch (BuiltinID) {
5373 default: break;
5374 case NEON::BI__builtin_neon_vabsh_f16:
5375 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5376 case NEON::BI__builtin_neon_vaddq_p128: {
5377 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5378 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5379 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5380 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5381 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5382 return Builder.CreateBitCast(Ops[0], Int128Ty);
5383 }
5384 case NEON::BI__builtin_neon_vldrq_p128: {
5385 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5386 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
5388 }
5389 case NEON::BI__builtin_neon_vstrq_p128: {
5390 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5391 }
5392 case NEON::BI__builtin_neon_vcvts_f32_u32:
5393 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5394 usgn = true;
5395 [[fallthrough]];
5396 case NEON::BI__builtin_neon_vcvts_f32_s32:
5397 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5398 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5399 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5400 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5401 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5402 if (usgn)
5403 return Builder.CreateUIToFP(Ops[0], FTy);
5404 return Builder.CreateSIToFP(Ops[0], FTy);
5405 }
5406 case NEON::BI__builtin_neon_vcvth_f16_u16:
5407 case NEON::BI__builtin_neon_vcvth_f16_u32:
5408 case NEON::BI__builtin_neon_vcvth_f16_u64:
5409 usgn = true;
5410 [[fallthrough]];
5411 case NEON::BI__builtin_neon_vcvth_f16_s16:
5412 case NEON::BI__builtin_neon_vcvth_f16_s32:
5413 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5414 llvm::Type *FTy = HalfTy;
5415 llvm::Type *InTy;
5416 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5417 InTy = Int64Ty;
5418 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5419 InTy = Int32Ty;
5420 else
5421 InTy = Int16Ty;
5422 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5423 if (usgn)
5424 return Builder.CreateUIToFP(Ops[0], FTy);
5425 return Builder.CreateSIToFP(Ops[0], FTy);
5426 }
5427 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5428 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5429 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5430 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5431 case NEON::BI__builtin_neon_vcvth_u16_f16:
5432 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5433 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5434 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5435 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5436 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5437 llvm::Type *InTy = Int16Ty;
5438 llvm::Type* FTy = HalfTy;
5439 llvm::Type *Tys[2] = {InTy, FTy};
5440 switch (BuiltinID) {
5441 default: llvm_unreachable("missing builtin ID in switch!");
5442 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5443 Int = Intrinsic::aarch64_neon_fcvtau; break;
5444 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5445 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5446 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5447 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5448 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5449 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5450 case NEON::BI__builtin_neon_vcvth_u16_f16:
5451 Int = Intrinsic::aarch64_neon_fcvtzu; break;
5452 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5453 Int = Intrinsic::aarch64_neon_fcvtas; break;
5454 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5455 Int = Intrinsic::aarch64_neon_fcvtms; break;
5456 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5457 Int = Intrinsic::aarch64_neon_fcvtns; break;
5458 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5459 Int = Intrinsic::aarch64_neon_fcvtps; break;
5460 case NEON::BI__builtin_neon_vcvth_s16_f16:
5461 Int = Intrinsic::aarch64_neon_fcvtzs; break;
5462 }
5463 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5464 }
5465 case NEON::BI__builtin_neon_vcaleh_f16:
5466 case NEON::BI__builtin_neon_vcalth_f16:
5467 case NEON::BI__builtin_neon_vcageh_f16:
5468 case NEON::BI__builtin_neon_vcagth_f16: {
5469 llvm::Type* InTy = Int32Ty;
5470 llvm::Type* FTy = HalfTy;
5471 llvm::Type *Tys[2] = {InTy, FTy};
5472 switch (BuiltinID) {
5473 default: llvm_unreachable("missing builtin ID in switch!");
5474 case NEON::BI__builtin_neon_vcageh_f16:
5475 Int = Intrinsic::aarch64_neon_facge; break;
5476 case NEON::BI__builtin_neon_vcagth_f16:
5477 Int = Intrinsic::aarch64_neon_facgt; break;
5478 case NEON::BI__builtin_neon_vcaleh_f16:
5479 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5480 case NEON::BI__builtin_neon_vcalth_f16:
5481 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5482 }
5483 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
5484 return Builder.CreateTrunc(Ops[0], Int16Ty);
5485 }
5486 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5487 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5488 llvm::Type* InTy = Int32Ty;
5489 llvm::Type* FTy = HalfTy;
5490 llvm::Type *Tys[2] = {InTy, FTy};
5491 switch (BuiltinID) {
5492 default: llvm_unreachable("missing builtin ID in switch!");
5493 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5494 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5495 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5496 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5497 }
5498 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5499 return Builder.CreateTrunc(Ops[0], Int16Ty);
5500 }
5501 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5502 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5503 llvm::Type* FTy = HalfTy;
5504 llvm::Type* InTy = Int32Ty;
5505 llvm::Type *Tys[2] = {FTy, InTy};
5506 switch (BuiltinID) {
5507 default: llvm_unreachable("missing builtin ID in switch!");
5508 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5509 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5510 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
5511 break;
5512 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5513 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5514 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
5515 break;
5516 }
5517 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5518 }
5519 case NEON::BI__builtin_neon_vpaddd_s64: {
5520 // TODO: Isn't this handled by
5521 // EmitCommonNeonSISDBuiltinExpr?
5522 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
5523 // The vector is v2f64, so make sure it's bitcast to that.
5524 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2i64");
5525 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5526 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5527 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5528 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5529 // Pairwise addition of a v2f64 into a scalar f64.
5530 return Builder.CreateAdd(Op0, Op1, "vpaddd");
5531 }
5532 case NEON::BI__builtin_neon_vpaddd_f64: {
5533 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
5534 // The vector is v2f64, so make sure it's bitcast to that.
5535 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f64");
5536 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5537 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5538 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5539 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5540 // Pairwise addition of a v2f64 into a scalar f64.
5541 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5542 }
5543 case NEON::BI__builtin_neon_vpadds_f32: {
5544 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
5545 // The vector is v2f32, so make sure it's bitcast to that.
5546 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f32");
5547 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5548 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5549 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5550 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5551 // Pairwise addition of a v2f32 into a scalar f32.
5552 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5553 }
5554 case NEON::BI__builtin_neon_vceqzd_s64:
5557 ICmpInst::ICMP_EQ, "vceqz");
5558 case NEON::BI__builtin_neon_vceqzd_f64:
5559 case NEON::BI__builtin_neon_vceqzs_f32:
5560 case NEON::BI__builtin_neon_vceqzh_f16:
5563 ICmpInst::FCMP_OEQ, "vceqz");
5564 case NEON::BI__builtin_neon_vcgezd_s64:
5567 ICmpInst::ICMP_SGE, "vcgez");
5568 case NEON::BI__builtin_neon_vcgezd_f64:
5569 case NEON::BI__builtin_neon_vcgezs_f32:
5570 case NEON::BI__builtin_neon_vcgezh_f16:
5573 ICmpInst::FCMP_OGE, "vcgez");
5574 case NEON::BI__builtin_neon_vclezd_s64:
5577 ICmpInst::ICMP_SLE, "vclez");
5578 case NEON::BI__builtin_neon_vclezd_f64:
5579 case NEON::BI__builtin_neon_vclezs_f32:
5580 case NEON::BI__builtin_neon_vclezh_f16:
5583 ICmpInst::FCMP_OLE, "vclez");
5584 case NEON::BI__builtin_neon_vcgtzd_s64:
5587 ICmpInst::ICMP_SGT, "vcgtz");
5588 case NEON::BI__builtin_neon_vcgtzd_f64:
5589 case NEON::BI__builtin_neon_vcgtzs_f32:
5590 case NEON::BI__builtin_neon_vcgtzh_f16:
5593 ICmpInst::FCMP_OGT, "vcgtz");
5594 case NEON::BI__builtin_neon_vcltzd_s64:
5597 ICmpInst::ICMP_SLT, "vcltz");
5598
5599 case NEON::BI__builtin_neon_vcltzd_f64:
5600 case NEON::BI__builtin_neon_vcltzs_f32:
5601 case NEON::BI__builtin_neon_vcltzh_f16:
5604 ICmpInst::FCMP_OLT, "vcltz");
5605
5606 case NEON::BI__builtin_neon_vceqzd_u64: {
5609 ICmpInst::ICMP_EQ, "vceqzd");
5610 }
5611 case NEON::BI__builtin_neon_vceqd_f64:
5612 case NEON::BI__builtin_neon_vcled_f64:
5613 case NEON::BI__builtin_neon_vcltd_f64:
5614 case NEON::BI__builtin_neon_vcged_f64:
5615 case NEON::BI__builtin_neon_vcgtd_f64: {
5616 llvm::CmpInst::Predicate P;
5617 switch (BuiltinID) {
5618 default: llvm_unreachable("missing builtin ID in switch!");
5619 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5620 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5621 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5622 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5623 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5624 }
5625 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5626 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5627 if (P == llvm::FCmpInst::FCMP_OEQ)
5628 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5629 else
5630 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5631 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5632 }
5633 case NEON::BI__builtin_neon_vceqs_f32:
5634 case NEON::BI__builtin_neon_vcles_f32:
5635 case NEON::BI__builtin_neon_vclts_f32:
5636 case NEON::BI__builtin_neon_vcges_f32:
5637 case NEON::BI__builtin_neon_vcgts_f32: {
5638 llvm::CmpInst::Predicate P;
5639 switch (BuiltinID) {
5640 default: llvm_unreachable("missing builtin ID in switch!");
5641 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5642 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5643 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5644 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5645 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5646 }
5647 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5648 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5649 if (P == llvm::FCmpInst::FCMP_OEQ)
5650 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5651 else
5652 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5653 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5654 }
5655 case NEON::BI__builtin_neon_vceqh_f16:
5656 case NEON::BI__builtin_neon_vcleh_f16:
5657 case NEON::BI__builtin_neon_vclth_f16:
5658 case NEON::BI__builtin_neon_vcgeh_f16:
5659 case NEON::BI__builtin_neon_vcgth_f16: {
5660 llvm::CmpInst::Predicate P;
5661 switch (BuiltinID) {
5662 default: llvm_unreachable("missing builtin ID in switch!");
5663 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
5664 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
5665 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
5666 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
5667 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
5668 }
5669 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
5670 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
5671 if (P == llvm::FCmpInst::FCMP_OEQ)
5672 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5673 else
5674 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5675 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
5676 }
5677 case NEON::BI__builtin_neon_vceqd_s64:
5678 case NEON::BI__builtin_neon_vceqd_u64:
5679 case NEON::BI__builtin_neon_vcgtd_s64:
5680 case NEON::BI__builtin_neon_vcgtd_u64:
5681 case NEON::BI__builtin_neon_vcltd_s64:
5682 case NEON::BI__builtin_neon_vcltd_u64:
5683 case NEON::BI__builtin_neon_vcged_u64:
5684 case NEON::BI__builtin_neon_vcged_s64:
5685 case NEON::BI__builtin_neon_vcled_u64:
5686 case NEON::BI__builtin_neon_vcled_s64: {
5687 llvm::CmpInst::Predicate P;
5688 switch (BuiltinID) {
5689 default: llvm_unreachable("missing builtin ID in switch!");
5690 case NEON::BI__builtin_neon_vceqd_s64:
5691 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5692 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5693 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5694 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5695 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5696 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5697 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5698 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5699 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5700 }
5701 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5702 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5703 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5704 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5705 }
5706 case NEON::BI__builtin_neon_vnegd_s64:
5707 return Builder.CreateNeg(Ops[0], "vnegd");
5708 case NEON::BI__builtin_neon_vnegh_f16:
5709 return Builder.CreateFNeg(Ops[0], "vnegh");
5710 case NEON::BI__builtin_neon_vtstd_s64:
5711 case NEON::BI__builtin_neon_vtstd_u64: {
5712 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5713 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5714 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5715 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5716 llvm::Constant::getNullValue(Int64Ty));
5717 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5718 }
5719 case NEON::BI__builtin_neon_vset_lane_i8:
5720 case NEON::BI__builtin_neon_vset_lane_i16:
5721 case NEON::BI__builtin_neon_vset_lane_i32:
5722 case NEON::BI__builtin_neon_vset_lane_i64:
5723 case NEON::BI__builtin_neon_vset_lane_bf16:
5724 case NEON::BI__builtin_neon_vset_lane_f32:
5725 case NEON::BI__builtin_neon_vsetq_lane_i8:
5726 case NEON::BI__builtin_neon_vsetq_lane_i16:
5727 case NEON::BI__builtin_neon_vsetq_lane_i32:
5728 case NEON::BI__builtin_neon_vsetq_lane_i64:
5729 case NEON::BI__builtin_neon_vsetq_lane_bf16:
5730 case NEON::BI__builtin_neon_vsetq_lane_f32:
5731 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5732 case NEON::BI__builtin_neon_vset_lane_f64:
5733 // The vector type needs a cast for the v1f64 variant.
5734 Ops[1] =
5735 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
5736 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5737 case NEON::BI__builtin_neon_vset_lane_mf8:
5738 case NEON::BI__builtin_neon_vsetq_lane_mf8:
5739 // The input vector type needs a cast to scalar type.
5740 Ops[0] =
5741 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
5742 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5743 case NEON::BI__builtin_neon_vsetq_lane_f64:
5744 // The vector type needs a cast for the v2f64 variant.
5745 Ops[1] =
5746 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
5747 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5748
5749 case NEON::BI__builtin_neon_vget_lane_i8:
5750 case NEON::BI__builtin_neon_vdupb_lane_i8:
5751 Ops[0] =
5752 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
5753 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5754 case NEON::BI__builtin_neon_vgetq_lane_i8:
5755 case NEON::BI__builtin_neon_vdupb_laneq_i8:
5756 Ops[0] =
5757 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
5758 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5759 case NEON::BI__builtin_neon_vget_lane_mf8:
5760 case NEON::BI__builtin_neon_vdupb_lane_mf8:
5761 case NEON::BI__builtin_neon_vgetq_lane_mf8:
5762 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
5763 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5764 case NEON::BI__builtin_neon_vget_lane_i16:
5765 case NEON::BI__builtin_neon_vduph_lane_i16:
5766 Ops[0] =
5767 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
5768 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5769 case NEON::BI__builtin_neon_vgetq_lane_i16:
5770 case NEON::BI__builtin_neon_vduph_laneq_i16:
5771 Ops[0] =
5772 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
5773 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5774 case NEON::BI__builtin_neon_vget_lane_i32:
5775 case NEON::BI__builtin_neon_vdups_lane_i32:
5776 Ops[0] =
5777 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
5778 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5779 case NEON::BI__builtin_neon_vdups_lane_f32:
5780 Ops[0] =
5781 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
5782 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdups_lane");
5783 case NEON::BI__builtin_neon_vgetq_lane_i32:
5784 case NEON::BI__builtin_neon_vdups_laneq_i32:
5785 Ops[0] =
5786 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
5787 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5788 case NEON::BI__builtin_neon_vget_lane_i64:
5789 case NEON::BI__builtin_neon_vdupd_lane_i64:
5790 Ops[0] =
5791 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
5792 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5793 case NEON::BI__builtin_neon_vdupd_lane_f64:
5794 Ops[0] =
5795 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
5796 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdupd_lane");
5797 case NEON::BI__builtin_neon_vgetq_lane_i64:
5798 case NEON::BI__builtin_neon_vdupd_laneq_i64:
5799 Ops[0] =
5800 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
5801 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5802 case NEON::BI__builtin_neon_vget_lane_f32:
5803 Ops[0] =
5804 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
5805 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5806 case NEON::BI__builtin_neon_vget_lane_f64:
5807 Ops[0] =
5808 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
5809 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5810 case NEON::BI__builtin_neon_vgetq_lane_f32:
5811 case NEON::BI__builtin_neon_vdups_laneq_f32:
5812 Ops[0] =
5813 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
5814 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5815 case NEON::BI__builtin_neon_vgetq_lane_f64:
5816 case NEON::BI__builtin_neon_vdupd_laneq_f64:
5817 Ops[0] =
5818 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
5819 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5820 case NEON::BI__builtin_neon_vaddh_f16:
5821 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
5822 case NEON::BI__builtin_neon_vsubh_f16:
5823 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
5824 case NEON::BI__builtin_neon_vmulh_f16:
5825 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
5826 case NEON::BI__builtin_neon_vdivh_f16:
5827 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
5828 case NEON::BI__builtin_neon_vfmah_f16:
5829 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5831 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5832 {Ops[1], Ops[2], Ops[0]});
5833 case NEON::BI__builtin_neon_vfmsh_f16: {
5834 Value *Neg = Builder.CreateFNeg(Ops[1], "vsubh");
5835
5836 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5838 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5839 {Neg, Ops[2], Ops[0]});
5840 }
5841 case NEON::BI__builtin_neon_vaddd_s64:
5842 case NEON::BI__builtin_neon_vaddd_u64:
5843 return Builder.CreateAdd(Ops[0], Ops[1], "vaddd");
5844 case NEON::BI__builtin_neon_vsubd_s64:
5845 case NEON::BI__builtin_neon_vsubd_u64:
5846 return Builder.CreateSub(Ops[0], Ops[1], "vsubd");
5847 case NEON::BI__builtin_neon_vqdmlalh_s16:
5848 case NEON::BI__builtin_neon_vqdmlslh_s16: {
5849 SmallVector<Value *, 2> ProductOps;
5850 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5851 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5852 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5853 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5854 ProductOps, "vqdmlXl");
5855 Constant *CI = ConstantInt::get(SizeTy, 0);
5856 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5857
5858 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5859 ? Intrinsic::aarch64_neon_sqadd
5860 : Intrinsic::aarch64_neon_sqsub;
5861 // Drop the 2nd multiplication argument before the accumulation
5862 Ops.pop_back();
5863 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5864 }
5865 case NEON::BI__builtin_neon_vqshlud_n_s64: {
5866 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5867 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5868 Ops, "vqshlu_n");
5869 }
5870 case NEON::BI__builtin_neon_vqshld_n_u64:
5871 case NEON::BI__builtin_neon_vqshld_n_s64: {
5872 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5873 ? Intrinsic::aarch64_neon_uqshl
5874 : Intrinsic::aarch64_neon_sqshl;
5875 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5876 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5877 }
5878 case NEON::BI__builtin_neon_vrshrd_n_u64:
5879 case NEON::BI__builtin_neon_vrshrd_n_s64: {
5880 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5881 ? Intrinsic::aarch64_neon_urshl
5882 : Intrinsic::aarch64_neon_srshl;
5883 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5884 Ops[1] = ConstantInt::get(Int64Ty, -SV);
5885 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5886 }
5887 case NEON::BI__builtin_neon_vrsrad_n_u64:
5888 case NEON::BI__builtin_neon_vrsrad_n_s64: {
5889 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5890 ? Intrinsic::aarch64_neon_urshl
5891 : Intrinsic::aarch64_neon_srshl;
5892 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5893 Ops[2] = Builder.CreateNeg(Ops[2]);
5894 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5895 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5896 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5897 }
5898 case NEON::BI__builtin_neon_vshld_n_s64:
5899 case NEON::BI__builtin_neon_vshld_n_u64: {
5900 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5901 return Builder.CreateShl(
5902 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5903 }
5904 case NEON::BI__builtin_neon_vshrd_n_s64: {
5905 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5906 return Builder.CreateAShr(
5907 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5908 Amt->getZExtValue())),
5909 "shrd_n");
5910 }
5911 case NEON::BI__builtin_neon_vshrd_n_u64: {
5912 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5913 uint64_t ShiftAmt = Amt->getZExtValue();
5914 // Right-shifting an unsigned value by its size yields 0.
5915 if (ShiftAmt == 64)
5916 return ConstantInt::get(Int64Ty, 0);
5917 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5918 "shrd_n");
5919 }
5920 case NEON::BI__builtin_neon_vsrad_n_s64: {
5921 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5922 Ops[1] = Builder.CreateAShr(
5923 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5924 Amt->getZExtValue())),
5925 "shrd_n");
5926 return Builder.CreateAdd(Ops[0], Ops[1]);
5927 }
5928 case NEON::BI__builtin_neon_vsrad_n_u64: {
5929 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5930 uint64_t ShiftAmt = Amt->getZExtValue();
5931 // Right-shifting an unsigned value by its size yields 0.
5932 // As Op + 0 = Op, return Ops[0] directly.
5933 if (ShiftAmt == 64)
5934 return Ops[0];
5935 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5936 "shrd_n");
5937 return Builder.CreateAdd(Ops[0], Ops[1]);
5938 }
5939 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5940 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5941 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5942 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5943 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
5944 SmallVector<Value *, 2> ProductOps;
5945 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5946 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5947 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5948 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5949 ProductOps, "vqdmlXl");
5950 Constant *CI = ConstantInt::get(SizeTy, 0);
5951 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5952 // Drop lane-selection and the corresponding vector argument (these have
5953 // already been used)
5954 Ops.pop_back_n(2);
5955
5956 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5957 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5958 ? Intrinsic::aarch64_neon_sqadd
5959 : Intrinsic::aarch64_neon_sqsub;
5960 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5961 }
5962 case NEON::BI__builtin_neon_vqdmlals_s32:
5963 case NEON::BI__builtin_neon_vqdmlsls_s32: {
5964 SmallVector<Value *, 2> ProductOps;
5965 ProductOps.push_back(Ops[1]);
5966 ProductOps.push_back(Ops[2]);
5967 Ops[1] =
5968 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5969 ProductOps, "vqdmlXl");
5970
5971 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5972 ? Intrinsic::aarch64_neon_sqadd
5973 : Intrinsic::aarch64_neon_sqsub;
5974 // Drop the 2nd multiplication argument before the accumulation
5975 Ops.pop_back();
5976 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5977 }
5978 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5979 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5980 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5981 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5982 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
5983 SmallVector<Value *, 2> ProductOps;
5984 ProductOps.push_back(Ops[1]);
5985 ProductOps.push_back(Ops[2]);
5986 Ops[1] =
5987 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5988 ProductOps, "vqdmlXl");
5989 // Drop lane-selection and the corresponding vector argument (these have
5990 // already been used)
5991 Ops.pop_back_n(2);
5992
5993 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5994 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5995 ? Intrinsic::aarch64_neon_sqadd
5996 : Intrinsic::aarch64_neon_sqsub;
5997 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5998 }
5999 case NEON::BI__builtin_neon_vget_lane_bf16:
6000 case NEON::BI__builtin_neon_vduph_lane_bf16:
6001 case NEON::BI__builtin_neon_vduph_lane_f16: {
6002 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6003 }
6004 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6005 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6006 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6007 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6008 }
6009 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6010 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6011 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6012 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6013 }
6014 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6015 SmallVector<int, 16> ConcatMask(8);
6016 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6017 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6018 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6019 llvm::Value *Trunc =
6020 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6021 return Builder.CreateShuffleVector(
6022 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6023 }
6024 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6025 SmallVector<int, 16> ConcatMask(8);
6026 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6027 SmallVector<int, 16> LoMask(4);
6028 std::iota(LoMask.begin(), LoMask.end(), 0);
6029 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6030 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6031 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6032 llvm::Value *Inactive = Builder.CreateShuffleVector(
6033 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6034 llvm::Value *Trunc =
6035 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6036 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6037 }
6038
6039 case clang::AArch64::BI_InterlockedAdd:
6040 case clang::AArch64::BI_InterlockedAdd_acq:
6041 case clang::AArch64::BI_InterlockedAdd_rel:
6042 case clang::AArch64::BI_InterlockedAdd_nf:
6043 case clang::AArch64::BI_InterlockedAdd64:
6044 case clang::AArch64::BI_InterlockedAdd64_acq:
6045 case clang::AArch64::BI_InterlockedAdd64_rel:
6046 case clang::AArch64::BI_InterlockedAdd64_nf: {
6047 Address DestAddr = CheckAtomicAlignment(*this, E);
6048 Value *Val = Ops[1];
6049 llvm::AtomicOrdering Ordering;
6050 switch (BuiltinID) {
6051 case clang::AArch64::BI_InterlockedAdd:
6052 case clang::AArch64::BI_InterlockedAdd64:
6053 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6054 break;
6055 case clang::AArch64::BI_InterlockedAdd_acq:
6056 case clang::AArch64::BI_InterlockedAdd64_acq:
6057 Ordering = llvm::AtomicOrdering::Acquire;
6058 break;
6059 case clang::AArch64::BI_InterlockedAdd_rel:
6060 case clang::AArch64::BI_InterlockedAdd64_rel:
6061 Ordering = llvm::AtomicOrdering::Release;
6062 break;
6063 case clang::AArch64::BI_InterlockedAdd_nf:
6064 case clang::AArch64::BI_InterlockedAdd64_nf:
6065 Ordering = llvm::AtomicOrdering::Monotonic;
6066 break;
6067 default:
6068 llvm_unreachable("missing builtin ID in switch!");
6069 }
6070 AtomicRMWInst *RMWI =
6071 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6072 return Builder.CreateAdd(RMWI, Val);
6073 }
6074 }
6075
6076 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6077 llvm::Type *Ty = VTy;
6078 if (!Ty)
6079 return nullptr;
6080
6081 bool ExtractLow = false;
6082 bool ExtendLaneArg = false;
6083 switch (BuiltinID) {
6084 default: return nullptr;
6085 case NEON::BI__builtin_neon_vbsl_v:
6086 case NEON::BI__builtin_neon_vbslq_v: {
6087 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6088 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6089 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6090 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6091
6092 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6093 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6094 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6095 return Builder.CreateBitCast(Ops[0], Ty);
6096 }
6097 case NEON::BI__builtin_neon_vfma_lane_v:
6098 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6099 // The ARM builtins (and instructions) have the addend as the first
6100 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6101 Value *Addend = Ops[0];
6102 Value *Multiplicand = Ops[1];
6103 Value *LaneSource = Ops[2];
6104 Ops[0] = Multiplicand;
6105 Ops[1] = LaneSource;
6106 Ops[2] = Addend;
6107
6108 // Now adjust things to handle the lane access.
6109 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6110 ? llvm::FixedVectorType::get(VTy->getElementType(),
6111 VTy->getNumElements() / 2)
6112 : VTy;
6113 llvm::Constant *cst = cast<Constant>(Ops[3]);
6114 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6115 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6116 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6117
6118 Ops.pop_back();
6119 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6120 : Intrinsic::fma;
6121 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6122 }
6123 case NEON::BI__builtin_neon_vfma_laneq_v: {
6124 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6125 // v1f64 fma should be mapped to Neon scalar f64 fma
6126 if (VTy && VTy->getElementType() == DoubleTy) {
6127 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6128 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6129 llvm::FixedVectorType *VTy =
6131 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6132 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6133 Value *Result;
6135 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6136 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6137 return Builder.CreateBitCast(Result, Ty);
6138 }
6139 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6140 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6141
6142 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6143 VTy->getNumElements() * 2);
6144 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6145 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6146 cast<ConstantInt>(Ops[3]));
6147 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6148
6150 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6151 {Ops[2], Ops[1], Ops[0]});
6152 }
6153 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6154 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6155 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6156
6157 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6158 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6160 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6161 {Ops[2], Ops[1], Ops[0]});
6162 }
6163 case NEON::BI__builtin_neon_vfmah_lane_f16:
6164 case NEON::BI__builtin_neon_vfmas_lane_f32:
6165 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6166 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6167 case NEON::BI__builtin_neon_vfmad_lane_f64:
6168 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6169 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6170 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6172 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6173 {Ops[1], Ops[2], Ops[0]});
6174 }
6175 case NEON::BI__builtin_neon_vmull_v:
6176 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6177 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6178 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6179 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6180 case NEON::BI__builtin_neon_vmax_v:
6181 case NEON::BI__builtin_neon_vmaxq_v:
6182 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6183 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6184 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6185 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6186 case NEON::BI__builtin_neon_vmaxh_f16: {
6187 Int = Intrinsic::aarch64_neon_fmax;
6188 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6189 }
6190 case NEON::BI__builtin_neon_vmin_v:
6191 case NEON::BI__builtin_neon_vminq_v:
6192 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6193 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6194 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6195 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6196 case NEON::BI__builtin_neon_vminh_f16: {
6197 Int = Intrinsic::aarch64_neon_fmin;
6198 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6199 }
6200 case NEON::BI__builtin_neon_vabd_v:
6201 case NEON::BI__builtin_neon_vabdq_v:
6202 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6203 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6204 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6205 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6206 case NEON::BI__builtin_neon_vpadal_v:
6207 case NEON::BI__builtin_neon_vpadalq_v: {
6208 unsigned ArgElts = VTy->getNumElements();
6209 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6210 unsigned BitWidth = EltTy->getBitWidth();
6211 auto *ArgTy = llvm::FixedVectorType::get(
6212 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6213 llvm::Type* Tys[2] = { VTy, ArgTy };
6214 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6216 TmpOps.push_back(Ops[1]);
6217 Function *F = CGM.getIntrinsic(Int, Tys);
6218 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6219 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6220 return Builder.CreateAdd(tmp, addend);
6221 }
6222 case NEON::BI__builtin_neon_vpmin_v:
6223 case NEON::BI__builtin_neon_vpminq_v:
6224 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6225 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6226 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6227 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6228 case NEON::BI__builtin_neon_vpmax_v:
6229 case NEON::BI__builtin_neon_vpmaxq_v:
6230 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6231 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6232 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6233 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6234 case NEON::BI__builtin_neon_vminnm_v:
6235 case NEON::BI__builtin_neon_vminnmq_v:
6236 Int = Intrinsic::aarch64_neon_fminnm;
6237 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6238 case NEON::BI__builtin_neon_vminnmh_f16:
6239 Int = Intrinsic::aarch64_neon_fminnm;
6240 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6241 case NEON::BI__builtin_neon_vmaxnm_v:
6242 case NEON::BI__builtin_neon_vmaxnmq_v:
6243 Int = Intrinsic::aarch64_neon_fmaxnm;
6244 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6245 case NEON::BI__builtin_neon_vmaxnmh_f16:
6246 Int = Intrinsic::aarch64_neon_fmaxnm;
6247 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6248 case NEON::BI__builtin_neon_vrecpss_f32: {
6249 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6250 Ops, "vrecps");
6251 }
6252 case NEON::BI__builtin_neon_vrecpsd_f64:
6253 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6254 Ops, "vrecps");
6255 case NEON::BI__builtin_neon_vrecpsh_f16:
6256 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6257 Ops, "vrecps");
6258 case NEON::BI__builtin_neon_vqshrun_n_v:
6259 Int = Intrinsic::aarch64_neon_sqshrun;
6260 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6261 case NEON::BI__builtin_neon_vqrshrun_n_v:
6262 Int = Intrinsic::aarch64_neon_sqrshrun;
6263 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6264 case NEON::BI__builtin_neon_vqshrn_n_v:
6265 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6266 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6267 case NEON::BI__builtin_neon_vrshrn_n_v:
6268 Int = Intrinsic::aarch64_neon_rshrn;
6269 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6270 case NEON::BI__builtin_neon_vqrshrn_n_v:
6271 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6272 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6273 case NEON::BI__builtin_neon_vrndah_f16: {
6274 Int = Builder.getIsFPConstrained()
6275 ? Intrinsic::experimental_constrained_round
6276 : Intrinsic::round;
6277 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6278 }
6279 case NEON::BI__builtin_neon_vrnda_v:
6280 case NEON::BI__builtin_neon_vrndaq_v: {
6281 Int = Builder.getIsFPConstrained()
6282 ? Intrinsic::experimental_constrained_round
6283 : Intrinsic::round;
6284 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6285 }
6286 case NEON::BI__builtin_neon_vrndih_f16: {
6287 Int = Builder.getIsFPConstrained()
6288 ? Intrinsic::experimental_constrained_nearbyint
6289 : Intrinsic::nearbyint;
6290 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6291 }
6292 case NEON::BI__builtin_neon_vrndmh_f16: {
6293 Int = Builder.getIsFPConstrained()
6294 ? Intrinsic::experimental_constrained_floor
6295 : Intrinsic::floor;
6296 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6297 }
6298 case NEON::BI__builtin_neon_vrndm_v:
6299 case NEON::BI__builtin_neon_vrndmq_v: {
6300 Int = Builder.getIsFPConstrained()
6301 ? Intrinsic::experimental_constrained_floor
6302 : Intrinsic::floor;
6303 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6304 }
6305 case NEON::BI__builtin_neon_vrndnh_f16: {
6306 Int = Builder.getIsFPConstrained()
6307 ? Intrinsic::experimental_constrained_roundeven
6308 : Intrinsic::roundeven;
6309 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6310 }
6311 case NEON::BI__builtin_neon_vrndn_v:
6312 case NEON::BI__builtin_neon_vrndnq_v: {
6313 Int = Builder.getIsFPConstrained()
6314 ? Intrinsic::experimental_constrained_roundeven
6315 : Intrinsic::roundeven;
6316 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6317 }
6318 case NEON::BI__builtin_neon_vrndns_f32: {
6319 Int = Builder.getIsFPConstrained()
6320 ? Intrinsic::experimental_constrained_roundeven
6321 : Intrinsic::roundeven;
6322 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6323 }
6324 case NEON::BI__builtin_neon_vrndph_f16: {
6325 Int = Builder.getIsFPConstrained()
6326 ? Intrinsic::experimental_constrained_ceil
6327 : Intrinsic::ceil;
6328 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6329 }
6330 case NEON::BI__builtin_neon_vrndp_v:
6331 case NEON::BI__builtin_neon_vrndpq_v: {
6332 Int = Builder.getIsFPConstrained()
6333 ? Intrinsic::experimental_constrained_ceil
6334 : Intrinsic::ceil;
6335 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6336 }
6337 case NEON::BI__builtin_neon_vrndxh_f16: {
6338 Int = Builder.getIsFPConstrained()
6339 ? Intrinsic::experimental_constrained_rint
6340 : Intrinsic::rint;
6341 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6342 }
6343 case NEON::BI__builtin_neon_vrndx_v:
6344 case NEON::BI__builtin_neon_vrndxq_v: {
6345 Int = Builder.getIsFPConstrained()
6346 ? Intrinsic::experimental_constrained_rint
6347 : Intrinsic::rint;
6348 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6349 }
6350 case NEON::BI__builtin_neon_vrndh_f16: {
6351 Int = Builder.getIsFPConstrained()
6352 ? Intrinsic::experimental_constrained_trunc
6353 : Intrinsic::trunc;
6354 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6355 }
6356 case NEON::BI__builtin_neon_vrnd32x_f32:
6357 case NEON::BI__builtin_neon_vrnd32xq_f32:
6358 case NEON::BI__builtin_neon_vrnd32x_f64:
6359 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6360 Int = Intrinsic::aarch64_neon_frint32x;
6361 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6362 }
6363 case NEON::BI__builtin_neon_vrnd32z_f32:
6364 case NEON::BI__builtin_neon_vrnd32zq_f32:
6365 case NEON::BI__builtin_neon_vrnd32z_f64:
6366 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6367 Int = Intrinsic::aarch64_neon_frint32z;
6368 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6369 }
6370 case NEON::BI__builtin_neon_vrnd64x_f32:
6371 case NEON::BI__builtin_neon_vrnd64xq_f32:
6372 case NEON::BI__builtin_neon_vrnd64x_f64:
6373 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6374 Int = Intrinsic::aarch64_neon_frint64x;
6375 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6376 }
6377 case NEON::BI__builtin_neon_vrnd64z_f32:
6378 case NEON::BI__builtin_neon_vrnd64zq_f32:
6379 case NEON::BI__builtin_neon_vrnd64z_f64:
6380 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6381 Int = Intrinsic::aarch64_neon_frint64z;
6382 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6383 }
6384 case NEON::BI__builtin_neon_vrnd_v:
6385 case NEON::BI__builtin_neon_vrndq_v: {
6386 Int = Builder.getIsFPConstrained()
6387 ? Intrinsic::experimental_constrained_trunc
6388 : Intrinsic::trunc;
6389 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6390 }
6391 case NEON::BI__builtin_neon_vcvt_f64_v:
6392 case NEON::BI__builtin_neon_vcvtq_f64_v:
6393 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6394 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6395 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6396 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6397 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6398 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6399 "unexpected vcvt_f64_f32 builtin");
6400 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6401 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6402
6403 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6404 }
6405 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6406 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6407 "unexpected vcvt_f32_f64 builtin");
6408 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6409 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6410
6411 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6412 }
6413 case NEON::BI__builtin_neon_vcvt_s32_v:
6414 case NEON::BI__builtin_neon_vcvt_u32_v:
6415 case NEON::BI__builtin_neon_vcvt_s64_v:
6416 case NEON::BI__builtin_neon_vcvt_u64_v:
6417 case NEON::BI__builtin_neon_vcvt_s16_f16:
6418 case NEON::BI__builtin_neon_vcvt_u16_f16:
6419 case NEON::BI__builtin_neon_vcvtq_s32_v:
6420 case NEON::BI__builtin_neon_vcvtq_u32_v:
6421 case NEON::BI__builtin_neon_vcvtq_s64_v:
6422 case NEON::BI__builtin_neon_vcvtq_u64_v:
6423 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6424 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6425 Int =
6426 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6427 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
6428 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
6429 }
6430 case NEON::BI__builtin_neon_vcvta_s16_f16:
6431 case NEON::BI__builtin_neon_vcvta_u16_f16:
6432 case NEON::BI__builtin_neon_vcvta_s32_v:
6433 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6434 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6435 case NEON::BI__builtin_neon_vcvta_u32_v:
6436 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6437 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6438 case NEON::BI__builtin_neon_vcvta_s64_v:
6439 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6440 case NEON::BI__builtin_neon_vcvta_u64_v:
6441 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6442 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6443 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6444 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6445 }
6446 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6447 case NEON::BI__builtin_neon_vcvtm_s32_v:
6448 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6449 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6450 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6451 case NEON::BI__builtin_neon_vcvtm_u32_v:
6452 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6453 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6454 case NEON::BI__builtin_neon_vcvtm_s64_v:
6455 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6456 case NEON::BI__builtin_neon_vcvtm_u64_v:
6457 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6458 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6459 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6460 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6461 }
6462 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6463 case NEON::BI__builtin_neon_vcvtn_s32_v:
6464 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6465 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6466 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6467 case NEON::BI__builtin_neon_vcvtn_u32_v:
6468 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6469 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6470 case NEON::BI__builtin_neon_vcvtn_s64_v:
6471 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6472 case NEON::BI__builtin_neon_vcvtn_u64_v:
6473 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6474 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6475 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6476 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6477 }
6478 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6479 case NEON::BI__builtin_neon_vcvtp_s32_v:
6480 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6481 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6482 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6483 case NEON::BI__builtin_neon_vcvtp_u32_v:
6484 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6485 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6486 case NEON::BI__builtin_neon_vcvtp_s64_v:
6487 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6488 case NEON::BI__builtin_neon_vcvtp_u64_v:
6489 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6490 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6491 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6492 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6493 }
6494 case NEON::BI__builtin_neon_vmulx_v:
6495 case NEON::BI__builtin_neon_vmulxq_v: {
6496 Int = Intrinsic::aarch64_neon_fmulx;
6497 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6498 }
6499 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6500 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6501 // vmulx_lane should be mapped to Neon scalar mulx after
6502 // extracting the scalar element
6503 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6504 Ops.pop_back();
6505 Int = Intrinsic::aarch64_neon_fmulx;
6506 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
6507 }
6508 case NEON::BI__builtin_neon_vmul_lane_v:
6509 case NEON::BI__builtin_neon_vmul_laneq_v: {
6510 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6511 bool Quad = false;
6512 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6513 Quad = true;
6514 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6515 llvm::FixedVectorType *VTy =
6517 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6518 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6519 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6520 return Builder.CreateBitCast(Result, Ty);
6521 }
6522 case NEON::BI__builtin_neon_vpmaxnm_v:
6523 case NEON::BI__builtin_neon_vpmaxnmq_v: {
6524 Int = Intrinsic::aarch64_neon_fmaxnmp;
6525 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6526 }
6527 case NEON::BI__builtin_neon_vpminnm_v:
6528 case NEON::BI__builtin_neon_vpminnmq_v: {
6529 Int = Intrinsic::aarch64_neon_fminnmp;
6530 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6531 }
6532 case NEON::BI__builtin_neon_vsqrth_f16: {
6533 Int = Builder.getIsFPConstrained()
6534 ? Intrinsic::experimental_constrained_sqrt
6535 : Intrinsic::sqrt;
6536 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
6537 }
6538 case NEON::BI__builtin_neon_vsqrt_v:
6539 case NEON::BI__builtin_neon_vsqrtq_v: {
6540 Int = Builder.getIsFPConstrained()
6541 ? Intrinsic::experimental_constrained_sqrt
6542 : Intrinsic::sqrt;
6543 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6544 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6545 }
6546 case NEON::BI__builtin_neon_vrbit_v:
6547 case NEON::BI__builtin_neon_vrbitq_v: {
6548 Int = Intrinsic::bitreverse;
6549 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6550 }
6551 case NEON::BI__builtin_neon_vmaxv_f16: {
6552 Int = Intrinsic::aarch64_neon_fmaxv;
6553 Ty = HalfTy;
6554 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6555 llvm::Type *Tys[2] = {Ty, VTy};
6556 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6557 return Builder.CreateTrunc(Ops[0], HalfTy);
6558 }
6559 case NEON::BI__builtin_neon_vmaxvq_f16: {
6560 Int = Intrinsic::aarch64_neon_fmaxv;
6561 Ty = HalfTy;
6562 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6563 llvm::Type *Tys[2] = {Ty, VTy};
6564 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6565 return Builder.CreateTrunc(Ops[0], HalfTy);
6566 }
6567 case NEON::BI__builtin_neon_vminv_f16: {
6568 Int = Intrinsic::aarch64_neon_fminv;
6569 Ty = HalfTy;
6570 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6571 llvm::Type *Tys[2] = {Ty, VTy};
6572 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6573 return Builder.CreateTrunc(Ops[0], HalfTy);
6574 }
6575 case NEON::BI__builtin_neon_vminvq_f16: {
6576 Int = Intrinsic::aarch64_neon_fminv;
6577 Ty = HalfTy;
6578 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6579 llvm::Type *Tys[2] = {Ty, VTy};
6580 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6581 return Builder.CreateTrunc(Ops[0], HalfTy);
6582 }
6583 case NEON::BI__builtin_neon_vmaxnmv_f16: {
6584 Int = Intrinsic::aarch64_neon_fmaxnmv;
6585 Ty = HalfTy;
6586 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6587 llvm::Type *Tys[2] = {Ty, VTy};
6588 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6589 return Builder.CreateTrunc(Ops[0], HalfTy);
6590 }
6591 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6592 Int = Intrinsic::aarch64_neon_fmaxnmv;
6593 Ty = HalfTy;
6594 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6595 llvm::Type *Tys[2] = {Ty, VTy};
6596 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6597 return Builder.CreateTrunc(Ops[0], HalfTy);
6598 }
6599 case NEON::BI__builtin_neon_vminnmv_f16: {
6600 Int = Intrinsic::aarch64_neon_fminnmv;
6601 Ty = HalfTy;
6602 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6603 llvm::Type *Tys[2] = {Ty, VTy};
6604 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6605 return Builder.CreateTrunc(Ops[0], HalfTy);
6606 }
6607 case NEON::BI__builtin_neon_vminnmvq_f16: {
6608 Int = Intrinsic::aarch64_neon_fminnmv;
6609 Ty = HalfTy;
6610 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6611 llvm::Type *Tys[2] = {Ty, VTy};
6612 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6613 return Builder.CreateTrunc(Ops[0], HalfTy);
6614 }
6615 case NEON::BI__builtin_neon_vmul_n_f64: {
6616 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6617 Value *RHS = Builder.CreateBitCast(Ops[1], DoubleTy);
6618 return Builder.CreateFMul(Ops[0], RHS);
6619 }
6620 case NEON::BI__builtin_neon_vaddlv_u8: {
6621 Int = Intrinsic::aarch64_neon_uaddlv;
6622 Ty = Int32Ty;
6623 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
6624 llvm::Type *Tys[2] = {Ty, VTy};
6625 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6626 return Builder.CreateTrunc(Ops[0], Int16Ty);
6627 }
6628 case NEON::BI__builtin_neon_vaddlv_u16: {
6629 Int = Intrinsic::aarch64_neon_uaddlv;
6630 Ty = Int32Ty;
6631 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
6632 llvm::Type *Tys[2] = {Ty, VTy};
6633 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6634 }
6635 case NEON::BI__builtin_neon_vaddlvq_u8: {
6636 Int = Intrinsic::aarch64_neon_uaddlv;
6637 Ty = Int32Ty;
6638 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
6639 llvm::Type *Tys[2] = {Ty, VTy};
6640 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6641 return Builder.CreateTrunc(Ops[0], Int16Ty);
6642 }
6643 case NEON::BI__builtin_neon_vaddlvq_u16: {
6644 Int = Intrinsic::aarch64_neon_uaddlv;
6645 Ty = Int32Ty;
6646 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
6647 llvm::Type *Tys[2] = {Ty, VTy};
6648 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6649 }
6650 case NEON::BI__builtin_neon_vaddlv_s8: {
6651 Int = Intrinsic::aarch64_neon_saddlv;
6652 Ty = Int32Ty;
6653 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
6654 llvm::Type *Tys[2] = {Ty, VTy};
6655 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6656 return Builder.CreateTrunc(Ops[0], Int16Ty);
6657 }
6658 case NEON::BI__builtin_neon_vaddlv_s16: {
6659 Int = Intrinsic::aarch64_neon_saddlv;
6660 Ty = Int32Ty;
6661 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
6662 llvm::Type *Tys[2] = {Ty, VTy};
6663 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6664 }
6665 case NEON::BI__builtin_neon_vaddlvq_s8: {
6666 Int = Intrinsic::aarch64_neon_saddlv;
6667 Ty = Int32Ty;
6668 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
6669 llvm::Type *Tys[2] = {Ty, VTy};
6670 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6671 return Builder.CreateTrunc(Ops[0], Int16Ty);
6672 }
6673 case NEON::BI__builtin_neon_vaddlvq_s16: {
6674 Int = Intrinsic::aarch64_neon_saddlv;
6675 Ty = Int32Ty;
6676 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
6677 llvm::Type *Tys[2] = {Ty, VTy};
6678 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6679 }
6680 case NEON::BI__builtin_neon_vsri_n_v:
6681 case NEON::BI__builtin_neon_vsriq_n_v: {
6682 Int = Intrinsic::aarch64_neon_vsri;
6683 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6684 return EmitNeonCall(Intrin, Ops, "vsri_n");
6685 }
6686 case NEON::BI__builtin_neon_vsli_n_v:
6687 case NEON::BI__builtin_neon_vsliq_n_v: {
6688 Int = Intrinsic::aarch64_neon_vsli;
6689 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6690 return EmitNeonCall(Intrin, Ops, "vsli_n");
6691 }
6692 case NEON::BI__builtin_neon_vsra_n_v:
6693 case NEON::BI__builtin_neon_vsraq_n_v:
6694 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6695 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6696 return Builder.CreateAdd(Ops[0], Ops[1]);
6697 case NEON::BI__builtin_neon_vrsra_n_v:
6698 case NEON::BI__builtin_neon_vrsraq_n_v: {
6699 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6701 TmpOps.push_back(Ops[1]);
6702 TmpOps.push_back(Ops[2]);
6703 Function* F = CGM.getIntrinsic(Int, Ty);
6704 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6705 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6706 return Builder.CreateAdd(Ops[0], tmp);
6707 }
6708 case NEON::BI__builtin_neon_vld1_v:
6709 case NEON::BI__builtin_neon_vld1q_v: {
6710 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
6711 }
6712 case NEON::BI__builtin_neon_vst1_v:
6713 case NEON::BI__builtin_neon_vst1q_v:
6714 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6715 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6716 case NEON::BI__builtin_neon_vld1_lane_v:
6717 case NEON::BI__builtin_neon_vld1q_lane_v: {
6718 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6719 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6720 PtrOp0.getAlignment());
6721 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6722 }
6723 case NEON::BI__builtin_neon_vldap1_lane_s64:
6724 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
6725 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6726 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
6727 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
6728 LI->setAtomic(llvm::AtomicOrdering::Acquire);
6729 Ops[0] = LI;
6730 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
6731 }
6732 case NEON::BI__builtin_neon_vld1_dup_v:
6733 case NEON::BI__builtin_neon_vld1q_dup_v: {
6734 Value *V = PoisonValue::get(Ty);
6735 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6736 PtrOp0.getAlignment());
6737 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6738 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6739 return EmitNeonSplat(Ops[0], CI);
6740 }
6741 case NEON::BI__builtin_neon_vst1_lane_v:
6742 case NEON::BI__builtin_neon_vst1q_lane_v:
6743 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6744 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6745 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6746 case NEON::BI__builtin_neon_vstl1_lane_s64:
6747 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
6748 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6749 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6750 llvm::StoreInst *SI =
6751 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6752 SI->setAtomic(llvm::AtomicOrdering::Release);
6753 return SI;
6754 }
6755 case NEON::BI__builtin_neon_vld2_v:
6756 case NEON::BI__builtin_neon_vld2q_v: {
6757 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6758 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6759 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6760 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6761 }
6762 case NEON::BI__builtin_neon_vld3_v:
6763 case NEON::BI__builtin_neon_vld3q_v: {
6764 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6765 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6766 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6767 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6768 }
6769 case NEON::BI__builtin_neon_vld4_v:
6770 case NEON::BI__builtin_neon_vld4q_v: {
6771 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6772 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6773 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6774 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6775 }
6776 case NEON::BI__builtin_neon_vld2_dup_v:
6777 case NEON::BI__builtin_neon_vld2q_dup_v: {
6778 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6779 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6780 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6781 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6782 }
6783 case NEON::BI__builtin_neon_vld3_dup_v:
6784 case NEON::BI__builtin_neon_vld3q_dup_v: {
6785 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6786 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6787 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6788 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6789 }
6790 case NEON::BI__builtin_neon_vld4_dup_v:
6791 case NEON::BI__builtin_neon_vld4q_dup_v: {
6792 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6793 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6794 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6795 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6796 }
6797 case NEON::BI__builtin_neon_vld2_lane_v:
6798 case NEON::BI__builtin_neon_vld2q_lane_v: {
6799 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6800 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6801 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6802 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6803 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6804 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6805 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
6806 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6807 }
6808 case NEON::BI__builtin_neon_vld3_lane_v:
6809 case NEON::BI__builtin_neon_vld3q_lane_v: {
6810 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6811 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6812 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6813 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6814 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6815 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6816 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6817 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
6818 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6819 }
6820 case NEON::BI__builtin_neon_vld4_lane_v:
6821 case NEON::BI__builtin_neon_vld4q_lane_v: {
6822 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6823 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6824 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6825 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6826 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6827 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6828 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6829 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6830 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
6831 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6832 }
6833 case NEON::BI__builtin_neon_vst2_v:
6834 case NEON::BI__builtin_neon_vst2q_v: {
6835 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6836 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6837 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6838 Ops, "");
6839 }
6840 case NEON::BI__builtin_neon_vst2_lane_v:
6841 case NEON::BI__builtin_neon_vst2q_lane_v: {
6842 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6843 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6844 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6845 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6846 Ops, "");
6847 }
6848 case NEON::BI__builtin_neon_vst3_v:
6849 case NEON::BI__builtin_neon_vst3q_v: {
6850 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6851 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6852 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6853 Ops, "");
6854 }
6855 case NEON::BI__builtin_neon_vst3_lane_v:
6856 case NEON::BI__builtin_neon_vst3q_lane_v: {
6857 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6858 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6859 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6860 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6861 Ops, "");
6862 }
6863 case NEON::BI__builtin_neon_vst4_v:
6864 case NEON::BI__builtin_neon_vst4q_v: {
6865 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6866 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6867 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6868 Ops, "");
6869 }
6870 case NEON::BI__builtin_neon_vst4_lane_v:
6871 case NEON::BI__builtin_neon_vst4q_lane_v: {
6872 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6873 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6874 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6875 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6876 Ops, "");
6877 }
6878 case NEON::BI__builtin_neon_vtrn_v:
6879 case NEON::BI__builtin_neon_vtrnq_v: {
6880 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6881 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6882 Value *SV = nullptr;
6883
6884 for (unsigned vi = 0; vi != 2; ++vi) {
6885 SmallVector<int, 16> Indices;
6886 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6887 Indices.push_back(i+vi);
6888 Indices.push_back(i+e+vi);
6889 }
6890 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6891 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6892 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6893 }
6894 return SV;
6895 }
6896 case NEON::BI__builtin_neon_vuzp_v:
6897 case NEON::BI__builtin_neon_vuzpq_v: {
6898 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6899 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6900 Value *SV = nullptr;
6901
6902 for (unsigned vi = 0; vi != 2; ++vi) {
6903 SmallVector<int, 16> Indices;
6904 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6905 Indices.push_back(2*i+vi);
6906
6907 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6908 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6909 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6910 }
6911 return SV;
6912 }
6913 case NEON::BI__builtin_neon_vzip_v:
6914 case NEON::BI__builtin_neon_vzipq_v: {
6915 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6916 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6917 Value *SV = nullptr;
6918
6919 for (unsigned vi = 0; vi != 2; ++vi) {
6920 SmallVector<int, 16> Indices;
6921 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6922 Indices.push_back((i + vi*e) >> 1);
6923 Indices.push_back(((i + vi*e) >> 1)+e);
6924 }
6925 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6926 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6927 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6928 }
6929 return SV;
6930 }
6931 case NEON::BI__builtin_neon_vqtbl1q_v: {
6932 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6933 Ops, "vtbl1");
6934 }
6935 case NEON::BI__builtin_neon_vqtbl2q_v: {
6936 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6937 Ops, "vtbl2");
6938 }
6939 case NEON::BI__builtin_neon_vqtbl3q_v: {
6940 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6941 Ops, "vtbl3");
6942 }
6943 case NEON::BI__builtin_neon_vqtbl4q_v: {
6944 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6945 Ops, "vtbl4");
6946 }
6947 case NEON::BI__builtin_neon_vqtbx1q_v: {
6948 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6949 Ops, "vtbx1");
6950 }
6951 case NEON::BI__builtin_neon_vqtbx2q_v: {
6952 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6953 Ops, "vtbx2");
6954 }
6955 case NEON::BI__builtin_neon_vqtbx3q_v: {
6956 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6957 Ops, "vtbx3");
6958 }
6959 case NEON::BI__builtin_neon_vqtbx4q_v: {
6960 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6961 Ops, "vtbx4");
6962 }
6963 case NEON::BI__builtin_neon_vsqadd_v:
6964 case NEON::BI__builtin_neon_vsqaddq_v: {
6965 Int = Intrinsic::aarch64_neon_usqadd;
6966 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6967 }
6968 case NEON::BI__builtin_neon_vuqadd_v:
6969 case NEON::BI__builtin_neon_vuqaddq_v: {
6970 Int = Intrinsic::aarch64_neon_suqadd;
6971 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6972 }
6973
6974 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
6975 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
6976 case NEON::BI__builtin_neon_vluti2_laneq_f16:
6977 case NEON::BI__builtin_neon_vluti2_laneq_p16:
6978 case NEON::BI__builtin_neon_vluti2_laneq_p8:
6979 case NEON::BI__builtin_neon_vluti2_laneq_s16:
6980 case NEON::BI__builtin_neon_vluti2_laneq_s8:
6981 case NEON::BI__builtin_neon_vluti2_laneq_u16:
6982 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
6983 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6984 llvm::Type *Tys[2];
6985 Tys[0] = Ty;
6986 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6987 /*isQuad*/ false));
6988 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
6989 }
6990 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
6991 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
6992 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
6993 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
6994 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
6995 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
6996 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
6997 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
6998 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
6999 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7000 llvm::Type *Tys[2];
7001 Tys[0] = Ty;
7002 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7003 /*isQuad*/ true));
7004 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7005 }
7006 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7007 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7008 case NEON::BI__builtin_neon_vluti2_lane_f16:
7009 case NEON::BI__builtin_neon_vluti2_lane_p16:
7010 case NEON::BI__builtin_neon_vluti2_lane_p8:
7011 case NEON::BI__builtin_neon_vluti2_lane_s16:
7012 case NEON::BI__builtin_neon_vluti2_lane_s8:
7013 case NEON::BI__builtin_neon_vluti2_lane_u16:
7014 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7015 Int = Intrinsic::aarch64_neon_vluti2_lane;
7016 llvm::Type *Tys[2];
7017 Tys[0] = Ty;
7018 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7019 /*isQuad*/ false));
7020 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7021 }
7022 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7023 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7024 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7025 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7026 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7027 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7028 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7029 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7030 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7031 Int = Intrinsic::aarch64_neon_vluti2_lane;
7032 llvm::Type *Tys[2];
7033 Tys[0] = Ty;
7034 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7035 /*isQuad*/ true));
7036 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7037 }
7038 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7039 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7040 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7041 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7042 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7043 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7044 }
7045 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7046 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7047 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7048 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7049 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7050 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7051 }
7052 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7053 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7054 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7055 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7056 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7057 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7058 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7059 }
7060 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7061 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7062 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7063 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7064 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7065 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7066 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7067 }
7068 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7069 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7070 {llvm::FixedVectorType::get(HalfTy, 8),
7071 llvm::FixedVectorType::get(Int8Ty, 16)},
7072 Ops, E, "fmmla");
7073 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7074 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7075 {llvm::FixedVectorType::get(FloatTy, 4),
7076 llvm::FixedVectorType::get(Int8Ty, 16)},
7077 Ops, E, "fmmla");
7078 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7079 ExtractLow = true;
7080 [[fallthrough]];
7081 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7082 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7083 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7084 llvm::FixedVectorType::get(BFloatTy, 8),
7085 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7086 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7087 ExtractLow = true;
7088 [[fallthrough]];
7089 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7090 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7091 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7092 llvm::FixedVectorType::get(BFloatTy, 8),
7093 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7094 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7095 ExtractLow = true;
7096 [[fallthrough]];
7097 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7098 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7099 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7100 llvm::FixedVectorType::get(HalfTy, 8),
7101 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7102 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7103 ExtractLow = true;
7104 [[fallthrough]];
7105 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7106 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7107 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7108 llvm::FixedVectorType::get(HalfTy, 8),
7109 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7110 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7111 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7112 llvm::FixedVectorType::get(Int8Ty, 8),
7113 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7114 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7115 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7116 llvm::FixedVectorType::get(Int8Ty, 8),
7117 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7118 E, "vfcvtn");
7119 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7120 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7121 llvm::FixedVectorType::get(Int8Ty, 16),
7122 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7123 E, "vfcvtn");
7124 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7125 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7126 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7127 uint64_t(0));
7128 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7129 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7130 }
7131
7132 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7133 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7134 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7135 Ops, E, "fdot2");
7136 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7137 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7138 ExtendLaneArg = true;
7139 [[fallthrough]];
7140 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7141 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7142 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7143 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7144 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7145 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7146 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7147 FloatTy, Ops, E, "fdot4");
7148 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7149 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7150 ExtendLaneArg = true;
7151 [[fallthrough]];
7152 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7153 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7154 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7155 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7156
7157 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7158 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7159 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7160 "vmlal");
7161 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7162 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7163 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7164 "vmlal");
7165 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7166 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7167 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7168 "vmlall");
7169 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7170 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7171 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7172 "vmlall");
7173 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7174 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7175 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7176 "vmlall");
7177 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7178 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7179 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7180 "vmlall");
7181 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7182 ExtendLaneArg = true;
7183 [[fallthrough]];
7184 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7185 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7186 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7187 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7188 ExtendLaneArg = true;
7189 [[fallthrough]];
7190 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7191 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7192 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7193 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7194 ExtendLaneArg = true;
7195 [[fallthrough]];
7196 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7197 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7198 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7199 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7200 ExtendLaneArg = true;
7201 [[fallthrough]];
7202 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7203 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7204 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7205 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7206 ExtendLaneArg = true;
7207 [[fallthrough]];
7208 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7209 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7210 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7211 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7212 ExtendLaneArg = true;
7213 [[fallthrough]];
7214 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7215 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7216 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7217 case NEON::BI__builtin_neon_vamin_f16:
7218 case NEON::BI__builtin_neon_vaminq_f16:
7219 case NEON::BI__builtin_neon_vamin_f32:
7220 case NEON::BI__builtin_neon_vaminq_f32:
7221 case NEON::BI__builtin_neon_vaminq_f64: {
7222 Int = Intrinsic::aarch64_neon_famin;
7223 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7224 }
7225 case NEON::BI__builtin_neon_vamax_f16:
7226 case NEON::BI__builtin_neon_vamaxq_f16:
7227 case NEON::BI__builtin_neon_vamax_f32:
7228 case NEON::BI__builtin_neon_vamaxq_f32:
7229 case NEON::BI__builtin_neon_vamaxq_f64: {
7230 Int = Intrinsic::aarch64_neon_famax;
7231 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7232 }
7233 case NEON::BI__builtin_neon_vscale_f16:
7234 case NEON::BI__builtin_neon_vscaleq_f16:
7235 case NEON::BI__builtin_neon_vscale_f32:
7236 case NEON::BI__builtin_neon_vscaleq_f32:
7237 case NEON::BI__builtin_neon_vscaleq_f64: {
7238 Int = Intrinsic::aarch64_neon_fp8_fscale;
7239 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7240 }
7241 }
7242}
7243
7245 const CallExpr *E) {
7246 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7247 BuiltinID == BPF::BI__builtin_btf_type_id ||
7248 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7249 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7250 "unexpected BPF builtin");
7251
7252 // A sequence number, injected into IR builtin functions, to
7253 // prevent CSE given the only difference of the function
7254 // may just be the debuginfo metadata.
7255 static uint32_t BuiltinSeqNum;
7256
7257 switch (BuiltinID) {
7258 default:
7259 llvm_unreachable("Unexpected BPF builtin");
7260 case BPF::BI__builtin_preserve_field_info: {
7261 const Expr *Arg = E->getArg(0);
7262 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7263
7264 if (!getDebugInfo()) {
7265 CGM.Error(E->getExprLoc(),
7266 "using __builtin_preserve_field_info() without -g");
7267 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7268 : EmitLValue(Arg).emitRawPointer(*this);
7269 }
7270
7271 // Enable underlying preserve_*_access_index() generation.
7272 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7273 IsInPreservedAIRegion = true;
7274 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7275 : EmitLValue(Arg).emitRawPointer(*this);
7276 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7277
7278 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7279 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7280
7281 // Built the IR for the preserve_field_info intrinsic.
7282 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7283 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7284 {FieldAddr->getType()});
7285 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7286 }
7287 case BPF::BI__builtin_btf_type_id:
7288 case BPF::BI__builtin_preserve_type_info: {
7289 if (!getDebugInfo()) {
7290 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7291 return nullptr;
7292 }
7293
7294 const Expr *Arg0 = E->getArg(0);
7295 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7296 Arg0->getType(), Arg0->getExprLoc());
7297
7298 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7299 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7300 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7301
7302 llvm::Function *FnDecl;
7303 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7304 FnDecl = Intrinsic::getOrInsertDeclaration(
7305 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7306 else
7307 FnDecl = Intrinsic::getOrInsertDeclaration(
7308 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7309 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
7310 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7311 return Fn;
7312 }
7313 case BPF::BI__builtin_preserve_enum_value: {
7314 if (!getDebugInfo()) {
7315 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7316 return nullptr;
7317 }
7318
7319 const Expr *Arg0 = E->getArg(0);
7320 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7321 Arg0->getType(), Arg0->getExprLoc());
7322
7323 // Find enumerator
7324 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
7325 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
7326 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
7327 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
7328
7329 auto InitVal = Enumerator->getInitVal();
7330 std::string InitValStr;
7331 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7332 InitValStr = std::to_string(InitVal.getSExtValue());
7333 else
7334 InitValStr = std::to_string(InitVal.getZExtValue());
7335 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7336 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
7337
7338 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7339 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7340 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7341
7342 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7343 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
7344 CallInst *Fn =
7345 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
7346 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7347 return Fn;
7348 }
7349 }
7350}
7351
7354 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7355 "Not a power-of-two sized vector!");
7356 bool AllConstants = true;
7357 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7358 AllConstants &= isa<Constant>(Ops[i]);
7359
7360 // If this is a constant vector, create a ConstantVector.
7361 if (AllConstants) {
7363 for (llvm::Value *Op : Ops)
7364 CstOps.push_back(cast<Constant>(Op));
7365 return llvm::ConstantVector::get(CstOps);
7366 }
7367
7368 // Otherwise, insertelement the values to build the vector.
7369 Value *Result = llvm::PoisonValue::get(
7370 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
7371
7372 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7373 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
7374
7375 return Result;
7376}
7377
7378Value *CodeGenFunction::EmitAArch64CpuInit() {
7379 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
7380 llvm::FunctionCallee Func =
7381 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
7382 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
7383 cast<llvm::GlobalValue>(Func.getCallee())
7384 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
7385 return Builder.CreateCall(Func);
7386}
7387
7388Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
7389 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
7390 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
7392 ArgStr.split(Features, "+");
7393 for (auto &Feature : Features) {
7394 Feature = Feature.trim();
7395 if (!llvm::AArch64::parseFMVExtension(Feature))
7396 return Builder.getFalse();
7397 if (Feature != "default")
7398 Features.push_back(Feature);
7399 }
7400 return EmitAArch64CpuSupports(Features);
7401}
7402
7403llvm::Value *
7404CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
7405 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
7406 Value *Result = Builder.getTrue();
7407 if (FeaturesMask != 0) {
7408 // Get features from structure in runtime library
7409 // struct {
7410 // unsigned long long features;
7411 // } __aarch64_cpu_features;
7412 llvm::Type *STy = llvm::StructType::get(Int64Ty);
7413 llvm::Constant *AArch64CPUFeatures =
7414 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
7415 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
7416 llvm::Value *CpuFeatures = Builder.CreateGEP(
7417 STy, AArch64CPUFeatures,
7418 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
7419 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
7421 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
7422 Value *Bitset = Builder.CreateAnd(Features, Mask);
7423 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
7424 Result = Builder.CreateAnd(Result, Cmp);
7425 }
7426 return Result;
7427}
Utilities used for generating code for AArch64 that are shared between the classic and ClangIR code-g...
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
#define NEONMAP0(NameBase)
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, mlir::Type eltTy)
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > intrinsicMap, unsigned builtinID, bool &mapProvenSorted)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:1993
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:2868
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:2838
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:2831
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3912
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:1038
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3090
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4382
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:1050
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:1020
static llvm::Value * ARMMVECreateFPToSI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2962
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2114
static llvm::Value * ARMMVECreateFPToUI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2970
static llvm::Value * ARMMVECreateSIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2946
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:1051
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3918
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:2827
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:1052
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:2905
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:3377
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:862
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:361
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:4475
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:2932
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:33
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:192
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:540
static llvm::Value * ARMMVECreateUIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2954
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:2894
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:2920
SpecialRegisterAccessKind
Definition ARM.cpp:1984
@ VolatileRead
Definition ARM.cpp:1986
@ NormalRead
Definition ARM.cpp:1985
@ Write
Definition ARM.cpp:1987
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:2860
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:1047
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1118
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:344
static Value * EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:2059
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:1911
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:1049
TokenType getType() const
Returns the token's type, e.g.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:57
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:226
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3129
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1603
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:551
Address getAddress() const
Definition CGValue.h:691
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:3386
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:472
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:7353
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3581
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:3689
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:447
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3548
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3836
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3245
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3491
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3746
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4406
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:3973
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3437
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1076
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3261
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1177
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4016
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:493
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2141
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3334
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7244
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3858
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3961
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3644
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2978
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:509
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3925
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:487
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:4486
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:456
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3233
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3079
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:1882
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:302
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:3662
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:3421
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3669
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3848
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1591
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3596
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:189
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3801
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3371
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:3889
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1707
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3299
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3950
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:441
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:379
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3095
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3086
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:454
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:277
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2015
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3378
QualType getPointeeType() const
Definition TypeBase.h:3388
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
EltType getEltType() const
bool isOverloadFirstandLast() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:712
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1866
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2231
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9328
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:754
QualType getType() const
Definition Decl.h:723
QualType getType() const
Definition Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:154
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:840
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601