clang 23.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
19#include "llvm/IR/InlineAsm.h"
20#include "llvm/IR/IntrinsicsAArch64.h"
21#include "llvm/IR/IntrinsicsARM.h"
22#include "llvm/IR/IntrinsicsBPF.h"
23#include "llvm/TargetParser/AArch64TargetParser.h"
24
25#include <numeric>
26
27using namespace clang;
28using namespace CodeGen;
29using namespace llvm;
30using namespace clang::aarch64;
31
32static std::optional<CodeGenFunction::MSVCIntrin>
33translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
34 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
35 switch (BuiltinID) {
36 default:
37 return std::nullopt;
38 case clang::AArch64::BI_BitScanForward:
39 case clang::AArch64::BI_BitScanForward64:
40 return MSVCIntrin::_BitScanForward;
41 case clang::AArch64::BI_BitScanReverse:
42 case clang::AArch64::BI_BitScanReverse64:
43 return MSVCIntrin::_BitScanReverse;
44 case clang::AArch64::BI_InterlockedAnd64:
45 return MSVCIntrin::_InterlockedAnd;
46 case clang::AArch64::BI_InterlockedExchange64:
47 return MSVCIntrin::_InterlockedExchange;
48 case clang::AArch64::BI_InterlockedExchangeAdd64:
49 return MSVCIntrin::_InterlockedExchangeAdd;
50 case clang::AArch64::BI_InterlockedExchangeSub64:
51 return MSVCIntrin::_InterlockedExchangeSub;
52 case clang::AArch64::BI_InterlockedOr64:
53 return MSVCIntrin::_InterlockedOr;
54 case clang::AArch64::BI_InterlockedXor64:
55 return MSVCIntrin::_InterlockedXor;
56 case clang::AArch64::BI_InterlockedDecrement64:
57 return MSVCIntrin::_InterlockedDecrement;
58 case clang::AArch64::BI_InterlockedIncrement64:
59 return MSVCIntrin::_InterlockedIncrement;
60 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
62 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
63 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
64 return MSVCIntrin::_InterlockedExchangeAdd_acq;
65 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
67 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
68 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
69 return MSVCIntrin::_InterlockedExchangeAdd_rel;
70 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
72 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
73 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
74 return MSVCIntrin::_InterlockedExchangeAdd_nf;
75 case clang::AArch64::BI_InterlockedExchange8_acq:
76 case clang::AArch64::BI_InterlockedExchange16_acq:
77 case clang::AArch64::BI_InterlockedExchange_acq:
78 case clang::AArch64::BI_InterlockedExchange64_acq:
79 case clang::AArch64::BI_InterlockedExchangePointer_acq:
80 return MSVCIntrin::_InterlockedExchange_acq;
81 case clang::AArch64::BI_InterlockedExchange8_rel:
82 case clang::AArch64::BI_InterlockedExchange16_rel:
83 case clang::AArch64::BI_InterlockedExchange_rel:
84 case clang::AArch64::BI_InterlockedExchange64_rel:
85 case clang::AArch64::BI_InterlockedExchangePointer_rel:
86 return MSVCIntrin::_InterlockedExchange_rel;
87 case clang::AArch64::BI_InterlockedExchange8_nf:
88 case clang::AArch64::BI_InterlockedExchange16_nf:
89 case clang::AArch64::BI_InterlockedExchange_nf:
90 case clang::AArch64::BI_InterlockedExchange64_nf:
91 case clang::AArch64::BI_InterlockedExchangePointer_nf:
92 return MSVCIntrin::_InterlockedExchange_nf;
93 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
95 case clang::AArch64::BI_InterlockedCompareExchange_acq:
96 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
97 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
98 return MSVCIntrin::_InterlockedCompareExchange_acq;
99 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
101 case clang::AArch64::BI_InterlockedCompareExchange_rel:
102 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
103 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
104 return MSVCIntrin::_InterlockedCompareExchange_rel;
105 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
107 case clang::AArch64::BI_InterlockedCompareExchange_nf:
108 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
109 return MSVCIntrin::_InterlockedCompareExchange_nf;
110 case clang::AArch64::BI_InterlockedCompareExchange128:
111 return MSVCIntrin::_InterlockedCompareExchange128;
112 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
113 return MSVCIntrin::_InterlockedCompareExchange128_acq;
114 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
115 return MSVCIntrin::_InterlockedCompareExchange128_nf;
116 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
117 return MSVCIntrin::_InterlockedCompareExchange128_rel;
118 case clang::AArch64::BI_InterlockedOr8_acq:
119 case clang::AArch64::BI_InterlockedOr16_acq:
120 case clang::AArch64::BI_InterlockedOr_acq:
121 case clang::AArch64::BI_InterlockedOr64_acq:
122 return MSVCIntrin::_InterlockedOr_acq;
123 case clang::AArch64::BI_InterlockedOr8_rel:
124 case clang::AArch64::BI_InterlockedOr16_rel:
125 case clang::AArch64::BI_InterlockedOr_rel:
126 case clang::AArch64::BI_InterlockedOr64_rel:
127 return MSVCIntrin::_InterlockedOr_rel;
128 case clang::AArch64::BI_InterlockedOr8_nf:
129 case clang::AArch64::BI_InterlockedOr16_nf:
130 case clang::AArch64::BI_InterlockedOr_nf:
131 case clang::AArch64::BI_InterlockedOr64_nf:
132 return MSVCIntrin::_InterlockedOr_nf;
133 case clang::AArch64::BI_InterlockedXor8_acq:
134 case clang::AArch64::BI_InterlockedXor16_acq:
135 case clang::AArch64::BI_InterlockedXor_acq:
136 case clang::AArch64::BI_InterlockedXor64_acq:
137 return MSVCIntrin::_InterlockedXor_acq;
138 case clang::AArch64::BI_InterlockedXor8_rel:
139 case clang::AArch64::BI_InterlockedXor16_rel:
140 case clang::AArch64::BI_InterlockedXor_rel:
141 case clang::AArch64::BI_InterlockedXor64_rel:
142 return MSVCIntrin::_InterlockedXor_rel;
143 case clang::AArch64::BI_InterlockedXor8_nf:
144 case clang::AArch64::BI_InterlockedXor16_nf:
145 case clang::AArch64::BI_InterlockedXor_nf:
146 case clang::AArch64::BI_InterlockedXor64_nf:
147 return MSVCIntrin::_InterlockedXor_nf;
148 case clang::AArch64::BI_InterlockedAnd8_acq:
149 case clang::AArch64::BI_InterlockedAnd16_acq:
150 case clang::AArch64::BI_InterlockedAnd_acq:
151 case clang::AArch64::BI_InterlockedAnd64_acq:
152 return MSVCIntrin::_InterlockedAnd_acq;
153 case clang::AArch64::BI_InterlockedAnd8_rel:
154 case clang::AArch64::BI_InterlockedAnd16_rel:
155 case clang::AArch64::BI_InterlockedAnd_rel:
156 case clang::AArch64::BI_InterlockedAnd64_rel:
157 return MSVCIntrin::_InterlockedAnd_rel;
158 case clang::AArch64::BI_InterlockedAnd8_nf:
159 case clang::AArch64::BI_InterlockedAnd16_nf:
160 case clang::AArch64::BI_InterlockedAnd_nf:
161 case clang::AArch64::BI_InterlockedAnd64_nf:
162 return MSVCIntrin::_InterlockedAnd_nf;
163 case clang::AArch64::BI_InterlockedIncrement16_acq:
164 case clang::AArch64::BI_InterlockedIncrement_acq:
165 case clang::AArch64::BI_InterlockedIncrement64_acq:
166 return MSVCIntrin::_InterlockedIncrement_acq;
167 case clang::AArch64::BI_InterlockedIncrement16_rel:
168 case clang::AArch64::BI_InterlockedIncrement_rel:
169 case clang::AArch64::BI_InterlockedIncrement64_rel:
170 return MSVCIntrin::_InterlockedIncrement_rel;
171 case clang::AArch64::BI_InterlockedIncrement16_nf:
172 case clang::AArch64::BI_InterlockedIncrement_nf:
173 case clang::AArch64::BI_InterlockedIncrement64_nf:
174 return MSVCIntrin::_InterlockedIncrement_nf;
175 case clang::AArch64::BI_InterlockedDecrement16_acq:
176 case clang::AArch64::BI_InterlockedDecrement_acq:
177 case clang::AArch64::BI_InterlockedDecrement64_acq:
178 return MSVCIntrin::_InterlockedDecrement_acq;
179 case clang::AArch64::BI_InterlockedDecrement16_rel:
180 case clang::AArch64::BI_InterlockedDecrement_rel:
181 case clang::AArch64::BI_InterlockedDecrement64_rel:
182 return MSVCIntrin::_InterlockedDecrement_rel;
183 case clang::AArch64::BI_InterlockedDecrement16_nf:
184 case clang::AArch64::BI_InterlockedDecrement_nf:
185 case clang::AArch64::BI_InterlockedDecrement64_nf:
186 return MSVCIntrin::_InterlockedDecrement_nf;
187 }
188 llvm_unreachable("must return from switch");
189}
190
191static std::optional<CodeGenFunction::MSVCIntrin>
192translateArmToMsvcIntrin(unsigned BuiltinID) {
193 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
194 switch (BuiltinID) {
195 default:
196 return std::nullopt;
197 case clang::ARM::BI_BitScanForward:
198 case clang::ARM::BI_BitScanForward64:
199 return MSVCIntrin::_BitScanForward;
200 case clang::ARM::BI_BitScanReverse:
201 case clang::ARM::BI_BitScanReverse64:
202 return MSVCIntrin::_BitScanReverse;
203 case clang::ARM::BI_InterlockedAnd64:
204 return MSVCIntrin::_InterlockedAnd;
205 case clang::ARM::BI_InterlockedExchange64:
206 return MSVCIntrin::_InterlockedExchange;
207 case clang::ARM::BI_InterlockedExchangeAdd64:
208 return MSVCIntrin::_InterlockedExchangeAdd;
209 case clang::ARM::BI_InterlockedExchangeSub64:
210 return MSVCIntrin::_InterlockedExchangeSub;
211 case clang::ARM::BI_InterlockedOr64:
212 return MSVCIntrin::_InterlockedOr;
213 case clang::ARM::BI_InterlockedXor64:
214 return MSVCIntrin::_InterlockedXor;
215 case clang::ARM::BI_InterlockedDecrement64:
216 return MSVCIntrin::_InterlockedDecrement;
217 case clang::ARM::BI_InterlockedIncrement64:
218 return MSVCIntrin::_InterlockedIncrement;
219 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
221 case clang::ARM::BI_InterlockedExchangeAdd_acq:
222 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
223 return MSVCIntrin::_InterlockedExchangeAdd_acq;
224 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
226 case clang::ARM::BI_InterlockedExchangeAdd_rel:
227 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
228 return MSVCIntrin::_InterlockedExchangeAdd_rel;
229 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
231 case clang::ARM::BI_InterlockedExchangeAdd_nf:
232 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
233 return MSVCIntrin::_InterlockedExchangeAdd_nf;
234 case clang::ARM::BI_InterlockedExchange8_acq:
235 case clang::ARM::BI_InterlockedExchange16_acq:
236 case clang::ARM::BI_InterlockedExchange_acq:
237 case clang::ARM::BI_InterlockedExchange64_acq:
238 case clang::ARM::BI_InterlockedExchangePointer_acq:
239 return MSVCIntrin::_InterlockedExchange_acq;
240 case clang::ARM::BI_InterlockedExchange8_rel:
241 case clang::ARM::BI_InterlockedExchange16_rel:
242 case clang::ARM::BI_InterlockedExchange_rel:
243 case clang::ARM::BI_InterlockedExchange64_rel:
244 case clang::ARM::BI_InterlockedExchangePointer_rel:
245 return MSVCIntrin::_InterlockedExchange_rel;
246 case clang::ARM::BI_InterlockedExchange8_nf:
247 case clang::ARM::BI_InterlockedExchange16_nf:
248 case clang::ARM::BI_InterlockedExchange_nf:
249 case clang::ARM::BI_InterlockedExchange64_nf:
250 case clang::ARM::BI_InterlockedExchangePointer_nf:
251 return MSVCIntrin::_InterlockedExchange_nf;
252 case clang::ARM::BI_InterlockedCompareExchange8_acq:
253 case clang::ARM::BI_InterlockedCompareExchange16_acq:
254 case clang::ARM::BI_InterlockedCompareExchange_acq:
255 case clang::ARM::BI_InterlockedCompareExchange64_acq:
256 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
257 return MSVCIntrin::_InterlockedCompareExchange_acq;
258 case clang::ARM::BI_InterlockedCompareExchange8_rel:
259 case clang::ARM::BI_InterlockedCompareExchange16_rel:
260 case clang::ARM::BI_InterlockedCompareExchange_rel:
261 case clang::ARM::BI_InterlockedCompareExchange64_rel:
262 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
263 return MSVCIntrin::_InterlockedCompareExchange_rel;
264 case clang::ARM::BI_InterlockedCompareExchange8_nf:
265 case clang::ARM::BI_InterlockedCompareExchange16_nf:
266 case clang::ARM::BI_InterlockedCompareExchange_nf:
267 case clang::ARM::BI_InterlockedCompareExchange64_nf:
268 return MSVCIntrin::_InterlockedCompareExchange_nf;
269 case clang::ARM::BI_InterlockedOr8_acq:
270 case clang::ARM::BI_InterlockedOr16_acq:
271 case clang::ARM::BI_InterlockedOr_acq:
272 case clang::ARM::BI_InterlockedOr64_acq:
273 return MSVCIntrin::_InterlockedOr_acq;
274 case clang::ARM::BI_InterlockedOr8_rel:
275 case clang::ARM::BI_InterlockedOr16_rel:
276 case clang::ARM::BI_InterlockedOr_rel:
277 case clang::ARM::BI_InterlockedOr64_rel:
278 return MSVCIntrin::_InterlockedOr_rel;
279 case clang::ARM::BI_InterlockedOr8_nf:
280 case clang::ARM::BI_InterlockedOr16_nf:
281 case clang::ARM::BI_InterlockedOr_nf:
282 case clang::ARM::BI_InterlockedOr64_nf:
283 return MSVCIntrin::_InterlockedOr_nf;
284 case clang::ARM::BI_InterlockedXor8_acq:
285 case clang::ARM::BI_InterlockedXor16_acq:
286 case clang::ARM::BI_InterlockedXor_acq:
287 case clang::ARM::BI_InterlockedXor64_acq:
288 return MSVCIntrin::_InterlockedXor_acq;
289 case clang::ARM::BI_InterlockedXor8_rel:
290 case clang::ARM::BI_InterlockedXor16_rel:
291 case clang::ARM::BI_InterlockedXor_rel:
292 case clang::ARM::BI_InterlockedXor64_rel:
293 return MSVCIntrin::_InterlockedXor_rel;
294 case clang::ARM::BI_InterlockedXor8_nf:
295 case clang::ARM::BI_InterlockedXor16_nf:
296 case clang::ARM::BI_InterlockedXor_nf:
297 case clang::ARM::BI_InterlockedXor64_nf:
298 return MSVCIntrin::_InterlockedXor_nf;
299 case clang::ARM::BI_InterlockedAnd8_acq:
300 case clang::ARM::BI_InterlockedAnd16_acq:
301 case clang::ARM::BI_InterlockedAnd_acq:
302 case clang::ARM::BI_InterlockedAnd64_acq:
303 return MSVCIntrin::_InterlockedAnd_acq;
304 case clang::ARM::BI_InterlockedAnd8_rel:
305 case clang::ARM::BI_InterlockedAnd16_rel:
306 case clang::ARM::BI_InterlockedAnd_rel:
307 case clang::ARM::BI_InterlockedAnd64_rel:
308 return MSVCIntrin::_InterlockedAnd_rel;
309 case clang::ARM::BI_InterlockedAnd8_nf:
310 case clang::ARM::BI_InterlockedAnd16_nf:
311 case clang::ARM::BI_InterlockedAnd_nf:
312 case clang::ARM::BI_InterlockedAnd64_nf:
313 return MSVCIntrin::_InterlockedAnd_nf;
314 case clang::ARM::BI_InterlockedIncrement16_acq:
315 case clang::ARM::BI_InterlockedIncrement_acq:
316 case clang::ARM::BI_InterlockedIncrement64_acq:
317 return MSVCIntrin::_InterlockedIncrement_acq;
318 case clang::ARM::BI_InterlockedIncrement16_rel:
319 case clang::ARM::BI_InterlockedIncrement_rel:
320 case clang::ARM::BI_InterlockedIncrement64_rel:
321 return MSVCIntrin::_InterlockedIncrement_rel;
322 case clang::ARM::BI_InterlockedIncrement16_nf:
323 case clang::ARM::BI_InterlockedIncrement_nf:
324 case clang::ARM::BI_InterlockedIncrement64_nf:
325 return MSVCIntrin::_InterlockedIncrement_nf;
326 case clang::ARM::BI_InterlockedDecrement16_acq:
327 case clang::ARM::BI_InterlockedDecrement_acq:
328 case clang::ARM::BI_InterlockedDecrement64_acq:
329 return MSVCIntrin::_InterlockedDecrement_acq;
330 case clang::ARM::BI_InterlockedDecrement16_rel:
331 case clang::ARM::BI_InterlockedDecrement_rel:
332 case clang::ARM::BI_InterlockedDecrement64_rel:
333 return MSVCIntrin::_InterlockedDecrement_rel;
334 case clang::ARM::BI_InterlockedDecrement16_nf:
335 case clang::ARM::BI_InterlockedDecrement_nf:
336 case clang::ARM::BI_InterlockedDecrement64_nf:
337 return MSVCIntrin::_InterlockedDecrement_nf;
338 }
339 llvm_unreachable("must return from switch");
340}
341
342// Emit an intrinsic where all operands are of the same type as the result.
343// Depending on mode, this may be a constrained floating-point intrinsic.
345 unsigned IntrinsicID,
346 unsigned ConstrainedIntrinsicID,
347 llvm::Type *Ty,
348 ArrayRef<Value *> Args) {
349 Function *F;
350 if (CGF.Builder.getIsFPConstrained())
351 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
352 else
353 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
354
355 if (CGF.Builder.getIsFPConstrained())
356 return CGF.Builder.CreateConstrainedFPCall(F, Args);
357
358 return CGF.Builder.CreateCall(F, Args);
359}
360
361static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
362 NeonTypeFlags TypeFlags,
363 bool HasFastHalfType = true,
364 bool V1Ty = false,
365 bool AllowBFloatArgsAndRet = true) {
366 int IsQuad = TypeFlags.isQuad();
367 switch (TypeFlags.getEltType()) {
371 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
374 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
376 if (AllowBFloatArgsAndRet)
377 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
378 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
380 if (HasFastHalfType)
381 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 return Builder.CreateCall(F, Ops, name);
445}
446
450 const CallExpr *E, const char *name) {
451 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
452 Ops.pop_back_val());
453 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
454}
455
457 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
458 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
459
460 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
461 RetTy->getPrimitiveSizeInBits();
462 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
463 Ops[1]->getType()};
464 if (ExtendLaneArg) {
465 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
466 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
467 uint64_t(0));
468 }
469 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
470}
471
473 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
474 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
475
476 if (ExtendLaneArg) {
477 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
478 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
479 uint64_t(0));
480 }
481 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
482 RetTy->getPrimitiveSizeInBits();
483 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
484 Ops, E, name);
485}
486
488 bool neg) {
489 int SV = cast<ConstantInt>(V)->getSExtValue();
490 return ConstantInt::getSigned(Ty, neg ? -SV : SV);
491}
492
493Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
494 llvm::Type *Ty1, bool Extract,
496 const CallExpr *E,
497 const char *name) {
498 llvm::Type *Tys[] = {Ty0, Ty1};
499 if (Extract) {
500 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
501 // the vector.
502 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
503 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
504 }
505 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
506}
507
508// Right-shift a vector by a constant.
510 llvm::Type *Ty, bool usgn,
511 const char *name) {
512 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
513
514 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
515 int EltSize = VTy->getScalarSizeInBits();
516
517 Vec = Builder.CreateBitCast(Vec, Ty);
518
519 // lshr/ashr are undefined when the shift amount is equal to the vector
520 // element size.
521 if (ShiftAmt == EltSize) {
522 if (usgn) {
523 // Right-shifting an unsigned value by its size yields 0.
524 return llvm::ConstantAggregateZero::get(VTy);
525 } else {
526 // Right-shifting a signed value by its size is equivalent
527 // to a shift of size-1.
528 --ShiftAmt;
529 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
530 }
531 }
532
533 Shift = EmitNeonShiftVector(Shift, Ty, false);
534 if (usgn)
535 return Builder.CreateLShr(Vec, Shift, name);
536 return Builder.CreateAShr(Vec, Shift, name);
537}
538
539// clang-format off
541 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
542 NEONMAP0(splat_lane_v),
543 NEONMAP0(splat_laneq_v),
544 NEONMAP0(splatq_lane_v),
545 NEONMAP0(splatq_laneq_v),
546 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
547 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
548 NEONMAP1(vabs_v, arm_neon_vabs, 0),
549 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
550 NEONMAP0(vadd_v),
551 NEONMAP0(vaddhn_v),
552 NEONMAP0(vaddq_v),
553 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
554 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
555 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
556 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
557 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
558 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
559 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
560 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
561 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
562 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
563 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
564 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
565 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
566 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
567 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
568 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
569 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
570 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
571 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
572 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
573 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
574 NEONMAP1(vcage_v, arm_neon_vacge, 0),
575 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
576 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
577 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
578 NEONMAP1(vcale_v, arm_neon_vacge, 0),
579 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
580 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
581 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
582 NEONMAP0(vceqz_v),
583 NEONMAP0(vceqzq_v),
584 NEONMAP0(vcgez_v),
585 NEONMAP0(vcgezq_v),
586 NEONMAP0(vcgtz_v),
587 NEONMAP0(vcgtzq_v),
588 NEONMAP0(vclez_v),
589 NEONMAP0(vclezq_v),
590 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
591 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
592 NEONMAP0(vcltz_v),
593 NEONMAP0(vcltzq_v),
594 NEONMAP1(vclz_v, ctlz, Add1ArgType),
595 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
596 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
597 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
598 NEONMAP0(vcvt_f16_s16),
599 NEONMAP0(vcvt_f16_u16),
600 NEONMAP0(vcvt_f32_v),
601 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
602 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
603 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
604 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
605 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
606 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
607 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
608 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
609 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
610 NEONMAP0(vcvt_s16_f16),
611 NEONMAP0(vcvt_s32_v),
612 NEONMAP0(vcvt_s64_v),
613 NEONMAP0(vcvt_u16_f16),
614 NEONMAP0(vcvt_u32_v),
615 NEONMAP0(vcvt_u64_v),
616 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
617 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
618 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
619 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
620 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
621 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
622 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
623 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
624 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
625 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
626 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
627 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
628 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
629 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
630 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
631 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
632 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
633 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
634 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
635 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
636 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
637 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
638 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
639 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
640 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
641 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
642 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
643 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
644 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
645 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
646 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
647 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
648 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
649 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
650 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
651 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
652 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
653 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
654 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
655 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
656 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
657 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
658 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
659 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
660 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
661 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
662 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
663 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
664 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
665 NEONMAP0(vcvtq_f16_s16),
666 NEONMAP0(vcvtq_f16_u16),
667 NEONMAP0(vcvtq_f32_v),
668 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
669 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
670 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
671 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
672 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
673 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
674 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
675 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
676 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
677 NEONMAP0(vcvtq_s16_f16),
678 NEONMAP0(vcvtq_s32_v),
679 NEONMAP0(vcvtq_s64_v),
680 NEONMAP0(vcvtq_u16_f16),
681 NEONMAP0(vcvtq_u32_v),
682 NEONMAP0(vcvtq_u64_v),
683 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
684 NEONMAP1(vdot_u32, arm_neon_udot, 0),
685 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
686 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
687 NEONMAP0(vext_v),
688 NEONMAP0(vextq_v),
689 NEONMAP0(vfma_v),
690 NEONMAP0(vfmaq_v),
691 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
692 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
693 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
694 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
695 NEONMAP0(vld1_dup_v),
696 NEONMAP1(vld1_v, arm_neon_vld1, 0),
697 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
698 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
699 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
700 NEONMAP0(vld1q_dup_v),
701 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
702 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
703 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
704 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
705 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
706 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
707 NEONMAP1(vld2_v, arm_neon_vld2, 0),
708 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
709 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
710 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
711 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
712 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
713 NEONMAP1(vld3_v, arm_neon_vld3, 0),
714 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
715 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
716 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
717 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
718 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
719 NEONMAP1(vld4_v, arm_neon_vld4, 0),
720 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
721 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
722 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
723 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
724 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
725 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
726 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
727 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
728 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
729 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
730 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
731 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
732 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
733 NEONMAP0(vmovl_v),
734 NEONMAP0(vmovn_v),
735 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
736 NEONMAP0(vmull_v),
737 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
738 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
739 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
740 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
741 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
742 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
743 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
744 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
745 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
746 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
747 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
748 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
750 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
751 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
752 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
753 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
754 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
755 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
756 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
757 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
758 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
759 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
760 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
761 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
762 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
763 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
764 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
765 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
766 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
767 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
768 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
769 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
770 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
771 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
772 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
773 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
774 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
775 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
776 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
777 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
778 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
779 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
780 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
781 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
782 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
783 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
784 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
785 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
786 NEONMAP1(vrnd_v, trunc, Add1ArgType),
787 NEONMAP1(vrnda_v, round, Add1ArgType),
788 NEONMAP1(vrndaq_v, round, Add1ArgType),
789 NEONMAP0(vrndi_v),
790 NEONMAP0(vrndiq_v),
791 NEONMAP1(vrndm_v, floor, Add1ArgType),
792 NEONMAP1(vrndmq_v, floor, Add1ArgType),
793 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
794 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
795 NEONMAP1(vrndp_v, ceil, Add1ArgType),
796 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
797 NEONMAP1(vrndq_v, trunc, Add1ArgType),
798 NEONMAP1(vrndx_v, rint, Add1ArgType),
799 NEONMAP1(vrndxq_v, rint, Add1ArgType),
800 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
801 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
802 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
803 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
804 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
805 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
806 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
807 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
808 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
809 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
810 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
811 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
812 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
813 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
814 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
815 NEONMAP0(vshl_n_v),
816 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
817 NEONMAP0(vshll_n_v),
818 NEONMAP0(vshlq_n_v),
819 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
820 NEONMAP0(vshr_n_v),
821 NEONMAP0(vshrn_n_v),
822 NEONMAP0(vshrq_n_v),
823 NEONMAP1(vst1_v, arm_neon_vst1, 0),
824 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
825 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
826 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
827 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
828 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
829 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
830 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
831 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
832 NEONMAP1(vst2_v, arm_neon_vst2, 0),
833 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
834 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
835 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
836 NEONMAP1(vst3_v, arm_neon_vst3, 0),
837 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
838 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
839 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
840 NEONMAP1(vst4_v, arm_neon_vst4, 0),
841 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
842 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
843 NEONMAP0(vsubhn_v),
844 NEONMAP0(vtrn_v),
845 NEONMAP0(vtrnq_v),
846 NEONMAP0(vtst_v),
847 NEONMAP0(vtstq_v),
848 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
849 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
850 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
851 NEONMAP0(vuzp_v),
852 NEONMAP0(vuzpq_v),
853 NEONMAP0(vzip_v),
854 NEONMAP0(vzipq_v)
855};
856
857// clang-format on
858
859// Some intrinsics are equivalent for codegen.
860static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
861 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
862 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
863 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
864 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
865 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
866 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
867 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
868 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
869 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
870 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
871 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
872 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
873 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
874 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
875 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
876 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
877 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
878 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
879 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
880 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
881 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
882 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
883 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
884 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
885 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
886 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
887 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
888 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
889 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
890 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
891 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
892 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
893 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
894 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
895 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
896 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
897 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
898 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
899 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
900 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
901 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
902 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
903 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
904 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
905 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
906 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
907 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
908 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
909 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
910 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
911 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
912 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
913 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
914 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
915 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
916 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
917 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
918 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
919 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
920 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
921 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
922 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
923 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
924 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
925 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
926 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
927 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
928 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
929 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
930 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
931 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
932 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
933 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
934 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
935 // arbitrary one to be handled as tha canonical variation.
936 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
937 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
938 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
939 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
940 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
941 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
942 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
943 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
944 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
945 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
946 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
947 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
948};
949
950#undef NEONMAP0
951#undef NEONMAP1
952#undef NEONMAP2
953
954#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
955 {SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier}
956
957#define SVEMAP2(NameBase, TypeModifier) \
958 {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier}
960#define GET_SVE_LLVM_INTRINSIC_MAP
961#include "clang/Basic/arm_sve_builtin_cg.inc"
962#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
963#undef GET_SVE_LLVM_INTRINSIC_MAP
964};
965
966#undef SVEMAP1
967#undef SVEMAP2
968
969#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
970 {SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier}
971
972#define SMEMAP2(NameBase, TypeModifier) \
973 {SME::BI__builtin_sme_##NameBase, 0, TypeModifier}
975#define GET_SME_LLVM_INTRINSIC_MAP
976#include "clang/Basic/arm_sme_builtin_cg.inc"
977#undef GET_SME_LLVM_INTRINSIC_MAP
978};
979
980#undef SMEMAP1
981#undef SMEMAP2
982
984
989
990// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
991// the corresponding info struct.
992template <typename IntrinsicInfo>
993static const IntrinsicInfo *
995 unsigned BuiltinID, bool &MapProvenSorted) {
996
997#ifndef NDEBUG
998 if (!MapProvenSorted) {
999 assert(llvm::is_sorted(IntrinsicMap));
1000 MapProvenSorted = true;
1001 }
1002#endif
1003
1004 const IntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID);
1005
1006 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1007 return Builtin;
1008
1009 return nullptr;
1010}
1011
1013 unsigned Modifier,
1014 llvm::Type *ArgType,
1015 const CallExpr *E) {
1016 int VectorSize = 0;
1017 if (Modifier & Use64BitVectors)
1018 VectorSize = 64;
1019 else if (Modifier & Use128BitVectors)
1020 VectorSize = 128;
1021
1022 // Return type.
1024 if (Modifier & AddRetType) {
1025 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1026 if (Modifier & VectorizeRetType)
1027 Ty = llvm::FixedVectorType::get(
1028 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1029
1030 Tys.push_back(Ty);
1031 }
1032
1033 // Arguments.
1034 if (Modifier & VectorizeArgTypes) {
1035 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1036 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1037 }
1038
1039 if (Modifier & (Add1ArgType | Add2ArgTypes))
1040 Tys.push_back(ArgType);
1041
1042 if (Modifier & Add2ArgTypes)
1043 Tys.push_back(ArgType);
1044
1045 if (Modifier & InventFloatType)
1046 Tys.push_back(FloatTy);
1047
1048 return CGM.getIntrinsic(IntrinsicID, Tys);
1049}
1050
1051//===----------------------------------------------------------------------===//
1052// Emit-helpers
1053//===----------------------------------------------------------------------===//
1055 CodeGenFunction &CGF, const ARMNeonVectorIntrinsicInfo &SISDInfo,
1056 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1057 assert(SISDInfo.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
1058
1059 switch (SISDInfo.BuiltinID) {
1060 case NEON::BI__builtin_neon_vcled_s64:
1061 case NEON::BI__builtin_neon_vcled_u64:
1062 case NEON::BI__builtin_neon_vcles_f32:
1063 case NEON::BI__builtin_neon_vcled_f64:
1064 case NEON::BI__builtin_neon_vcltd_s64:
1065 case NEON::BI__builtin_neon_vcltd_u64:
1066 case NEON::BI__builtin_neon_vclts_f32:
1067 case NEON::BI__builtin_neon_vcltd_f64:
1068 case NEON::BI__builtin_neon_vcales_f32:
1069 case NEON::BI__builtin_neon_vcaled_f64:
1070 case NEON::BI__builtin_neon_vcalts_f32:
1071 case NEON::BI__builtin_neon_vcaltd_f64:
1072 // Only one direction of comparisons actually exist, cmle is actually a cmge
1073 // with swapped operands. The table gives us the right intrinsic but we
1074 // still need to do the swap.
1075 std::swap(Ops[0], Ops[1]);
1076 break;
1077 }
1078
1079 // Use fptosi.sat/fptoui.sat unless under strict FP.
1080 unsigned LLVMIntrinsic = SISDInfo.LLVMIntrinsic;
1081 if (!CGF.Builder.getIsFPConstrained()) {
1082 if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzs)
1083 LLVMIntrinsic = Intrinsic::fptosi_sat;
1084 else if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzu)
1085 LLVMIntrinsic = Intrinsic::fptoui_sat;
1086 }
1087 llvm::Type *ArgTy = CGF.ConvertType(E->getArg(0)->getType());
1088 Function *F = CGF.LookupNeonLLVMIntrinsic(LLVMIntrinsic,
1089 SISDInfo.TypeModifier, ArgTy, E);
1090
1091 int j = 0;
1092 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1093 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1094 ai != ae; ++ai, ++j) {
1095 llvm::Type *ArgTy = ai->getType();
1096 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1097 ArgTy->getPrimitiveSizeInBits())
1098 continue;
1099 assert(
1100 ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy() &&
1101 "Expecting vector LLVM intrinsic type and scalar Clang builtin type!");
1102
1103 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1104 // it before inserting.
1105 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1106 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1107 Ops[j] =
1108 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1109 }
1110
1111 Value *Result = CGF.EmitNeonCall(F, Ops, SISDInfo.NameHint);
1112 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1113 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1114 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1115 return CGF.Builder.CreateExtractElement(Result, C0);
1116
1117 return CGF.Builder.CreateBitCast(Result, ResultType, SISDInfo.NameHint);
1118}
1119
1121 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1122 const char *NameHint, unsigned Modifier, const CallExpr *E,
1123 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1124 llvm::Triple::ArchType Arch) {
1125
1126 // Extract the trailing immediate argument that encodes the type discriminator
1127 // for this overloaded intrinsic.
1128 // TODO: Move to the parent code that takes care of argument processing.
1129 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1130 std::optional<llvm::APSInt> NeonTypeConst =
1132 if (!NeonTypeConst)
1133 return nullptr;
1134
1135 // Determine the type of this overloaded NEON intrinsic.
1136 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1137 const bool Usgn = Type.isUnsigned();
1138 const bool Quad = Type.isQuad();
1139 const bool Floating = Type.isFloatingPoint();
1140 const bool HasFastHalfType = getTarget().hasFastHalfType();
1141 const bool AllowBFloatArgsAndRet =
1142 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1143
1144 llvm::FixedVectorType *VTy =
1145 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1146 llvm::Type *Ty = VTy;
1147 if (!Ty)
1148 return nullptr;
1149
1150 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1151 return Builder.getInt32(addr.getAlignment().getQuantity());
1152 };
1153
1154 unsigned Int = LLVMIntrinsic;
1155 if ((Modifier & UnsignedAlts) && !Usgn)
1156 Int = AltLLVMIntrinsic;
1157
1158 switch (BuiltinID) {
1159 default: break;
1160 case NEON::BI__builtin_neon_splat_lane_v:
1161 case NEON::BI__builtin_neon_splat_laneq_v:
1162 case NEON::BI__builtin_neon_splatq_lane_v:
1163 case NEON::BI__builtin_neon_splatq_laneq_v: {
1164 auto NumElements = VTy->getElementCount();
1165 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1166 NumElements = NumElements * 2;
1167 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1168 NumElements = NumElements.divideCoefficientBy(2);
1169
1170 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1171 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1172 }
1173 case NEON::BI__builtin_neon_vpadd_v:
1174 case NEON::BI__builtin_neon_vpaddq_v:
1175 // We don't allow fp/int overloading of intrinsics.
1176 if (VTy->getElementType()->isFloatingPointTy() &&
1177 Int == Intrinsic::aarch64_neon_addp)
1178 Int = Intrinsic::aarch64_neon_faddp;
1179 break;
1180 case NEON::BI__builtin_neon_vabs_v:
1181 case NEON::BI__builtin_neon_vabsq_v:
1182 if (VTy->getElementType()->isFloatingPointTy())
1183 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1184 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1185 case NEON::BI__builtin_neon_vadd_v:
1186 case NEON::BI__builtin_neon_vaddq_v: {
1187 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1188 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1189 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1190 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1191 return Builder.CreateBitCast(Ops[0], Ty);
1192 }
1193 case NEON::BI__builtin_neon_vaddhn_v: {
1194 llvm::FixedVectorType *SrcTy =
1195 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1196
1197 // %sum = add <4 x i32> %lhs, %rhs
1198 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1199 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1200 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1201
1202 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1203 Constant *ShiftAmt =
1204 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1205 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1206
1207 // %res = trunc <4 x i32> %high to <4 x i16>
1208 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1209 }
1210 case NEON::BI__builtin_neon_vcale_v:
1211 case NEON::BI__builtin_neon_vcaleq_v:
1212 case NEON::BI__builtin_neon_vcalt_v:
1213 case NEON::BI__builtin_neon_vcaltq_v:
1214 std::swap(Ops[0], Ops[1]);
1215 [[fallthrough]];
1216 case NEON::BI__builtin_neon_vcage_v:
1217 case NEON::BI__builtin_neon_vcageq_v:
1218 case NEON::BI__builtin_neon_vcagt_v:
1219 case NEON::BI__builtin_neon_vcagtq_v: {
1220 llvm::Type *Ty;
1221 switch (VTy->getScalarSizeInBits()) {
1222 default: llvm_unreachable("unexpected type");
1223 case 32:
1224 Ty = FloatTy;
1225 break;
1226 case 64:
1227 Ty = DoubleTy;
1228 break;
1229 case 16:
1230 Ty = HalfTy;
1231 break;
1232 }
1233 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1234 llvm::Type *Tys[] = { VTy, VecFlt };
1235 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1236 return EmitNeonCall(F, Ops, NameHint);
1237 }
1238 case NEON::BI__builtin_neon_vceqz_v:
1239 case NEON::BI__builtin_neon_vceqzq_v:
1241 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1242 case NEON::BI__builtin_neon_vcgez_v:
1243 case NEON::BI__builtin_neon_vcgezq_v:
1245 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1246 "vcgez");
1247 case NEON::BI__builtin_neon_vclez_v:
1248 case NEON::BI__builtin_neon_vclezq_v:
1250 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1251 "vclez");
1252 case NEON::BI__builtin_neon_vcgtz_v:
1253 case NEON::BI__builtin_neon_vcgtzq_v:
1255 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1256 "vcgtz");
1257 case NEON::BI__builtin_neon_vcltz_v:
1258 case NEON::BI__builtin_neon_vcltzq_v:
1260 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1261 "vcltz");
1262 case NEON::BI__builtin_neon_vclz_v:
1263 case NEON::BI__builtin_neon_vclzq_v:
1264 // We generate target-independent intrinsic, which needs a second argument
1265 // for whether or not clz of zero is undefined; on ARM it isn't.
1266 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1267 break;
1268 case NEON::BI__builtin_neon_vcvt_f32_v:
1269 case NEON::BI__builtin_neon_vcvtq_f32_v:
1270 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1271 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1272 HasFastHalfType);
1273 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1274 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1275 case NEON::BI__builtin_neon_vcvt_f16_s16:
1276 case NEON::BI__builtin_neon_vcvt_f16_u16:
1277 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1278 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1279 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1280 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1281 HasFastHalfType);
1282 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1283 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1284 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1285 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1286 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1287 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1288 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1289 Function *F = CGM.getIntrinsic(Int, Tys);
1290 return EmitNeonCall(F, Ops, "vcvt_n");
1291 }
1292 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1293 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1294 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1295 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1296 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1297 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1298 Function *F = CGM.getIntrinsic(Int, Tys);
1299 return EmitNeonCall(F, Ops, "vcvt_n");
1300 }
1301 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1302 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1303 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1304 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1305 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1306 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1307 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1308 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1309 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1310 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1311 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1312 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1313 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1314 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1315 return EmitNeonCall(F, Ops, "vcvt_n");
1316 }
1317 case NEON::BI__builtin_neon_vcvt_s32_v:
1318 case NEON::BI__builtin_neon_vcvt_u32_v:
1319 case NEON::BI__builtin_neon_vcvt_s64_v:
1320 case NEON::BI__builtin_neon_vcvt_u64_v:
1321 case NEON::BI__builtin_neon_vcvt_s16_f16:
1322 case NEON::BI__builtin_neon_vcvt_u16_f16:
1323 case NEON::BI__builtin_neon_vcvtq_s32_v:
1324 case NEON::BI__builtin_neon_vcvtq_u32_v:
1325 case NEON::BI__builtin_neon_vcvtq_s64_v:
1326 case NEON::BI__builtin_neon_vcvtq_u64_v:
1327 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1328 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1329 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1330 if (Int) {
1331 // AArch64: use fptosi.sat/fptoui.sat unless under strict FP.
1332 if (!Builder.getIsFPConstrained())
1333 Int = Usgn ? Intrinsic::fptoui_sat : Intrinsic::fptosi_sat;
1334 llvm::Type *Tys[2] = {Ty, Ops[0]->getType()};
1335 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
1336 }
1337 // FIXME: ARM uses plain fptoui/fptosi which have UB on out-of-range
1338 // values. These should also use saturating intrinsics.
1339 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1340 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1341 }
1342 case NEON::BI__builtin_neon_vcvta_s16_f16:
1343 case NEON::BI__builtin_neon_vcvta_s32_v:
1344 case NEON::BI__builtin_neon_vcvta_s64_v:
1345 case NEON::BI__builtin_neon_vcvta_u16_f16:
1346 case NEON::BI__builtin_neon_vcvta_u32_v:
1347 case NEON::BI__builtin_neon_vcvta_u64_v:
1348 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1349 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1350 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1351 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1352 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1353 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1354 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1355 case NEON::BI__builtin_neon_vcvtn_s32_v:
1356 case NEON::BI__builtin_neon_vcvtn_s64_v:
1357 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1358 case NEON::BI__builtin_neon_vcvtn_u32_v:
1359 case NEON::BI__builtin_neon_vcvtn_u64_v:
1360 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1361 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1362 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1363 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1364 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1365 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1366 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1367 case NEON::BI__builtin_neon_vcvtp_s32_v:
1368 case NEON::BI__builtin_neon_vcvtp_s64_v:
1369 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1370 case NEON::BI__builtin_neon_vcvtp_u32_v:
1371 case NEON::BI__builtin_neon_vcvtp_u64_v:
1372 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1373 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1374 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1375 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1376 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1377 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1378 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1379 case NEON::BI__builtin_neon_vcvtm_s32_v:
1380 case NEON::BI__builtin_neon_vcvtm_s64_v:
1381 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1382 case NEON::BI__builtin_neon_vcvtm_u32_v:
1383 case NEON::BI__builtin_neon_vcvtm_u64_v:
1384 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1385 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1386 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1387 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1388 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1389 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1390 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1391 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1392 }
1393 case NEON::BI__builtin_neon_vcvtx_f32_v: {
1394 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
1395 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1396
1397 }
1398 case NEON::BI__builtin_neon_vext_v:
1399 case NEON::BI__builtin_neon_vextq_v: {
1400 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1401 SmallVector<int, 16> Indices;
1402 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1403 Indices.push_back(i+CV);
1404
1405 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1406 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1407 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
1408 }
1409 case NEON::BI__builtin_neon_vfma_v:
1410 case NEON::BI__builtin_neon_vfmaq_v: {
1411 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1412 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1413 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1414
1415 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
1417 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
1418 {Ops[1], Ops[2], Ops[0]});
1419 }
1420 case NEON::BI__builtin_neon_vld1_v:
1421 case NEON::BI__builtin_neon_vld1q_v: {
1422 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1423 Ops.push_back(getAlignmentValue32(PtrOp0));
1424 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
1425 }
1426 case NEON::BI__builtin_neon_vld1_x2_v:
1427 case NEON::BI__builtin_neon_vld1q_x2_v:
1428 case NEON::BI__builtin_neon_vld1_x3_v:
1429 case NEON::BI__builtin_neon_vld1q_x3_v:
1430 case NEON::BI__builtin_neon_vld1_x4_v:
1431 case NEON::BI__builtin_neon_vld1q_x4_v: {
1432 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1433 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1434 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
1435 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1436 }
1437 case NEON::BI__builtin_neon_vld2_v:
1438 case NEON::BI__builtin_neon_vld2q_v:
1439 case NEON::BI__builtin_neon_vld3_v:
1440 case NEON::BI__builtin_neon_vld3q_v:
1441 case NEON::BI__builtin_neon_vld4_v:
1442 case NEON::BI__builtin_neon_vld4q_v:
1443 case NEON::BI__builtin_neon_vld2_dup_v:
1444 case NEON::BI__builtin_neon_vld2q_dup_v:
1445 case NEON::BI__builtin_neon_vld3_dup_v:
1446 case NEON::BI__builtin_neon_vld3q_dup_v:
1447 case NEON::BI__builtin_neon_vld4_dup_v:
1448 case NEON::BI__builtin_neon_vld4q_dup_v: {
1449 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1450 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1451 Value *Align = getAlignmentValue32(PtrOp1);
1452 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
1453 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1454 }
1455 case NEON::BI__builtin_neon_vld1_dup_v:
1456 case NEON::BI__builtin_neon_vld1q_dup_v: {
1457 Value *V = PoisonValue::get(Ty);
1458 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
1459 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
1460 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
1461 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
1462 return EmitNeonSplat(Ops[0], CI);
1463 }
1464 case NEON::BI__builtin_neon_vld2_lane_v:
1465 case NEON::BI__builtin_neon_vld2q_lane_v:
1466 case NEON::BI__builtin_neon_vld3_lane_v:
1467 case NEON::BI__builtin_neon_vld3q_lane_v:
1468 case NEON::BI__builtin_neon_vld4_lane_v:
1469 case NEON::BI__builtin_neon_vld4q_lane_v: {
1470 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1471 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1472 for (unsigned I = 2; I < Ops.size() - 1; ++I)
1473 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
1474 Ops.push_back(getAlignmentValue32(PtrOp1));
1475 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
1476 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1477 }
1478 case NEON::BI__builtin_neon_vmovl_v: {
1479 llvm::FixedVectorType *DTy =
1480 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1481 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1482 if (Usgn)
1483 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1484 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1485 }
1486 case NEON::BI__builtin_neon_vmovn_v: {
1487 llvm::FixedVectorType *QTy =
1488 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1489 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1490 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1491 }
1492 case NEON::BI__builtin_neon_vmull_v:
1493 // FIXME: the integer vmull operations could be emitted in terms of pure
1494 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
1495 // hoisting the exts outside loops. Until global ISel comes along that can
1496 // see through such movement this leads to bad CodeGen. So we need an
1497 // intrinsic for now.
1498 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1499 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1500 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
1501 case NEON::BI__builtin_neon_vpadal_v:
1502 case NEON::BI__builtin_neon_vpadalq_v: {
1503 // The source operand type has twice as many elements of half the size.
1504 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1505 llvm::Type *EltTy =
1506 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1507 auto *NarrowTy =
1508 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1509 llvm::Type *Tys[2] = { Ty, NarrowTy };
1510 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1511 }
1512 case NEON::BI__builtin_neon_vpaddl_v:
1513 case NEON::BI__builtin_neon_vpaddlq_v: {
1514 // The source operand type has twice as many elements of half the size.
1515 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1516 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1517 auto *NarrowTy =
1518 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1519 llvm::Type *Tys[2] = { Ty, NarrowTy };
1520 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
1521 }
1522 case NEON::BI__builtin_neon_vqdmlal_v:
1523 case NEON::BI__builtin_neon_vqdmlsl_v: {
1524 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
1525 Ops[1] =
1526 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
1527 Ops.resize(2);
1528 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
1529 }
1530 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
1531 case NEON::BI__builtin_neon_vqdmulh_lane_v:
1532 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
1533 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
1534 auto *RTy = cast<llvm::FixedVectorType>(Ty);
1535 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
1536 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
1537 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
1538 RTy->getNumElements() * 2);
1539 llvm::Type *Tys[2] = {
1540 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1541 /*isQuad*/ false))};
1542 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1543 }
1544 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
1545 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
1546 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
1547 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
1548 llvm::Type *Tys[2] = {
1549 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1550 /*isQuad*/ true))};
1551 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1552 }
1553 case NEON::BI__builtin_neon_vqshl_n_v:
1554 case NEON::BI__builtin_neon_vqshlq_n_v:
1555 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1556 1, false);
1557 case NEON::BI__builtin_neon_vqshlu_n_v:
1558 case NEON::BI__builtin_neon_vqshluq_n_v:
1559 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
1560 1, false);
1561 case NEON::BI__builtin_neon_vrecpe_v:
1562 case NEON::BI__builtin_neon_vrecpeq_v:
1563 case NEON::BI__builtin_neon_vrsqrte_v:
1564 case NEON::BI__builtin_neon_vrsqrteq_v:
1565 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
1566 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1567 case NEON::BI__builtin_neon_vrndi_v:
1568 case NEON::BI__builtin_neon_vrndiq_v:
1569 Int = Builder.getIsFPConstrained()
1570 ? Intrinsic::experimental_constrained_nearbyint
1571 : Intrinsic::nearbyint;
1572 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1573 case NEON::BI__builtin_neon_vrshr_n_v:
1574 case NEON::BI__builtin_neon_vrshrq_n_v:
1575 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
1576 1, true);
1577 case NEON::BI__builtin_neon_vsha512hq_u64:
1578 case NEON::BI__builtin_neon_vsha512h2q_u64:
1579 case NEON::BI__builtin_neon_vsha512su0q_u64:
1580 case NEON::BI__builtin_neon_vsha512su1q_u64: {
1581 Function *F = CGM.getIntrinsic(Int);
1582 return EmitNeonCall(F, Ops, "");
1583 }
1584 case NEON::BI__builtin_neon_vshl_n_v:
1585 case NEON::BI__builtin_neon_vshlq_n_v:
1586 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1587 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
1588 "vshl_n");
1589 case NEON::BI__builtin_neon_vshll_n_v: {
1590 llvm::FixedVectorType *SrcTy =
1591 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1592 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1593 if (Usgn)
1594 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
1595 else
1596 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
1597 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
1598 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
1599 }
1600 case NEON::BI__builtin_neon_vshrn_n_v: {
1601 llvm::FixedVectorType *SrcTy =
1602 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1603 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1604 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
1605 if (Usgn)
1606 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
1607 else
1608 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
1609 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
1610 }
1611 case NEON::BI__builtin_neon_vshr_n_v:
1612 case NEON::BI__builtin_neon_vshrq_n_v:
1613 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
1614 case NEON::BI__builtin_neon_vst1_v:
1615 case NEON::BI__builtin_neon_vst1q_v:
1616 case NEON::BI__builtin_neon_vst2_v:
1617 case NEON::BI__builtin_neon_vst2q_v:
1618 case NEON::BI__builtin_neon_vst3_v:
1619 case NEON::BI__builtin_neon_vst3q_v:
1620 case NEON::BI__builtin_neon_vst4_v:
1621 case NEON::BI__builtin_neon_vst4q_v:
1622 case NEON::BI__builtin_neon_vst2_lane_v:
1623 case NEON::BI__builtin_neon_vst2q_lane_v:
1624 case NEON::BI__builtin_neon_vst3_lane_v:
1625 case NEON::BI__builtin_neon_vst3q_lane_v:
1626 case NEON::BI__builtin_neon_vst4_lane_v:
1627 case NEON::BI__builtin_neon_vst4q_lane_v: {
1628 llvm::Type *Tys[] = {Int8PtrTy, Ty};
1629 Ops.push_back(getAlignmentValue32(PtrOp0));
1630 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
1631 }
1632 case NEON::BI__builtin_neon_vsm3partw1q_u32:
1633 case NEON::BI__builtin_neon_vsm3partw2q_u32:
1634 case NEON::BI__builtin_neon_vsm3ss1q_u32:
1635 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
1636 case NEON::BI__builtin_neon_vsm4eq_u32: {
1637 Function *F = CGM.getIntrinsic(Int);
1638 return EmitNeonCall(F, Ops, "");
1639 }
1640 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
1641 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
1642 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
1643 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
1644 Function *F = CGM.getIntrinsic(Int);
1645 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
1646 return EmitNeonCall(F, Ops, "");
1647 }
1648 case NEON::BI__builtin_neon_vst1_x2_v:
1649 case NEON::BI__builtin_neon_vst1q_x2_v:
1650 case NEON::BI__builtin_neon_vst1_x3_v:
1651 case NEON::BI__builtin_neon_vst1q_x3_v:
1652 case NEON::BI__builtin_neon_vst1_x4_v:
1653 case NEON::BI__builtin_neon_vst1q_x4_v: {
1654 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
1655 // in AArch64 it comes last. We may want to stick to one or another.
1656 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
1657 Arch == llvm::Triple::aarch64_32) {
1658 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1659 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
1660 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1661 }
1662 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
1663 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1664 }
1665 case NEON::BI__builtin_neon_vsubhn_v: {
1666 llvm::FixedVectorType *SrcTy =
1667 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1668
1669 // %sum = add <4 x i32> %lhs, %rhs
1670 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1671 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1672 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
1673
1674 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1675 Constant *ShiftAmt =
1676 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1677 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
1678
1679 // %res = trunc <4 x i32> %high to <4 x i16>
1680 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
1681 }
1682 case NEON::BI__builtin_neon_vtrn_v:
1683 case NEON::BI__builtin_neon_vtrnq_v: {
1684 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1685 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1686 Value *SV = nullptr;
1687
1688 for (unsigned vi = 0; vi != 2; ++vi) {
1689 SmallVector<int, 16> Indices;
1690 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1691 Indices.push_back(i+vi);
1692 Indices.push_back(i+e+vi);
1693 }
1694 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1695 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
1696 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1697 }
1698 return SV;
1699 }
1700 case NEON::BI__builtin_neon_vtst_v:
1701 case NEON::BI__builtin_neon_vtstq_v: {
1702 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1703 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1704 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1705 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1706 ConstantAggregateZero::get(Ty));
1707 return Builder.CreateSExt(Ops[0], Ty, "vtst");
1708 }
1709 case NEON::BI__builtin_neon_vuzp_v:
1710 case NEON::BI__builtin_neon_vuzpq_v: {
1711 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1712 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1713 Value *SV = nullptr;
1714
1715 for (unsigned vi = 0; vi != 2; ++vi) {
1716 SmallVector<int, 16> Indices;
1717 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1718 Indices.push_back(2*i+vi);
1719
1720 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1721 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
1722 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1723 }
1724 return SV;
1725 }
1726 case NEON::BI__builtin_neon_vxarq_u64: {
1727 Function *F = CGM.getIntrinsic(Int);
1728 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
1729 return EmitNeonCall(F, Ops, "");
1730 }
1731 case NEON::BI__builtin_neon_vzip_v:
1732 case NEON::BI__builtin_neon_vzipq_v: {
1733 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1734 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1735 Value *SV = nullptr;
1736
1737 for (unsigned vi = 0; vi != 2; ++vi) {
1738 SmallVector<int, 16> Indices;
1739 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1740 Indices.push_back((i + vi*e) >> 1);
1741 Indices.push_back(((i + vi*e) >> 1)+e);
1742 }
1743 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1744 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
1745 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1746 }
1747 return SV;
1748 }
1749 case NEON::BI__builtin_neon_vdot_s32:
1750 case NEON::BI__builtin_neon_vdot_u32:
1751 case NEON::BI__builtin_neon_vdotq_s32:
1752 case NEON::BI__builtin_neon_vdotq_u32: {
1753 auto *InputTy =
1754 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1755 llvm::Type *Tys[2] = { Ty, InputTy };
1756 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
1757 }
1758 case NEON::BI__builtin_neon_vfmlal_low_f16:
1759 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
1760 auto *InputTy =
1761 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1762 llvm::Type *Tys[2] = { Ty, InputTy };
1763 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
1764 }
1765 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1766 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
1767 auto *InputTy =
1768 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1769 llvm::Type *Tys[2] = { Ty, InputTy };
1770 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
1771 }
1772 case NEON::BI__builtin_neon_vfmlal_high_f16:
1773 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
1774 auto *InputTy =
1775 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1776 llvm::Type *Tys[2] = { Ty, InputTy };
1777 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
1778 }
1779 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1780 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
1781 auto *InputTy =
1782 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1783 llvm::Type *Tys[2] = { Ty, InputTy };
1784 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
1785 }
1786 case NEON::BI__builtin_neon_vmmlaq_s32:
1787 case NEON::BI__builtin_neon_vmmlaq_u32: {
1788 auto *InputTy =
1789 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1790 llvm::Type *Tys[2] = { Ty, InputTy };
1791 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
1792 }
1793 case NEON::BI__builtin_neon_vmmlaq_f16_f16:
1794 case NEON::BI__builtin_neon_vmmlaq_f32_f16: {
1795 auto *InputTy =
1796 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1797 llvm::Type *Tys[2] = {Ty, InputTy};
1798 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fmmla");
1799 }
1800 case NEON::BI__builtin_neon_vusmmlaq_s32: {
1801 auto *InputTy =
1802 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1803 llvm::Type *Tys[2] = { Ty, InputTy };
1804 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
1805 }
1806 case NEON::BI__builtin_neon_vusdot_s32:
1807 case NEON::BI__builtin_neon_vusdotq_s32: {
1808 auto *InputTy =
1809 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1810 llvm::Type *Tys[2] = { Ty, InputTy };
1811 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
1812 }
1813 case NEON::BI__builtin_neon_vbfdot_f32:
1814 case NEON::BI__builtin_neon_vbfdotq_f32: {
1815 llvm::Type *InputTy =
1816 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
1817 llvm::Type *Tys[2] = { Ty, InputTy };
1818 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
1819 }
1820 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
1821 llvm::Type *Tys[1] = { Ty };
1822 Function *F = CGM.getIntrinsic(Int, Tys);
1823 return EmitNeonCall(F, Ops, "vcvtfp2bf");
1824 }
1825
1826 }
1827
1828 assert(Int && "Expected valid intrinsic number");
1829
1830 // Determine the type(s) of this overloaded AArch64 intrinsic.
1831 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
1832
1833 Value *Result = EmitNeonCall(F, Ops, NameHint);
1834 llvm::Type *ResultType = ConvertType(E->getType());
1835 // AArch64 intrinsic one-element vector type cast to
1836 // scalar type expected by the builtin
1837 return Builder.CreateBitCast(Result, ResultType, NameHint);
1838}
1839
1840Value *
1842 const CmpInst::Predicate Pred,
1843 const Twine &Name) {
1844
1845 if (isa<FixedVectorType>(Ty)) {
1846 // Vector types are cast to i8 vectors. Recover original type.
1847 Op = Builder.CreateBitCast(Op, Ty);
1848 }
1849
1850 Constant *zero = Constant::getNullValue(Op->getType());
1851
1852 if (CmpInst::isFPPredicate(Pred)) {
1853 if (Pred == CmpInst::FCMP_OEQ)
1854 Op = Builder.CreateFCmp(Pred, Op, zero);
1855 else
1856 Op = Builder.CreateFCmpS(Pred, Op, zero);
1857 } else {
1858 Op = Builder.CreateICmp(Pred, Op, zero);
1859 }
1860
1861 llvm::Type *ResTy = Ty;
1862 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
1863 ResTy = FixedVectorType::get(
1864 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
1865 VTy->getNumElements());
1866
1867 return Builder.CreateSExt(Op, ResTy, Name);
1868}
1869
1871 Value *ExtOp, Value *IndexOp,
1872 llvm::Type *ResTy, unsigned IntID,
1873 const char *Name) {
1875 if (ExtOp)
1876 TblOps.push_back(ExtOp);
1877
1878 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
1879 SmallVector<int, 16> Indices;
1880 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
1881 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
1882 Indices.push_back(2*i);
1883 Indices.push_back(2*i+1);
1884 }
1885
1886 int PairPos = 0, End = Ops.size() - 1;
1887 while (PairPos < End) {
1888 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1889 Ops[PairPos+1], Indices,
1890 Name));
1891 PairPos += 2;
1892 }
1893
1894 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
1895 // of the 128-bit lookup table with zero.
1896 if (PairPos == End) {
1897 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
1898 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1899 ZeroTbl, Indices, Name));
1900 }
1901
1902 Function *TblF;
1903 TblOps.push_back(IndexOp);
1904 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
1905
1906 return CGF.EmitNeonCall(TblF, TblOps, Name);
1907}
1908
1909Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
1910 unsigned Value;
1911 switch (BuiltinID) {
1912 default:
1913 return nullptr;
1914 case clang::ARM::BI__builtin_arm_nop:
1915 Value = 0;
1916 break;
1917 case clang::ARM::BI__builtin_arm_yield:
1918 case clang::ARM::BI__yield:
1919 Value = 1;
1920 break;
1921 case clang::ARM::BI__builtin_arm_wfe:
1922 case clang::ARM::BI__wfe:
1923 Value = 2;
1924 break;
1925 case clang::ARM::BI__builtin_arm_wfi:
1926 case clang::ARM::BI__wfi:
1927 Value = 3;
1928 break;
1929 case clang::ARM::BI__builtin_arm_sev:
1930 case clang::ARM::BI__sev:
1931 Value = 4;
1932 break;
1933 case clang::ARM::BI__builtin_arm_sevl:
1934 case clang::ARM::BI__sevl:
1935 Value = 5;
1936 break;
1937 }
1938
1939 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
1940 llvm::ConstantInt::get(Int32Ty, Value));
1941}
1942
1948
1949// Generates the IR for the read/write special register builtin,
1950// ValueType is the type of the value that is to be written or read,
1951// RegisterType is the type of the register being written to or read from.
1953 const CallExpr *E,
1954 llvm::Type *RegisterType,
1955 llvm::Type *ValueType,
1956 SpecialRegisterAccessKind AccessKind,
1957 StringRef SysReg = "") {
1958 // write and register intrinsics only support 32, 64 and 128 bit operations.
1959 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
1960 RegisterType->isIntegerTy(128)) &&
1961 "Unsupported size for register.");
1962
1963 CodeGen::CGBuilderTy &Builder = CGF.Builder;
1964 CodeGen::CodeGenModule &CGM = CGF.CGM;
1965 LLVMContext &Context = CGM.getLLVMContext();
1966
1967 if (SysReg.empty()) {
1968 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
1969 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
1970 }
1971
1972 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
1973 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
1974 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
1975
1976 llvm::Type *Types[] = { RegisterType };
1977
1978 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
1979 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
1980 && "Can't fit 64-bit value in 32-bit register");
1981
1982 if (AccessKind != Write) {
1983 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
1984 llvm::Function *F = CGM.getIntrinsic(
1985 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
1986 : Intrinsic::read_register,
1987 Types);
1988 llvm::Value *Call = Builder.CreateCall(F, Metadata);
1989
1990 if (MixedTypes)
1991 // Read into 64 bit register and then truncate result to 32 bit.
1992 return Builder.CreateTrunc(Call, ValueType);
1993
1994 if (ValueType->isPointerTy())
1995 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
1996 return Builder.CreateIntToPtr(Call, ValueType);
1997
1998 return Call;
1999 }
2000
2001 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2002 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2003 if (MixedTypes) {
2004 // Extend 32 bit write value to 64 bit to pass to write.
2005 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2006 return Builder.CreateCall(F, { Metadata, ArgValue });
2007 }
2008
2009 if (ValueType->isPointerTy()) {
2010 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2011 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2012 return Builder.CreateCall(F, { Metadata, ArgValue });
2013 }
2014
2015 return Builder.CreateCall(F, { Metadata, ArgValue });
2016}
2017
2018static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2019 const CallExpr *E) {
2020 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2021 CodeGen::CodeGenModule &CGM = CGF.CGM;
2023
2024 auto getIntArg = [&](unsigned ArgNo) {
2026 if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
2027 llvm_unreachable("Expected constant argument to range prefetch.");
2028 return Result.Val.getInt().getExtValue();
2029 };
2030
2031 Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
2032 Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
2033 Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
2034
2035 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2036 auto Length = getIntArg(3);
2037 auto Count = getIntArg(4) - 1;
2038 auto Stride = getIntArg(5);
2039 auto Distance = getIntArg(6);
2040
2041 // Map ReuseDistance given in bytes to four bits representing decreasing
2042 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2043 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2044 // over the maximum is represented by 0 (distance not known).
2045 if (Distance > 0) {
2046 Distance = llvm::Log2_32_Ceil(Distance);
2047 if (Distance < 15)
2048 Distance = 15;
2049 else if (Distance > 29)
2050 Distance = 0;
2051 else
2052 Distance = 30 - Distance;
2053 }
2054
2055 uint64_t Mask22 = (1ULL << 22) - 1;
2056 uint64_t Mask16 = (1ULL << 16) - 1;
2057 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2058 ((Count & Mask16) << 22) | (Length & Mask22);
2059
2060 Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
2061 } else
2062 Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
2063
2064 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
2065 Ops);
2066}
2067
2068/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2069/// argument that specifies the vector type. The additional argument is meant
2070/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
2071/// should be kept consistent with the logic in Sema.
2072/// TODO: Make this return false for SISD builtins.
2073static bool HasExtraNeonArgument(unsigned BuiltinID) {
2074 // Required by the headers included below, but not in this particular
2075 // function.
2076 [[maybe_unused]] int PtrArgNum = -1;
2077 [[maybe_unused]] bool HasConstPtr = false;
2078
2079 // The mask encodes the type. We don't care about the actual value. Instead,
2080 // we just check whether its been set.
2081 uint64_t mask = 0;
2082 switch (BuiltinID) {
2083#define GET_NEON_OVERLOAD_CHECK
2084#include "clang/Basic/arm_fp16.inc"
2085#include "clang/Basic/arm_neon.inc"
2086#undef GET_NEON_OVERLOAD_CHECK
2087 // Non-neon builtins for controling VFP that take extra argument for
2088 // discriminating the type.
2089 case ARM::BI__builtin_arm_vcvtr_f:
2090 case ARM::BI__builtin_arm_vcvtr_d:
2091 mask = 1;
2092 }
2093
2094 if (mask)
2095 return true;
2096
2097 return false;
2098}
2099
2101 const CallExpr *E,
2103 llvm::Triple::ArchType Arch) {
2104 if (auto Hint = GetValueForARMHint(BuiltinID))
2105 return Hint;
2106
2107 if (BuiltinID == clang::ARM::BI__emit) {
2108 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2109 llvm::FunctionType *FTy =
2110 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2111
2113 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2114 llvm_unreachable("Sema will ensure that the parameter is constant");
2115
2116 llvm::APSInt Value = Result.Val.getInt();
2117 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2118
2119 llvm::InlineAsm *Emit =
2120 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2121 /*hasSideEffects=*/true)
2122 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2123 /*hasSideEffects=*/true);
2124
2125 return Builder.CreateCall(Emit);
2126 }
2127
2128 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2129 Value *Option = EmitScalarExpr(E->getArg(0));
2130 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2131 }
2132
2133 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2135 Value *RW = EmitScalarExpr(E->getArg(1));
2136 Value *IsData = EmitScalarExpr(E->getArg(2));
2137
2138 // Locality is not supported on ARM target
2139 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2140
2141 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2142 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2143 }
2144
2145 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2146 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2147 return Builder.CreateCall(
2148 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2149 }
2150
2151 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2152 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2153 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2154 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2155 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2156 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2157 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2158 return Res;
2159 }
2160
2161
2162 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2163 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2164 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2165 }
2166 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2167 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2168 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2169 "cls");
2170 }
2171
2172 if (BuiltinID == clang::ARM::BI__clear_cache) {
2173 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2174 const FunctionDecl *FD = E->getDirectCallee();
2175 Value *Ops[2];
2176 for (unsigned i = 0; i < 2; i++)
2177 Ops[i] = EmitScalarExpr(E->getArg(i));
2178 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2179 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2180 StringRef Name = FD->getName();
2181 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2182 }
2183
2184 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2185 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2186 Function *F;
2187
2188 switch (BuiltinID) {
2189 default: llvm_unreachable("unexpected builtin");
2190 case clang::ARM::BI__builtin_arm_mcrr:
2191 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2192 break;
2193 case clang::ARM::BI__builtin_arm_mcrr2:
2194 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2195 break;
2196 }
2197
2198 // MCRR{2} instruction has 5 operands but
2199 // the intrinsic has 4 because Rt and Rt2
2200 // are represented as a single unsigned 64
2201 // bit integer in the intrinsic definition
2202 // but internally it's represented as 2 32
2203 // bit integers.
2204
2205 Value *Coproc = EmitScalarExpr(E->getArg(0));
2206 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2207 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2208 Value *CRm = EmitScalarExpr(E->getArg(3));
2209
2210 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2211 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2212 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2213 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2214
2215 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2216 }
2217
2218 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2219 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2220 Function *F;
2221
2222 switch (BuiltinID) {
2223 default: llvm_unreachable("unexpected builtin");
2224 case clang::ARM::BI__builtin_arm_mrrc:
2225 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2226 break;
2227 case clang::ARM::BI__builtin_arm_mrrc2:
2228 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2229 break;
2230 }
2231
2232 Value *Coproc = EmitScalarExpr(E->getArg(0));
2233 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2234 Value *CRm = EmitScalarExpr(E->getArg(2));
2235 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2236
2237 // Returns an unsigned 64 bit integer, represented
2238 // as two 32 bit integers.
2239
2240 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2241 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2242 Rt = Builder.CreateZExt(Rt, Int64Ty);
2243 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2244
2245 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2246 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2247 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2248
2249 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2250 }
2251
2252 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2253 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2254 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2255 getContext().getTypeSize(E->getType()) == 64) ||
2256 BuiltinID == clang::ARM::BI__ldrexd) {
2257 Function *F;
2258
2259 switch (BuiltinID) {
2260 default: llvm_unreachable("unexpected builtin");
2261 case clang::ARM::BI__builtin_arm_ldaex:
2262 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2263 break;
2264 case clang::ARM::BI__builtin_arm_ldrexd:
2265 case clang::ARM::BI__builtin_arm_ldrex:
2266 case clang::ARM::BI__ldrexd:
2267 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2268 break;
2269 }
2270
2271 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2272 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2273
2274 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2275 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2276 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2277 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2278
2279 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2280 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2281 Val = Builder.CreateOr(Val, Val1);
2282 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2283 }
2284
2285 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2286 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2287 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2288
2289 QualType Ty = E->getType();
2290 llvm::Type *RealResTy = ConvertType(Ty);
2291 llvm::Type *IntTy =
2292 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2293
2294 Function *F = CGM.getIntrinsic(
2295 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2296 : Intrinsic::arm_ldrex,
2297 DefaultPtrTy);
2298 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2299 Val->addParamAttr(
2300 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2301
2302 if (RealResTy->isPointerTy())
2303 return Builder.CreateIntToPtr(Val, RealResTy);
2304 else {
2305 llvm::Type *IntResTy = llvm::IntegerType::get(
2306 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2307 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2308 RealResTy);
2309 }
2310 }
2311
2312 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2313 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2314 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2315 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2316 Function *F = CGM.getIntrinsic(
2317 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2318 : Intrinsic::arm_strexd);
2319 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2320
2322 Value *Val = EmitScalarExpr(E->getArg(0));
2323 Builder.CreateStore(Val, Tmp);
2324
2325 Address LdPtr = Tmp.withElementType(STy);
2326 Val = Builder.CreateLoad(LdPtr);
2327
2328 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2329 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2330 Value *StPtr = EmitScalarExpr(E->getArg(1));
2331 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2332 }
2333
2334 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2335 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2336 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2337 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2338
2339 QualType Ty = E->getArg(0)->getType();
2340 llvm::Type *StoreTy =
2341 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2342
2343 if (StoreVal->getType()->isPointerTy())
2344 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2345 else {
2346 llvm::Type *IntTy = llvm::IntegerType::get(
2348 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2349 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2350 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2351 }
2352
2353 Function *F = CGM.getIntrinsic(
2354 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2355 : Intrinsic::arm_strex,
2356 StoreAddr->getType());
2357
2358 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2359 CI->addParamAttr(
2360 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2361 return CI;
2362 }
2363
2364 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2365 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2366 return Builder.CreateCall(F);
2367 }
2368
2369 // CRC32
2370 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2371 switch (BuiltinID) {
2372 case clang::ARM::BI__builtin_arm_crc32b:
2373 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2374 case clang::ARM::BI__builtin_arm_crc32cb:
2375 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2376 case clang::ARM::BI__builtin_arm_crc32h:
2377 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2378 case clang::ARM::BI__builtin_arm_crc32ch:
2379 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2380 case clang::ARM::BI__builtin_arm_crc32w:
2381 case clang::ARM::BI__builtin_arm_crc32d:
2382 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2383 case clang::ARM::BI__builtin_arm_crc32cw:
2384 case clang::ARM::BI__builtin_arm_crc32cd:
2385 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2386 }
2387
2388 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2389 Value *Arg0 = EmitScalarExpr(E->getArg(0));
2390 Value *Arg1 = EmitScalarExpr(E->getArg(1));
2391
2392 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2393 // intrinsics, hence we need different codegen for these cases.
2394 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2395 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2396 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2397 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
2398 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
2399 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
2400
2401 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2402 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
2403 return Builder.CreateCall(F, {Res, Arg1b});
2404 } else {
2405 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
2406
2407 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2408 return Builder.CreateCall(F, {Arg0, Arg1});
2409 }
2410 }
2411
2412 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2413 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2414 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2415 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2416 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2417 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2418
2419 SpecialRegisterAccessKind AccessKind = Write;
2420 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2421 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2422 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2423 AccessKind = VolatileRead;
2424
2425 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2426 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2427
2428 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2429 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2430
2431 llvm::Type *ValueType;
2432 llvm::Type *RegisterType;
2433 if (IsPointerBuiltin) {
2434 ValueType = VoidPtrTy;
2436 } else if (Is64Bit) {
2437 ValueType = RegisterType = Int64Ty;
2438 } else {
2439 ValueType = RegisterType = Int32Ty;
2440 }
2441
2442 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
2443 AccessKind);
2444 }
2445
2446 if (BuiltinID == ARM::BI__builtin_sponentry) {
2447 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
2448 return Builder.CreateCall(F);
2449 }
2450
2451 // Handle MSVC intrinsics before argument evaluation to prevent double
2452 // evaluation.
2453 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
2454 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
2455
2456 // Deal with MVE builtins
2457 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2458 return Result;
2459 // Handle CDE builtins
2460 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2461 return Result;
2462
2463 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
2464 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
2465 return P.first == BuiltinID;
2466 });
2467 if (It != end(NEONEquivalentIntrinsicMap))
2468 BuiltinID = It->second;
2469
2470 // Find out if any arguments are required to be integer constant
2471 // expressions.
2472 unsigned ICEArguments = 0;
2474 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2475 assert(Error == ASTContext::GE_None && "Should not codegen an error");
2476
2477 auto getAlignmentValue32 = [&](Address addr) -> Value* {
2478 return Builder.getInt32(addr.getAlignment().getQuantity());
2479 };
2480
2481 Address PtrOp0 = Address::invalid();
2482 Address PtrOp1 = Address::invalid();
2484 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
2485 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
2486 for (unsigned i = 0, e = NumArgs; i != e; i++) {
2487 if (i == 0) {
2488 switch (BuiltinID) {
2489 case NEON::BI__builtin_neon_vld1_v:
2490 case NEON::BI__builtin_neon_vld1q_v:
2491 case NEON::BI__builtin_neon_vld1q_lane_v:
2492 case NEON::BI__builtin_neon_vld1_lane_v:
2493 case NEON::BI__builtin_neon_vld1_dup_v:
2494 case NEON::BI__builtin_neon_vld1q_dup_v:
2495 case NEON::BI__builtin_neon_vst1_v:
2496 case NEON::BI__builtin_neon_vst1q_v:
2497 case NEON::BI__builtin_neon_vst1q_lane_v:
2498 case NEON::BI__builtin_neon_vst1_lane_v:
2499 case NEON::BI__builtin_neon_vst2_v:
2500 case NEON::BI__builtin_neon_vst2q_v:
2501 case NEON::BI__builtin_neon_vst2_lane_v:
2502 case NEON::BI__builtin_neon_vst2q_lane_v:
2503 case NEON::BI__builtin_neon_vst3_v:
2504 case NEON::BI__builtin_neon_vst3q_v:
2505 case NEON::BI__builtin_neon_vst3_lane_v:
2506 case NEON::BI__builtin_neon_vst3q_lane_v:
2507 case NEON::BI__builtin_neon_vst4_v:
2508 case NEON::BI__builtin_neon_vst4q_v:
2509 case NEON::BI__builtin_neon_vst4_lane_v:
2510 case NEON::BI__builtin_neon_vst4q_lane_v:
2511 // Get the alignment for the argument in addition to the value;
2512 // we'll use it later.
2513 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
2514 Ops.push_back(PtrOp0.emitRawPointer(*this));
2515 continue;
2516 }
2517 }
2518 if (i == 1) {
2519 switch (BuiltinID) {
2520 case NEON::BI__builtin_neon_vld2_v:
2521 case NEON::BI__builtin_neon_vld2q_v:
2522 case NEON::BI__builtin_neon_vld3_v:
2523 case NEON::BI__builtin_neon_vld3q_v:
2524 case NEON::BI__builtin_neon_vld4_v:
2525 case NEON::BI__builtin_neon_vld4q_v:
2526 case NEON::BI__builtin_neon_vld2_lane_v:
2527 case NEON::BI__builtin_neon_vld2q_lane_v:
2528 case NEON::BI__builtin_neon_vld3_lane_v:
2529 case NEON::BI__builtin_neon_vld3q_lane_v:
2530 case NEON::BI__builtin_neon_vld4_lane_v:
2531 case NEON::BI__builtin_neon_vld4q_lane_v:
2532 case NEON::BI__builtin_neon_vld2_dup_v:
2533 case NEON::BI__builtin_neon_vld2q_dup_v:
2534 case NEON::BI__builtin_neon_vld3_dup_v:
2535 case NEON::BI__builtin_neon_vld3q_dup_v:
2536 case NEON::BI__builtin_neon_vld4_dup_v:
2537 case NEON::BI__builtin_neon_vld4q_dup_v:
2538 // Get the alignment for the argument in addition to the value;
2539 // we'll use it later.
2540 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
2541 Ops.push_back(PtrOp1.emitRawPointer(*this));
2542 continue;
2543 }
2544 }
2545
2546 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
2547 }
2548
2549 switch (BuiltinID) {
2550 default: break;
2551
2552 case NEON::BI__builtin_neon_vget_lane_i8:
2553 case NEON::BI__builtin_neon_vget_lane_i16:
2554 case NEON::BI__builtin_neon_vget_lane_i32:
2555 case NEON::BI__builtin_neon_vget_lane_i64:
2556 case NEON::BI__builtin_neon_vget_lane_bf16:
2557 case NEON::BI__builtin_neon_vget_lane_f32:
2558 case NEON::BI__builtin_neon_vgetq_lane_i8:
2559 case NEON::BI__builtin_neon_vgetq_lane_i16:
2560 case NEON::BI__builtin_neon_vgetq_lane_i32:
2561 case NEON::BI__builtin_neon_vgetq_lane_i64:
2562 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2563 case NEON::BI__builtin_neon_vgetq_lane_f32:
2564 case NEON::BI__builtin_neon_vduph_lane_bf16:
2565 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2566 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
2567
2568 case NEON::BI__builtin_neon_vrndns_f32: {
2569 Value *Arg = EmitScalarExpr(E->getArg(0));
2570 llvm::Type *Tys[] = {Arg->getType()};
2571 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
2572 return Builder.CreateCall(F, {Arg}, "vrndn"); }
2573
2574 case NEON::BI__builtin_neon_vset_lane_i8:
2575 case NEON::BI__builtin_neon_vset_lane_i16:
2576 case NEON::BI__builtin_neon_vset_lane_i32:
2577 case NEON::BI__builtin_neon_vset_lane_i64:
2578 case NEON::BI__builtin_neon_vset_lane_bf16:
2579 case NEON::BI__builtin_neon_vset_lane_f32:
2580 case NEON::BI__builtin_neon_vsetq_lane_i8:
2581 case NEON::BI__builtin_neon_vsetq_lane_i16:
2582 case NEON::BI__builtin_neon_vsetq_lane_i32:
2583 case NEON::BI__builtin_neon_vsetq_lane_i64:
2584 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2585 case NEON::BI__builtin_neon_vsetq_lane_f32:
2586 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
2587
2588 case NEON::BI__builtin_neon_vsha1h_u32:
2589 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
2590 "vsha1h");
2591 case NEON::BI__builtin_neon_vsha1cq_u32:
2592 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
2593 "vsha1h");
2594 case NEON::BI__builtin_neon_vsha1pq_u32:
2595 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
2596 "vsha1h");
2597 case NEON::BI__builtin_neon_vsha1mq_u32:
2598 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
2599 "vsha1h");
2600
2601 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2602 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
2603 "vcvtbfp2bf");
2604 case NEON::BI__builtin_neon_vcvt_f16_f32:
2605 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf), Ops,
2606 "vcvtfp2hf");
2607 case NEON::BI__builtin_neon_vcvt_f32_f16:
2608 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp), Ops,
2609 "vcvthf2fp");
2610
2611 // The ARM _MoveToCoprocessor builtins put the input register value as
2612 // the first argument, but the LLVM intrinsic expects it as the third one.
2613 case clang::ARM::BI_MoveToCoprocessor:
2614 case clang::ARM::BI_MoveToCoprocessor2: {
2615 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
2616 ? Intrinsic::arm_mcr
2617 : Intrinsic::arm_mcr2);
2618 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
2619 Ops[3], Ops[4], Ops[5]});
2620 }
2621 }
2622
2623 // Get the last argument, which specifies the vector type.
2624 assert(HasExtraArg);
2625 const Expr *Arg = E->getArg(E->getNumArgs()-1);
2626 std::optional<llvm::APSInt> Result =
2628 if (!Result)
2629 return nullptr;
2630
2631 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
2632 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
2633 // Determine the overloaded type of this builtin.
2634 llvm::Type *Ty;
2635 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
2636 Ty = FloatTy;
2637 else
2638 Ty = DoubleTy;
2639
2640 // Determine whether this is an unsigned conversion or not.
2641 bool usgn = Result->getZExtValue() == 1;
2642 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
2643
2644 // Call the appropriate intrinsic.
2645 Function *F = CGM.getIntrinsic(Int, Ty);
2646 return Builder.CreateCall(F, Ops, "vcvtr");
2647 }
2648
2649 // Determine the type of this overloaded NEON intrinsic.
2650 NeonTypeFlags Type = Result->getZExtValue();
2651 bool usgn = Type.isUnsigned();
2652 bool rightShift = false;
2653
2654 llvm::FixedVectorType *VTy =
2655 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
2656 getTarget().hasBFloat16Type());
2657 llvm::Type *Ty = VTy;
2658 if (!Ty)
2659 return nullptr;
2660
2661 // Many NEON builtins have identical semantics and uses in ARM and
2662 // AArch64. Emit these in a single function.
2663 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
2665 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
2666 if (Builtin)
2668 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
2669 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
2670
2671 unsigned Int;
2672 switch (BuiltinID) {
2673 default: return nullptr;
2674 case NEON::BI__builtin_neon_vld1q_lane_v:
2675 // Handle 64-bit integer elements as a special case. Use shuffles of
2676 // one-element vectors to avoid poor code for i64 in the backend.
2677 if (VTy->getElementType()->isIntegerTy(64)) {
2678 // Extract the other lane.
2679 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2680 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
2681 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
2682 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2683 // Load the value as a one-element vector.
2684 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
2685 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2686 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
2687 Value *Align = getAlignmentValue32(PtrOp0);
2688 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
2689 // Combine them.
2690 int Indices[] = {1 - Lane, Lane};
2691 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
2692 }
2693 [[fallthrough]];
2694 case NEON::BI__builtin_neon_vld1_lane_v: {
2695 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2696 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2697 Value *Ld = Builder.CreateLoad(PtrOp0);
2698 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
2699 }
2700 case NEON::BI__builtin_neon_vqrshrn_n_v:
2701 Int =
2702 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2703 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
2704 1, true);
2705 case NEON::BI__builtin_neon_vqrshrun_n_v:
2706 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
2707 Ops, "vqrshrun_n", 1, true);
2708 case NEON::BI__builtin_neon_vqshrn_n_v:
2709 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2710 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
2711 1, true);
2712 case NEON::BI__builtin_neon_vqshrun_n_v:
2713 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
2714 Ops, "vqshrun_n", 1, true);
2715 case NEON::BI__builtin_neon_vrecpe_v:
2716 case NEON::BI__builtin_neon_vrecpeq_v:
2717 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
2718 Ops, "vrecpe");
2719 case NEON::BI__builtin_neon_vrshrn_n_v:
2720 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2721 Ops, "vrshrn_n", 1, true);
2722 case NEON::BI__builtin_neon_vrsra_n_v:
2723 case NEON::BI__builtin_neon_vrsraq_n_v:
2724 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2725 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2726 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
2727 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2728 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
2729 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2730 case NEON::BI__builtin_neon_vsri_n_v:
2731 case NEON::BI__builtin_neon_vsriq_n_v:
2732 rightShift = true;
2733 [[fallthrough]];
2734 case NEON::BI__builtin_neon_vsli_n_v:
2735 case NEON::BI__builtin_neon_vsliq_n_v:
2736 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
2737 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2738 Ops, "vsli_n");
2739 case NEON::BI__builtin_neon_vsra_n_v:
2740 case NEON::BI__builtin_neon_vsraq_n_v:
2741 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2742 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
2743 return Builder.CreateAdd(Ops[0], Ops[1]);
2744 case NEON::BI__builtin_neon_vst1q_lane_v:
2745 // Handle 64-bit integer elements as a special case. Use a shuffle to get
2746 // a one-element vector and avoid poor code for i64 in the backend.
2747 if (VTy->getElementType()->isIntegerTy(64)) {
2748 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2749 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
2750 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2751 Ops[2] = getAlignmentValue32(PtrOp0);
2752 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
2753 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
2754 Tys), Ops);
2755 }
2756 [[fallthrough]];
2757 case NEON::BI__builtin_neon_vst1_lane_v: {
2758 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2759 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
2760 return Builder.CreateStore(Ops[1],
2761 PtrOp0.withElementType(Ops[1]->getType()));
2762 }
2763 case NEON::BI__builtin_neon_vtbl1_v:
2764 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
2765 Ops, "vtbl1");
2766 case NEON::BI__builtin_neon_vtbl2_v:
2767 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
2768 Ops, "vtbl2");
2769 case NEON::BI__builtin_neon_vtbl3_v:
2770 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
2771 Ops, "vtbl3");
2772 case NEON::BI__builtin_neon_vtbl4_v:
2773 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
2774 Ops, "vtbl4");
2775 case NEON::BI__builtin_neon_vtbx1_v:
2776 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
2777 Ops, "vtbx1");
2778 case NEON::BI__builtin_neon_vtbx2_v:
2779 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
2780 Ops, "vtbx2");
2781 case NEON::BI__builtin_neon_vtbx3_v:
2782 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
2783 Ops, "vtbx3");
2784 case NEON::BI__builtin_neon_vtbx4_v:
2785 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
2786 Ops, "vtbx4");
2787 }
2788}
2789
2790template<typename Integer>
2792 return E->getIntegerConstantExpr(Context)->getExtValue();
2793}
2794
2795static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
2796 llvm::Type *T, bool Unsigned) {
2797 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
2798 // which finds it convenient to specify signed/unsigned as a boolean flag.
2799 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
2800}
2801
2802static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
2803 uint32_t Shift, bool Unsigned) {
2804 // MVE helper function for integer shift right. This must handle signed vs
2805 // unsigned, and also deal specially with the case where the shift count is
2806 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
2807 // undefined behavior, but in MVE it's legal, so we must convert it to code
2808 // that is not undefined in IR.
2809 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
2810 ->getElementType()
2811 ->getPrimitiveSizeInBits();
2812 if (Shift == LaneBits) {
2813 // An unsigned shift of the full lane size always generates zero, so we can
2814 // simply emit a zero vector. A signed shift of the full lane size does the
2815 // same thing as shifting by one bit fewer.
2816 if (Unsigned)
2817 return llvm::Constant::getNullValue(V->getType());
2818 else
2819 --Shift;
2820 }
2821 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
2822}
2823
2824static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
2825 // MVE-specific helper function for a vector splat, which infers the element
2826 // count of the output vector by knowing that MVE vectors are all 128 bits
2827 // wide.
2828 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
2829 return Builder.CreateVectorSplat(Elements, V);
2830}
2831
2832static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
2833 CodeGenFunction *CGF,
2834 llvm::Value *V,
2835 llvm::Type *DestType) {
2836 // Convert one MVE vector type into another by reinterpreting its in-register
2837 // format.
2838 //
2839 // Little-endian, this is identical to a bitcast (which reinterprets the
2840 // memory format). But big-endian, they're not necessarily the same, because
2841 // the register and memory formats map to each other differently depending on
2842 // the lane size.
2843 //
2844 // We generate a bitcast whenever we can (if we're little-endian, or if the
2845 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
2846 // that performs the different kind of reinterpretation.
2847 if (CGF->getTarget().isBigEndian() &&
2848 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
2849 return Builder.CreateCall(
2850 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
2851 {DestType, V->getType()}),
2852 V);
2853 } else {
2854 return Builder.CreateBitCast(V, DestType);
2855 }
2856}
2857
2858static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
2859 // Make a shufflevector that extracts every other element of a vector (evens
2860 // or odds, as desired).
2861 SmallVector<int, 16> Indices;
2862 unsigned InputElements =
2863 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
2864 for (unsigned i = 0; i < InputElements; i += 2)
2865 Indices.push_back(i + Odd);
2866 return Builder.CreateShuffleVector(V, Indices);
2867}
2868
2869static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
2870 llvm::Value *V1) {
2871 // Make a shufflevector that interleaves two vectors element by element.
2872 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
2873 SmallVector<int, 16> Indices;
2874 unsigned InputElements =
2875 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
2876 for (unsigned i = 0; i < InputElements; i++) {
2877 Indices.push_back(i);
2878 Indices.push_back(i + InputElements);
2879 }
2880 return Builder.CreateShuffleVector(V0, V1, Indices);
2881}
2882
2883template<unsigned HighBit, unsigned OtherBits>
2884static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
2885 // MVE-specific helper function to make a vector splat of a constant such as
2886 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
2887 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
2888 unsigned LaneBits = T->getPrimitiveSizeInBits();
2889 uint32_t Value = HighBit << (LaneBits - 1);
2890 if (OtherBits)
2891 Value |= (1UL << (LaneBits - 1)) - 1;
2892 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
2893 return ARMMVEVectorSplat(Builder, Lane);
2894}
2895
2896static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
2897 llvm::Value *V,
2898 unsigned ReverseWidth) {
2899 // MVE-specific helper function which reverses the elements of a
2900 // vector within every (ReverseWidth)-bit collection of lanes.
2901 SmallVector<int, 16> Indices;
2902 unsigned LaneSize = V->getType()->getScalarSizeInBits();
2903 unsigned Elements = 128 / LaneSize;
2904 unsigned Mask = ReverseWidth / LaneSize - 1;
2905 for (unsigned i = 0; i < Elements; i++)
2906 Indices.push_back(i ^ Mask);
2907 return Builder.CreateShuffleVector(V, Indices);
2908}
2909
2910static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
2911 CodeGenFunction *CGF, llvm::Value *V,
2912 llvm::Type *Ty) {
2913 return Builder.CreateCall(
2914 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2915 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2916}
2917
2918static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
2919 CodeGenFunction *CGF, llvm::Value *V,
2920 llvm::Type *Ty) {
2921 return Builder.CreateCall(
2922 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2923 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2924}
2925
2926static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
2927 CodeGenFunction *CGF, llvm::Value *V,
2928 llvm::Type *Ty) {
2929 return Builder.CreateCall(
2930 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2931 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2932}
2933
2934static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
2935 CodeGenFunction *CGF, llvm::Value *V,
2936 llvm::Type *Ty) {
2937 return Builder.CreateCall(
2938 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2939 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2940}
2941
2943 const CallExpr *E,
2945 llvm::Triple::ArchType Arch) {
2946 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
2947 Intrinsic::ID IRIntr;
2948 unsigned NumVectors;
2949
2950 // Code autogenerated by Tablegen will handle all the simple builtins.
2951 switch (BuiltinID) {
2952 #include "clang/Basic/arm_mve_builtin_cg.inc"
2953
2954 // If we didn't match an MVE builtin id at all, go back to the
2955 // main EmitARMBuiltinExpr.
2956 default:
2957 return nullptr;
2958 }
2959
2960 // Anything that breaks from that switch is an MVE builtin that
2961 // needs handwritten code to generate.
2962
2963 switch (CustomCodeGenType) {
2964
2965 case CustomCodeGen::VLD24: {
2968
2969 auto MvecCType = E->getType();
2970 auto MvecLType = ConvertType(MvecCType);
2971 assert(MvecLType->isStructTy() &&
2972 "Return type for vld[24]q should be a struct");
2973 assert(MvecLType->getStructNumElements() == 1 &&
2974 "Return-type struct for vld[24]q should have one element");
2975 auto MvecLTypeInner = MvecLType->getStructElementType(0);
2976 assert(MvecLTypeInner->isArrayTy() &&
2977 "Return-type struct for vld[24]q should contain an array");
2978 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
2979 "Array member of return-type struct vld[24]q has wrong length");
2980 auto VecLType = MvecLTypeInner->getArrayElementType();
2981
2982 Tys.push_back(VecLType);
2983
2984 auto Addr = E->getArg(0);
2985 Ops.push_back(EmitScalarExpr(Addr));
2986 Tys.push_back(ConvertType(Addr->getType()));
2987
2988 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
2989 Value *LoadResult = Builder.CreateCall(F, Ops);
2990 Value *MvecOut = PoisonValue::get(MvecLType);
2991 for (unsigned i = 0; i < NumVectors; ++i) {
2992 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
2993 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
2994 }
2995
2996 if (ReturnValue.isNull())
2997 return MvecOut;
2998 else
2999 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3000 }
3001
3002 case CustomCodeGen::VST24: {
3005
3006 auto Addr = E->getArg(0);
3007 Ops.push_back(EmitScalarExpr(Addr));
3008 Tys.push_back(ConvertType(Addr->getType()));
3009
3010 auto MvecCType = E->getArg(1)->getType();
3011 auto MvecLType = ConvertType(MvecCType);
3012 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3013 assert(MvecLType->getStructNumElements() == 1 &&
3014 "Data-type struct for vst2q should have one element");
3015 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3016 assert(MvecLTypeInner->isArrayTy() &&
3017 "Data-type struct for vst2q should contain an array");
3018 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3019 "Array member of return-type struct vld[24]q has wrong length");
3020 auto VecLType = MvecLTypeInner->getArrayElementType();
3021
3022 Tys.push_back(VecLType);
3023
3024 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3025 EmitAggExpr(E->getArg(1), MvecSlot);
3026 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3027 for (unsigned i = 0; i < NumVectors; i++)
3028 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3029
3030 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3031 Value *ToReturn = nullptr;
3032 for (unsigned i = 0; i < NumVectors; i++) {
3033 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3034 ToReturn = Builder.CreateCall(F, Ops);
3035 Ops.pop_back();
3036 }
3037 return ToReturn;
3038 }
3039 }
3040 llvm_unreachable("unknown custom codegen type.");
3041}
3042
3044 const CallExpr *E,
3046 llvm::Triple::ArchType Arch) {
3047 switch (BuiltinID) {
3048 default:
3049 return nullptr;
3050#include "clang/Basic/arm_cde_builtin_cg.inc"
3051 }
3052}
3053
3054static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3055 const CallExpr *E,
3057 llvm::Triple::ArchType Arch) {
3058 unsigned int Int = 0;
3059 const char *s = nullptr;
3060
3061 switch (BuiltinID) {
3062 default:
3063 return nullptr;
3064 case NEON::BI__builtin_neon_vtbl1_v:
3065 case NEON::BI__builtin_neon_vqtbl1_v:
3066 case NEON::BI__builtin_neon_vqtbl1q_v:
3067 case NEON::BI__builtin_neon_vtbl2_v:
3068 case NEON::BI__builtin_neon_vqtbl2_v:
3069 case NEON::BI__builtin_neon_vqtbl2q_v:
3070 case NEON::BI__builtin_neon_vtbl3_v:
3071 case NEON::BI__builtin_neon_vqtbl3_v:
3072 case NEON::BI__builtin_neon_vqtbl3q_v:
3073 case NEON::BI__builtin_neon_vtbl4_v:
3074 case NEON::BI__builtin_neon_vqtbl4_v:
3075 case NEON::BI__builtin_neon_vqtbl4q_v:
3076 break;
3077 case NEON::BI__builtin_neon_vtbx1_v:
3078 case NEON::BI__builtin_neon_vqtbx1_v:
3079 case NEON::BI__builtin_neon_vqtbx1q_v:
3080 case NEON::BI__builtin_neon_vtbx2_v:
3081 case NEON::BI__builtin_neon_vqtbx2_v:
3082 case NEON::BI__builtin_neon_vqtbx2q_v:
3083 case NEON::BI__builtin_neon_vtbx3_v:
3084 case NEON::BI__builtin_neon_vqtbx3_v:
3085 case NEON::BI__builtin_neon_vqtbx3q_v:
3086 case NEON::BI__builtin_neon_vtbx4_v:
3087 case NEON::BI__builtin_neon_vqtbx4_v:
3088 case NEON::BI__builtin_neon_vqtbx4q_v:
3089 break;
3090 }
3091
3092 assert(E->getNumArgs() >= 3);
3093
3094 // Get the last argument, which specifies the vector type.
3095 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3096 std::optional<llvm::APSInt> Result =
3098 if (!Result)
3099 return nullptr;
3100
3101 // Determine the type of this overloaded NEON intrinsic.
3102 NeonTypeFlags Type = Result->getZExtValue();
3103 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3104 if (!Ty)
3105 return nullptr;
3106
3107 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3108
3109 // AArch64 scalar builtins are not overloaded, they do not have an extra
3110 // argument that specifies the vector type, need to handle each case.
3111 switch (BuiltinID) {
3112 case NEON::BI__builtin_neon_vtbl1_v: {
3113 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3114 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3115 }
3116 case NEON::BI__builtin_neon_vtbl2_v: {
3117 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3118 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3119 }
3120 case NEON::BI__builtin_neon_vtbl3_v: {
3121 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3122 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3123 }
3124 case NEON::BI__builtin_neon_vtbl4_v: {
3125 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3126 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3127 }
3128 case NEON::BI__builtin_neon_vtbx1_v: {
3129 Value *TblRes =
3130 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3131 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3132
3133 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3134 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3135 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3136
3137 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3138 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3139 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3140 }
3141 case NEON::BI__builtin_neon_vtbx2_v: {
3142 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3143 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3144 }
3145 case NEON::BI__builtin_neon_vtbx3_v: {
3146 Value *TblRes =
3147 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3148 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3149
3150 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3151 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3152 TwentyFourV);
3153 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3154
3155 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3156 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3157 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3158 }
3159 case NEON::BI__builtin_neon_vtbx4_v: {
3160 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3161 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3162 }
3163 case NEON::BI__builtin_neon_vqtbl1_v:
3164 case NEON::BI__builtin_neon_vqtbl1q_v:
3165 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3166 case NEON::BI__builtin_neon_vqtbl2_v:
3167 case NEON::BI__builtin_neon_vqtbl2q_v: {
3168 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3169 case NEON::BI__builtin_neon_vqtbl3_v:
3170 case NEON::BI__builtin_neon_vqtbl3q_v:
3171 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3172 case NEON::BI__builtin_neon_vqtbl4_v:
3173 case NEON::BI__builtin_neon_vqtbl4q_v:
3174 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3175 case NEON::BI__builtin_neon_vqtbx1_v:
3176 case NEON::BI__builtin_neon_vqtbx1q_v:
3177 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3178 case NEON::BI__builtin_neon_vqtbx2_v:
3179 case NEON::BI__builtin_neon_vqtbx2q_v:
3180 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3181 case NEON::BI__builtin_neon_vqtbx3_v:
3182 case NEON::BI__builtin_neon_vqtbx3q_v:
3183 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3184 case NEON::BI__builtin_neon_vqtbx4_v:
3185 case NEON::BI__builtin_neon_vqtbx4q_v:
3186 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3187 }
3188 }
3189
3190 if (!Int)
3191 return nullptr;
3192
3193 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3194 return CGF.EmitNeonCall(F, Ops, s);
3195}
3196
3198 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3199 Op = Builder.CreateBitCast(Op, Int16Ty);
3200 Value *V = PoisonValue::get(VTy);
3201 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3202 Op = Builder.CreateInsertElement(V, Op, CI);
3203 return Op;
3204}
3205
3206/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3207/// access builtin. Only required if it can't be inferred from the base pointer
3208/// operand.
3210 switch (TypeFlags.getMemEltType()) {
3211 case SVETypeFlags::MemEltTyDefault:
3212 return getEltType(TypeFlags);
3213 case SVETypeFlags::MemEltTyInt8:
3214 return Builder.getInt8Ty();
3215 case SVETypeFlags::MemEltTyInt16:
3216 return Builder.getInt16Ty();
3217 case SVETypeFlags::MemEltTyInt32:
3218 return Builder.getInt32Ty();
3219 case SVETypeFlags::MemEltTyInt64:
3220 return Builder.getInt64Ty();
3221 }
3222 llvm_unreachable("Unknown MemEltType");
3223}
3224
3225llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3226 switch (TypeFlags.getEltType()) {
3227 default:
3228 llvm_unreachable("Invalid SVETypeFlag!");
3229
3230 case SVETypeFlags::EltTyMFloat8:
3231 case SVETypeFlags::EltTyInt8:
3232 return Builder.getInt8Ty();
3233 case SVETypeFlags::EltTyInt16:
3234 return Builder.getInt16Ty();
3235 case SVETypeFlags::EltTyInt32:
3236 return Builder.getInt32Ty();
3237 case SVETypeFlags::EltTyInt64:
3238 return Builder.getInt64Ty();
3239 case SVETypeFlags::EltTyInt128:
3240 return Builder.getInt128Ty();
3241
3242 case SVETypeFlags::EltTyFloat16:
3243 return Builder.getHalfTy();
3244 case SVETypeFlags::EltTyFloat32:
3245 return Builder.getFloatTy();
3246 case SVETypeFlags::EltTyFloat64:
3247 return Builder.getDoubleTy();
3248
3249 case SVETypeFlags::EltTyBFloat16:
3250 return Builder.getBFloatTy();
3251
3252 case SVETypeFlags::EltTyBool8:
3253 case SVETypeFlags::EltTyBool16:
3254 case SVETypeFlags::EltTyBool32:
3255 case SVETypeFlags::EltTyBool64:
3256 return Builder.getInt1Ty();
3257 }
3258}
3259
3260// Return the llvm predicate vector type corresponding to the specified element
3261// TypeFlags.
3262llvm::ScalableVectorType *
3264 switch (TypeFlags.getEltType()) {
3265 default: llvm_unreachable("Unhandled SVETypeFlag!");
3266
3267 case SVETypeFlags::EltTyInt8:
3268 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3269 case SVETypeFlags::EltTyInt16:
3270 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3271 case SVETypeFlags::EltTyInt32:
3272 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3273 case SVETypeFlags::EltTyInt64:
3274 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3275
3276 case SVETypeFlags::EltTyBFloat16:
3277 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3278 case SVETypeFlags::EltTyFloat16:
3279 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3280 case SVETypeFlags::EltTyFloat32:
3281 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3282 case SVETypeFlags::EltTyFloat64:
3283 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3284
3285 case SVETypeFlags::EltTyBool8:
3286 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3287 case SVETypeFlags::EltTyBool16:
3288 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3289 case SVETypeFlags::EltTyBool32:
3290 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3291 case SVETypeFlags::EltTyBool64:
3292 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3293 }
3294}
3295
3296// Return the llvm vector type corresponding to the specified element TypeFlags.
3297llvm::ScalableVectorType *
3299 switch (TypeFlags.getEltType()) {
3300 default:
3301 llvm_unreachable("Invalid SVETypeFlag!");
3302
3303 case SVETypeFlags::EltTyInt8:
3304 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3305 case SVETypeFlags::EltTyInt16:
3306 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3307 case SVETypeFlags::EltTyInt32:
3308 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3309 case SVETypeFlags::EltTyInt64:
3310 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3311
3312 case SVETypeFlags::EltTyMFloat8:
3313 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3314 case SVETypeFlags::EltTyFloat16:
3315 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3316 case SVETypeFlags::EltTyBFloat16:
3317 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3318 case SVETypeFlags::EltTyFloat32:
3319 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3320 case SVETypeFlags::EltTyFloat64:
3321 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3322
3323 case SVETypeFlags::EltTyBool8:
3324 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3325 case SVETypeFlags::EltTyBool16:
3326 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3327 case SVETypeFlags::EltTyBool32:
3328 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3329 case SVETypeFlags::EltTyBool64:
3330 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3331 }
3332}
3333
3334constexpr unsigned SVEBitsPerBlock = 128;
3335
3336static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3337 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3338 return llvm::ScalableVectorType::get(EltTy, NumElts);
3339}
3340
3341// Reinterpret the input predicate so that it can be used to correctly isolate
3342// the elements of the specified datatype.
3344 llvm::ScalableVectorType *VTy) {
3345
3346 if (isa<TargetExtType>(Pred->getType()) &&
3347 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3348 return Pred;
3349
3350 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3351 if (Pred->getType() == RTy)
3352 return Pred;
3353
3354 unsigned IntID;
3355 llvm::Type *IntrinsicTy;
3356 switch (VTy->getMinNumElements()) {
3357 default:
3358 llvm_unreachable("unsupported element count!");
3359 case 1:
3360 case 2:
3361 case 4:
3362 case 8:
3363 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3364 IntrinsicTy = RTy;
3365 break;
3366 case 16:
3367 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3368 IntrinsicTy = Pred->getType();
3369 break;
3370 }
3371
3372 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3373 Value *C = Builder.CreateCall(F, Pred);
3374 assert(C->getType() == RTy && "Unexpected return type!");
3375 return C;
3376}
3377
3379 llvm::StructType *Ty) {
3380 if (PredTuple->getType() == Ty)
3381 return PredTuple;
3382
3383 Value *Ret = llvm::PoisonValue::get(Ty);
3384 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3385 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3386 Pred = EmitSVEPredicateCast(
3387 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3388 Ret = Builder.CreateInsertValue(Ret, Pred, I);
3389 }
3390
3391 return Ret;
3392}
3393
3396 unsigned IntID) {
3397 auto *ResultTy = getSVEType(TypeFlags);
3398 auto *OverloadedTy =
3399 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
3400 Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
3401
3402 // At the ACLE level there's only one predicate type, svbool_t, which is
3403 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3404 // actual type being loaded. For example, when loading doubles (i64) the
3405 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3406 // the predicate and the data being loaded must match. Cast to the type
3407 // expected by the intrinsic. The intrinsic itself should be defined in
3408 // a way than enforces relations between parameter types.
3409 Ops[0] = EmitSVEPredicateCast(
3410 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3411
3412 // Pass 0 when the offset is missing. This can only be applied when using
3413 // the "vector base" addressing mode for which ACLE allows no offset. The
3414 // corresponding LLVM IR always requires an offset.
3415 if (Ops.size() == 2) {
3416 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3417 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3418 }
3419
3420 // For "vector base, scalar index" scale the index so that it becomes a
3421 // scalar offset.
3422 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3423 unsigned BytesPerElt =
3424 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3425 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3426 }
3427
3428 Value *Call = Builder.CreateCall(F, Ops);
3429
3430 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3431 // other cases it's folded into a nop.
3432 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
3433 : Builder.CreateSExt(Call, ResultTy);
3434}
3435
3438 unsigned IntID) {
3439 auto *SrcDataTy = getSVEType(TypeFlags);
3440 auto *OverloadedTy =
3441 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
3442
3443 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3444 // it's the first argument. Move it accordingly.
3445 Ops.insert(Ops.begin(), Ops.pop_back_val());
3446
3447 Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
3448
3449 // Pass 0 when the offset is missing. This can only be applied when using
3450 // the "vector base" addressing mode for which ACLE allows no offset. The
3451 // corresponding LLVM IR always requires an offset.
3452 if (Ops.size() == 3) {
3453 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3454 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3455 }
3456
3457 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
3458 // folded into a nop.
3459 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
3460
3461 // At the ACLE level there's only one predicate type, svbool_t, which is
3462 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3463 // actual type being stored. For example, when storing doubles (i64) the
3464 // predicated should be <n x 2 x i1> instead. At the IR level the type of
3465 // the predicate and the data being stored must match. Cast to the type
3466 // expected by the intrinsic. The intrinsic itself should be defined in
3467 // a way that enforces relations between parameter types.
3468 Ops[1] = EmitSVEPredicateCast(
3469 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
3470
3471 // For "vector base, scalar index" scale the index so that it becomes a
3472 // scalar offset.
3473 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
3474 unsigned BytesPerElt =
3475 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3476 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
3477 }
3478
3479 return Builder.CreateCall(F, Ops);
3480}
3481
3484 unsigned IntID) {
3485 // The gather prefetches are overloaded on the vector input - this can either
3486 // be the vector of base addresses or vector of offsets.
3487 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
3488 if (!OverloadedTy)
3489 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
3490
3491 // Cast the predicate from svbool_t to the right number of elements.
3492 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
3493
3494 // vector + imm addressing modes
3495 if (Ops[1]->getType()->isVectorTy()) {
3496 if (Ops.size() == 3) {
3497 // Pass 0 for 'vector+imm' when the index is omitted.
3498 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3499
3500 // The sv_prfop is the last operand in the builtin and IR intrinsic.
3501 std::swap(Ops[2], Ops[3]);
3502 } else {
3503 // Index needs to be passed as scaled offset.
3504 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3505 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
3506 if (BytesPerElt > 1)
3507 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3508 }
3509
3510 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
3511 return Builder.CreateCall(F, Ops);
3512 }
3513
3514 Function *F = CGM.getIntrinsic(IntID, {Ops[1]->getType(), OverloadedTy});
3515 return Builder.CreateCall(F, Ops);
3516}
3517
3520 unsigned IntID) {
3521 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3522 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3523 Value *BasePtr = Ops[1];
3524
3525 // Does the load have an offset?
3526 if (Ops.size() > 2)
3527 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3528
3529 Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
3530 return Builder.CreateCall(F, {Predicate, BasePtr});
3531}
3532
3535 unsigned IntID) {
3536 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3537
3538 unsigned N;
3539 switch (IntID) {
3540 case Intrinsic::aarch64_sve_st2:
3541 case Intrinsic::aarch64_sve_st1_pn_x2:
3542 case Intrinsic::aarch64_sve_stnt1_pn_x2:
3543 case Intrinsic::aarch64_sve_st2q:
3544 N = 2;
3545 break;
3546 case Intrinsic::aarch64_sve_st3:
3547 case Intrinsic::aarch64_sve_st3q:
3548 N = 3;
3549 break;
3550 case Intrinsic::aarch64_sve_st4:
3551 case Intrinsic::aarch64_sve_st1_pn_x4:
3552 case Intrinsic::aarch64_sve_stnt1_pn_x4:
3553 case Intrinsic::aarch64_sve_st4q:
3554 N = 4;
3555 break;
3556 default:
3557 llvm_unreachable("unknown intrinsic!");
3558 }
3559
3560 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3561 Value *BasePtr = Ops[1];
3562
3563 // Does the store have an offset?
3564 if (Ops.size() > (2 + N))
3565 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3566
3567 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
3568 // need to break up the tuple vector.
3570 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
3571 Operands.push_back(Ops[I]);
3572 Operands.append({Predicate, BasePtr});
3573 Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
3574
3575 return Builder.CreateCall(F, Operands);
3576}
3577
3578// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
3579// svpmullt_pair intrinsics, with the exception that their results are bitcast
3580// to a wider type.
3583 unsigned BuiltinID) {
3584 // Splat scalar operand to vector (intrinsics with _n infix)
3585 if (TypeFlags.hasSplatOperand()) {
3586 unsigned OpNo = TypeFlags.getSplatOperand();
3587 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
3588 }
3589
3590 // The pair-wise function has a narrower overloaded type.
3591 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
3592 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
3593
3594 // Now bitcast to the wider result type.
3595 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
3596 return EmitSVEReinterpret(Call, Ty);
3597}
3598
3600 ArrayRef<Value *> Ops, unsigned BuiltinID) {
3601 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
3602 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
3603 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
3604}
3605
3608 unsigned BuiltinID) {
3609 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3610 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
3611 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3612
3613 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
3614 Value *BasePtr = Ops[1];
3615
3616 // Implement the index operand if not omitted.
3617 if (Ops.size() > 3)
3618 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3619
3620 Value *PrfOp = Ops.back();
3621
3622 llvm::Type *Tys[2] = {Predicate->getType(), BasePtr->getType()};
3623 Function *F = CGM.getIntrinsic(BuiltinID, Tys);
3624 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
3625}
3626
3628 llvm::Type *ReturnTy,
3630 unsigned IntrinsicID,
3631 bool IsZExtReturn) {
3632 QualType LangPTy = E->getArg(1)->getType();
3633 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3634 LangPTy->castAs<PointerType>()->getPointeeType());
3635
3636 // Mfloat8 types is stored as a vector, so extra work
3637 // to extract sclar element type is necessary.
3638 if (MemEltTy->isVectorTy()) {
3639 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3640 "Only <1 x i8> expected");
3641 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3642 }
3643
3644 // The vector type that is returned may be different from the
3645 // eventual type loaded from memory.
3646 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
3647 llvm::ScalableVectorType *MemoryTy = nullptr;
3648 llvm::ScalableVectorType *PredTy = nullptr;
3649 bool IsQuadLoad = false;
3650 switch (IntrinsicID) {
3651 case Intrinsic::aarch64_sve_ld1uwq:
3652 case Intrinsic::aarch64_sve_ld1udq:
3653 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3654 PredTy = llvm::ScalableVectorType::get(
3655 llvm::Type::getInt1Ty(getLLVMContext()), 1);
3656 IsQuadLoad = true;
3657 break;
3658 default:
3659 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3660 PredTy = MemoryTy;
3661 break;
3662 }
3663
3664 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3665 Value *BasePtr = Ops[1];
3666
3667 // Does the load have an offset?
3668 if (Ops.size() > 2)
3669 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3670
3671 llvm::Type *Tys[2] = {IsQuadLoad ? VectorTy : MemoryTy, BasePtr->getType()};
3672 Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
3673 auto *Load = Builder.CreateCall(F, {Predicate, BasePtr});
3674 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3675 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
3676
3677 if (IsQuadLoad)
3678 return Load;
3679
3680 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
3681 : Builder.CreateSExt(Load, VectorTy);
3682}
3683
3686 unsigned IntrinsicID) {
3687 QualType LangPTy = E->getArg(1)->getType();
3688 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3689 LangPTy->castAs<PointerType>()->getPointeeType());
3690
3691 // Mfloat8 types is stored as a vector, so extra work
3692 // to extract sclar element type is necessary.
3693 if (MemEltTy->isVectorTy()) {
3694 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3695 "Only <1 x i8> expected");
3696 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3697 }
3698
3699 // The vector type that is stored may be different from the
3700 // eventual type stored to memory.
3701 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
3702 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3703
3704 auto PredTy = MemoryTy;
3705 auto AddrMemoryTy = MemoryTy;
3706 bool IsQuadStore = false;
3707
3708 switch (IntrinsicID) {
3709 case Intrinsic::aarch64_sve_st1wq:
3710 case Intrinsic::aarch64_sve_st1dq:
3711 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3712 PredTy =
3713 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
3714 IsQuadStore = true;
3715 break;
3716 default:
3717 break;
3718 }
3719 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3720 Value *BasePtr = Ops[1];
3721
3722 // Does the store have an offset?
3723 if (Ops.size() == 4)
3724 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
3725
3726 // Last value is always the data
3727 Value *Val =
3728 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
3729
3730 llvm::Type *Tys[2] = {IsQuadStore ? VectorTy : MemoryTy, BasePtr->getType()};
3731 Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
3732 auto *Store = Builder.CreateCall(F, {Val, Predicate, BasePtr});
3733 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3734 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
3735 return Store;
3736}
3737
3740 unsigned IntID) {
3741 Ops[2] = EmitSVEPredicateCast(
3743
3744 SmallVector<Value *> NewOps;
3745 NewOps.push_back(Ops[2]);
3746
3747 llvm::Value *BasePtr = Ops[3];
3748 llvm::Value *RealSlice = Ops[1];
3749 // If the intrinsic contains the vnum parameter, multiply it with the vector
3750 // size in bytes.
3751 if (Ops.size() == 5) {
3752 Function *StreamingVectorLength =
3753 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
3754 llvm::Value *StreamingVectorLengthCall =
3755 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
3756 llvm::ConstantInt::get(Int64Ty, 8), "svl",
3757 /* HasNUW */ true, /* HasNSW */ true);
3758 llvm::Value *Mulvl =
3759 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
3760 // The type of the ptr parameter is void *, so use Int8Ty here.
3761 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
3762 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
3763 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
3764 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
3765 }
3766 NewOps.push_back(BasePtr);
3767 NewOps.push_back(Ops[0]);
3768 NewOps.push_back(RealSlice);
3769 Function *F = CGM.getIntrinsic(IntID, BasePtr->getType());
3770 return Builder.CreateCall(F, NewOps);
3771}
3772
3775 unsigned IntID) {
3776 auto *VecTy = getSVEType(TypeFlags);
3777 Function *F = CGM.getIntrinsic(IntID, VecTy);
3778 if (TypeFlags.isReadZA())
3779 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
3780 else if (TypeFlags.isWriteZA())
3781 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
3782 return Builder.CreateCall(F, Ops);
3783}
3784
3787 unsigned IntID) {
3788 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
3789 if (Ops.size() == 0)
3790 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
3791 Function *F = CGM.getIntrinsic(IntID, {});
3792 return Builder.CreateCall(F, Ops);
3793}
3794
3797 unsigned IntID) {
3798 if (Ops.size() == 2)
3799 Ops.push_back(Builder.getInt32(0));
3800 else
3801 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
3802 Function *F = CGM.getIntrinsic(IntID, Ops[1]->getType());
3803 return Builder.CreateCall(F, Ops);
3804}
3805
3806// Limit the usage of scalable llvm IR generated by the ACLE by using the
3807// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
3808Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
3809 return Builder.CreateVectorSplat(
3810 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
3811}
3812
3814 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
3815#ifndef NDEBUG
3816 auto *VecTy = cast<llvm::VectorType>(Ty);
3817 ElementCount EC = VecTy->getElementCount();
3818 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
3819 "Only <1 x i8> expected");
3820#endif
3821 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
3822 }
3823 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
3824}
3825
3827 // FIXME: For big endian this needs an additional REV, or needs a separate
3828 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
3829 // instruction is defined as 'bitwise' equivalent from memory point of
3830 // view (when storing/reloading), whereas the svreinterpret builtin
3831 // implements bitwise equivalent cast from register point of view.
3832 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
3833
3834 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
3835 Value *Tuple = llvm::PoisonValue::get(Ty);
3836
3837 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
3838 Value *In = Builder.CreateExtractValue(Val, I);
3839 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
3840 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
3841 }
3842
3843 return Tuple;
3844 }
3845
3846 return Builder.CreateBitCast(Val, Ty);
3847}
3848
3849static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3851 auto *SplatZero = Constant::getNullValue(Ty);
3852 Ops.insert(Ops.begin(), SplatZero);
3853}
3854
3855static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3857 auto *SplatUndef = UndefValue::get(Ty);
3858 Ops.insert(Ops.begin(), SplatUndef);
3859}
3860
3861SmallVector<llvm::Type *, 2>
3863 llvm::Type *ResultType,
3864 ArrayRef<Value *> Ops) {
3865 if (TypeFlags.isOverloadNone())
3866 return {};
3867
3868 llvm::Type *DefaultType = getSVEType(TypeFlags);
3869
3870 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
3871 return {DefaultType, Ops[1]->getType()};
3872
3873 if (TypeFlags.isOverloadWhileRW())
3874 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
3875
3876 if (TypeFlags.isOverloadDefaultAndOp0())
3877 return {DefaultType, Ops[0]->getType()};
3878
3879 if (TypeFlags.isOverloadFirstandLast())
3880 return {Ops[0]->getType(), Ops.back()->getType()};
3881
3882 if (TypeFlags.isReductionQV())
3883 return {ResultType, Ops[1]->getType()};
3884
3885 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
3886 return {DefaultType};
3887}
3888
3890 ArrayRef<Value *> Ops) {
3891 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
3892 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
3893 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
3894
3895 if (TypeFlags.isTupleSet())
3896 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
3897 return Builder.CreateExtractValue(Ops[0], Idx);
3898}
3899
3901 llvm::Type *Ty,
3902 ArrayRef<Value *> Ops) {
3903 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
3904
3905 Value *Tuple = llvm::PoisonValue::get(Ty);
3906 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
3907 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
3908
3909 return Tuple;
3910}
3911
3913 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
3914 SVETypeFlags TypeFlags) {
3915 // Find out if any arguments are required to be integer constant expressions.
3916 unsigned ICEArguments = 0;
3918 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3919 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3920
3921 // Tuple set/get only requires one insert/extract vector, which is
3922 // created by EmitSVETupleSetOrGet.
3923 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
3924
3925 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
3926 bool IsICE = ICEArguments & (1 << i);
3927 Value *Arg = EmitScalarExpr(E->getArg(i));
3928
3929 if (IsICE) {
3930 // If this is required to be a constant, constant fold it so that we know
3931 // that the generated intrinsic gets a ConstantInt.
3932 std::optional<llvm::APSInt> Result =
3934 assert(Result && "Expected argument to be a constant");
3935
3936 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
3937 // truncate because the immediate has been range checked and no valid
3938 // immediate requires more than a handful of bits.
3939 *Result = Result->extOrTrunc(32);
3940 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
3941 continue;
3942 }
3943
3944 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
3945 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
3946 Ops.push_back(Builder.CreateExtractValue(Arg, I));
3947
3948 continue;
3949 }
3950
3951 Ops.push_back(Arg);
3952 }
3953}
3954
3956 const CallExpr *E) {
3957 llvm::Type *Ty = ConvertType(E->getType());
3958 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
3959 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
3960 Value *Val = EmitScalarExpr(E->getArg(0));
3961 return EmitSVEReinterpret(Val, Ty);
3962 }
3963
3964 auto *Builtin =
3967
3969 SVETypeFlags TypeFlags(Builtin->TypeModifier);
3970 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
3971
3972 if (TypeFlags.isLoad())
3973 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
3974 TypeFlags.isZExtReturn());
3975 if (TypeFlags.isStore())
3976 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
3977 if (TypeFlags.isGatherLoad())
3978 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
3979 if (TypeFlags.isScatterStore())
3980 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
3981 if (TypeFlags.isPrefetch())
3982 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
3983 if (TypeFlags.isGatherPrefetch())
3984 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
3985 if (TypeFlags.isStructLoad())
3986 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
3987 if (TypeFlags.isStructStore())
3988 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
3989 if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
3990 return EmitSVETupleSetOrGet(TypeFlags, Ops);
3991 if (TypeFlags.isTupleCreate())
3992 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
3993 if (TypeFlags.isUndef())
3994 return UndefValue::get(Ty);
3995
3996 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
3997 // -------------------------------------------------------------------
3998 if (Builtin->LLVMIntrinsic != 0) {
3999 // Emit set FPMR for intrinsics that require it
4000 if (TypeFlags.setsFPMR())
4001 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4002 Ops.pop_back_val());
4003 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4005
4006 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4008
4009 // Some ACLE builtins leave out the argument to specify the predicate
4010 // pattern, which is expected to be expanded to an SV_ALL pattern.
4011 if (TypeFlags.isAppendSVALL())
4012 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4013 if (TypeFlags.isInsertOp1SVALL())
4014 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4015
4016 // Predicates must match the main datatype.
4017 for (Value *&Op : Ops)
4018 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4019 if (PredTy->getElementType()->isIntegerTy(1))
4020 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4021
4022 // Splat scalar operand to vector (intrinsics with _n infix)
4023 if (TypeFlags.hasSplatOperand()) {
4024 unsigned OpNo = TypeFlags.getSplatOperand();
4025 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4026 }
4027
4028 if (TypeFlags.isReverseCompare())
4029 std::swap(Ops[1], Ops[2]);
4030 else if (TypeFlags.isReverseUSDOT())
4031 std::swap(Ops[1], Ops[2]);
4032 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4033 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4034 std::swap(Ops[1], Ops[2]);
4035 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4036 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4037 std::swap(Ops[1], Ops[3]);
4038
4039 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4040 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4041 llvm::Type *OpndTy = Ops[1]->getType();
4042 auto *SplatZero = Constant::getNullValue(OpndTy);
4043 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4044 }
4045
4046 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4047 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4048 Value *Call = Builder.CreateCall(F, Ops);
4049
4050 if (Call->getType() == Ty)
4051 return Call;
4052
4053 // Predicate results must be converted to svbool_t.
4054 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4055 return EmitSVEPredicateCast(Call, PredTy);
4056 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4057 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4058
4059 llvm_unreachable("unsupported element count!");
4060 }
4061
4062 switch (BuiltinID) {
4063 default:
4064 return nullptr;
4065
4066 case SVE::BI__builtin_sve_svreinterpret_b: {
4067 auto SVCountTy =
4068 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4069 Function *CastFromSVCountF =
4070 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4071 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4072 }
4073 case SVE::BI__builtin_sve_svreinterpret_c: {
4074 auto SVCountTy =
4075 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4076 Function *CastToSVCountF =
4077 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4078 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4079 }
4080
4081 case SVE::BI__builtin_sve_svpsel_lane_b8:
4082 case SVE::BI__builtin_sve_svpsel_lane_b16:
4083 case SVE::BI__builtin_sve_svpsel_lane_b32:
4084 case SVE::BI__builtin_sve_svpsel_lane_b64:
4085 case SVE::BI__builtin_sve_svpsel_lane_c8:
4086 case SVE::BI__builtin_sve_svpsel_lane_c16:
4087 case SVE::BI__builtin_sve_svpsel_lane_c32:
4088 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4089 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4090 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4091 "aarch64.svcount")) &&
4092 "Unexpected TargetExtType");
4093 auto SVCountTy =
4094 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4095 Function *CastFromSVCountF =
4096 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4097 Function *CastToSVCountF =
4098 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4099
4100 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4101 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4102 llvm::Value *Ops0 =
4103 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4104 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4105 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4106 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4107 }
4108 case SVE::BI__builtin_sve_svmov_b_z: {
4109 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4110 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4111 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4112 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4113 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4114 }
4115
4116 case SVE::BI__builtin_sve_svnot_b_z: {
4117 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4118 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4119 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4120 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4121 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4122 }
4123
4124 case SVE::BI__builtin_sve_svmovlb_u16:
4125 case SVE::BI__builtin_sve_svmovlb_u32:
4126 case SVE::BI__builtin_sve_svmovlb_u64:
4127 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4128
4129 case SVE::BI__builtin_sve_svmovlb_s16:
4130 case SVE::BI__builtin_sve_svmovlb_s32:
4131 case SVE::BI__builtin_sve_svmovlb_s64:
4132 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4133
4134 case SVE::BI__builtin_sve_svmovlt_u16:
4135 case SVE::BI__builtin_sve_svmovlt_u32:
4136 case SVE::BI__builtin_sve_svmovlt_u64:
4137 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4138
4139 case SVE::BI__builtin_sve_svmovlt_s16:
4140 case SVE::BI__builtin_sve_svmovlt_s32:
4141 case SVE::BI__builtin_sve_svmovlt_s64:
4142 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4143
4144 case SVE::BI__builtin_sve_svpmullt_u16:
4145 case SVE::BI__builtin_sve_svpmullt_u64:
4146 case SVE::BI__builtin_sve_svpmullt_n_u16:
4147 case SVE::BI__builtin_sve_svpmullt_n_u64:
4148 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4149
4150 case SVE::BI__builtin_sve_svpmullb_u16:
4151 case SVE::BI__builtin_sve_svpmullb_u64:
4152 case SVE::BI__builtin_sve_svpmullb_n_u16:
4153 case SVE::BI__builtin_sve_svpmullb_n_u64:
4154 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4155
4156 case SVE::BI__builtin_sve_svdup_n_b8:
4157 case SVE::BI__builtin_sve_svdup_n_b16:
4158 case SVE::BI__builtin_sve_svdup_n_b32:
4159 case SVE::BI__builtin_sve_svdup_n_b64: {
4160 Value *CmpNE =
4161 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4162 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4163 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4165 }
4166
4167 case SVE::BI__builtin_sve_svdupq_n_b8:
4168 case SVE::BI__builtin_sve_svdupq_n_b16:
4169 case SVE::BI__builtin_sve_svdupq_n_b32:
4170 case SVE::BI__builtin_sve_svdupq_n_b64:
4171 case SVE::BI__builtin_sve_svdupq_n_u8:
4172 case SVE::BI__builtin_sve_svdupq_n_s8:
4173 case SVE::BI__builtin_sve_svdupq_n_u64:
4174 case SVE::BI__builtin_sve_svdupq_n_f64:
4175 case SVE::BI__builtin_sve_svdupq_n_s64:
4176 case SVE::BI__builtin_sve_svdupq_n_u16:
4177 case SVE::BI__builtin_sve_svdupq_n_f16:
4178 case SVE::BI__builtin_sve_svdupq_n_bf16:
4179 case SVE::BI__builtin_sve_svdupq_n_s16:
4180 case SVE::BI__builtin_sve_svdupq_n_u32:
4181 case SVE::BI__builtin_sve_svdupq_n_f32:
4182 case SVE::BI__builtin_sve_svdupq_n_s32: {
4183 // These builtins are implemented by storing each element to an array and using
4184 // ld1rq to materialize a vector.
4185 unsigned NumOpnds = Ops.size();
4186
4187 bool IsBoolTy =
4188 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4189
4190 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4191 // so that the compare can use the width that is natural for the expected
4192 // number of predicate lanes.
4193 llvm::Type *EltTy = Ops[0]->getType();
4194 if (IsBoolTy)
4195 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4196
4198 for (unsigned I = 0; I < NumOpnds; ++I)
4199 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4200 Value *Vec = BuildVector(VecOps);
4201
4202 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4203 Value *InsertSubVec = Builder.CreateInsertVector(
4204 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4205
4206 Function *F =
4207 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4208 Value *DupQLane =
4209 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4210
4211 if (!IsBoolTy)
4212 return DupQLane;
4213
4214 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4215 Constant *Pred = ConstantInt::getTrue(getSVEPredType(TypeFlags));
4216
4217 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4218 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4219 : Intrinsic::aarch64_sve_cmpne_wide,
4220 OverloadedTy);
4221 Value *Call = Builder.CreateCall(
4222 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4224 }
4225
4226 case SVE::BI__builtin_sve_svpfalse_b:
4227 return ConstantInt::getFalse(Ty);
4228
4229 case SVE::BI__builtin_sve_svpfalse_c: {
4230 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4231 Function *CastToSVCountF =
4232 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4233 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4234 }
4235
4236 case SVE::BI__builtin_sve_svlen_bf16:
4237 case SVE::BI__builtin_sve_svlen_f16:
4238 case SVE::BI__builtin_sve_svlen_f32:
4239 case SVE::BI__builtin_sve_svlen_f64:
4240 case SVE::BI__builtin_sve_svlen_s8:
4241 case SVE::BI__builtin_sve_svlen_s16:
4242 case SVE::BI__builtin_sve_svlen_s32:
4243 case SVE::BI__builtin_sve_svlen_s64:
4244 case SVE::BI__builtin_sve_svlen_u8:
4245 case SVE::BI__builtin_sve_svlen_u16:
4246 case SVE::BI__builtin_sve_svlen_u32:
4247 case SVE::BI__builtin_sve_svlen_u64: {
4248 SVETypeFlags TF(Builtin->TypeModifier);
4249 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4250 }
4251
4252 case SVE::BI__builtin_sve_svtbl2_u8:
4253 case SVE::BI__builtin_sve_svtbl2_s8:
4254 case SVE::BI__builtin_sve_svtbl2_u16:
4255 case SVE::BI__builtin_sve_svtbl2_s16:
4256 case SVE::BI__builtin_sve_svtbl2_u32:
4257 case SVE::BI__builtin_sve_svtbl2_s32:
4258 case SVE::BI__builtin_sve_svtbl2_u64:
4259 case SVE::BI__builtin_sve_svtbl2_s64:
4260 case SVE::BI__builtin_sve_svtbl2_f16:
4261 case SVE::BI__builtin_sve_svtbl2_bf16:
4262 case SVE::BI__builtin_sve_svtbl2_f32:
4263 case SVE::BI__builtin_sve_svtbl2_f64: {
4264 SVETypeFlags TF(Builtin->TypeModifier);
4265 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4266 return Builder.CreateCall(F, Ops);
4267 }
4268
4269 case SVE::BI__builtin_sve_svset_neonq_s8:
4270 case SVE::BI__builtin_sve_svset_neonq_s16:
4271 case SVE::BI__builtin_sve_svset_neonq_s32:
4272 case SVE::BI__builtin_sve_svset_neonq_s64:
4273 case SVE::BI__builtin_sve_svset_neonq_u8:
4274 case SVE::BI__builtin_sve_svset_neonq_u16:
4275 case SVE::BI__builtin_sve_svset_neonq_u32:
4276 case SVE::BI__builtin_sve_svset_neonq_u64:
4277 case SVE::BI__builtin_sve_svset_neonq_f16:
4278 case SVE::BI__builtin_sve_svset_neonq_f32:
4279 case SVE::BI__builtin_sve_svset_neonq_f64:
4280 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4281 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4282 }
4283
4284 case SVE::BI__builtin_sve_svget_neonq_s8:
4285 case SVE::BI__builtin_sve_svget_neonq_s16:
4286 case SVE::BI__builtin_sve_svget_neonq_s32:
4287 case SVE::BI__builtin_sve_svget_neonq_s64:
4288 case SVE::BI__builtin_sve_svget_neonq_u8:
4289 case SVE::BI__builtin_sve_svget_neonq_u16:
4290 case SVE::BI__builtin_sve_svget_neonq_u32:
4291 case SVE::BI__builtin_sve_svget_neonq_u64:
4292 case SVE::BI__builtin_sve_svget_neonq_f16:
4293 case SVE::BI__builtin_sve_svget_neonq_f32:
4294 case SVE::BI__builtin_sve_svget_neonq_f64:
4295 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4296 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4297 }
4298
4299 case SVE::BI__builtin_sve_svdup_neonq_s8:
4300 case SVE::BI__builtin_sve_svdup_neonq_s16:
4301 case SVE::BI__builtin_sve_svdup_neonq_s32:
4302 case SVE::BI__builtin_sve_svdup_neonq_s64:
4303 case SVE::BI__builtin_sve_svdup_neonq_u8:
4304 case SVE::BI__builtin_sve_svdup_neonq_u16:
4305 case SVE::BI__builtin_sve_svdup_neonq_u32:
4306 case SVE::BI__builtin_sve_svdup_neonq_u64:
4307 case SVE::BI__builtin_sve_svdup_neonq_f16:
4308 case SVE::BI__builtin_sve_svdup_neonq_f32:
4309 case SVE::BI__builtin_sve_svdup_neonq_f64:
4310 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4311 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4312 uint64_t(0));
4313 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4314 {Insert, Builder.getInt64(0)});
4315 }
4316 }
4317
4318 /// Should not happen
4319 return nullptr;
4320}
4321
4322static void swapCommutativeSMEOperands(unsigned BuiltinID,
4324 unsigned MultiVec;
4325 switch (BuiltinID) {
4326 default:
4327 return;
4328 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4329 MultiVec = 1;
4330 break;
4331 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4332 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4333 MultiVec = 2;
4334 break;
4335 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4336 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4337 MultiVec = 4;
4338 break;
4339 }
4340
4341 if (MultiVec > 0)
4342 for (unsigned I = 0; I < MultiVec; ++I)
4343 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4344}
4345
4347 const CallExpr *E) {
4348 auto *Builtin =
4351
4353 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4354 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4355
4356 if (TypeFlags.isLoad() || TypeFlags.isStore())
4357 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4358 if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4359 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4360 if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4361 BuiltinID == SME::BI__builtin_sme_svzero_za)
4362 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4363 if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4364 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4365 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4366 BuiltinID == SME::BI__builtin_sme_svstr_za)
4367 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4368
4369 // Emit set FPMR for intrinsics that require it
4370 if (TypeFlags.setsFPMR())
4371 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4372 Ops.pop_back_val());
4373 // Handle builtins which require their multi-vector operands to be swapped
4374 swapCommutativeSMEOperands(BuiltinID, Ops);
4375
4376 auto isCntsBuiltin = [&]() {
4377 switch (BuiltinID) {
4378 default:
4379 return 0;
4380 case SME::BI__builtin_sme_svcntsb:
4381 return 8;
4382 case SME::BI__builtin_sme_svcntsh:
4383 return 4;
4384 case SME::BI__builtin_sme_svcntsw:
4385 return 2;
4386 }
4387 };
4388
4389 if (auto Mul = isCntsBuiltin()) {
4390 llvm::Value *Cntd =
4391 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
4392 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
4393 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4394 }
4395
4396 // Should not happen!
4397 if (Builtin->LLVMIntrinsic == 0)
4398 return nullptr;
4399
4400 // Predicates must match the main datatype.
4401 for (Value *&Op : Ops)
4402 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4403 if (PredTy->getElementType()->isIntegerTy(1))
4404 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4405
4406 if (BuiltinID == SME::BI__builtin_sme_svldr_zt ||
4407 BuiltinID == SME::BI__builtin_sme_svstr_zt) {
4408 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, Ops[1]->getType());
4409 return Builder.CreateCall(F, Ops);
4410 }
4411
4412 Function *F =
4413 TypeFlags.isOverloadNone()
4414 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
4415 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
4416
4417 return Builder.CreateCall(F, Ops);
4418}
4419
4420/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4421/// return it as an i8 pointer.
4423 LLVMContext &Context = CGF.CGM.getLLVMContext();
4424 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
4425 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4426 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4427 llvm::Function *F =
4428 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
4429 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
4430 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
4431}
4432
4434 const CallExpr *E,
4435 llvm::Triple::ArchType Arch) {
4436 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4437 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4438 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4439
4440 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4441 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4442 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4443
4444 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4445 return EmitAArch64CpuSupports(E);
4446
4447 unsigned HintID = static_cast<unsigned>(-1);
4448 switch (BuiltinID) {
4449 default: break;
4450 case clang::AArch64::BI__builtin_arm_nop:
4451 HintID = 0;
4452 break;
4453 case clang::AArch64::BI__builtin_arm_yield:
4454 case clang::AArch64::BI__yield:
4455 HintID = 1;
4456 break;
4457 case clang::AArch64::BI__builtin_arm_wfe:
4458 case clang::AArch64::BI__wfe:
4459 HintID = 2;
4460 break;
4461 case clang::AArch64::BI__builtin_arm_wfi:
4462 case clang::AArch64::BI__wfi:
4463 HintID = 3;
4464 break;
4465 case clang::AArch64::BI__builtin_arm_sev:
4466 case clang::AArch64::BI__sev:
4467 HintID = 4;
4468 break;
4469 case clang::AArch64::BI__builtin_arm_sevl:
4470 case clang::AArch64::BI__sevl:
4471 HintID = 5;
4472 break;
4473 }
4474
4475 if (HintID != static_cast<unsigned>(-1)) {
4476 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4477 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4478 }
4479
4480 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
4481 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4482 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4483 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
4484 }
4485
4486 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
4487 // Create call to __arm_sme_state and store the results to the two pointers.
4488 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
4489 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
4490 false),
4491 "__arm_sme_state"));
4492 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
4493 "aarch64_pstate_sm_compatible");
4494 CI->setAttributes(Attrs);
4495 CI->setCallingConv(
4496 llvm::CallingConv::
4497 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
4498 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
4500 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
4502 }
4503
4504 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
4505 assert((getContext().getTypeSize(E->getType()) == 32) &&
4506 "rbit of unusual size!");
4507 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4508 return Builder.CreateCall(
4509 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4510 }
4511 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
4512 assert((getContext().getTypeSize(E->getType()) == 64) &&
4513 "rbit of unusual size!");
4514 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4515 return Builder.CreateCall(
4516 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4517 }
4518
4519 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
4520 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
4521 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4522 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
4523 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
4524 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
4525 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
4526 return Res;
4527 }
4528
4529 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
4530 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4531 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
4532 "cls");
4533 }
4534 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
4535 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4536 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
4537 "cls");
4538 }
4539
4540 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
4541 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
4542 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4543 llvm::Type *Ty = Arg->getType();
4544 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
4545 Arg, "frint32z");
4546 }
4547
4548 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
4549 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
4550 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4551 llvm::Type *Ty = Arg->getType();
4552 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
4553 Arg, "frint64z");
4554 }
4555
4556 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
4557 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
4558 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4559 llvm::Type *Ty = Arg->getType();
4560 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
4561 Arg, "frint32x");
4562 }
4563
4564 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
4565 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
4566 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4567 llvm::Type *Ty = Arg->getType();
4568 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
4569 Arg, "frint64x");
4570 }
4571
4572 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
4573 assert((getContext().getTypeSize(E->getType()) == 32) &&
4574 "__jcvt of unusual size!");
4575 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4576 return Builder.CreateCall(
4577 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
4578 }
4579
4580 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
4581 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
4582 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
4583 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
4584 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
4585 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
4586
4587 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
4588 // Load from the address via an LLVM intrinsic, receiving a
4589 // tuple of 8 i64 words, and store each one to ValPtr.
4590 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
4591 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
4592 llvm::Value *ToRet;
4593 for (size_t i = 0; i < 8; i++) {
4594 llvm::Value *ValOffsetPtr =
4595 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4596 Address Addr =
4597 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4598 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
4599 }
4600 return ToRet;
4601 }
4602
4603 // Load 8 i64 words from ValPtr, and store them to the address
4604 // via an LLVM intrinsic.
4606 Args.push_back(MemAddr);
4607 for (size_t i = 0; i < 8; i++) {
4608 llvm::Value *ValOffsetPtr =
4609 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4610 Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4611 Args.push_back(Builder.CreateLoad(Addr));
4612 }
4613
4614 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
4615 ? Intrinsic::aarch64_st64b
4616 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
4617 ? Intrinsic::aarch64_st64bv
4618 : Intrinsic::aarch64_st64bv0);
4619 Function *F = CGM.getIntrinsic(Intr);
4620 return Builder.CreateCall(F, Args);
4621 }
4622
4623 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
4624 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
4625
4626 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
4627 ? Intrinsic::aarch64_rndr
4628 : Intrinsic::aarch64_rndrrs);
4629 Function *F = CGM.getIntrinsic(Intr);
4630 llvm::Value *Val = Builder.CreateCall(F);
4631 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
4632 Value *Status = Builder.CreateExtractValue(Val, 1);
4633
4634 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
4635 Builder.CreateStore(RandomValue, MemAddress);
4636 Status = Builder.CreateZExt(Status, Int32Ty);
4637 return Status;
4638 }
4639
4640 if (BuiltinID == clang::AArch64::BI__clear_cache) {
4641 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4642 const FunctionDecl *FD = E->getDirectCallee();
4643 Value *Ops[2];
4644 for (unsigned i = 0; i < 2; i++)
4645 Ops[i] = EmitScalarExpr(E->getArg(i));
4646 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4647 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4648 StringRef Name = FD->getName();
4649 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4650 }
4651
4652 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4653 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
4654 getContext().getTypeSize(E->getType()) == 128) {
4655 Function *F =
4656 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4657 ? Intrinsic::aarch64_ldaxp
4658 : Intrinsic::aarch64_ldxp);
4659
4660 Value *LdPtr = EmitScalarExpr(E->getArg(0));
4661 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
4662
4663 Value *Val0 = Builder.CreateExtractValue(Val, 1);
4664 Value *Val1 = Builder.CreateExtractValue(Val, 0);
4665 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4666 Val0 = Builder.CreateZExt(Val0, Int128Ty);
4667 Val1 = Builder.CreateZExt(Val1, Int128Ty);
4668
4669 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4670 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4671 Val = Builder.CreateOr(Val, Val1);
4672 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4673 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4674 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
4675 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4676
4677 QualType Ty = E->getType();
4678 llvm::Type *RealResTy = ConvertType(Ty);
4679 llvm::Type *IntTy =
4680 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4681
4682 Function *F =
4683 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4684 ? Intrinsic::aarch64_ldaxr
4685 : Intrinsic::aarch64_ldxr,
4686 DefaultPtrTy);
4687 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4688 Val->addParamAttr(
4689 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
4690
4691 if (RealResTy->isPointerTy())
4692 return Builder.CreateIntToPtr(Val, RealResTy);
4693
4694 llvm::Type *IntResTy = llvm::IntegerType::get(
4695 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4696 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
4697 RealResTy);
4698 }
4699
4700 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4701 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
4702 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4703 Function *F =
4704 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4705 ? Intrinsic::aarch64_stlxp
4706 : Intrinsic::aarch64_stxp);
4707 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
4708
4710 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4711
4712 Tmp = Tmp.withElementType(STy);
4713 llvm::Value *Val = Builder.CreateLoad(Tmp);
4714
4715 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4716 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4717 Value *StPtr = EmitScalarExpr(E->getArg(1));
4718 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4719 }
4720
4721 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4722 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
4723 Value *StoreVal = EmitScalarExpr(E->getArg(0));
4724 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4725
4726 QualType Ty = E->getArg(0)->getType();
4727 llvm::Type *StoreTy =
4728 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4729
4730 if (StoreVal->getType()->isPointerTy())
4731 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4732 else {
4733 llvm::Type *IntTy = llvm::IntegerType::get(
4735 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4736 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4737 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4738 }
4739
4740 Function *F =
4741 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4742 ? Intrinsic::aarch64_stlxr
4743 : Intrinsic::aarch64_stxr,
4744 StoreAddr->getType());
4745 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4746 CI->addParamAttr(
4747 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
4748 return CI;
4749 }
4750
4751 if (BuiltinID == clang::AArch64::BI__getReg ||
4752 BuiltinID == clang::AArch64::BI__setReg) {
4754 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4755 llvm_unreachable("Sema will ensure that the parameter is constant");
4756
4757 llvm::APSInt Value = Result.Val.getInt();
4758 LLVMContext &Context = CGM.getLLVMContext();
4759 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
4760
4761 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4762 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4763 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4764
4765 CallInst *CI;
4766 if (BuiltinID == clang::AArch64::BI__getReg) {
4767 llvm::Function *F =
4768 CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty});
4769 CI = Builder.CreateCall(F, Metadata);
4770 } else {
4771 llvm::Function *F =
4772 CGM.getIntrinsic(Intrinsic::write_volatile_register, {Int64Ty});
4773 CI = Builder.CreateCall(F, {Metadata, EmitScalarExpr(E->getArg(1))});
4774 }
4775 return CI;
4776 }
4777
4778 if (BuiltinID == clang::AArch64::BI__getRegFp ||
4779 BuiltinID == clang::AArch64::BI__setRegFp) {
4781 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4782 llvm_unreachable("Sema will ensure that the parameter is constant");
4783
4784 llvm::APSInt Value = Result.Val.getInt();
4785 LLVMContext &Context = CGM.getLLVMContext();
4786 std::string Reg = "d" + toString(Value, 10);
4787
4788 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4789 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4790 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4791
4792 llvm::Value *Ret;
4793 if (BuiltinID == clang::AArch64::BI__getRegFp) {
4794 llvm::Function *F =
4795 CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty});
4796 llvm::Value *Bits = Builder.CreateCall(F, Metadata);
4797 Ret = Builder.CreateBitCast(Bits, llvm::Type::getDoubleTy(Context));
4798 } else {
4799 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
4800 llvm::Value *Bits = Builder.CreateBitCast(Val, Int64Ty);
4801 llvm::Function *F =
4802 CGM.getIntrinsic(Intrinsic::write_volatile_register, {Int64Ty});
4803 Ret = Builder.CreateCall(F, {Metadata, Bits});
4804 }
4805 return Ret;
4806 }
4807
4808 if (BuiltinID == clang::AArch64::BI__break) {
4810 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4811 llvm_unreachable("Sema will ensure that the parameter is constant");
4812
4813 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4814 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
4815 }
4816
4817 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
4818 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4819 return Builder.CreateCall(F);
4820 }
4821
4822 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
4823 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
4824 llvm::SyncScope::SingleThread);
4825
4826 // CRC32
4827 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4828 switch (BuiltinID) {
4829 case clang::AArch64::BI__builtin_arm_crc32b:
4830 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4831 case clang::AArch64::BI__builtin_arm_crc32cb:
4832 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4833 case clang::AArch64::BI__builtin_arm_crc32h:
4834 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4835 case clang::AArch64::BI__builtin_arm_crc32ch:
4836 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4837 case clang::AArch64::BI__builtin_arm_crc32w:
4838 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4839 case clang::AArch64::BI__builtin_arm_crc32cw:
4840 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4841 case clang::AArch64::BI__builtin_arm_crc32d:
4842 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4843 case clang::AArch64::BI__builtin_arm_crc32cd:
4844 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4845 }
4846
4847 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4848 Value *Arg0 = EmitScalarExpr(E->getArg(0));
4849 Value *Arg1 = EmitScalarExpr(E->getArg(1));
4850 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4851
4852 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4853 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4854
4855 return Builder.CreateCall(F, {Arg0, Arg1});
4856 }
4857
4858 // Memory Operations (MOPS)
4859 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
4860 Value *Dst = EmitScalarExpr(E->getArg(0));
4861 Value *Val = EmitScalarExpr(E->getArg(1));
4862 Value *Size = EmitScalarExpr(E->getArg(2));
4863 Val = Builder.CreateTrunc(Val, Int8Ty);
4864 Size = Builder.CreateIntCast(Size, Int64Ty, false);
4865 return Builder.CreateCall(
4866 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
4867 }
4868
4869 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
4870 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
4871 return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
4872
4873 // Memory Tagging Extensions (MTE) Intrinsics
4874 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
4875 switch (BuiltinID) {
4876 case clang::AArch64::BI__builtin_arm_irg:
4877 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
4878 case clang::AArch64::BI__builtin_arm_addg:
4879 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
4880 case clang::AArch64::BI__builtin_arm_gmi:
4881 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
4882 case clang::AArch64::BI__builtin_arm_ldg:
4883 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
4884 case clang::AArch64::BI__builtin_arm_stg:
4885 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
4886 case clang::AArch64::BI__builtin_arm_subp:
4887 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
4888 }
4889
4890 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
4891 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
4893 Value *Mask = EmitScalarExpr(E->getArg(1));
4894 assert(Mask->getType()->getScalarSizeInBits() == 64 &&
4895 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4896 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4897 {Pointer, Mask});
4898 }
4899 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
4901 Value *TagOffset = EmitScalarExpr(E->getArg(1));
4902
4903 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
4904 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4905 {Pointer, TagOffset});
4906 }
4907 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
4909 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
4910 assert(ExcludedMask->getType()->getScalarSizeInBits() == 64 &&
4911 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4912 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4913 {Pointer, ExcludedMask});
4914 }
4915 // Although it is possible to supply a different return
4916 // address (first arg) to this intrinsic, for now we set
4917 // return address same as input address.
4918 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
4919 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4920 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4921 {TagAddress, TagAddress});
4922 }
4923 // Although it is possible to supply a different tag (to set)
4924 // to this intrinsic (as first arg), for now we supply
4925 // the tag that is in input address arg (common use case).
4926 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
4927 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4928 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4929 {TagAddress, TagAddress});
4930 }
4931 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
4932 Value *PointerA = EmitScalarExpr(E->getArg(0));
4933 Value *PointerB = EmitScalarExpr(E->getArg(1));
4934 return Builder.CreateCall(
4935 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
4936 }
4937 }
4938
4939 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4940 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4941 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4942 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4943 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
4944 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
4945 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
4946 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
4947
4948 SpecialRegisterAccessKind AccessKind = Write;
4949 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4950 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4951 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4952 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
4953 AccessKind = VolatileRead;
4954
4955 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4956 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
4957
4958 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4959 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
4960
4961 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4962 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
4963
4964 llvm::Type *ValueType;
4965 llvm::Type *RegisterType = Int64Ty;
4966 if (Is32Bit) {
4967 ValueType = Int32Ty;
4968 } else if (Is128Bit) {
4969 llvm::Type *Int128Ty =
4970 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
4971 ValueType = Int128Ty;
4972 RegisterType = Int128Ty;
4973 } else if (IsPointerBuiltin) {
4974 ValueType = VoidPtrTy;
4975 } else {
4976 ValueType = Int64Ty;
4977 };
4978
4979 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
4980 AccessKind);
4981 }
4982
4983 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
4984 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
4985 LLVMContext &Context = CGM.getLLVMContext();
4986
4987 unsigned SysReg =
4988 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
4989
4990 std::string SysRegStr;
4991 llvm::raw_string_ostream(SysRegStr)
4992 << (0b10 | SysReg >> 14) << ":" << ((SysReg >> 11) & 7) << ":"
4993 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
4994 << (SysReg & 7);
4995
4996 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
4997 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4998 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4999
5000 llvm::Type *RegisterType = Int64Ty;
5001 llvm::Type *Types[] = { RegisterType };
5002
5003 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5004 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5005
5006 return Builder.CreateCall(F, Metadata);
5007 }
5008
5009 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5010 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5011 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5012
5013 return Result;
5014 }
5015
5016 if (BuiltinID == clang::AArch64::BI__sys) {
5017 unsigned SysReg =
5018 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5019 const unsigned Op1 = SysReg >> 11;
5020 const unsigned CRn = (SysReg >> 7) & 0xf;
5021 const unsigned CRm = (SysReg >> 3) & 0xf;
5022 const unsigned Op2 = SysReg & 0x7;
5023
5024 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sys),
5025 {Builder.getInt32(Op1), Builder.getInt32(CRn),
5026 Builder.getInt32(CRm), Builder.getInt32(Op2),
5027 EmitScalarExpr(E->getArg(1))});
5028
5029 // Return 0 for convenience, even though MSVC returns some other undefined
5030 // value.
5031 return ConstantInt::get(Builder.getInt32Ty(), 0);
5032 }
5033
5034 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5035 llvm::Function *F =
5036 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5037 return Builder.CreateCall(F);
5038 }
5039
5040 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5041 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5042 return Builder.CreateCall(F);
5043 }
5044
5045 if (BuiltinID == clang::AArch64::BI__mulh ||
5046 BuiltinID == clang::AArch64::BI__umulh) {
5047 llvm::Type *ResType = ConvertType(E->getType());
5048 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5049
5050 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5051 Value *LHS =
5052 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5053 Value *RHS =
5054 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5055
5056 Value *MulResult, *HigherBits;
5057 if (IsSigned) {
5058 MulResult = Builder.CreateNSWMul(LHS, RHS);
5059 HigherBits = Builder.CreateAShr(MulResult, 64);
5060 } else {
5061 MulResult = Builder.CreateNUWMul(LHS, RHS);
5062 HigherBits = Builder.CreateLShr(MulResult, 64);
5063 }
5064 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5065
5066 return HigherBits;
5067 }
5068
5069 if (BuiltinID == AArch64::BI__writex18byte ||
5070 BuiltinID == AArch64::BI__writex18word ||
5071 BuiltinID == AArch64::BI__writex18dword ||
5072 BuiltinID == AArch64::BI__writex18qword) {
5073 // Process the args first
5074 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5075 Value *DataArg = EmitScalarExpr(E->getArg(1));
5076
5077 // Read x18 as i8*
5078 llvm::Value *X18 = readX18AsPtr(*this);
5079
5080 // Store val at x18 + offset
5081 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5082 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5083 StoreInst *Store =
5084 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5085 return Store;
5086 }
5087
5088 if (BuiltinID == AArch64::BI__readx18byte ||
5089 BuiltinID == AArch64::BI__readx18word ||
5090 BuiltinID == AArch64::BI__readx18dword ||
5091 BuiltinID == AArch64::BI__readx18qword) {
5092 // Process the args first
5093 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5094
5095 // Read x18 as i8*
5096 llvm::Value *X18 = readX18AsPtr(*this);
5097
5098 // Load x18 + offset
5099 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5100 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5101 llvm::Type *IntTy = ConvertType(E->getType());
5102 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5103 return Load;
5104 }
5105
5106 if (BuiltinID == AArch64::BI__addx18byte ||
5107 BuiltinID == AArch64::BI__addx18word ||
5108 BuiltinID == AArch64::BI__addx18dword ||
5109 BuiltinID == AArch64::BI__addx18qword ||
5110 BuiltinID == AArch64::BI__incx18byte ||
5111 BuiltinID == AArch64::BI__incx18word ||
5112 BuiltinID == AArch64::BI__incx18dword ||
5113 BuiltinID == AArch64::BI__incx18qword) {
5114 llvm::Type *IntTy;
5115 bool isIncrement;
5116 switch (BuiltinID) {
5117 case AArch64::BI__incx18byte:
5118 IntTy = Int8Ty;
5119 isIncrement = true;
5120 break;
5121 case AArch64::BI__incx18word:
5122 IntTy = Int16Ty;
5123 isIncrement = true;
5124 break;
5125 case AArch64::BI__incx18dword:
5126 IntTy = Int32Ty;
5127 isIncrement = true;
5128 break;
5129 case AArch64::BI__incx18qword:
5130 IntTy = Int64Ty;
5131 isIncrement = true;
5132 break;
5133 default:
5134 IntTy = ConvertType(E->getArg(1)->getType());
5135 isIncrement = false;
5136 break;
5137 }
5138 // Process the args first
5139 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5140 Value *ValToAdd =
5141 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5142
5143 // Read x18 as i8*
5144 llvm::Value *X18 = readX18AsPtr(*this);
5145
5146 // Load x18 + offset
5147 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5148 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5149 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5150
5151 // Add values
5152 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5153
5154 // Store val at x18 + offset
5155 StoreInst *Store =
5156 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5157 return Store;
5158 }
5159
5160 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5161 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5162 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5163 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5164 Value *Arg = EmitScalarExpr(E->getArg(0));
5165 llvm::Type *RetTy = ConvertType(E->getType());
5166 return Builder.CreateBitCast(Arg, RetTy);
5167 }
5168
5169 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5170 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5171 BuiltinID == AArch64::BI_CountLeadingZeros ||
5172 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5173 Value *Arg = EmitScalarExpr(E->getArg(0));
5174 llvm::Type *ArgType = Arg->getType();
5175
5176 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5177 BuiltinID == AArch64::BI_CountLeadingOnes64)
5178 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5179
5180 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5181 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5182
5183 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5184 BuiltinID == AArch64::BI_CountLeadingZeros64)
5185 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5186 return Result;
5187 }
5188
5189 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5190 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5191 Value *Arg = EmitScalarExpr(E->getArg(0));
5192
5193 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5194 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5195 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5196
5197 Value *Result = Builder.CreateCall(F, Arg, "cls");
5198 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5199 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5200 return Result;
5201 }
5202
5203 if (BuiltinID == AArch64::BI_CountOneBits ||
5204 BuiltinID == AArch64::BI_CountOneBits64) {
5205 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5206 llvm::Type *ArgType = ArgValue->getType();
5207 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5208
5209 Value *Result = Builder.CreateCall(F, ArgValue);
5210 if (BuiltinID == AArch64::BI_CountOneBits64)
5211 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5212 return Result;
5213 }
5214
5215 if (BuiltinID == AArch64::BI_CountTrailingZeros ||
5216 BuiltinID == AArch64::BI_CountTrailingZeros64) {
5217 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5218 llvm::Type *ArgType = ArgValue->getType();
5219 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
5220
5221 // MSVC leaves 0 undefined; use false for predictable codegen
5222 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getInt1(false)});
5223 if (BuiltinID == AArch64::BI_CountTrailingZeros64)
5224 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5225 return Result;
5226 }
5227
5228 if (BuiltinID == AArch64::BI__prefetch) {
5230 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5231 Value *Locality = ConstantInt::get(Int32Ty, 3);
5232 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5233 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5234 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5235 }
5236
5237 if (BuiltinID == AArch64::BI__prefetch2) {
5239 llvm::APSInt PrfOp = E->getArg(1)->EvaluateKnownConstInt(CGM.getContext());
5240 // Decode 5-bit PRFM encoding: bits[4:3]=type, bits[2:1]=target,
5241 // bit[0]=policy
5242 // type: PLD=0(load), PLI=1(instr), PST=2(store)
5243 // target: L1=0, L2=1, L3=2
5244 // policy: KEEP=0, STRM=1
5245 uint64_t Op = PrfOp.getZExtValue();
5246 uint64_t Type = (Op >> 3) & 0x3;
5247 uint64_t Target = (Op >> 1) & 0x3;
5248 uint64_t Policy = Op & 0x1;
5249 Value *RW = Builder.getInt32(Type == 2 ? 1 : 0);
5250 Value *Local = Builder.getInt32(Target);
5251 Value *IsStream = Builder.getInt32(Policy);
5252 Value *IsData = Builder.getInt32(Type == 1 ? 0 : 1);
5253 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_prefetch);
5254 return Builder.CreateCall(F, {Address, RW, Local, IsStream, IsData});
5255 }
5256
5257 if (BuiltinID == AArch64::BI__hlt) {
5258 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5259 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5260
5261 // Return 0 for convenience, even though MSVC returns some other undefined
5262 // value.
5263 return ConstantInt::get(Builder.getInt32Ty(), 0);
5264 }
5265
5266 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5267 return Builder.CreateFPTrunc(
5268 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5269 Builder.getFloatTy()),
5270 Builder.getBFloatTy());
5271
5272 // Handle MSVC intrinsics before argument evaluation to prevent double
5273 // evaluation.
5274 if (std::optional<MSVCIntrin> MsvcIntId =
5276 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5277
5278 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5279 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5280 return P.first == BuiltinID;
5281 });
5282 if (It != end(NEONEquivalentIntrinsicMap))
5283 BuiltinID = It->second;
5284
5285 // Check whether this is an SISD builtin.
5286 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5288 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5289 bool IsSISD = (Builtin != nullptr);
5290
5291 // Find out if any arguments are required to be integer constant
5292 // expressions.
5293 unsigned ICEArguments = 0;
5295 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5296 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5297
5299 Address PtrOp0 = Address::invalid();
5300 // Note the assumption that SISD intrinsics do not contain extra arguments.
5301 // TODO: Fold this into a single function call instead of, effectively, two
5302 // separate checks.
5303 bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
5304 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5305 for (unsigned i = 0, e = NumArgs; i != e; i++) {
5306 if (i == 0) {
5307 switch (BuiltinID) {
5308 case NEON::BI__builtin_neon_vld1_v:
5309 case NEON::BI__builtin_neon_vld1q_v:
5310 case NEON::BI__builtin_neon_vld1_dup_v:
5311 case NEON::BI__builtin_neon_vld1q_dup_v:
5312 case NEON::BI__builtin_neon_vld1_lane_v:
5313 case NEON::BI__builtin_neon_vld1q_lane_v:
5314 case NEON::BI__builtin_neon_vst1_v:
5315 case NEON::BI__builtin_neon_vst1q_v:
5316 case NEON::BI__builtin_neon_vst1_lane_v:
5317 case NEON::BI__builtin_neon_vst1q_lane_v:
5318 case NEON::BI__builtin_neon_vldap1_lane_s64:
5319 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5320 case NEON::BI__builtin_neon_vstl1_lane_s64:
5321 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5322 // Get the alignment for the argument in addition to the value;
5323 // we'll use it later.
5324 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5325 Ops.push_back(PtrOp0.emitRawPointer(*this));
5326 continue;
5327 }
5328 }
5329 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5330 }
5331
5332 if (Builtin) {
5334 assert(Result && "SISD intrinsic should have been handled");
5335 return Result;
5336 }
5337
5338 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5340 if (std::optional<llvm::APSInt> Result =
5342 // Determine the type of this overloaded NEON intrinsic.
5343 Type = NeonTypeFlags(Result->getZExtValue());
5344
5345 bool usgn = Type.isUnsigned();
5346 bool quad = Type.isQuad();
5347 unsigned Int;
5348
5349 // Not all intrinsics handled by the common case work for AArch64 yet, so only
5350 // defer to common code if it's been added to our special map.
5351 Builtin =
5354
5355 if (Builtin)
5357 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5358 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5359 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
5360
5361 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
5362 return V;
5363
5364 // Handle non-overloaded intrinsics first.
5365 switch (BuiltinID) {
5366 default: break;
5367 case NEON::BI__builtin_neon_vabsh_f16:
5368 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5369 case NEON::BI__builtin_neon_vaddq_p128: {
5370 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5371 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5372 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5373 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5374 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5375 return Builder.CreateBitCast(Ops[0], Int128Ty);
5376 }
5377 case NEON::BI__builtin_neon_vldrq_p128: {
5378 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5379 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
5381 }
5382 case NEON::BI__builtin_neon_vstrq_p128: {
5383 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5384 }
5385 case NEON::BI__builtin_neon_vcvts_f32_u32:
5386 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5387 usgn = true;
5388 [[fallthrough]];
5389 case NEON::BI__builtin_neon_vcvts_f32_s32:
5390 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5391 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5392 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5393 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5394 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5395 if (usgn)
5396 return Builder.CreateUIToFP(Ops[0], FTy);
5397 return Builder.CreateSIToFP(Ops[0], FTy);
5398 }
5399 case NEON::BI__builtin_neon_vcvth_f16_u16:
5400 case NEON::BI__builtin_neon_vcvth_f16_u32:
5401 case NEON::BI__builtin_neon_vcvth_f16_u64:
5402 usgn = true;
5403 [[fallthrough]];
5404 case NEON::BI__builtin_neon_vcvth_f16_s16:
5405 case NEON::BI__builtin_neon_vcvth_f16_s32:
5406 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5407 llvm::Type *FTy = HalfTy;
5408 llvm::Type *InTy;
5409 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5410 InTy = Int64Ty;
5411 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5412 InTy = Int32Ty;
5413 else
5414 InTy = Int16Ty;
5415 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5416 if (usgn)
5417 return Builder.CreateUIToFP(Ops[0], FTy);
5418 return Builder.CreateSIToFP(Ops[0], FTy);
5419 }
5420 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5421 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5422 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5423 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5424 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5425 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5426 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5427 case NEON::BI__builtin_neon_vcvtph_s16_f16: {
5428 llvm::Type *InTy = Int16Ty;
5429 llvm::Type* FTy = HalfTy;
5430 llvm::Type *Tys[2] = {InTy, FTy};
5431 switch (BuiltinID) {
5432 default: llvm_unreachable("missing builtin ID in switch!");
5433 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5434 Int = Intrinsic::aarch64_neon_fcvtau; break;
5435 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5436 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5437 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5438 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5439 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5440 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5441 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5442 Int = Intrinsic::aarch64_neon_fcvtas; break;
5443 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5444 Int = Intrinsic::aarch64_neon_fcvtms; break;
5445 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5446 Int = Intrinsic::aarch64_neon_fcvtns; break;
5447 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5448 Int = Intrinsic::aarch64_neon_fcvtps; break;
5449 }
5450 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5451 }
5452 case NEON::BI__builtin_neon_vcaleh_f16:
5453 case NEON::BI__builtin_neon_vcalth_f16:
5454 case NEON::BI__builtin_neon_vcageh_f16:
5455 case NEON::BI__builtin_neon_vcagth_f16: {
5456 llvm::Type* InTy = Int32Ty;
5457 llvm::Type* FTy = HalfTy;
5458 llvm::Type *Tys[2] = {InTy, FTy};
5459 switch (BuiltinID) {
5460 default: llvm_unreachable("missing builtin ID in switch!");
5461 case NEON::BI__builtin_neon_vcageh_f16:
5462 Int = Intrinsic::aarch64_neon_facge; break;
5463 case NEON::BI__builtin_neon_vcagth_f16:
5464 Int = Intrinsic::aarch64_neon_facgt; break;
5465 case NEON::BI__builtin_neon_vcaleh_f16:
5466 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5467 case NEON::BI__builtin_neon_vcalth_f16:
5468 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5469 }
5470 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
5471 return Builder.CreateTrunc(Ops[0], Int16Ty);
5472 }
5473 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5474 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5475 llvm::Type* InTy = Int32Ty;
5476 llvm::Type* FTy = HalfTy;
5477 llvm::Type *Tys[2] = {InTy, FTy};
5478 switch (BuiltinID) {
5479 default: llvm_unreachable("missing builtin ID in switch!");
5480 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5481 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5482 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5483 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5484 }
5485 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5486 return Builder.CreateTrunc(Ops[0], Int16Ty);
5487 }
5488 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5489 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5490 llvm::Type* FTy = HalfTy;
5491 llvm::Type* InTy = Int32Ty;
5492 llvm::Type *Tys[2] = {FTy, InTy};
5493 switch (BuiltinID) {
5494 default: llvm_unreachable("missing builtin ID in switch!");
5495 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5496 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5497 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
5498 break;
5499 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5500 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5501 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
5502 break;
5503 }
5504 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5505 }
5506 case NEON::BI__builtin_neon_vpaddd_s64: {
5507 // TODO: Isn't this handled by
5508 // EmitCommonNeonSISDBuiltinExpr?
5509 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
5510 // The vector is v2f64, so make sure it's bitcast to that.
5511 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2i64");
5512 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5513 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5514 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5515 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5516 // Pairwise addition of a v2f64 into a scalar f64.
5517 return Builder.CreateAdd(Op0, Op1, "vpaddd");
5518 }
5519 case NEON::BI__builtin_neon_vpaddd_f64: {
5520 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
5521 // The vector is v2f64, so make sure it's bitcast to that.
5522 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f64");
5523 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5524 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5525 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5526 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5527 // Pairwise addition of a v2f64 into a scalar f64.
5528 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5529 }
5530 case NEON::BI__builtin_neon_vpadds_f32: {
5531 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
5532 // The vector is v2f32, so make sure it's bitcast to that.
5533 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f32");
5534 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5535 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5536 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5537 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5538 // Pairwise addition of a v2f32 into a scalar f32.
5539 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5540 }
5541 case NEON::BI__builtin_neon_vceqzd_s64:
5544 ICmpInst::ICMP_EQ, "vceqz");
5545 case NEON::BI__builtin_neon_vceqzd_f64:
5546 case NEON::BI__builtin_neon_vceqzs_f32:
5547 case NEON::BI__builtin_neon_vceqzh_f16:
5550 ICmpInst::FCMP_OEQ, "vceqz");
5551 case NEON::BI__builtin_neon_vcgezd_s64:
5554 ICmpInst::ICMP_SGE, "vcgez");
5555 case NEON::BI__builtin_neon_vcgezd_f64:
5556 case NEON::BI__builtin_neon_vcgezs_f32:
5557 case NEON::BI__builtin_neon_vcgezh_f16:
5560 ICmpInst::FCMP_OGE, "vcgez");
5561 case NEON::BI__builtin_neon_vclezd_s64:
5564 ICmpInst::ICMP_SLE, "vclez");
5565 case NEON::BI__builtin_neon_vclezd_f64:
5566 case NEON::BI__builtin_neon_vclezs_f32:
5567 case NEON::BI__builtin_neon_vclezh_f16:
5570 ICmpInst::FCMP_OLE, "vclez");
5571 case NEON::BI__builtin_neon_vcgtzd_s64:
5574 ICmpInst::ICMP_SGT, "vcgtz");
5575 case NEON::BI__builtin_neon_vcgtzd_f64:
5576 case NEON::BI__builtin_neon_vcgtzs_f32:
5577 case NEON::BI__builtin_neon_vcgtzh_f16:
5580 ICmpInst::FCMP_OGT, "vcgtz");
5581 case NEON::BI__builtin_neon_vcltzd_s64:
5584 ICmpInst::ICMP_SLT, "vcltz");
5585
5586 case NEON::BI__builtin_neon_vcltzd_f64:
5587 case NEON::BI__builtin_neon_vcltzs_f32:
5588 case NEON::BI__builtin_neon_vcltzh_f16:
5591 ICmpInst::FCMP_OLT, "vcltz");
5592
5593 case NEON::BI__builtin_neon_vceqzd_u64: {
5596 ICmpInst::ICMP_EQ, "vceqzd");
5597 }
5598 case NEON::BI__builtin_neon_vceqd_f64:
5599 case NEON::BI__builtin_neon_vcled_f64:
5600 case NEON::BI__builtin_neon_vcltd_f64:
5601 case NEON::BI__builtin_neon_vcged_f64:
5602 case NEON::BI__builtin_neon_vcgtd_f64: {
5603 llvm::CmpInst::Predicate P;
5604 switch (BuiltinID) {
5605 default: llvm_unreachable("missing builtin ID in switch!");
5606 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5607 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5608 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5609 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5610 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5611 }
5612 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5613 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5614 if (P == llvm::FCmpInst::FCMP_OEQ)
5615 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5616 else
5617 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5618 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5619 }
5620 case NEON::BI__builtin_neon_vceqs_f32:
5621 case NEON::BI__builtin_neon_vcles_f32:
5622 case NEON::BI__builtin_neon_vclts_f32:
5623 case NEON::BI__builtin_neon_vcges_f32:
5624 case NEON::BI__builtin_neon_vcgts_f32: {
5625 llvm::CmpInst::Predicate P;
5626 switch (BuiltinID) {
5627 default: llvm_unreachable("missing builtin ID in switch!");
5628 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5629 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5630 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5631 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5632 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5633 }
5634 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5635 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5636 if (P == llvm::FCmpInst::FCMP_OEQ)
5637 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5638 else
5639 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5640 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5641 }
5642 case NEON::BI__builtin_neon_vceqh_f16:
5643 case NEON::BI__builtin_neon_vcleh_f16:
5644 case NEON::BI__builtin_neon_vclth_f16:
5645 case NEON::BI__builtin_neon_vcgeh_f16:
5646 case NEON::BI__builtin_neon_vcgth_f16: {
5647 llvm::CmpInst::Predicate P;
5648 switch (BuiltinID) {
5649 default: llvm_unreachable("missing builtin ID in switch!");
5650 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
5651 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
5652 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
5653 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
5654 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
5655 }
5656 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
5657 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
5658 if (P == llvm::FCmpInst::FCMP_OEQ)
5659 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5660 else
5661 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5662 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
5663 }
5664 case NEON::BI__builtin_neon_vceqd_s64:
5665 case NEON::BI__builtin_neon_vceqd_u64:
5666 case NEON::BI__builtin_neon_vcgtd_s64:
5667 case NEON::BI__builtin_neon_vcgtd_u64:
5668 case NEON::BI__builtin_neon_vcltd_s64:
5669 case NEON::BI__builtin_neon_vcltd_u64:
5670 case NEON::BI__builtin_neon_vcged_u64:
5671 case NEON::BI__builtin_neon_vcged_s64:
5672 case NEON::BI__builtin_neon_vcled_u64:
5673 case NEON::BI__builtin_neon_vcled_s64: {
5674 llvm::CmpInst::Predicate P;
5675 switch (BuiltinID) {
5676 default: llvm_unreachable("missing builtin ID in switch!");
5677 case NEON::BI__builtin_neon_vceqd_s64:
5678 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5679 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5680 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5681 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5682 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5683 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5684 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5685 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5686 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5687 }
5688 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5689 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5690 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5691 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5692 }
5693 case NEON::BI__builtin_neon_vnegd_s64:
5694 return Builder.CreateNeg(Ops[0], "vnegd");
5695 case NEON::BI__builtin_neon_vnegh_f16:
5696 return Builder.CreateFNeg(Ops[0], "vnegh");
5697 case NEON::BI__builtin_neon_vtstd_s64:
5698 case NEON::BI__builtin_neon_vtstd_u64: {
5699 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5700 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5701 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5702 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5703 llvm::Constant::getNullValue(Int64Ty));
5704 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5705 }
5706 case NEON::BI__builtin_neon_vset_lane_i8:
5707 case NEON::BI__builtin_neon_vset_lane_i16:
5708 case NEON::BI__builtin_neon_vset_lane_i32:
5709 case NEON::BI__builtin_neon_vset_lane_i64:
5710 case NEON::BI__builtin_neon_vset_lane_bf16:
5711 case NEON::BI__builtin_neon_vset_lane_f32:
5712 case NEON::BI__builtin_neon_vsetq_lane_i8:
5713 case NEON::BI__builtin_neon_vsetq_lane_i16:
5714 case NEON::BI__builtin_neon_vsetq_lane_i32:
5715 case NEON::BI__builtin_neon_vsetq_lane_i64:
5716 case NEON::BI__builtin_neon_vsetq_lane_bf16:
5717 case NEON::BI__builtin_neon_vsetq_lane_f32:
5718 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5719 case NEON::BI__builtin_neon_vset_lane_f64:
5720 // The vector type needs a cast for the v1f64 variant.
5721 Ops[1] =
5722 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
5723 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5724 case NEON::BI__builtin_neon_vset_lane_mf8:
5725 case NEON::BI__builtin_neon_vsetq_lane_mf8:
5726 // The input vector type needs a cast to scalar type.
5727 Ops[0] =
5728 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
5729 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5730 case NEON::BI__builtin_neon_vsetq_lane_f64:
5731 // The vector type needs a cast for the v2f64 variant.
5732 Ops[1] =
5733 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
5734 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5735
5736 case NEON::BI__builtin_neon_vget_lane_i8:
5737 case NEON::BI__builtin_neon_vdupb_lane_i8:
5738 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5739 case NEON::BI__builtin_neon_vgetq_lane_i8:
5740 case NEON::BI__builtin_neon_vdupb_laneq_i8:
5741 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5742 case NEON::BI__builtin_neon_vget_lane_mf8:
5743 case NEON::BI__builtin_neon_vdupb_lane_mf8:
5744 case NEON::BI__builtin_neon_vgetq_lane_mf8:
5745 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
5746 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5747 case NEON::BI__builtin_neon_vget_lane_i16:
5748 case NEON::BI__builtin_neon_vduph_lane_i16:
5749 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5750 case NEON::BI__builtin_neon_vgetq_lane_i16:
5751 case NEON::BI__builtin_neon_vduph_laneq_i16:
5752 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5753 case NEON::BI__builtin_neon_vget_lane_i32:
5754 case NEON::BI__builtin_neon_vdups_lane_i32:
5755 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5756 case NEON::BI__builtin_neon_vdups_lane_f32:
5757 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdups_lane");
5758 case NEON::BI__builtin_neon_vgetq_lane_i32:
5759 case NEON::BI__builtin_neon_vdups_laneq_i32:
5760 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5761 case NEON::BI__builtin_neon_vget_lane_i64:
5762 case NEON::BI__builtin_neon_vdupd_lane_i64:
5763 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5764 case NEON::BI__builtin_neon_vdupd_lane_f64:
5765 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdupd_lane");
5766 case NEON::BI__builtin_neon_vgetq_lane_i64:
5767 case NEON::BI__builtin_neon_vdupd_laneq_i64:
5768 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5769 case NEON::BI__builtin_neon_vget_lane_f32:
5770 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5771 case NEON::BI__builtin_neon_vget_lane_f64:
5772 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5773 case NEON::BI__builtin_neon_vgetq_lane_f32:
5774 case NEON::BI__builtin_neon_vdups_laneq_f32:
5775 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5776 case NEON::BI__builtin_neon_vgetq_lane_f64:
5777 case NEON::BI__builtin_neon_vdupd_laneq_f64:
5778 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5779 case NEON::BI__builtin_neon_vaddh_f16:
5780 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
5781 case NEON::BI__builtin_neon_vsubh_f16:
5782 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
5783 case NEON::BI__builtin_neon_vmulh_f16:
5784 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
5785 case NEON::BI__builtin_neon_vdivh_f16:
5786 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
5787 case NEON::BI__builtin_neon_vfmah_f16:
5788 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5790 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5791 {Ops[1], Ops[2], Ops[0]});
5792 case NEON::BI__builtin_neon_vfmsh_f16: {
5793 Value *Neg = Builder.CreateFNeg(Ops[1], "vsubh");
5794
5795 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5797 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5798 {Neg, Ops[2], Ops[0]});
5799 }
5800 case NEON::BI__builtin_neon_vaddd_s64:
5801 case NEON::BI__builtin_neon_vaddd_u64:
5802 return Builder.CreateAdd(Ops[0], Ops[1], "vaddd");
5803 case NEON::BI__builtin_neon_vsubd_s64:
5804 case NEON::BI__builtin_neon_vsubd_u64:
5805 return Builder.CreateSub(Ops[0], Ops[1], "vsubd");
5806 case NEON::BI__builtin_neon_vqdmlalh_s16:
5807 case NEON::BI__builtin_neon_vqdmlslh_s16: {
5808 SmallVector<Value *, 2> ProductOps;
5809 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5810 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5811 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5812 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5813 ProductOps, "vqdmlXl");
5814 Constant *CI = ConstantInt::get(SizeTy, 0);
5815 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5816
5817 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5818 ? Intrinsic::aarch64_neon_sqadd
5819 : Intrinsic::aarch64_neon_sqsub;
5820 // Drop the 2nd multiplication argument before the accumulation
5821 Ops.pop_back();
5822 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5823 }
5824 case NEON::BI__builtin_neon_vqshlud_n_s64: {
5825 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5826 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5827 Ops, "vqshlu_n");
5828 }
5829 case NEON::BI__builtin_neon_vqshld_n_u64:
5830 case NEON::BI__builtin_neon_vqshld_n_s64: {
5831 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5832 ? Intrinsic::aarch64_neon_uqshl
5833 : Intrinsic::aarch64_neon_sqshl;
5834 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5835 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5836 }
5837 case NEON::BI__builtin_neon_vrshrd_n_u64:
5838 case NEON::BI__builtin_neon_vrshrd_n_s64: {
5839 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5840 ? Intrinsic::aarch64_neon_urshl
5841 : Intrinsic::aarch64_neon_srshl;
5842 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5843 Ops[1] = ConstantInt::get(Int64Ty, -SV);
5844 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5845 }
5846 case NEON::BI__builtin_neon_vrsrad_n_u64:
5847 case NEON::BI__builtin_neon_vrsrad_n_s64: {
5848 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5849 ? Intrinsic::aarch64_neon_urshl
5850 : Intrinsic::aarch64_neon_srshl;
5851 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5852 Ops[2] = Builder.CreateNeg(Ops[2]);
5853 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5854 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5855 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5856 }
5857 case NEON::BI__builtin_neon_vshld_n_s64:
5858 case NEON::BI__builtin_neon_vshld_n_u64: {
5859 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5860 return Builder.CreateShl(
5861 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5862 }
5863 case NEON::BI__builtin_neon_vshrd_n_s64: {
5864 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5865 return Builder.CreateAShr(
5866 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5867 Amt->getZExtValue())),
5868 "shrd_n");
5869 }
5870 case NEON::BI__builtin_neon_vshrd_n_u64: {
5871 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5872 uint64_t ShiftAmt = Amt->getZExtValue();
5873 // Right-shifting an unsigned value by its size yields 0.
5874 if (ShiftAmt == 64)
5875 return ConstantInt::get(Int64Ty, 0);
5876 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5877 "shrd_n");
5878 }
5879 case NEON::BI__builtin_neon_vsrad_n_s64: {
5880 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5881 Ops[1] = Builder.CreateAShr(
5882 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5883 Amt->getZExtValue())),
5884 "shrd_n");
5885 return Builder.CreateAdd(Ops[0], Ops[1]);
5886 }
5887 case NEON::BI__builtin_neon_vsrad_n_u64: {
5888 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5889 uint64_t ShiftAmt = Amt->getZExtValue();
5890 // Right-shifting an unsigned value by its size yields 0.
5891 // As Op + 0 = Op, return Ops[0] directly.
5892 if (ShiftAmt == 64)
5893 return Ops[0];
5894 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5895 "shrd_n");
5896 return Builder.CreateAdd(Ops[0], Ops[1]);
5897 }
5898 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5899 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5900 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5901 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5902 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
5903 SmallVector<Value *, 2> ProductOps;
5904 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5905 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5906 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5907 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5908 ProductOps, "vqdmlXl");
5909 Constant *CI = ConstantInt::get(SizeTy, 0);
5910 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5911 // Drop lane-selection and the corresponding vector argument (these have
5912 // already been used)
5913 Ops.pop_back_n(2);
5914
5915 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5916 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5917 ? Intrinsic::aarch64_neon_sqadd
5918 : Intrinsic::aarch64_neon_sqsub;
5919 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5920 }
5921 case NEON::BI__builtin_neon_vqdmlals_s32:
5922 case NEON::BI__builtin_neon_vqdmlsls_s32: {
5923 SmallVector<Value *, 2> ProductOps;
5924 ProductOps.push_back(Ops[1]);
5925 ProductOps.push_back(Ops[2]);
5926 Ops[1] =
5927 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5928 ProductOps, "vqdmlXl");
5929
5930 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5931 ? Intrinsic::aarch64_neon_sqadd
5932 : Intrinsic::aarch64_neon_sqsub;
5933 // Drop the 2nd multiplication argument before the accumulation
5934 Ops.pop_back();
5935 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5936 }
5937 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5938 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5939 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5940 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5941 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
5942 SmallVector<Value *, 2> ProductOps;
5943 ProductOps.push_back(Ops[1]);
5944 ProductOps.push_back(Ops[2]);
5945 Ops[1] =
5946 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5947 ProductOps, "vqdmlXl");
5948 // Drop lane-selection and the corresponding vector argument (these have
5949 // already been used)
5950 Ops.pop_back_n(2);
5951
5952 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5953 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5954 ? Intrinsic::aarch64_neon_sqadd
5955 : Intrinsic::aarch64_neon_sqsub;
5956 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5957 }
5958 case NEON::BI__builtin_neon_vget_lane_bf16:
5959 case NEON::BI__builtin_neon_vduph_lane_bf16:
5960 case NEON::BI__builtin_neon_vduph_lane_f16: {
5961 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5962 }
5963 case NEON::BI__builtin_neon_vgetq_lane_bf16:
5964 case NEON::BI__builtin_neon_vduph_laneq_bf16:
5965 case NEON::BI__builtin_neon_vduph_laneq_f16: {
5966 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5967 }
5968 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
5969 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
5970 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
5971 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
5972 }
5973 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
5974 SmallVector<int, 16> ConcatMask(8);
5975 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
5976 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
5977 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
5978 llvm::Value *Trunc =
5979 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
5980 return Builder.CreateShuffleVector(
5981 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
5982 }
5983 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
5984 SmallVector<int, 16> ConcatMask(8);
5985 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
5986 SmallVector<int, 16> LoMask(4);
5987 std::iota(LoMask.begin(), LoMask.end(), 0);
5988 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
5989 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
5990 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
5991 llvm::Value *Inactive = Builder.CreateShuffleVector(
5992 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
5993 llvm::Value *Trunc =
5994 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
5995 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
5996 }
5997 case NEON::BI__builtin_neon_vcvt_f16_f32: {
5998 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
5999 llvm::Type *V4F16 = FixedVectorType::get(Builder.getHalfTy(), 4);
6000 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4F16);
6001 }
6002 case NEON::BI__builtin_neon_vcvt_f32_f16: {
6003 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6004 llvm::Type *V4F16 = FixedVectorType::get(Builder.getHalfTy(), 4);
6005 return Builder.CreateFPExt(Builder.CreateBitCast(Ops[0], V4F16), V4F32);
6006 }
6007
6008 case clang::AArch64::BI_InterlockedAdd:
6009 case clang::AArch64::BI_InterlockedAdd_acq:
6010 case clang::AArch64::BI_InterlockedAdd_rel:
6011 case clang::AArch64::BI_InterlockedAdd_nf:
6012 case clang::AArch64::BI_InterlockedAdd64:
6013 case clang::AArch64::BI_InterlockedAdd64_acq:
6014 case clang::AArch64::BI_InterlockedAdd64_rel:
6015 case clang::AArch64::BI_InterlockedAdd64_nf: {
6016 Address DestAddr = CheckAtomicAlignment(*this, E);
6017 Value *Val = Ops[1];
6018 llvm::AtomicOrdering Ordering;
6019 switch (BuiltinID) {
6020 case clang::AArch64::BI_InterlockedAdd:
6021 case clang::AArch64::BI_InterlockedAdd64:
6022 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6023 break;
6024 case clang::AArch64::BI_InterlockedAdd_acq:
6025 case clang::AArch64::BI_InterlockedAdd64_acq:
6026 Ordering = llvm::AtomicOrdering::Acquire;
6027 break;
6028 case clang::AArch64::BI_InterlockedAdd_rel:
6029 case clang::AArch64::BI_InterlockedAdd64_rel:
6030 Ordering = llvm::AtomicOrdering::Release;
6031 break;
6032 case clang::AArch64::BI_InterlockedAdd_nf:
6033 case clang::AArch64::BI_InterlockedAdd64_nf:
6034 Ordering = llvm::AtomicOrdering::Monotonic;
6035 break;
6036 default:
6037 llvm_unreachable("missing builtin ID in switch!");
6038 }
6039 AtomicRMWInst *RMWI =
6040 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6041 return Builder.CreateAdd(RMWI, Val);
6042 }
6043 }
6044
6045 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6046 llvm::Type *Ty = VTy;
6047 if (!Ty)
6048 return nullptr;
6049
6050 bool ExtractLow = false;
6051 bool ExtendLaneArg = false;
6052 switch (BuiltinID) {
6053 default: return nullptr;
6054 case NEON::BI__builtin_neon_vbsl_v:
6055 case NEON::BI__builtin_neon_vbslq_v: {
6056 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6057 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6058 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6059 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6060
6061 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6062 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6063 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6064 return Builder.CreateBitCast(Ops[0], Ty);
6065 }
6066 case NEON::BI__builtin_neon_vfma_lane_v:
6067 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6068 // The ARM builtins (and instructions) have the addend as the first
6069 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6070 Value *Addend = Ops[0];
6071 Value *Multiplicand = Ops[1];
6072 Value *LaneSource = Ops[2];
6073 Ops[0] = Multiplicand;
6074 Ops[1] = LaneSource;
6075 Ops[2] = Addend;
6076
6077 // Now adjust things to handle the lane access.
6078 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6079 ? llvm::FixedVectorType::get(VTy->getElementType(),
6080 VTy->getNumElements() / 2)
6081 : VTy;
6082 llvm::Constant *cst = cast<Constant>(Ops[3]);
6083 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6084 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6085 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6086
6087 Ops.pop_back();
6088 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6089 : Intrinsic::fma;
6090 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6091 }
6092 case NEON::BI__builtin_neon_vfma_laneq_v: {
6093 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6094 // v1f64 fma should be mapped to Neon scalar f64 fma
6095 if (VTy && VTy->getElementType() == DoubleTy) {
6096 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6097 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6098 llvm::FixedVectorType *VTy =
6100 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6101 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6102 Value *Result;
6104 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6105 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6106 return Builder.CreateBitCast(Result, Ty);
6107 }
6108 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6109 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6110
6111 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6112 VTy->getNumElements() * 2);
6113 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6114 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6115 cast<ConstantInt>(Ops[3]));
6116 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6117
6119 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6120 {Ops[2], Ops[1], Ops[0]});
6121 }
6122 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6123 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6124 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6125
6126 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6127 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6129 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6130 {Ops[2], Ops[1], Ops[0]});
6131 }
6132 case NEON::BI__builtin_neon_vfmah_lane_f16:
6133 case NEON::BI__builtin_neon_vfmas_lane_f32:
6134 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6135 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6136 case NEON::BI__builtin_neon_vfmad_lane_f64:
6137 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6138 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6139 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6141 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6142 {Ops[1], Ops[2], Ops[0]});
6143 }
6144 case NEON::BI__builtin_neon_vmull_v:
6145 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6146 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6147 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6148 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6149 case NEON::BI__builtin_neon_vmax_v:
6150 case NEON::BI__builtin_neon_vmaxq_v:
6151 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6152 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6153 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6154 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6155 case NEON::BI__builtin_neon_vmaxh_f16: {
6156 Int = Intrinsic::aarch64_neon_fmax;
6157 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6158 }
6159 case NEON::BI__builtin_neon_vmin_v:
6160 case NEON::BI__builtin_neon_vminq_v:
6161 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6162 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6163 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6164 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6165 case NEON::BI__builtin_neon_vminh_f16: {
6166 Int = Intrinsic::aarch64_neon_fmin;
6167 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6168 }
6169 case NEON::BI__builtin_neon_vabd_v:
6170 case NEON::BI__builtin_neon_vabdq_v:
6171 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6172 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6173 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6174 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6175 case NEON::BI__builtin_neon_vpadal_v:
6176 case NEON::BI__builtin_neon_vpadalq_v: {
6177 unsigned ArgElts = VTy->getNumElements();
6178 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6179 unsigned BitWidth = EltTy->getBitWidth();
6180 auto *ArgTy = llvm::FixedVectorType::get(
6181 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6182 llvm::Type* Tys[2] = { VTy, ArgTy };
6183 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6185 TmpOps.push_back(Ops[1]);
6186 Function *F = CGM.getIntrinsic(Int, Tys);
6187 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6188 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6189 return Builder.CreateAdd(tmp, addend);
6190 }
6191 case NEON::BI__builtin_neon_vpmin_v:
6192 case NEON::BI__builtin_neon_vpminq_v:
6193 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6194 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6195 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6196 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6197 case NEON::BI__builtin_neon_vpmax_v:
6198 case NEON::BI__builtin_neon_vpmaxq_v:
6199 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6200 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6201 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6202 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6203 case NEON::BI__builtin_neon_vminnm_v:
6204 case NEON::BI__builtin_neon_vminnmq_v:
6205 Int = Intrinsic::aarch64_neon_fminnm;
6206 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6207 case NEON::BI__builtin_neon_vminnmh_f16:
6208 Int = Intrinsic::aarch64_neon_fminnm;
6209 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6210 case NEON::BI__builtin_neon_vmaxnm_v:
6211 case NEON::BI__builtin_neon_vmaxnmq_v:
6212 Int = Intrinsic::aarch64_neon_fmaxnm;
6213 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6214 case NEON::BI__builtin_neon_vmaxnmh_f16:
6215 Int = Intrinsic::aarch64_neon_fmaxnm;
6216 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6217 case NEON::BI__builtin_neon_vrecpss_f32: {
6218 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6219 Ops, "vrecps");
6220 }
6221 case NEON::BI__builtin_neon_vrecpsd_f64:
6222 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6223 Ops, "vrecps");
6224 case NEON::BI__builtin_neon_vrecpsh_f16:
6225 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6226 Ops, "vrecps");
6227 case NEON::BI__builtin_neon_vqshrun_n_v:
6228 Int = Intrinsic::aarch64_neon_sqshrun;
6229 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6230 case NEON::BI__builtin_neon_vqrshrun_n_v:
6231 Int = Intrinsic::aarch64_neon_sqrshrun;
6232 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6233 case NEON::BI__builtin_neon_vqshrn_n_v:
6234 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6235 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6236 case NEON::BI__builtin_neon_vrshrn_n_v:
6237 Int = Intrinsic::aarch64_neon_rshrn;
6238 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6239 case NEON::BI__builtin_neon_vqrshrn_n_v:
6240 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6241 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6242 case NEON::BI__builtin_neon_vrndah_f16: {
6243 Int = Builder.getIsFPConstrained()
6244 ? Intrinsic::experimental_constrained_round
6245 : Intrinsic::round;
6246 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6247 }
6248 case NEON::BI__builtin_neon_vrnda_v:
6249 case NEON::BI__builtin_neon_vrndaq_v: {
6250 Int = Builder.getIsFPConstrained()
6251 ? Intrinsic::experimental_constrained_round
6252 : Intrinsic::round;
6253 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6254 }
6255 case NEON::BI__builtin_neon_vrndih_f16: {
6256 Int = Builder.getIsFPConstrained()
6257 ? Intrinsic::experimental_constrained_nearbyint
6258 : Intrinsic::nearbyint;
6259 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6260 }
6261 case NEON::BI__builtin_neon_vrndmh_f16: {
6262 Int = Builder.getIsFPConstrained()
6263 ? Intrinsic::experimental_constrained_floor
6264 : Intrinsic::floor;
6265 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6266 }
6267 case NEON::BI__builtin_neon_vrndm_v:
6268 case NEON::BI__builtin_neon_vrndmq_v: {
6269 Int = Builder.getIsFPConstrained()
6270 ? Intrinsic::experimental_constrained_floor
6271 : Intrinsic::floor;
6272 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6273 }
6274 case NEON::BI__builtin_neon_vrndnh_f16: {
6275 Int = Builder.getIsFPConstrained()
6276 ? Intrinsic::experimental_constrained_roundeven
6277 : Intrinsic::roundeven;
6278 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6279 }
6280 case NEON::BI__builtin_neon_vrndn_v:
6281 case NEON::BI__builtin_neon_vrndnq_v: {
6282 Int = Builder.getIsFPConstrained()
6283 ? Intrinsic::experimental_constrained_roundeven
6284 : Intrinsic::roundeven;
6285 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6286 }
6287 case NEON::BI__builtin_neon_vrndns_f32: {
6288 Int = Builder.getIsFPConstrained()
6289 ? Intrinsic::experimental_constrained_roundeven
6290 : Intrinsic::roundeven;
6291 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6292 }
6293 case NEON::BI__builtin_neon_vrndph_f16: {
6294 Int = Builder.getIsFPConstrained()
6295 ? Intrinsic::experimental_constrained_ceil
6296 : Intrinsic::ceil;
6297 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6298 }
6299 case NEON::BI__builtin_neon_vrndp_v:
6300 case NEON::BI__builtin_neon_vrndpq_v: {
6301 Int = Builder.getIsFPConstrained()
6302 ? Intrinsic::experimental_constrained_ceil
6303 : Intrinsic::ceil;
6304 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6305 }
6306 case NEON::BI__builtin_neon_vrndxh_f16: {
6307 Int = Builder.getIsFPConstrained()
6308 ? Intrinsic::experimental_constrained_rint
6309 : Intrinsic::rint;
6310 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6311 }
6312 case NEON::BI__builtin_neon_vrndx_v:
6313 case NEON::BI__builtin_neon_vrndxq_v: {
6314 Int = Builder.getIsFPConstrained()
6315 ? Intrinsic::experimental_constrained_rint
6316 : Intrinsic::rint;
6317 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6318 }
6319 case NEON::BI__builtin_neon_vrndh_f16: {
6320 Int = Builder.getIsFPConstrained()
6321 ? Intrinsic::experimental_constrained_trunc
6322 : Intrinsic::trunc;
6323 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6324 }
6325 case NEON::BI__builtin_neon_vrnd32x_f32:
6326 case NEON::BI__builtin_neon_vrnd32xq_f32:
6327 case NEON::BI__builtin_neon_vrnd32x_f64:
6328 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6329 Int = Intrinsic::aarch64_neon_frint32x;
6330 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6331 }
6332 case NEON::BI__builtin_neon_vrnd32z_f32:
6333 case NEON::BI__builtin_neon_vrnd32zq_f32:
6334 case NEON::BI__builtin_neon_vrnd32z_f64:
6335 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6336 Int = Intrinsic::aarch64_neon_frint32z;
6337 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6338 }
6339 case NEON::BI__builtin_neon_vrnd64x_f32:
6340 case NEON::BI__builtin_neon_vrnd64xq_f32:
6341 case NEON::BI__builtin_neon_vrnd64x_f64:
6342 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6343 Int = Intrinsic::aarch64_neon_frint64x;
6344 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6345 }
6346 case NEON::BI__builtin_neon_vrnd64z_f32:
6347 case NEON::BI__builtin_neon_vrnd64zq_f32:
6348 case NEON::BI__builtin_neon_vrnd64z_f64:
6349 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6350 Int = Intrinsic::aarch64_neon_frint64z;
6351 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6352 }
6353 case NEON::BI__builtin_neon_vrnd_v:
6354 case NEON::BI__builtin_neon_vrndq_v: {
6355 Int = Builder.getIsFPConstrained()
6356 ? Intrinsic::experimental_constrained_trunc
6357 : Intrinsic::trunc;
6358 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6359 }
6360 case NEON::BI__builtin_neon_vcvt_f64_v:
6361 case NEON::BI__builtin_neon_vcvtq_f64_v:
6362 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6363 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6364 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6365 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6366 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6367 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6368 "unexpected vcvt_f64_f32 builtin");
6369 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6370 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6371
6372 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6373 }
6374 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6375 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6376 "unexpected vcvt_f32_f64 builtin");
6377 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6378 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6379
6380 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6381 }
6382 case NEON::BI__builtin_neon_vcvta_s16_f16:
6383 case NEON::BI__builtin_neon_vcvta_u16_f16:
6384 case NEON::BI__builtin_neon_vcvta_s32_v:
6385 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6386 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6387 case NEON::BI__builtin_neon_vcvta_u32_v:
6388 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6389 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6390 case NEON::BI__builtin_neon_vcvta_s64_v:
6391 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6392 case NEON::BI__builtin_neon_vcvta_u64_v:
6393 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6394 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6395 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6396 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6397 }
6398 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6399 case NEON::BI__builtin_neon_vcvtm_s32_v:
6400 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6401 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6402 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6403 case NEON::BI__builtin_neon_vcvtm_u32_v:
6404 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6405 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6406 case NEON::BI__builtin_neon_vcvtm_s64_v:
6407 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6408 case NEON::BI__builtin_neon_vcvtm_u64_v:
6409 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6410 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6411 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6412 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6413 }
6414 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6415 case NEON::BI__builtin_neon_vcvtn_s32_v:
6416 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6417 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6418 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6419 case NEON::BI__builtin_neon_vcvtn_u32_v:
6420 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6421 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6422 case NEON::BI__builtin_neon_vcvtn_s64_v:
6423 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6424 case NEON::BI__builtin_neon_vcvtn_u64_v:
6425 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6426 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6427 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6428 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6429 }
6430 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6431 case NEON::BI__builtin_neon_vcvtp_s32_v:
6432 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6433 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6434 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6435 case NEON::BI__builtin_neon_vcvtp_u32_v:
6436 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6437 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6438 case NEON::BI__builtin_neon_vcvtp_s64_v:
6439 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6440 case NEON::BI__builtin_neon_vcvtp_u64_v:
6441 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6442 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6443 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6444 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6445 }
6446 case NEON::BI__builtin_neon_vmulx_v:
6447 case NEON::BI__builtin_neon_vmulxq_v: {
6448 Int = Intrinsic::aarch64_neon_fmulx;
6449 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6450 }
6451 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6452 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6453 // vmulx_lane should be mapped to Neon scalar mulx after
6454 // extracting the scalar element
6455 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6456 Ops.pop_back();
6457 Int = Intrinsic::aarch64_neon_fmulx;
6458 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
6459 }
6460 case NEON::BI__builtin_neon_vmul_lane_v:
6461 case NEON::BI__builtin_neon_vmul_laneq_v: {
6462 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6463 bool Quad = false;
6464 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6465 Quad = true;
6466 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6467 llvm::FixedVectorType *VTy =
6469 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6470 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6471 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6472 return Builder.CreateBitCast(Result, Ty);
6473 }
6474 case NEON::BI__builtin_neon_vpmaxnm_v:
6475 case NEON::BI__builtin_neon_vpmaxnmq_v: {
6476 Int = Intrinsic::aarch64_neon_fmaxnmp;
6477 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6478 }
6479 case NEON::BI__builtin_neon_vpminnm_v:
6480 case NEON::BI__builtin_neon_vpminnmq_v: {
6481 Int = Intrinsic::aarch64_neon_fminnmp;
6482 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6483 }
6484 case NEON::BI__builtin_neon_vsqrth_f16: {
6485 Int = Builder.getIsFPConstrained()
6486 ? Intrinsic::experimental_constrained_sqrt
6487 : Intrinsic::sqrt;
6488 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
6489 }
6490 case NEON::BI__builtin_neon_vsqrt_v:
6491 case NEON::BI__builtin_neon_vsqrtq_v: {
6492 Int = Builder.getIsFPConstrained()
6493 ? Intrinsic::experimental_constrained_sqrt
6494 : Intrinsic::sqrt;
6495 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6496 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6497 }
6498 case NEON::BI__builtin_neon_vrbit_v:
6499 case NEON::BI__builtin_neon_vrbitq_v: {
6500 Int = Intrinsic::bitreverse;
6501 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6502 }
6503 case NEON::BI__builtin_neon_vmaxv_f16: {
6504 Int = Intrinsic::aarch64_neon_fmaxv;
6505 Ty = HalfTy;
6506 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6507 llvm::Type *Tys[2] = {Ty, VTy};
6508 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6509 }
6510 case NEON::BI__builtin_neon_vmaxvq_f16: {
6511 Int = Intrinsic::aarch64_neon_fmaxv;
6512 Ty = HalfTy;
6513 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6514 llvm::Type *Tys[2] = {Ty, VTy};
6515 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6516 }
6517 case NEON::BI__builtin_neon_vminv_f16: {
6518 Int = Intrinsic::aarch64_neon_fminv;
6519 Ty = HalfTy;
6520 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6521 llvm::Type *Tys[2] = {Ty, VTy};
6522 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6523 }
6524 case NEON::BI__builtin_neon_vminvq_f16: {
6525 Int = Intrinsic::aarch64_neon_fminv;
6526 Ty = HalfTy;
6527 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6528 llvm::Type *Tys[2] = {Ty, VTy};
6529 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6530 }
6531 case NEON::BI__builtin_neon_vmaxnmv_f16: {
6532 Int = Intrinsic::aarch64_neon_fmaxnmv;
6533 Ty = HalfTy;
6534 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6535 llvm::Type *Tys[2] = {Ty, VTy};
6536 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6537 }
6538 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6539 Int = Intrinsic::aarch64_neon_fmaxnmv;
6540 Ty = HalfTy;
6541 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6542 llvm::Type *Tys[2] = {Ty, VTy};
6543 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6544 }
6545 case NEON::BI__builtin_neon_vminnmv_f16: {
6546 Int = Intrinsic::aarch64_neon_fminnmv;
6547 Ty = HalfTy;
6548 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6549 llvm::Type *Tys[2] = {Ty, VTy};
6550 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6551 }
6552 case NEON::BI__builtin_neon_vminnmvq_f16: {
6553 Int = Intrinsic::aarch64_neon_fminnmv;
6554 Ty = HalfTy;
6555 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6556 llvm::Type *Tys[2] = {Ty, VTy};
6557 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6558 }
6559 case NEON::BI__builtin_neon_vmul_n_f64: {
6560 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6561 Value *RHS = Builder.CreateBitCast(Ops[1], DoubleTy);
6562 return Builder.CreateFMul(Ops[0], RHS);
6563 }
6564 case NEON::BI__builtin_neon_vaddlv_u8:
6565 case NEON::BI__builtin_neon_vaddlvq_u8:
6566 case NEON::BI__builtin_neon_vaddlv_u16:
6567 case NEON::BI__builtin_neon_vaddlvq_u16: {
6568 Int = Intrinsic::aarch64_neon_uaddlv;
6569 Ty = Int32Ty;
6570 VTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
6571 llvm::Type *Tys[2] = {Ty, VTy};
6572 Value *Result = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6573 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6574 return Builder.CreateTrunc(Result, Int16Ty);
6575 return Result;
6576 }
6577 case NEON::BI__builtin_neon_vaddlv_s8:
6578 case NEON::BI__builtin_neon_vaddlvq_s8:
6579 case NEON::BI__builtin_neon_vaddlv_s16:
6580 case NEON::BI__builtin_neon_vaddlvq_s16: {
6581 Int = Intrinsic::aarch64_neon_saddlv;
6582 Ty = Int32Ty;
6583 VTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
6584 llvm::Type *Tys[2] = {Ty, VTy};
6585 Value *Result = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6586 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6587 return Builder.CreateTrunc(Result, Int16Ty);
6588 return Result;
6589 }
6590 case NEON::BI__builtin_neon_vsri_n_v:
6591 case NEON::BI__builtin_neon_vsriq_n_v: {
6592 Int = Intrinsic::aarch64_neon_vsri;
6593 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6594 return EmitNeonCall(Intrin, Ops, "vsri_n");
6595 }
6596 case NEON::BI__builtin_neon_vsli_n_v:
6597 case NEON::BI__builtin_neon_vsliq_n_v: {
6598 Int = Intrinsic::aarch64_neon_vsli;
6599 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6600 return EmitNeonCall(Intrin, Ops, "vsli_n");
6601 }
6602 case NEON::BI__builtin_neon_vsra_n_v:
6603 case NEON::BI__builtin_neon_vsraq_n_v:
6604 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6605 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6606 return Builder.CreateAdd(Ops[0], Ops[1]);
6607 case NEON::BI__builtin_neon_vrsra_n_v:
6608 case NEON::BI__builtin_neon_vrsraq_n_v: {
6609 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6611 TmpOps.push_back(Ops[1]);
6612 TmpOps.push_back(Ops[2]);
6613 Function* F = CGM.getIntrinsic(Int, Ty);
6614 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6615 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6616 return Builder.CreateAdd(Ops[0], tmp);
6617 }
6618 case NEON::BI__builtin_neon_vld1_v:
6619 case NEON::BI__builtin_neon_vld1q_v: {
6620 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
6621 }
6622 case NEON::BI__builtin_neon_vst1_v:
6623 case NEON::BI__builtin_neon_vst1q_v:
6624 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6625 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6626 case NEON::BI__builtin_neon_vld1_lane_v:
6627 case NEON::BI__builtin_neon_vld1q_lane_v: {
6628 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6629 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6630 PtrOp0.getAlignment());
6631 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6632 }
6633 case NEON::BI__builtin_neon_vldap1_lane_s64:
6634 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
6635 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6636 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
6637 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
6638 LI->setAtomic(llvm::AtomicOrdering::Acquire);
6639 Ops[0] = LI;
6640 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
6641 }
6642 case NEON::BI__builtin_neon_vld1_dup_v:
6643 case NEON::BI__builtin_neon_vld1q_dup_v: {
6644 Value *V = PoisonValue::get(Ty);
6645 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6646 PtrOp0.getAlignment());
6647 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6648 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6649 return EmitNeonSplat(Ops[0], CI);
6650 }
6651 case NEON::BI__builtin_neon_vst1_lane_v:
6652 case NEON::BI__builtin_neon_vst1q_lane_v:
6653 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6654 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6655 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6656 case NEON::BI__builtin_neon_vstl1_lane_s64:
6657 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
6658 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6659 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6660 llvm::StoreInst *SI =
6661 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6662 SI->setAtomic(llvm::AtomicOrdering::Release);
6663 return SI;
6664 }
6665 case NEON::BI__builtin_neon_vld2_v:
6666 case NEON::BI__builtin_neon_vld2q_v: {
6667 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6668 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6669 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6670 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6671 }
6672 case NEON::BI__builtin_neon_vld3_v:
6673 case NEON::BI__builtin_neon_vld3q_v: {
6674 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6675 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6676 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6677 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6678 }
6679 case NEON::BI__builtin_neon_vld4_v:
6680 case NEON::BI__builtin_neon_vld4q_v: {
6681 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6682 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6683 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6684 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6685 }
6686 case NEON::BI__builtin_neon_vld2_dup_v:
6687 case NEON::BI__builtin_neon_vld2q_dup_v: {
6688 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6689 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6690 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6691 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6692 }
6693 case NEON::BI__builtin_neon_vld3_dup_v:
6694 case NEON::BI__builtin_neon_vld3q_dup_v: {
6695 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6696 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6697 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6698 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6699 }
6700 case NEON::BI__builtin_neon_vld4_dup_v:
6701 case NEON::BI__builtin_neon_vld4q_dup_v: {
6702 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6703 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6704 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6705 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6706 }
6707 case NEON::BI__builtin_neon_vld2_lane_v:
6708 case NEON::BI__builtin_neon_vld2q_lane_v: {
6709 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6710 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6711 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6712 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6713 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6714 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6715 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
6716 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6717 }
6718 case NEON::BI__builtin_neon_vld3_lane_v:
6719 case NEON::BI__builtin_neon_vld3q_lane_v: {
6720 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6721 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6722 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6723 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6724 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6725 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6726 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6727 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
6728 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6729 }
6730 case NEON::BI__builtin_neon_vld4_lane_v:
6731 case NEON::BI__builtin_neon_vld4q_lane_v: {
6732 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6733 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6734 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6735 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6736 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6737 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6738 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6739 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6740 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
6741 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6742 }
6743 case NEON::BI__builtin_neon_vst2_v:
6744 case NEON::BI__builtin_neon_vst2q_v: {
6745 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6746 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6747 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6748 Ops, "");
6749 }
6750 case NEON::BI__builtin_neon_vst2_lane_v:
6751 case NEON::BI__builtin_neon_vst2q_lane_v: {
6752 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6753 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6754 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6755 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6756 Ops, "");
6757 }
6758 case NEON::BI__builtin_neon_vst3_v:
6759 case NEON::BI__builtin_neon_vst3q_v: {
6760 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6761 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6762 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6763 Ops, "");
6764 }
6765 case NEON::BI__builtin_neon_vst3_lane_v:
6766 case NEON::BI__builtin_neon_vst3q_lane_v: {
6767 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6768 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6769 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6770 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6771 Ops, "");
6772 }
6773 case NEON::BI__builtin_neon_vst4_v:
6774 case NEON::BI__builtin_neon_vst4q_v: {
6775 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6776 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6777 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6778 Ops, "");
6779 }
6780 case NEON::BI__builtin_neon_vst4_lane_v:
6781 case NEON::BI__builtin_neon_vst4q_lane_v: {
6782 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6783 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6784 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6785 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6786 Ops, "");
6787 }
6788 case NEON::BI__builtin_neon_vtrn_v:
6789 case NEON::BI__builtin_neon_vtrnq_v: {
6790 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6791 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6792 Value *SV = nullptr;
6793
6794 for (unsigned vi = 0; vi != 2; ++vi) {
6795 SmallVector<int, 16> Indices;
6796 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6797 Indices.push_back(i+vi);
6798 Indices.push_back(i+e+vi);
6799 }
6800 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6801 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6802 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6803 }
6804 return SV;
6805 }
6806 case NEON::BI__builtin_neon_vuzp_v:
6807 case NEON::BI__builtin_neon_vuzpq_v: {
6808 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6809 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6810 Value *SV = nullptr;
6811
6812 for (unsigned vi = 0; vi != 2; ++vi) {
6813 SmallVector<int, 16> Indices;
6814 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6815 Indices.push_back(2*i+vi);
6816
6817 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6818 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6819 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6820 }
6821 return SV;
6822 }
6823 case NEON::BI__builtin_neon_vzip_v:
6824 case NEON::BI__builtin_neon_vzipq_v: {
6825 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6826 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6827 Value *SV = nullptr;
6828
6829 for (unsigned vi = 0; vi != 2; ++vi) {
6830 SmallVector<int, 16> Indices;
6831 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6832 Indices.push_back((i + vi*e) >> 1);
6833 Indices.push_back(((i + vi*e) >> 1)+e);
6834 }
6835 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6836 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6837 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6838 }
6839 return SV;
6840 }
6841 case NEON::BI__builtin_neon_vqtbl1q_v: {
6842 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6843 Ops, "vtbl1");
6844 }
6845 case NEON::BI__builtin_neon_vqtbl2q_v: {
6846 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6847 Ops, "vtbl2");
6848 }
6849 case NEON::BI__builtin_neon_vqtbl3q_v: {
6850 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6851 Ops, "vtbl3");
6852 }
6853 case NEON::BI__builtin_neon_vqtbl4q_v: {
6854 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6855 Ops, "vtbl4");
6856 }
6857 case NEON::BI__builtin_neon_vqtbx1q_v: {
6858 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6859 Ops, "vtbx1");
6860 }
6861 case NEON::BI__builtin_neon_vqtbx2q_v: {
6862 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6863 Ops, "vtbx2");
6864 }
6865 case NEON::BI__builtin_neon_vqtbx3q_v: {
6866 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6867 Ops, "vtbx3");
6868 }
6869 case NEON::BI__builtin_neon_vqtbx4q_v: {
6870 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6871 Ops, "vtbx4");
6872 }
6873 case NEON::BI__builtin_neon_vsqadd_v:
6874 case NEON::BI__builtin_neon_vsqaddq_v: {
6875 Int = Intrinsic::aarch64_neon_usqadd;
6876 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6877 }
6878 case NEON::BI__builtin_neon_vuqadd_v:
6879 case NEON::BI__builtin_neon_vuqaddq_v: {
6880 Int = Intrinsic::aarch64_neon_suqadd;
6881 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6882 }
6883
6884 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
6885 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
6886 case NEON::BI__builtin_neon_vluti2_laneq_f16:
6887 case NEON::BI__builtin_neon_vluti2_laneq_p16:
6888 case NEON::BI__builtin_neon_vluti2_laneq_p8:
6889 case NEON::BI__builtin_neon_vluti2_laneq_s16:
6890 case NEON::BI__builtin_neon_vluti2_laneq_s8:
6891 case NEON::BI__builtin_neon_vluti2_laneq_u16:
6892 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
6893 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6894 llvm::Type *Tys[2];
6895 Tys[0] = Ty;
6896 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6897 /*isQuad*/ false));
6898 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
6899 }
6900 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
6901 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
6902 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
6903 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
6904 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
6905 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
6906 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
6907 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
6908 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
6909 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6910 llvm::Type *Tys[2];
6911 Tys[0] = Ty;
6912 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6913 /*isQuad*/ true));
6914 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
6915 }
6916 case NEON::BI__builtin_neon_vluti2_lane_mf8:
6917 case NEON::BI__builtin_neon_vluti2_lane_bf16:
6918 case NEON::BI__builtin_neon_vluti2_lane_f16:
6919 case NEON::BI__builtin_neon_vluti2_lane_p16:
6920 case NEON::BI__builtin_neon_vluti2_lane_p8:
6921 case NEON::BI__builtin_neon_vluti2_lane_s16:
6922 case NEON::BI__builtin_neon_vluti2_lane_s8:
6923 case NEON::BI__builtin_neon_vluti2_lane_u16:
6924 case NEON::BI__builtin_neon_vluti2_lane_u8: {
6925 Int = Intrinsic::aarch64_neon_vluti2_lane;
6926 llvm::Type *Tys[2];
6927 Tys[0] = Ty;
6928 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6929 /*isQuad*/ false));
6930 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
6931 }
6932 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
6933 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
6934 case NEON::BI__builtin_neon_vluti2q_lane_f16:
6935 case NEON::BI__builtin_neon_vluti2q_lane_p16:
6936 case NEON::BI__builtin_neon_vluti2q_lane_p8:
6937 case NEON::BI__builtin_neon_vluti2q_lane_s16:
6938 case NEON::BI__builtin_neon_vluti2q_lane_s8:
6939 case NEON::BI__builtin_neon_vluti2q_lane_u16:
6940 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
6941 Int = Intrinsic::aarch64_neon_vluti2_lane;
6942 llvm::Type *Tys[2];
6943 Tys[0] = Ty;
6944 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6945 /*isQuad*/ true));
6946 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
6947 }
6948 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
6949 case NEON::BI__builtin_neon_vluti4q_lane_p8:
6950 case NEON::BI__builtin_neon_vluti4q_lane_s8:
6951 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
6952 Int = Intrinsic::aarch64_neon_vluti4q_lane;
6953 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
6954 }
6955 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
6956 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
6957 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
6958 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
6959 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
6960 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
6961 }
6962 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
6963 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
6964 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
6965 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
6966 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
6967 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
6968 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
6969 }
6970 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
6971 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
6972 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
6973 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
6974 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
6975 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
6976 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
6977 }
6978 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
6979 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
6980 {llvm::FixedVectorType::get(HalfTy, 8),
6981 llvm::FixedVectorType::get(Int8Ty, 16)},
6982 Ops, E, "fmmla");
6983 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
6984 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
6985 {llvm::FixedVectorType::get(FloatTy, 4),
6986 llvm::FixedVectorType::get(Int8Ty, 16)},
6987 Ops, E, "fmmla");
6988 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
6989 ExtractLow = true;
6990 [[fallthrough]];
6991 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
6992 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
6993 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
6994 llvm::FixedVectorType::get(BFloatTy, 8),
6995 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
6996 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
6997 ExtractLow = true;
6998 [[fallthrough]];
6999 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7000 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7001 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7002 llvm::FixedVectorType::get(BFloatTy, 8),
7003 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7004 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7005 ExtractLow = true;
7006 [[fallthrough]];
7007 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7008 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7009 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7010 llvm::FixedVectorType::get(HalfTy, 8),
7011 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7012 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7013 ExtractLow = true;
7014 [[fallthrough]];
7015 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7016 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7017 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7018 llvm::FixedVectorType::get(HalfTy, 8),
7019 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7020 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7021 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7022 llvm::FixedVectorType::get(Int8Ty, 8),
7023 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7024 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7025 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7026 llvm::FixedVectorType::get(Int8Ty, 8),
7027 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7028 E, "vfcvtn");
7029 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7030 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7031 llvm::FixedVectorType::get(Int8Ty, 16),
7032 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7033 E, "vfcvtn");
7034 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7035 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7036 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7037 uint64_t(0));
7038 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7039 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7040 }
7041
7042 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7043 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7044 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7045 Ops, E, "fdot2");
7046 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7047 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7048 ExtendLaneArg = true;
7049 [[fallthrough]];
7050 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7051 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7052 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7053 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7054 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7055 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7056 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7057 FloatTy, Ops, E, "fdot4");
7058 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7059 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7060 ExtendLaneArg = true;
7061 [[fallthrough]];
7062 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7063 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7064 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7065 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7066
7067 case NEON::BI__builtin_neon_vdot_f32_f16:
7068 case NEON::BI__builtin_neon_vdotq_f32_f16: {
7069 llvm::Type *InputTy =
7070 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7071 llvm::Type *Tys[2] = {Ty, InputTy};
7072 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_fdot, Tys),
7073 Ops, "vdot");
7074 }
7075
7076 case NEON::BI__builtin_neon_vdot_lane_f32_f16:
7077 case NEON::BI__builtin_neon_vdot_laneq_f32_f16:
7078 case NEON::BI__builtin_neon_vdotq_lane_f32_f16:
7079 case NEON::BI__builtin_neon_vdotq_laneq_f32_f16: {
7080 llvm::FixedVectorType *InputTy =
7081 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7082 llvm::FixedVectorType *LaneTy = llvm::FixedVectorType::get(
7083 HalfTy, Ops[2]->getType()->getPrimitiveSizeInBits() / 16);
7084 // Treat the lane argument as a splat and use non-lane version of the
7085 // intrinsic.
7086 Ops[2] = Builder.CreateBitCast(Ops[2], LaneTy);
7087 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]),
7088 InputTy->getElementCount());
7089 llvm::Type *Tys[2] = {Ty, InputTy};
7090 Ops.pop_back();
7091 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_fdot, Tys),
7092 Ops, "vdot");
7093 }
7094
7095 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7096 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7097 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7098 "vmlal");
7099 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7100 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7101 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7102 "vmlal");
7103 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7104 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7105 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7106 "vmlall");
7107 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7108 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7109 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7110 "vmlall");
7111 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7112 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7113 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7114 "vmlall");
7115 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7116 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7117 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7118 "vmlall");
7119 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7120 ExtendLaneArg = true;
7121 [[fallthrough]];
7122 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7123 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7124 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7125 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7126 ExtendLaneArg = true;
7127 [[fallthrough]];
7128 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7129 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7130 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7131 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7132 ExtendLaneArg = true;
7133 [[fallthrough]];
7134 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7135 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7136 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7137 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7138 ExtendLaneArg = true;
7139 [[fallthrough]];
7140 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7141 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7142 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7143 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7144 ExtendLaneArg = true;
7145 [[fallthrough]];
7146 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7147 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7148 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7149 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7150 ExtendLaneArg = true;
7151 [[fallthrough]];
7152 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7153 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7154 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7155 case NEON::BI__builtin_neon_vamin_f16:
7156 case NEON::BI__builtin_neon_vaminq_f16:
7157 case NEON::BI__builtin_neon_vamin_f32:
7158 case NEON::BI__builtin_neon_vaminq_f32:
7159 case NEON::BI__builtin_neon_vaminq_f64: {
7160 Int = Intrinsic::aarch64_neon_famin;
7161 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7162 }
7163 case NEON::BI__builtin_neon_vamax_f16:
7164 case NEON::BI__builtin_neon_vamaxq_f16:
7165 case NEON::BI__builtin_neon_vamax_f32:
7166 case NEON::BI__builtin_neon_vamaxq_f32:
7167 case NEON::BI__builtin_neon_vamaxq_f64: {
7168 Int = Intrinsic::aarch64_neon_famax;
7169 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7170 }
7171 case NEON::BI__builtin_neon_vscale_f16:
7172 case NEON::BI__builtin_neon_vscaleq_f16:
7173 case NEON::BI__builtin_neon_vscale_f32:
7174 case NEON::BI__builtin_neon_vscaleq_f32:
7175 case NEON::BI__builtin_neon_vscaleq_f64: {
7176 Int = Intrinsic::aarch64_neon_fp8_fscale;
7177 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7178 }
7179 }
7180}
7181
7183 const CallExpr *E) {
7184 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7185 BuiltinID == BPF::BI__builtin_btf_type_id ||
7186 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7187 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7188 "unexpected BPF builtin");
7189
7190 // A sequence number, injected into IR builtin functions, to
7191 // prevent CSE given the only difference of the function
7192 // may just be the debuginfo metadata.
7193 static uint32_t BuiltinSeqNum;
7194
7195 switch (BuiltinID) {
7196 default:
7197 llvm_unreachable("Unexpected BPF builtin");
7198 case BPF::BI__builtin_preserve_field_info: {
7199 const Expr *Arg = E->getArg(0);
7200 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7201
7202 if (!getDebugInfo()) {
7203 CGM.Error(E->getExprLoc(),
7204 "using __builtin_preserve_field_info() without -g");
7205 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7206 : EmitLValue(Arg).emitRawPointer(*this);
7207 }
7208
7209 // Enable underlying preserve_*_access_index() generation.
7210 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7211 IsInPreservedAIRegion = true;
7212 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7213 : EmitLValue(Arg).emitRawPointer(*this);
7214 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7215
7216 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7217 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7218
7219 // Built the IR for the preserve_field_info intrinsic.
7220 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7221 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7222 {FieldAddr->getType()});
7223 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7224 }
7225 case BPF::BI__builtin_btf_type_id:
7226 case BPF::BI__builtin_preserve_type_info: {
7227 if (!getDebugInfo()) {
7228 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7229 return nullptr;
7230 }
7231
7232 const Expr *Arg0 = E->getArg(0);
7233 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7234 Arg0->getType(), Arg0->getExprLoc());
7235
7236 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7237 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7238 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7239
7240 llvm::Function *FnDecl;
7241 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7242 FnDecl = Intrinsic::getOrInsertDeclaration(
7243 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7244 else
7245 FnDecl = Intrinsic::getOrInsertDeclaration(
7246 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7247 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
7248 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7249 return Fn;
7250 }
7251 case BPF::BI__builtin_preserve_enum_value: {
7252 if (!getDebugInfo()) {
7253 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7254 return nullptr;
7255 }
7256
7257 const Expr *Arg0 = E->getArg(0);
7258 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7259 Arg0->getType(), Arg0->getExprLoc());
7260
7261 // Find enumerator
7262 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
7263 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
7264 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
7265 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
7266
7267 auto InitVal = Enumerator->getInitVal();
7268 std::string InitValStr;
7269 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7270 InitValStr = std::to_string(InitVal.getSExtValue());
7271 else
7272 InitValStr = std::to_string(InitVal.getZExtValue());
7273 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7274 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
7275
7276 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7277 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7278 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7279
7280 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7281 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
7282 CallInst *Fn =
7283 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
7284 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7285 return Fn;
7286 }
7287 }
7288}
7289
7292 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7293 "Not a power-of-two sized vector!");
7294 bool AllConstants = true;
7295 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7296 AllConstants &= isa<Constant>(Ops[i]);
7297
7298 // If this is a constant vector, create a ConstantVector.
7299 if (AllConstants) {
7301 for (llvm::Value *Op : Ops)
7302 CstOps.push_back(cast<Constant>(Op));
7303 return llvm::ConstantVector::get(CstOps);
7304 }
7305
7306 // Otherwise, insertelement the values to build the vector.
7307 Value *Result = llvm::PoisonValue::get(
7308 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
7309
7310 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7311 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
7312
7313 return Result;
7314}
7315
7316Value *CodeGenFunction::EmitAArch64CpuInit() {
7317 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
7318 llvm::FunctionCallee Func =
7319 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
7320 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
7321 cast<llvm::GlobalValue>(Func.getCallee())
7322 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
7323 return Builder.CreateCall(Func);
7324}
7325
7326Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
7327 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
7328 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
7330 ArgStr.split(OrigFeatures, "+");
7332 for (StringRef Feature : OrigFeatures) {
7333 Feature = Feature.trim();
7334 if (!llvm::AArch64::parseFMVExtension(Feature))
7335 return Builder.getFalse();
7336 if (Feature != "default")
7337 Features.push_back(Feature);
7338 }
7339 return EmitAArch64CpuSupports(Features);
7340}
7341
7342llvm::Value *
7343CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
7344 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
7345 Value *Result = Builder.getTrue();
7346 if (FeaturesMask != 0) {
7347 // Get features from structure in runtime library
7348 // struct {
7349 // unsigned long long features;
7350 // } __aarch64_cpu_features;
7351 llvm::Type *STy = llvm::StructType::get(Int64Ty);
7352 llvm::Constant *AArch64CPUFeatures =
7353 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
7354 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
7355 llvm::Value *CpuFeatures = Builder.CreateGEP(
7356 STy, AArch64CPUFeatures,
7357 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
7358 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
7360 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
7361 Value *Bitset = Builder.CreateAnd(Features, Mask);
7362 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
7363 Result = Builder.CreateAnd(Result, Cmp);
7364 }
7365 return Result;
7366}
Utilities used for generating code for AArch64 that are shared between the classic and ClangIR code-g...
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
#define NEONMAP0(NameBase)
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, mlir::Type eltTy)
static const IntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< IntrinsicInfo > intrinsicMap, unsigned builtinID, bool &mapProvenSorted)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:1952
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:2832
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:2802
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:2795
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3849
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3054
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4322
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:986
static llvm::Value * ARMMVECreateFPToSI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2926
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2073
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMNeonVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1054
static llvm::Value * ARMMVECreateFPToUI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2934
static llvm::Value * ARMMVECreateSIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2910
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:987
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3855
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:2791
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:988
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:2869
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:3334
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:860
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:361
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:4422
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:2896
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:33
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:192
static llvm::Value * ARMMVECreateUIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2918
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:2858
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:2884
SpecialRegisterAccessKind
Definition ARM.cpp:1943
@ VolatileRead
Definition ARM.cpp:1945
@ NormalRead
Definition ARM.cpp:1944
@ Write
Definition ARM.cpp:1946
static const AArch64SVEAndSMEVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:974
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:2824
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:983
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:344
static Value * EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:2018
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:1870
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:985
static const ARMNeonVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:540
static const AArch64SVEAndSMEVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:959
TokenType getType() const
Returns the token's type, e.g.
Result
Implement __builtin_bit_cast and related operations.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:57
Enumerates target-specific builtins in their own namespaces within namespace clang.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:223
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2949
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3153
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3132
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3140
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1609
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:551
Address getAddress() const
Definition CGValue.h:691
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:3343
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:472
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:7291
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3518
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:3627
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:447
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3482
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3773
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3209
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3436
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3684
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4346
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:3912
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3394
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1012
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3225
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1120
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:3955
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:493
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2100
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3298
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7182
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3795
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3900
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3581
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2942
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:509
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3862
const TargetCodeGenInfo & getTargetHooks() const
RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen without...
Definition CGExpr.cpp:232
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:487
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:4433
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:456
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3197
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3043
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:1841
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:310
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:3599
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:3378
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3606
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3785
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1599
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3533
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3738
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:3826
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1715
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3263
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3889
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:441
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:383
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3104
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3095
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:454
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:283
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2027
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3392
QualType getPointeeType() const
Definition TypeBase.h:3402
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
EltType getEltType() const
bool isOverloadFirstandLast() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isOverloadDefaultAndOp0() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:712
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1875
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9344
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:789
QualType getType() const
Definition Decl.h:723
QualType getType() const
Definition Value.cpp:238
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
const ARMNeonVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
const ARMNeonVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:155
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:840
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 uint32_t
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:652
Describes an AArch64 SVE or SME intrinsic.
Describes an ARM or AArch64 NEON intrinsic, or an AArch64 SISD intrinsic.
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601