clang 23.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
19#include "llvm/IR/InlineAsm.h"
20#include "llvm/IR/IntrinsicsAArch64.h"
21#include "llvm/IR/IntrinsicsARM.h"
22#include "llvm/IR/IntrinsicsBPF.h"
23#include "llvm/TargetParser/AArch64TargetParser.h"
24
25#include <numeric>
26
27using namespace clang;
28using namespace CodeGen;
29using namespace llvm;
30using namespace clang::aarch64;
31
32static std::optional<CodeGenFunction::MSVCIntrin>
33translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
34 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
35 switch (BuiltinID) {
36 default:
37 return std::nullopt;
38 case clang::AArch64::BI_BitScanForward:
39 case clang::AArch64::BI_BitScanForward64:
40 return MSVCIntrin::_BitScanForward;
41 case clang::AArch64::BI_BitScanReverse:
42 case clang::AArch64::BI_BitScanReverse64:
43 return MSVCIntrin::_BitScanReverse;
44 case clang::AArch64::BI_InterlockedAnd64:
45 return MSVCIntrin::_InterlockedAnd;
46 case clang::AArch64::BI_InterlockedExchange64:
47 return MSVCIntrin::_InterlockedExchange;
48 case clang::AArch64::BI_InterlockedExchangeAdd64:
49 return MSVCIntrin::_InterlockedExchangeAdd;
50 case clang::AArch64::BI_InterlockedExchangeSub64:
51 return MSVCIntrin::_InterlockedExchangeSub;
52 case clang::AArch64::BI_InterlockedOr64:
53 return MSVCIntrin::_InterlockedOr;
54 case clang::AArch64::BI_InterlockedXor64:
55 return MSVCIntrin::_InterlockedXor;
56 case clang::AArch64::BI_InterlockedDecrement64:
57 return MSVCIntrin::_InterlockedDecrement;
58 case clang::AArch64::BI_InterlockedIncrement64:
59 return MSVCIntrin::_InterlockedIncrement;
60 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
62 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
63 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
64 return MSVCIntrin::_InterlockedExchangeAdd_acq;
65 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
67 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
68 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
69 return MSVCIntrin::_InterlockedExchangeAdd_rel;
70 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
72 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
73 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
74 return MSVCIntrin::_InterlockedExchangeAdd_nf;
75 case clang::AArch64::BI_InterlockedExchange8_acq:
76 case clang::AArch64::BI_InterlockedExchange16_acq:
77 case clang::AArch64::BI_InterlockedExchange_acq:
78 case clang::AArch64::BI_InterlockedExchange64_acq:
79 case clang::AArch64::BI_InterlockedExchangePointer_acq:
80 return MSVCIntrin::_InterlockedExchange_acq;
81 case clang::AArch64::BI_InterlockedExchange8_rel:
82 case clang::AArch64::BI_InterlockedExchange16_rel:
83 case clang::AArch64::BI_InterlockedExchange_rel:
84 case clang::AArch64::BI_InterlockedExchange64_rel:
85 case clang::AArch64::BI_InterlockedExchangePointer_rel:
86 return MSVCIntrin::_InterlockedExchange_rel;
87 case clang::AArch64::BI_InterlockedExchange8_nf:
88 case clang::AArch64::BI_InterlockedExchange16_nf:
89 case clang::AArch64::BI_InterlockedExchange_nf:
90 case clang::AArch64::BI_InterlockedExchange64_nf:
91 case clang::AArch64::BI_InterlockedExchangePointer_nf:
92 return MSVCIntrin::_InterlockedExchange_nf;
93 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
95 case clang::AArch64::BI_InterlockedCompareExchange_acq:
96 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
97 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
98 return MSVCIntrin::_InterlockedCompareExchange_acq;
99 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
101 case clang::AArch64::BI_InterlockedCompareExchange_rel:
102 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
103 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
104 return MSVCIntrin::_InterlockedCompareExchange_rel;
105 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
107 case clang::AArch64::BI_InterlockedCompareExchange_nf:
108 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
109 return MSVCIntrin::_InterlockedCompareExchange_nf;
110 case clang::AArch64::BI_InterlockedCompareExchange128:
111 return MSVCIntrin::_InterlockedCompareExchange128;
112 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
113 return MSVCIntrin::_InterlockedCompareExchange128_acq;
114 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
115 return MSVCIntrin::_InterlockedCompareExchange128_nf;
116 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
117 return MSVCIntrin::_InterlockedCompareExchange128_rel;
118 case clang::AArch64::BI_InterlockedOr8_acq:
119 case clang::AArch64::BI_InterlockedOr16_acq:
120 case clang::AArch64::BI_InterlockedOr_acq:
121 case clang::AArch64::BI_InterlockedOr64_acq:
122 return MSVCIntrin::_InterlockedOr_acq;
123 case clang::AArch64::BI_InterlockedOr8_rel:
124 case clang::AArch64::BI_InterlockedOr16_rel:
125 case clang::AArch64::BI_InterlockedOr_rel:
126 case clang::AArch64::BI_InterlockedOr64_rel:
127 return MSVCIntrin::_InterlockedOr_rel;
128 case clang::AArch64::BI_InterlockedOr8_nf:
129 case clang::AArch64::BI_InterlockedOr16_nf:
130 case clang::AArch64::BI_InterlockedOr_nf:
131 case clang::AArch64::BI_InterlockedOr64_nf:
132 return MSVCIntrin::_InterlockedOr_nf;
133 case clang::AArch64::BI_InterlockedXor8_acq:
134 case clang::AArch64::BI_InterlockedXor16_acq:
135 case clang::AArch64::BI_InterlockedXor_acq:
136 case clang::AArch64::BI_InterlockedXor64_acq:
137 return MSVCIntrin::_InterlockedXor_acq;
138 case clang::AArch64::BI_InterlockedXor8_rel:
139 case clang::AArch64::BI_InterlockedXor16_rel:
140 case clang::AArch64::BI_InterlockedXor_rel:
141 case clang::AArch64::BI_InterlockedXor64_rel:
142 return MSVCIntrin::_InterlockedXor_rel;
143 case clang::AArch64::BI_InterlockedXor8_nf:
144 case clang::AArch64::BI_InterlockedXor16_nf:
145 case clang::AArch64::BI_InterlockedXor_nf:
146 case clang::AArch64::BI_InterlockedXor64_nf:
147 return MSVCIntrin::_InterlockedXor_nf;
148 case clang::AArch64::BI_InterlockedAnd8_acq:
149 case clang::AArch64::BI_InterlockedAnd16_acq:
150 case clang::AArch64::BI_InterlockedAnd_acq:
151 case clang::AArch64::BI_InterlockedAnd64_acq:
152 return MSVCIntrin::_InterlockedAnd_acq;
153 case clang::AArch64::BI_InterlockedAnd8_rel:
154 case clang::AArch64::BI_InterlockedAnd16_rel:
155 case clang::AArch64::BI_InterlockedAnd_rel:
156 case clang::AArch64::BI_InterlockedAnd64_rel:
157 return MSVCIntrin::_InterlockedAnd_rel;
158 case clang::AArch64::BI_InterlockedAnd8_nf:
159 case clang::AArch64::BI_InterlockedAnd16_nf:
160 case clang::AArch64::BI_InterlockedAnd_nf:
161 case clang::AArch64::BI_InterlockedAnd64_nf:
162 return MSVCIntrin::_InterlockedAnd_nf;
163 case clang::AArch64::BI_InterlockedIncrement16_acq:
164 case clang::AArch64::BI_InterlockedIncrement_acq:
165 case clang::AArch64::BI_InterlockedIncrement64_acq:
166 return MSVCIntrin::_InterlockedIncrement_acq;
167 case clang::AArch64::BI_InterlockedIncrement16_rel:
168 case clang::AArch64::BI_InterlockedIncrement_rel:
169 case clang::AArch64::BI_InterlockedIncrement64_rel:
170 return MSVCIntrin::_InterlockedIncrement_rel;
171 case clang::AArch64::BI_InterlockedIncrement16_nf:
172 case clang::AArch64::BI_InterlockedIncrement_nf:
173 case clang::AArch64::BI_InterlockedIncrement64_nf:
174 return MSVCIntrin::_InterlockedIncrement_nf;
175 case clang::AArch64::BI_InterlockedDecrement16_acq:
176 case clang::AArch64::BI_InterlockedDecrement_acq:
177 case clang::AArch64::BI_InterlockedDecrement64_acq:
178 return MSVCIntrin::_InterlockedDecrement_acq;
179 case clang::AArch64::BI_InterlockedDecrement16_rel:
180 case clang::AArch64::BI_InterlockedDecrement_rel:
181 case clang::AArch64::BI_InterlockedDecrement64_rel:
182 return MSVCIntrin::_InterlockedDecrement_rel;
183 case clang::AArch64::BI_InterlockedDecrement16_nf:
184 case clang::AArch64::BI_InterlockedDecrement_nf:
185 case clang::AArch64::BI_InterlockedDecrement64_nf:
186 return MSVCIntrin::_InterlockedDecrement_nf;
187 }
188 llvm_unreachable("must return from switch");
189}
190
191static std::optional<CodeGenFunction::MSVCIntrin>
192translateArmToMsvcIntrin(unsigned BuiltinID) {
193 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
194 switch (BuiltinID) {
195 default:
196 return std::nullopt;
197 case clang::ARM::BI_BitScanForward:
198 case clang::ARM::BI_BitScanForward64:
199 return MSVCIntrin::_BitScanForward;
200 case clang::ARM::BI_BitScanReverse:
201 case clang::ARM::BI_BitScanReverse64:
202 return MSVCIntrin::_BitScanReverse;
203 case clang::ARM::BI_InterlockedAnd64:
204 return MSVCIntrin::_InterlockedAnd;
205 case clang::ARM::BI_InterlockedExchange64:
206 return MSVCIntrin::_InterlockedExchange;
207 case clang::ARM::BI_InterlockedExchangeAdd64:
208 return MSVCIntrin::_InterlockedExchangeAdd;
209 case clang::ARM::BI_InterlockedExchangeSub64:
210 return MSVCIntrin::_InterlockedExchangeSub;
211 case clang::ARM::BI_InterlockedOr64:
212 return MSVCIntrin::_InterlockedOr;
213 case clang::ARM::BI_InterlockedXor64:
214 return MSVCIntrin::_InterlockedXor;
215 case clang::ARM::BI_InterlockedDecrement64:
216 return MSVCIntrin::_InterlockedDecrement;
217 case clang::ARM::BI_InterlockedIncrement64:
218 return MSVCIntrin::_InterlockedIncrement;
219 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
221 case clang::ARM::BI_InterlockedExchangeAdd_acq:
222 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
223 return MSVCIntrin::_InterlockedExchangeAdd_acq;
224 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
226 case clang::ARM::BI_InterlockedExchangeAdd_rel:
227 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
228 return MSVCIntrin::_InterlockedExchangeAdd_rel;
229 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
231 case clang::ARM::BI_InterlockedExchangeAdd_nf:
232 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
233 return MSVCIntrin::_InterlockedExchangeAdd_nf;
234 case clang::ARM::BI_InterlockedExchange8_acq:
235 case clang::ARM::BI_InterlockedExchange16_acq:
236 case clang::ARM::BI_InterlockedExchange_acq:
237 case clang::ARM::BI_InterlockedExchange64_acq:
238 case clang::ARM::BI_InterlockedExchangePointer_acq:
239 return MSVCIntrin::_InterlockedExchange_acq;
240 case clang::ARM::BI_InterlockedExchange8_rel:
241 case clang::ARM::BI_InterlockedExchange16_rel:
242 case clang::ARM::BI_InterlockedExchange_rel:
243 case clang::ARM::BI_InterlockedExchange64_rel:
244 case clang::ARM::BI_InterlockedExchangePointer_rel:
245 return MSVCIntrin::_InterlockedExchange_rel;
246 case clang::ARM::BI_InterlockedExchange8_nf:
247 case clang::ARM::BI_InterlockedExchange16_nf:
248 case clang::ARM::BI_InterlockedExchange_nf:
249 case clang::ARM::BI_InterlockedExchange64_nf:
250 case clang::ARM::BI_InterlockedExchangePointer_nf:
251 return MSVCIntrin::_InterlockedExchange_nf;
252 case clang::ARM::BI_InterlockedCompareExchange8_acq:
253 case clang::ARM::BI_InterlockedCompareExchange16_acq:
254 case clang::ARM::BI_InterlockedCompareExchange_acq:
255 case clang::ARM::BI_InterlockedCompareExchange64_acq:
256 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
257 return MSVCIntrin::_InterlockedCompareExchange_acq;
258 case clang::ARM::BI_InterlockedCompareExchange8_rel:
259 case clang::ARM::BI_InterlockedCompareExchange16_rel:
260 case clang::ARM::BI_InterlockedCompareExchange_rel:
261 case clang::ARM::BI_InterlockedCompareExchange64_rel:
262 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
263 return MSVCIntrin::_InterlockedCompareExchange_rel;
264 case clang::ARM::BI_InterlockedCompareExchange8_nf:
265 case clang::ARM::BI_InterlockedCompareExchange16_nf:
266 case clang::ARM::BI_InterlockedCompareExchange_nf:
267 case clang::ARM::BI_InterlockedCompareExchange64_nf:
268 return MSVCIntrin::_InterlockedCompareExchange_nf;
269 case clang::ARM::BI_InterlockedOr8_acq:
270 case clang::ARM::BI_InterlockedOr16_acq:
271 case clang::ARM::BI_InterlockedOr_acq:
272 case clang::ARM::BI_InterlockedOr64_acq:
273 return MSVCIntrin::_InterlockedOr_acq;
274 case clang::ARM::BI_InterlockedOr8_rel:
275 case clang::ARM::BI_InterlockedOr16_rel:
276 case clang::ARM::BI_InterlockedOr_rel:
277 case clang::ARM::BI_InterlockedOr64_rel:
278 return MSVCIntrin::_InterlockedOr_rel;
279 case clang::ARM::BI_InterlockedOr8_nf:
280 case clang::ARM::BI_InterlockedOr16_nf:
281 case clang::ARM::BI_InterlockedOr_nf:
282 case clang::ARM::BI_InterlockedOr64_nf:
283 return MSVCIntrin::_InterlockedOr_nf;
284 case clang::ARM::BI_InterlockedXor8_acq:
285 case clang::ARM::BI_InterlockedXor16_acq:
286 case clang::ARM::BI_InterlockedXor_acq:
287 case clang::ARM::BI_InterlockedXor64_acq:
288 return MSVCIntrin::_InterlockedXor_acq;
289 case clang::ARM::BI_InterlockedXor8_rel:
290 case clang::ARM::BI_InterlockedXor16_rel:
291 case clang::ARM::BI_InterlockedXor_rel:
292 case clang::ARM::BI_InterlockedXor64_rel:
293 return MSVCIntrin::_InterlockedXor_rel;
294 case clang::ARM::BI_InterlockedXor8_nf:
295 case clang::ARM::BI_InterlockedXor16_nf:
296 case clang::ARM::BI_InterlockedXor_nf:
297 case clang::ARM::BI_InterlockedXor64_nf:
298 return MSVCIntrin::_InterlockedXor_nf;
299 case clang::ARM::BI_InterlockedAnd8_acq:
300 case clang::ARM::BI_InterlockedAnd16_acq:
301 case clang::ARM::BI_InterlockedAnd_acq:
302 case clang::ARM::BI_InterlockedAnd64_acq:
303 return MSVCIntrin::_InterlockedAnd_acq;
304 case clang::ARM::BI_InterlockedAnd8_rel:
305 case clang::ARM::BI_InterlockedAnd16_rel:
306 case clang::ARM::BI_InterlockedAnd_rel:
307 case clang::ARM::BI_InterlockedAnd64_rel:
308 return MSVCIntrin::_InterlockedAnd_rel;
309 case clang::ARM::BI_InterlockedAnd8_nf:
310 case clang::ARM::BI_InterlockedAnd16_nf:
311 case clang::ARM::BI_InterlockedAnd_nf:
312 case clang::ARM::BI_InterlockedAnd64_nf:
313 return MSVCIntrin::_InterlockedAnd_nf;
314 case clang::ARM::BI_InterlockedIncrement16_acq:
315 case clang::ARM::BI_InterlockedIncrement_acq:
316 case clang::ARM::BI_InterlockedIncrement64_acq:
317 return MSVCIntrin::_InterlockedIncrement_acq;
318 case clang::ARM::BI_InterlockedIncrement16_rel:
319 case clang::ARM::BI_InterlockedIncrement_rel:
320 case clang::ARM::BI_InterlockedIncrement64_rel:
321 return MSVCIntrin::_InterlockedIncrement_rel;
322 case clang::ARM::BI_InterlockedIncrement16_nf:
323 case clang::ARM::BI_InterlockedIncrement_nf:
324 case clang::ARM::BI_InterlockedIncrement64_nf:
325 return MSVCIntrin::_InterlockedIncrement_nf;
326 case clang::ARM::BI_InterlockedDecrement16_acq:
327 case clang::ARM::BI_InterlockedDecrement_acq:
328 case clang::ARM::BI_InterlockedDecrement64_acq:
329 return MSVCIntrin::_InterlockedDecrement_acq;
330 case clang::ARM::BI_InterlockedDecrement16_rel:
331 case clang::ARM::BI_InterlockedDecrement_rel:
332 case clang::ARM::BI_InterlockedDecrement64_rel:
333 return MSVCIntrin::_InterlockedDecrement_rel;
334 case clang::ARM::BI_InterlockedDecrement16_nf:
335 case clang::ARM::BI_InterlockedDecrement_nf:
336 case clang::ARM::BI_InterlockedDecrement64_nf:
337 return MSVCIntrin::_InterlockedDecrement_nf;
338 }
339 llvm_unreachable("must return from switch");
340}
341
342// Emit an intrinsic where all operands are of the same type as the result.
343// Depending on mode, this may be a constrained floating-point intrinsic.
345 unsigned IntrinsicID,
346 unsigned ConstrainedIntrinsicID,
347 llvm::Type *Ty,
348 ArrayRef<Value *> Args) {
349 Function *F;
350 if (CGF.Builder.getIsFPConstrained())
351 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
352 else
353 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
354
355 if (CGF.Builder.getIsFPConstrained())
356 return CGF.Builder.CreateConstrainedFPCall(F, Args);
357
358 return CGF.Builder.CreateCall(F, Args);
359}
360
361static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
362 NeonTypeFlags TypeFlags,
363 bool HasFastHalfType = true,
364 bool V1Ty = false,
365 bool AllowBFloatArgsAndRet = true) {
366 int IsQuad = TypeFlags.isQuad();
367 switch (TypeFlags.getEltType()) {
371 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
374 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
376 if (AllowBFloatArgsAndRet)
377 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
378 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
380 if (HasFastHalfType)
381 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 return Builder.CreateCall(F, Ops, name);
445}
446
450 const CallExpr *E, const char *name) {
451 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
452 Ops.pop_back_val());
453 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
454}
455
457 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
458 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
459
460 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
461 RetTy->getPrimitiveSizeInBits();
462 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
463 Ops[1]->getType()};
464 if (ExtendLaneArg) {
465 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
466 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
467 uint64_t(0));
468 }
469 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
470}
471
473 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
474 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
475
476 if (ExtendLaneArg) {
477 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
478 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
479 uint64_t(0));
480 }
481 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
482 RetTy->getPrimitiveSizeInBits();
483 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
484 Ops, E, name);
485}
486
488 bool neg) {
489 int SV = cast<ConstantInt>(V)->getSExtValue();
490 return ConstantInt::getSigned(Ty, neg ? -SV : SV);
491}
492
493Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
494 llvm::Type *Ty1, bool Extract,
496 const CallExpr *E,
497 const char *name) {
498 llvm::Type *Tys[] = {Ty0, Ty1};
499 if (Extract) {
500 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
501 // the vector.
502 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
503 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
504 }
505 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
506}
507
508// Right-shift a vector by a constant.
510 llvm::Type *Ty, bool usgn,
511 const char *name) {
512 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
513
514 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
515 int EltSize = VTy->getScalarSizeInBits();
516
517 Vec = Builder.CreateBitCast(Vec, Ty);
518
519 // lshr/ashr are undefined when the shift amount is equal to the vector
520 // element size.
521 if (ShiftAmt == EltSize) {
522 if (usgn) {
523 // Right-shifting an unsigned value by its size yields 0.
524 return llvm::ConstantAggregateZero::get(VTy);
525 } else {
526 // Right-shifting a signed value by its size is equivalent
527 // to a shift of size-1.
528 --ShiftAmt;
529 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
530 }
531 }
532
533 Shift = EmitNeonShiftVector(Shift, Ty, false);
534 if (usgn)
535 return Builder.CreateLShr(Vec, Shift, name);
536 return Builder.CreateAShr(Vec, Shift, name);
537}
538
539// clang-format off
541 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
542 NEONMAP0(splat_lane_v),
543 NEONMAP0(splat_laneq_v),
544 NEONMAP0(splatq_lane_v),
545 NEONMAP0(splatq_laneq_v),
546 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
547 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
548 NEONMAP1(vabs_v, arm_neon_vabs, 0),
549 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
550 NEONMAP0(vadd_v),
551 NEONMAP0(vaddhn_v),
552 NEONMAP0(vaddq_v),
553 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
554 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
555 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
556 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
557 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
558 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
559 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
560 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
561 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
562 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
563 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
564 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
565 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
566 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
567 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
568 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
569 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
570 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
571 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
572 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
573 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
574 NEONMAP1(vcage_v, arm_neon_vacge, 0),
575 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
576 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
577 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
578 NEONMAP1(vcale_v, arm_neon_vacge, 0),
579 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
580 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
581 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
582 NEONMAP0(vceqz_v),
583 NEONMAP0(vceqzq_v),
584 NEONMAP0(vcgez_v),
585 NEONMAP0(vcgezq_v),
586 NEONMAP0(vcgtz_v),
587 NEONMAP0(vcgtzq_v),
588 NEONMAP0(vclez_v),
589 NEONMAP0(vclezq_v),
590 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
591 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
592 NEONMAP0(vcltz_v),
593 NEONMAP0(vcltzq_v),
594 NEONMAP1(vclz_v, ctlz, Add1ArgType),
595 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
596 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
597 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
598 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
599 NEONMAP0(vcvt_f16_s16),
600 NEONMAP0(vcvt_f16_u16),
601 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
602 NEONMAP0(vcvt_f32_v),
603 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
604 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
605 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
606 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
607 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
608 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
609 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
610 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
611 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
612 NEONMAP0(vcvt_s16_f16),
613 NEONMAP0(vcvt_s32_v),
614 NEONMAP0(vcvt_s64_v),
615 NEONMAP0(vcvt_u16_f16),
616 NEONMAP0(vcvt_u32_v),
617 NEONMAP0(vcvt_u64_v),
618 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
619 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
620 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
621 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
622 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
623 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
624 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
625 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
626 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
627 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
628 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
629 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
630 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
631 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
632 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
633 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
634 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
635 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
636 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
637 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
638 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
639 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
640 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
641 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
642 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
643 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
644 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
645 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
646 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
647 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
648 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
649 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
650 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
651 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
652 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
653 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
654 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
655 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
656 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
657 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
658 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
659 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
660 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
661 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
662 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
663 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
664 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
665 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
666 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
667 NEONMAP0(vcvtq_f16_s16),
668 NEONMAP0(vcvtq_f16_u16),
669 NEONMAP0(vcvtq_f32_v),
670 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
671 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
672 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
673 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
674 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
675 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
676 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
677 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
678 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
679 NEONMAP0(vcvtq_s16_f16),
680 NEONMAP0(vcvtq_s32_v),
681 NEONMAP0(vcvtq_s64_v),
682 NEONMAP0(vcvtq_u16_f16),
683 NEONMAP0(vcvtq_u32_v),
684 NEONMAP0(vcvtq_u64_v),
685 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
686 NEONMAP1(vdot_u32, arm_neon_udot, 0),
687 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
688 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
689 NEONMAP0(vext_v),
690 NEONMAP0(vextq_v),
691 NEONMAP0(vfma_v),
692 NEONMAP0(vfmaq_v),
693 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
694 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
695 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
696 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
697 NEONMAP0(vld1_dup_v),
698 NEONMAP1(vld1_v, arm_neon_vld1, 0),
699 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
700 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
701 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
702 NEONMAP0(vld1q_dup_v),
703 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
704 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
705 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
706 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
707 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
708 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
709 NEONMAP1(vld2_v, arm_neon_vld2, 0),
710 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
711 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
712 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
713 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
714 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
715 NEONMAP1(vld3_v, arm_neon_vld3, 0),
716 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
717 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
718 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
719 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
720 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
721 NEONMAP1(vld4_v, arm_neon_vld4, 0),
722 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
723 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
724 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
725 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
726 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
727 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
728 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
729 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
730 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
731 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
732 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
733 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
734 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
735 NEONMAP0(vmovl_v),
736 NEONMAP0(vmovn_v),
737 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
738 NEONMAP0(vmull_v),
739 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
740 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
741 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
742 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
743 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
744 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
745 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
746 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
747 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
748 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
749 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
750 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
751 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
752 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
753 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
754 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
755 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
756 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
757 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
758 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
759 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
760 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
761 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
762 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
763 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
764 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
765 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
766 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
767 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
768 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
769 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
770 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
771 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
772 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
773 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
774 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
775 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
776 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
777 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
778 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
779 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
780 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
781 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
782 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
783 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
784 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
785 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
786 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
787 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
788 NEONMAP1(vrnd_v, trunc, Add1ArgType),
789 NEONMAP1(vrnda_v, round, Add1ArgType),
790 NEONMAP1(vrndaq_v, round, Add1ArgType),
791 NEONMAP0(vrndi_v),
792 NEONMAP0(vrndiq_v),
793 NEONMAP1(vrndm_v, floor, Add1ArgType),
794 NEONMAP1(vrndmq_v, floor, Add1ArgType),
795 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
796 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
797 NEONMAP1(vrndp_v, ceil, Add1ArgType),
798 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
799 NEONMAP1(vrndq_v, trunc, Add1ArgType),
800 NEONMAP1(vrndx_v, rint, Add1ArgType),
801 NEONMAP1(vrndxq_v, rint, Add1ArgType),
802 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
803 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
804 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
805 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
806 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
807 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
808 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
809 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
810 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
811 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
812 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
813 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
814 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
815 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
816 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
817 NEONMAP0(vshl_n_v),
818 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
819 NEONMAP0(vshll_n_v),
820 NEONMAP0(vshlq_n_v),
821 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
822 NEONMAP0(vshr_n_v),
823 NEONMAP0(vshrn_n_v),
824 NEONMAP0(vshrq_n_v),
825 NEONMAP1(vst1_v, arm_neon_vst1, 0),
826 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
827 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
828 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
829 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
830 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
831 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
832 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
833 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
834 NEONMAP1(vst2_v, arm_neon_vst2, 0),
835 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
836 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
837 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
838 NEONMAP1(vst3_v, arm_neon_vst3, 0),
839 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
840 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
841 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
842 NEONMAP1(vst4_v, arm_neon_vst4, 0),
843 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
844 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
845 NEONMAP0(vsubhn_v),
846 NEONMAP0(vtrn_v),
847 NEONMAP0(vtrnq_v),
848 NEONMAP0(vtst_v),
849 NEONMAP0(vtstq_v),
850 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
851 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
852 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
853 NEONMAP0(vuzp_v),
854 NEONMAP0(vuzpq_v),
855 NEONMAP0(vzip_v),
856 NEONMAP0(vzipq_v)
857};
858
859// clang-format on
860
861// Some intrinsics are equivalent for codegen.
862static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
863 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
864 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
865 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
866 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
867 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
868 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
869 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
870 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
871 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
872 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
873 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
874 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
875 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
876 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
877 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
878 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
879 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
880 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
881 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
882 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
883 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
884 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
885 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
886 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
887 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
888 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
889 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
890 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
891 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
892 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
893 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
894 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
895 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
896 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
897 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
898 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
899 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
900 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
901 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
902 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
903 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
904 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
905 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
906 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
907 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
908 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
909 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
910 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
911 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
912 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
913 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
914 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
915 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
916 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
917 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
918 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
919 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
920 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
921 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
922 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
923 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
924 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
925 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
926 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
927 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
928 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
929 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
930 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
931 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
932 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
933 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
934 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
935 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
936 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
937 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
938 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
939 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
940 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
941 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
942 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
943 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
944 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
945 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
946 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
947 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
948 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
949 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
950 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
951 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
952 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
953 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
954 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
955 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
956 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
957 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
958 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
959 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
960 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
961 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
962 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
963 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
964 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
965 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
966 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
967 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
968 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
969 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
970 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
971 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
972 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
973 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
974 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
975 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
976 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
977 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
978 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
979 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
980 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
981 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
982 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
983 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
984 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
985 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
986 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
987 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
988 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
989 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
990 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
991 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
992 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
993 // arbitrary one to be handled as tha canonical variation.
994 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
995 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
996 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
997 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
998 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
999 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1000 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1001 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1002 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1003 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1004 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1005 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1006};
1007
1008#undef NEONMAP0
1009#undef NEONMAP1
1010#undef NEONMAP2
1011
1012#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1013 { \
1014 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1015 TypeModifier \
1016 }
1017
1018#define SVEMAP2(NameBase, TypeModifier) \
1019 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1021#define GET_SVE_LLVM_INTRINSIC_MAP
1022#include "clang/Basic/arm_sve_builtin_cg.inc"
1023#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1024#undef GET_SVE_LLVM_INTRINSIC_MAP
1025};
1026
1027#undef SVEMAP1
1028#undef SVEMAP2
1029
1030#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1031 { \
1032 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1033 TypeModifier \
1034 }
1035
1036#define SMEMAP2(NameBase, TypeModifier) \
1037 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1039#define GET_SME_LLVM_INTRINSIC_MAP
1040#include "clang/Basic/arm_sme_builtin_cg.inc"
1041#undef GET_SME_LLVM_INTRINSIC_MAP
1042};
1043
1044#undef SMEMAP1
1045#undef SMEMAP2
1046
1048
1053
1054// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
1055// the corresponding info struct.
1056static const ARMVectorIntrinsicInfo *
1058 unsigned BuiltinID, bool &MapProvenSorted) {
1059
1060#ifndef NDEBUG
1061 if (!MapProvenSorted) {
1062 assert(llvm::is_sorted(IntrinsicMap));
1063 MapProvenSorted = true;
1064 }
1065#endif
1066
1068 llvm::lower_bound(IntrinsicMap, BuiltinID);
1069
1070 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1071 return Builtin;
1072
1073 return nullptr;
1074}
1075
1077 unsigned Modifier,
1078 llvm::Type *ArgType,
1079 const CallExpr *E) {
1080 int VectorSize = 0;
1081 if (Modifier & Use64BitVectors)
1082 VectorSize = 64;
1083 else if (Modifier & Use128BitVectors)
1084 VectorSize = 128;
1085
1086 // Return type.
1088 if (Modifier & AddRetType) {
1089 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1090 if (Modifier & VectorizeRetType)
1091 Ty = llvm::FixedVectorType::get(
1092 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1093
1094 Tys.push_back(Ty);
1095 }
1096
1097 // Arguments.
1098 if (Modifier & VectorizeArgTypes) {
1099 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1100 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1101 }
1102
1103 if (Modifier & (Add1ArgType | Add2ArgTypes))
1104 Tys.push_back(ArgType);
1105
1106 if (Modifier & Add2ArgTypes)
1107 Tys.push_back(ArgType);
1108
1109 if (Modifier & InventFloatType)
1110 Tys.push_back(FloatTy);
1111
1112 return CGM.getIntrinsic(IntrinsicID, Tys);
1113}
1114
1115//===----------------------------------------------------------------------===//
1116// Emit-helpers
1117//===----------------------------------------------------------------------===//
1119 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1120 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1121 assert(SISDInfo.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
1122
1123 switch (SISDInfo.BuiltinID) {
1124 case NEON::BI__builtin_neon_vcled_s64:
1125 case NEON::BI__builtin_neon_vcled_u64:
1126 case NEON::BI__builtin_neon_vcles_f32:
1127 case NEON::BI__builtin_neon_vcled_f64:
1128 case NEON::BI__builtin_neon_vcltd_s64:
1129 case NEON::BI__builtin_neon_vcltd_u64:
1130 case NEON::BI__builtin_neon_vclts_f32:
1131 case NEON::BI__builtin_neon_vcltd_f64:
1132 case NEON::BI__builtin_neon_vcales_f32:
1133 case NEON::BI__builtin_neon_vcaled_f64:
1134 case NEON::BI__builtin_neon_vcalts_f32:
1135 case NEON::BI__builtin_neon_vcaltd_f64:
1136 // Only one direction of comparisons actually exist, cmle is actually a cmge
1137 // with swapped operands. The table gives us the right intrinsic but we
1138 // still need to do the swap.
1139 std::swap(Ops[0], Ops[1]);
1140 break;
1141 }
1142
1143 // Use fptosi.sat/fptoui.sat unless under strict FP.
1144 unsigned LLVMIntrinsic = SISDInfo.LLVMIntrinsic;
1145 if (!CGF.Builder.getIsFPConstrained()) {
1146 if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzs)
1147 LLVMIntrinsic = Intrinsic::fptosi_sat;
1148 else if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzu)
1149 LLVMIntrinsic = Intrinsic::fptoui_sat;
1150 }
1151 llvm::Type *ArgTy = CGF.ConvertType(E->getArg(0)->getType());
1152 Function *F = CGF.LookupNeonLLVMIntrinsic(LLVMIntrinsic,
1153 SISDInfo.TypeModifier, ArgTy, E);
1154
1155 int j = 0;
1156 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1157 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1158 ai != ae; ++ai, ++j) {
1159 llvm::Type *ArgTy = ai->getType();
1160 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1161 ArgTy->getPrimitiveSizeInBits())
1162 continue;
1163 assert(
1164 ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy() &&
1165 "Expecting vector LLVM intrinsic type and scalar Clang builtin type!");
1166
1167 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1168 // it before inserting.
1169 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1170 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1171 Ops[j] =
1172 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1173 }
1174
1175 Value *Result = CGF.EmitNeonCall(F, Ops, SISDInfo.NameHint);
1176 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1177 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1178 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1179 return CGF.Builder.CreateExtractElement(Result, C0);
1180
1181 return CGF.Builder.CreateBitCast(Result, ResultType, SISDInfo.NameHint);
1182}
1183
1185 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1186 const char *NameHint, unsigned Modifier, const CallExpr *E,
1187 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1188 llvm::Triple::ArchType Arch) {
1189
1190 // Extract the trailing immediate argument that encodes the type discriminator
1191 // for this overloaded intrinsic.
1192 // TODO: Move to the parent code that takes care of argument processing.
1193 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1194 std::optional<llvm::APSInt> NeonTypeConst =
1196 if (!NeonTypeConst)
1197 return nullptr;
1198
1199 // Determine the type of this overloaded NEON intrinsic.
1200 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1201 const bool Usgn = Type.isUnsigned();
1202 const bool Quad = Type.isQuad();
1203 const bool Floating = Type.isFloatingPoint();
1204 const bool HasFastHalfType = getTarget().hasFastHalfType();
1205 const bool AllowBFloatArgsAndRet =
1206 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1207
1208 llvm::FixedVectorType *VTy =
1209 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1210 llvm::Type *Ty = VTy;
1211 if (!Ty)
1212 return nullptr;
1213
1214 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1215 return Builder.getInt32(addr.getAlignment().getQuantity());
1216 };
1217
1218 unsigned Int = LLVMIntrinsic;
1219 if ((Modifier & UnsignedAlts) && !Usgn)
1220 Int = AltLLVMIntrinsic;
1221
1222 switch (BuiltinID) {
1223 default: break;
1224 case NEON::BI__builtin_neon_splat_lane_v:
1225 case NEON::BI__builtin_neon_splat_laneq_v:
1226 case NEON::BI__builtin_neon_splatq_lane_v:
1227 case NEON::BI__builtin_neon_splatq_laneq_v: {
1228 auto NumElements = VTy->getElementCount();
1229 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1230 NumElements = NumElements * 2;
1231 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1232 NumElements = NumElements.divideCoefficientBy(2);
1233
1234 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1235 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1236 }
1237 case NEON::BI__builtin_neon_vpadd_v:
1238 case NEON::BI__builtin_neon_vpaddq_v:
1239 // We don't allow fp/int overloading of intrinsics.
1240 if (VTy->getElementType()->isFloatingPointTy() &&
1241 Int == Intrinsic::aarch64_neon_addp)
1242 Int = Intrinsic::aarch64_neon_faddp;
1243 break;
1244 case NEON::BI__builtin_neon_vabs_v:
1245 case NEON::BI__builtin_neon_vabsq_v:
1246 if (VTy->getElementType()->isFloatingPointTy())
1247 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1248 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1249 case NEON::BI__builtin_neon_vadd_v:
1250 case NEON::BI__builtin_neon_vaddq_v: {
1251 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1252 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1253 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1254 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1255 return Builder.CreateBitCast(Ops[0], Ty);
1256 }
1257 case NEON::BI__builtin_neon_vaddhn_v: {
1258 llvm::FixedVectorType *SrcTy =
1259 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1260
1261 // %sum = add <4 x i32> %lhs, %rhs
1262 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1263 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1264 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1265
1266 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1267 Constant *ShiftAmt =
1268 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1269 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1270
1271 // %res = trunc <4 x i32> %high to <4 x i16>
1272 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1273 }
1274 case NEON::BI__builtin_neon_vcale_v:
1275 case NEON::BI__builtin_neon_vcaleq_v:
1276 case NEON::BI__builtin_neon_vcalt_v:
1277 case NEON::BI__builtin_neon_vcaltq_v:
1278 std::swap(Ops[0], Ops[1]);
1279 [[fallthrough]];
1280 case NEON::BI__builtin_neon_vcage_v:
1281 case NEON::BI__builtin_neon_vcageq_v:
1282 case NEON::BI__builtin_neon_vcagt_v:
1283 case NEON::BI__builtin_neon_vcagtq_v: {
1284 llvm::Type *Ty;
1285 switch (VTy->getScalarSizeInBits()) {
1286 default: llvm_unreachable("unexpected type");
1287 case 32:
1288 Ty = FloatTy;
1289 break;
1290 case 64:
1291 Ty = DoubleTy;
1292 break;
1293 case 16:
1294 Ty = HalfTy;
1295 break;
1296 }
1297 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1298 llvm::Type *Tys[] = { VTy, VecFlt };
1299 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1300 return EmitNeonCall(F, Ops, NameHint);
1301 }
1302 case NEON::BI__builtin_neon_vceqz_v:
1303 case NEON::BI__builtin_neon_vceqzq_v:
1305 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1306 case NEON::BI__builtin_neon_vcgez_v:
1307 case NEON::BI__builtin_neon_vcgezq_v:
1309 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1310 "vcgez");
1311 case NEON::BI__builtin_neon_vclez_v:
1312 case NEON::BI__builtin_neon_vclezq_v:
1314 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1315 "vclez");
1316 case NEON::BI__builtin_neon_vcgtz_v:
1317 case NEON::BI__builtin_neon_vcgtzq_v:
1319 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1320 "vcgtz");
1321 case NEON::BI__builtin_neon_vcltz_v:
1322 case NEON::BI__builtin_neon_vcltzq_v:
1324 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1325 "vcltz");
1326 case NEON::BI__builtin_neon_vclz_v:
1327 case NEON::BI__builtin_neon_vclzq_v:
1328 // We generate target-independent intrinsic, which needs a second argument
1329 // for whether or not clz of zero is undefined; on ARM it isn't.
1330 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1331 break;
1332 case NEON::BI__builtin_neon_vcvt_f32_v:
1333 case NEON::BI__builtin_neon_vcvtq_f32_v:
1334 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1335 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1336 HasFastHalfType);
1337 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1338 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1339 case NEON::BI__builtin_neon_vcvt_f16_s16:
1340 case NEON::BI__builtin_neon_vcvt_f16_u16:
1341 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1342 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1343 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1344 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1345 HasFastHalfType);
1346 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1347 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1348 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1349 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1350 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1351 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1352 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1353 Function *F = CGM.getIntrinsic(Int, Tys);
1354 return EmitNeonCall(F, Ops, "vcvt_n");
1355 }
1356 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1357 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1358 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1359 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1360 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1361 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1362 Function *F = CGM.getIntrinsic(Int, Tys);
1363 return EmitNeonCall(F, Ops, "vcvt_n");
1364 }
1365 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1366 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1367 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1368 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1369 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1370 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1371 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1372 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1373 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1374 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1375 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1376 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1377 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1378 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1379 return EmitNeonCall(F, Ops, "vcvt_n");
1380 }
1381 case NEON::BI__builtin_neon_vcvt_s32_v:
1382 case NEON::BI__builtin_neon_vcvt_u32_v:
1383 case NEON::BI__builtin_neon_vcvt_s64_v:
1384 case NEON::BI__builtin_neon_vcvt_u64_v:
1385 case NEON::BI__builtin_neon_vcvt_s16_f16:
1386 case NEON::BI__builtin_neon_vcvt_u16_f16:
1387 case NEON::BI__builtin_neon_vcvtq_s32_v:
1388 case NEON::BI__builtin_neon_vcvtq_u32_v:
1389 case NEON::BI__builtin_neon_vcvtq_s64_v:
1390 case NEON::BI__builtin_neon_vcvtq_u64_v:
1391 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1392 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1393 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1394 if (Int) {
1395 // AArch64: use fptosi.sat/fptoui.sat unless under strict FP.
1396 if (!Builder.getIsFPConstrained())
1397 Int = Usgn ? Intrinsic::fptoui_sat : Intrinsic::fptosi_sat;
1398 llvm::Type *Tys[2] = {Ty, Ops[0]->getType()};
1399 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
1400 }
1401 // FIXME: ARM uses plain fptoui/fptosi which have UB on out-of-range
1402 // values. These should also use saturating intrinsics.
1403 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1404 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1405 }
1406 case NEON::BI__builtin_neon_vcvta_s16_f16:
1407 case NEON::BI__builtin_neon_vcvta_s32_v:
1408 case NEON::BI__builtin_neon_vcvta_s64_v:
1409 case NEON::BI__builtin_neon_vcvta_u16_f16:
1410 case NEON::BI__builtin_neon_vcvta_u32_v:
1411 case NEON::BI__builtin_neon_vcvta_u64_v:
1412 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1413 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1414 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1415 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1416 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1417 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1418 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1419 case NEON::BI__builtin_neon_vcvtn_s32_v:
1420 case NEON::BI__builtin_neon_vcvtn_s64_v:
1421 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1422 case NEON::BI__builtin_neon_vcvtn_u32_v:
1423 case NEON::BI__builtin_neon_vcvtn_u64_v:
1424 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1425 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1426 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1427 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1428 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1429 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1430 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1431 case NEON::BI__builtin_neon_vcvtp_s32_v:
1432 case NEON::BI__builtin_neon_vcvtp_s64_v:
1433 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1434 case NEON::BI__builtin_neon_vcvtp_u32_v:
1435 case NEON::BI__builtin_neon_vcvtp_u64_v:
1436 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1437 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1438 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1439 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1440 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1441 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1442 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1443 case NEON::BI__builtin_neon_vcvtm_s32_v:
1444 case NEON::BI__builtin_neon_vcvtm_s64_v:
1445 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1446 case NEON::BI__builtin_neon_vcvtm_u32_v:
1447 case NEON::BI__builtin_neon_vcvtm_u64_v:
1448 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1449 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1450 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1451 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1452 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1453 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1454 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1455 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1456 }
1457 case NEON::BI__builtin_neon_vcvtx_f32_v: {
1458 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
1459 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1460
1461 }
1462 case NEON::BI__builtin_neon_vext_v:
1463 case NEON::BI__builtin_neon_vextq_v: {
1464 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1465 SmallVector<int, 16> Indices;
1466 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1467 Indices.push_back(i+CV);
1468
1469 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1470 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1471 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
1472 }
1473 case NEON::BI__builtin_neon_vfma_v:
1474 case NEON::BI__builtin_neon_vfmaq_v: {
1475 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1476 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1477 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1478
1479 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
1481 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
1482 {Ops[1], Ops[2], Ops[0]});
1483 }
1484 case NEON::BI__builtin_neon_vld1_v:
1485 case NEON::BI__builtin_neon_vld1q_v: {
1486 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1487 Ops.push_back(getAlignmentValue32(PtrOp0));
1488 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
1489 }
1490 case NEON::BI__builtin_neon_vld1_x2_v:
1491 case NEON::BI__builtin_neon_vld1q_x2_v:
1492 case NEON::BI__builtin_neon_vld1_x3_v:
1493 case NEON::BI__builtin_neon_vld1q_x3_v:
1494 case NEON::BI__builtin_neon_vld1_x4_v:
1495 case NEON::BI__builtin_neon_vld1q_x4_v: {
1496 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1497 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1498 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
1499 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1500 }
1501 case NEON::BI__builtin_neon_vld2_v:
1502 case NEON::BI__builtin_neon_vld2q_v:
1503 case NEON::BI__builtin_neon_vld3_v:
1504 case NEON::BI__builtin_neon_vld3q_v:
1505 case NEON::BI__builtin_neon_vld4_v:
1506 case NEON::BI__builtin_neon_vld4q_v:
1507 case NEON::BI__builtin_neon_vld2_dup_v:
1508 case NEON::BI__builtin_neon_vld2q_dup_v:
1509 case NEON::BI__builtin_neon_vld3_dup_v:
1510 case NEON::BI__builtin_neon_vld3q_dup_v:
1511 case NEON::BI__builtin_neon_vld4_dup_v:
1512 case NEON::BI__builtin_neon_vld4q_dup_v: {
1513 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1514 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1515 Value *Align = getAlignmentValue32(PtrOp1);
1516 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
1517 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1518 }
1519 case NEON::BI__builtin_neon_vld1_dup_v:
1520 case NEON::BI__builtin_neon_vld1q_dup_v: {
1521 Value *V = PoisonValue::get(Ty);
1522 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
1523 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
1524 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
1525 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
1526 return EmitNeonSplat(Ops[0], CI);
1527 }
1528 case NEON::BI__builtin_neon_vld2_lane_v:
1529 case NEON::BI__builtin_neon_vld2q_lane_v:
1530 case NEON::BI__builtin_neon_vld3_lane_v:
1531 case NEON::BI__builtin_neon_vld3q_lane_v:
1532 case NEON::BI__builtin_neon_vld4_lane_v:
1533 case NEON::BI__builtin_neon_vld4q_lane_v: {
1534 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1535 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1536 for (unsigned I = 2; I < Ops.size() - 1; ++I)
1537 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
1538 Ops.push_back(getAlignmentValue32(PtrOp1));
1539 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
1540 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1541 }
1542 case NEON::BI__builtin_neon_vmovl_v: {
1543 llvm::FixedVectorType *DTy =
1544 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1545 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1546 if (Usgn)
1547 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1548 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1549 }
1550 case NEON::BI__builtin_neon_vmovn_v: {
1551 llvm::FixedVectorType *QTy =
1552 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1553 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1554 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1555 }
1556 case NEON::BI__builtin_neon_vmull_v:
1557 // FIXME: the integer vmull operations could be emitted in terms of pure
1558 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
1559 // hoisting the exts outside loops. Until global ISel comes along that can
1560 // see through such movement this leads to bad CodeGen. So we need an
1561 // intrinsic for now.
1562 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1563 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1564 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
1565 case NEON::BI__builtin_neon_vpadal_v:
1566 case NEON::BI__builtin_neon_vpadalq_v: {
1567 // The source operand type has twice as many elements of half the size.
1568 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1569 llvm::Type *EltTy =
1570 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1571 auto *NarrowTy =
1572 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1573 llvm::Type *Tys[2] = { Ty, NarrowTy };
1574 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1575 }
1576 case NEON::BI__builtin_neon_vpaddl_v:
1577 case NEON::BI__builtin_neon_vpaddlq_v: {
1578 // The source operand type has twice as many elements of half the size.
1579 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1580 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1581 auto *NarrowTy =
1582 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1583 llvm::Type *Tys[2] = { Ty, NarrowTy };
1584 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
1585 }
1586 case NEON::BI__builtin_neon_vqdmlal_v:
1587 case NEON::BI__builtin_neon_vqdmlsl_v: {
1588 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
1589 Ops[1] =
1590 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
1591 Ops.resize(2);
1592 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
1593 }
1594 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
1595 case NEON::BI__builtin_neon_vqdmulh_lane_v:
1596 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
1597 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
1598 auto *RTy = cast<llvm::FixedVectorType>(Ty);
1599 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
1600 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
1601 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
1602 RTy->getNumElements() * 2);
1603 llvm::Type *Tys[2] = {
1604 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1605 /*isQuad*/ false))};
1606 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1607 }
1608 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
1609 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
1610 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
1611 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
1612 llvm::Type *Tys[2] = {
1613 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1614 /*isQuad*/ true))};
1615 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1616 }
1617 case NEON::BI__builtin_neon_vqshl_n_v:
1618 case NEON::BI__builtin_neon_vqshlq_n_v:
1619 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1620 1, false);
1621 case NEON::BI__builtin_neon_vqshlu_n_v:
1622 case NEON::BI__builtin_neon_vqshluq_n_v:
1623 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
1624 1, false);
1625 case NEON::BI__builtin_neon_vrecpe_v:
1626 case NEON::BI__builtin_neon_vrecpeq_v:
1627 case NEON::BI__builtin_neon_vrsqrte_v:
1628 case NEON::BI__builtin_neon_vrsqrteq_v:
1629 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
1630 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1631 case NEON::BI__builtin_neon_vrndi_v:
1632 case NEON::BI__builtin_neon_vrndiq_v:
1633 Int = Builder.getIsFPConstrained()
1634 ? Intrinsic::experimental_constrained_nearbyint
1635 : Intrinsic::nearbyint;
1636 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1637 case NEON::BI__builtin_neon_vrshr_n_v:
1638 case NEON::BI__builtin_neon_vrshrq_n_v:
1639 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
1640 1, true);
1641 case NEON::BI__builtin_neon_vsha512hq_u64:
1642 case NEON::BI__builtin_neon_vsha512h2q_u64:
1643 case NEON::BI__builtin_neon_vsha512su0q_u64:
1644 case NEON::BI__builtin_neon_vsha512su1q_u64: {
1645 Function *F = CGM.getIntrinsic(Int);
1646 return EmitNeonCall(F, Ops, "");
1647 }
1648 case NEON::BI__builtin_neon_vshl_n_v:
1649 case NEON::BI__builtin_neon_vshlq_n_v:
1650 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1651 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
1652 "vshl_n");
1653 case NEON::BI__builtin_neon_vshll_n_v: {
1654 llvm::FixedVectorType *SrcTy =
1655 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1656 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1657 if (Usgn)
1658 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
1659 else
1660 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
1661 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
1662 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
1663 }
1664 case NEON::BI__builtin_neon_vshrn_n_v: {
1665 llvm::FixedVectorType *SrcTy =
1666 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1667 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1668 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
1669 if (Usgn)
1670 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
1671 else
1672 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
1673 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
1674 }
1675 case NEON::BI__builtin_neon_vshr_n_v:
1676 case NEON::BI__builtin_neon_vshrq_n_v:
1677 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
1678 case NEON::BI__builtin_neon_vst1_v:
1679 case NEON::BI__builtin_neon_vst1q_v:
1680 case NEON::BI__builtin_neon_vst2_v:
1681 case NEON::BI__builtin_neon_vst2q_v:
1682 case NEON::BI__builtin_neon_vst3_v:
1683 case NEON::BI__builtin_neon_vst3q_v:
1684 case NEON::BI__builtin_neon_vst4_v:
1685 case NEON::BI__builtin_neon_vst4q_v:
1686 case NEON::BI__builtin_neon_vst2_lane_v:
1687 case NEON::BI__builtin_neon_vst2q_lane_v:
1688 case NEON::BI__builtin_neon_vst3_lane_v:
1689 case NEON::BI__builtin_neon_vst3q_lane_v:
1690 case NEON::BI__builtin_neon_vst4_lane_v:
1691 case NEON::BI__builtin_neon_vst4q_lane_v: {
1692 llvm::Type *Tys[] = {Int8PtrTy, Ty};
1693 Ops.push_back(getAlignmentValue32(PtrOp0));
1694 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
1695 }
1696 case NEON::BI__builtin_neon_vsm3partw1q_u32:
1697 case NEON::BI__builtin_neon_vsm3partw2q_u32:
1698 case NEON::BI__builtin_neon_vsm3ss1q_u32:
1699 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
1700 case NEON::BI__builtin_neon_vsm4eq_u32: {
1701 Function *F = CGM.getIntrinsic(Int);
1702 return EmitNeonCall(F, Ops, "");
1703 }
1704 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
1705 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
1706 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
1707 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
1708 Function *F = CGM.getIntrinsic(Int);
1709 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
1710 return EmitNeonCall(F, Ops, "");
1711 }
1712 case NEON::BI__builtin_neon_vst1_x2_v:
1713 case NEON::BI__builtin_neon_vst1q_x2_v:
1714 case NEON::BI__builtin_neon_vst1_x3_v:
1715 case NEON::BI__builtin_neon_vst1q_x3_v:
1716 case NEON::BI__builtin_neon_vst1_x4_v:
1717 case NEON::BI__builtin_neon_vst1q_x4_v: {
1718 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
1719 // in AArch64 it comes last. We may want to stick to one or another.
1720 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
1721 Arch == llvm::Triple::aarch64_32) {
1722 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1723 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
1724 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1725 }
1726 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
1727 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1728 }
1729 case NEON::BI__builtin_neon_vsubhn_v: {
1730 llvm::FixedVectorType *SrcTy =
1731 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1732
1733 // %sum = add <4 x i32> %lhs, %rhs
1734 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1735 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1736 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
1737
1738 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1739 Constant *ShiftAmt =
1740 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1741 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
1742
1743 // %res = trunc <4 x i32> %high to <4 x i16>
1744 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
1745 }
1746 case NEON::BI__builtin_neon_vtrn_v:
1747 case NEON::BI__builtin_neon_vtrnq_v: {
1748 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1749 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1750 Value *SV = nullptr;
1751
1752 for (unsigned vi = 0; vi != 2; ++vi) {
1753 SmallVector<int, 16> Indices;
1754 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1755 Indices.push_back(i+vi);
1756 Indices.push_back(i+e+vi);
1757 }
1758 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1759 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
1760 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1761 }
1762 return SV;
1763 }
1764 case NEON::BI__builtin_neon_vtst_v:
1765 case NEON::BI__builtin_neon_vtstq_v: {
1766 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1767 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1768 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1769 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1770 ConstantAggregateZero::get(Ty));
1771 return Builder.CreateSExt(Ops[0], Ty, "vtst");
1772 }
1773 case NEON::BI__builtin_neon_vuzp_v:
1774 case NEON::BI__builtin_neon_vuzpq_v: {
1775 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1776 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1777 Value *SV = nullptr;
1778
1779 for (unsigned vi = 0; vi != 2; ++vi) {
1780 SmallVector<int, 16> Indices;
1781 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1782 Indices.push_back(2*i+vi);
1783
1784 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1785 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
1786 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1787 }
1788 return SV;
1789 }
1790 case NEON::BI__builtin_neon_vxarq_u64: {
1791 Function *F = CGM.getIntrinsic(Int);
1792 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
1793 return EmitNeonCall(F, Ops, "");
1794 }
1795 case NEON::BI__builtin_neon_vzip_v:
1796 case NEON::BI__builtin_neon_vzipq_v: {
1797 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1798 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1799 Value *SV = nullptr;
1800
1801 for (unsigned vi = 0; vi != 2; ++vi) {
1802 SmallVector<int, 16> Indices;
1803 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1804 Indices.push_back((i + vi*e) >> 1);
1805 Indices.push_back(((i + vi*e) >> 1)+e);
1806 }
1807 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1808 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
1809 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1810 }
1811 return SV;
1812 }
1813 case NEON::BI__builtin_neon_vdot_s32:
1814 case NEON::BI__builtin_neon_vdot_u32:
1815 case NEON::BI__builtin_neon_vdotq_s32:
1816 case NEON::BI__builtin_neon_vdotq_u32: {
1817 auto *InputTy =
1818 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1819 llvm::Type *Tys[2] = { Ty, InputTy };
1820 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
1821 }
1822 case NEON::BI__builtin_neon_vfmlal_low_f16:
1823 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
1824 auto *InputTy =
1825 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1826 llvm::Type *Tys[2] = { Ty, InputTy };
1827 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
1828 }
1829 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1830 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
1831 auto *InputTy =
1832 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1833 llvm::Type *Tys[2] = { Ty, InputTy };
1834 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
1835 }
1836 case NEON::BI__builtin_neon_vfmlal_high_f16:
1837 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
1838 auto *InputTy =
1839 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1840 llvm::Type *Tys[2] = { Ty, InputTy };
1841 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
1842 }
1843 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1844 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
1845 auto *InputTy =
1846 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1847 llvm::Type *Tys[2] = { Ty, InputTy };
1848 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
1849 }
1850 case NEON::BI__builtin_neon_vmmlaq_s32:
1851 case NEON::BI__builtin_neon_vmmlaq_u32: {
1852 auto *InputTy =
1853 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1854 llvm::Type *Tys[2] = { Ty, InputTy };
1855 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
1856 }
1857 case NEON::BI__builtin_neon_vmmlaq_f16_f16:
1858 case NEON::BI__builtin_neon_vmmlaq_f32_f16: {
1859 auto *InputTy =
1860 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1861 llvm::Type *Tys[2] = {Ty, InputTy};
1862 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fmmla");
1863 }
1864 case NEON::BI__builtin_neon_vusmmlaq_s32: {
1865 auto *InputTy =
1866 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1867 llvm::Type *Tys[2] = { Ty, InputTy };
1868 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
1869 }
1870 case NEON::BI__builtin_neon_vusdot_s32:
1871 case NEON::BI__builtin_neon_vusdotq_s32: {
1872 auto *InputTy =
1873 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1874 llvm::Type *Tys[2] = { Ty, InputTy };
1875 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
1876 }
1877 case NEON::BI__builtin_neon_vbfdot_f32:
1878 case NEON::BI__builtin_neon_vbfdotq_f32: {
1879 llvm::Type *InputTy =
1880 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
1881 llvm::Type *Tys[2] = { Ty, InputTy };
1882 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
1883 }
1884 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
1885 llvm::Type *Tys[1] = { Ty };
1886 Function *F = CGM.getIntrinsic(Int, Tys);
1887 return EmitNeonCall(F, Ops, "vcvtfp2bf");
1888 }
1889
1890 }
1891
1892 assert(Int && "Expected valid intrinsic number");
1893
1894 // Determine the type(s) of this overloaded AArch64 intrinsic.
1895 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
1896
1897 Value *Result = EmitNeonCall(F, Ops, NameHint);
1898 llvm::Type *ResultType = ConvertType(E->getType());
1899 // AArch64 intrinsic one-element vector type cast to
1900 // scalar type expected by the builtin
1901 return Builder.CreateBitCast(Result, ResultType, NameHint);
1902}
1903
1904Value *
1906 const CmpInst::Predicate Pred,
1907 const Twine &Name) {
1908
1909 if (isa<FixedVectorType>(Ty)) {
1910 // Vector types are cast to i8 vectors. Recover original type.
1911 Op = Builder.CreateBitCast(Op, Ty);
1912 }
1913
1914 Constant *zero = Constant::getNullValue(Op->getType());
1915
1916 if (CmpInst::isFPPredicate(Pred)) {
1917 if (Pred == CmpInst::FCMP_OEQ)
1918 Op = Builder.CreateFCmp(Pred, Op, zero);
1919 else
1920 Op = Builder.CreateFCmpS(Pred, Op, zero);
1921 } else {
1922 Op = Builder.CreateICmp(Pred, Op, zero);
1923 }
1924
1925 llvm::Type *ResTy = Ty;
1926 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
1927 ResTy = FixedVectorType::get(
1928 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
1929 VTy->getNumElements());
1930
1931 return Builder.CreateSExt(Op, ResTy, Name);
1932}
1933
1935 Value *ExtOp, Value *IndexOp,
1936 llvm::Type *ResTy, unsigned IntID,
1937 const char *Name) {
1939 if (ExtOp)
1940 TblOps.push_back(ExtOp);
1941
1942 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
1943 SmallVector<int, 16> Indices;
1944 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
1945 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
1946 Indices.push_back(2*i);
1947 Indices.push_back(2*i+1);
1948 }
1949
1950 int PairPos = 0, End = Ops.size() - 1;
1951 while (PairPos < End) {
1952 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1953 Ops[PairPos+1], Indices,
1954 Name));
1955 PairPos += 2;
1956 }
1957
1958 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
1959 // of the 128-bit lookup table with zero.
1960 if (PairPos == End) {
1961 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
1962 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1963 ZeroTbl, Indices, Name));
1964 }
1965
1966 Function *TblF;
1967 TblOps.push_back(IndexOp);
1968 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
1969
1970 return CGF.EmitNeonCall(TblF, TblOps, Name);
1971}
1972
1973Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
1974 unsigned Value;
1975 switch (BuiltinID) {
1976 default:
1977 return nullptr;
1978 case clang::ARM::BI__builtin_arm_nop:
1979 Value = 0;
1980 break;
1981 case clang::ARM::BI__builtin_arm_yield:
1982 case clang::ARM::BI__yield:
1983 Value = 1;
1984 break;
1985 case clang::ARM::BI__builtin_arm_wfe:
1986 case clang::ARM::BI__wfe:
1987 Value = 2;
1988 break;
1989 case clang::ARM::BI__builtin_arm_wfi:
1990 case clang::ARM::BI__wfi:
1991 Value = 3;
1992 break;
1993 case clang::ARM::BI__builtin_arm_sev:
1994 case clang::ARM::BI__sev:
1995 Value = 4;
1996 break;
1997 case clang::ARM::BI__builtin_arm_sevl:
1998 case clang::ARM::BI__sevl:
1999 Value = 5;
2000 break;
2001 }
2002
2003 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2004 llvm::ConstantInt::get(Int32Ty, Value));
2005}
2006
2012
2013// Generates the IR for the read/write special register builtin,
2014// ValueType is the type of the value that is to be written or read,
2015// RegisterType is the type of the register being written to or read from.
2017 const CallExpr *E,
2018 llvm::Type *RegisterType,
2019 llvm::Type *ValueType,
2020 SpecialRegisterAccessKind AccessKind,
2021 StringRef SysReg = "") {
2022 // write and register intrinsics only support 32, 64 and 128 bit operations.
2023 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2024 RegisterType->isIntegerTy(128)) &&
2025 "Unsupported size for register.");
2026
2027 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2028 CodeGen::CodeGenModule &CGM = CGF.CGM;
2029 LLVMContext &Context = CGM.getLLVMContext();
2030
2031 if (SysReg.empty()) {
2032 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2033 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2034 }
2035
2036 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2037 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2038 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2039
2040 llvm::Type *Types[] = { RegisterType };
2041
2042 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2043 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2044 && "Can't fit 64-bit value in 32-bit register");
2045
2046 if (AccessKind != Write) {
2047 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2048 llvm::Function *F = CGM.getIntrinsic(
2049 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2050 : Intrinsic::read_register,
2051 Types);
2052 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2053
2054 if (MixedTypes)
2055 // Read into 64 bit register and then truncate result to 32 bit.
2056 return Builder.CreateTrunc(Call, ValueType);
2057
2058 if (ValueType->isPointerTy())
2059 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2060 return Builder.CreateIntToPtr(Call, ValueType);
2061
2062 return Call;
2063 }
2064
2065 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2066 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2067 if (MixedTypes) {
2068 // Extend 32 bit write value to 64 bit to pass to write.
2069 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2070 return Builder.CreateCall(F, { Metadata, ArgValue });
2071 }
2072
2073 if (ValueType->isPointerTy()) {
2074 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2075 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2076 return Builder.CreateCall(F, { Metadata, ArgValue });
2077 }
2078
2079 return Builder.CreateCall(F, { Metadata, ArgValue });
2080}
2081
2082static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2083 const CallExpr *E) {
2084 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2085 CodeGen::CodeGenModule &CGM = CGF.CGM;
2087
2088 auto getIntArg = [&](unsigned ArgNo) {
2090 if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
2091 llvm_unreachable("Expected constant argument to range prefetch.");
2092 return Result.Val.getInt().getExtValue();
2093 };
2094
2095 Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
2096 Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
2097 Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
2098
2099 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2100 auto Length = getIntArg(3);
2101 auto Count = getIntArg(4) - 1;
2102 auto Stride = getIntArg(5);
2103 auto Distance = getIntArg(6);
2104
2105 // Map ReuseDistance given in bytes to four bits representing decreasing
2106 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2107 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2108 // over the maximum is represented by 0 (distance not known).
2109 if (Distance > 0) {
2110 Distance = llvm::Log2_32_Ceil(Distance);
2111 if (Distance < 15)
2112 Distance = 15;
2113 else if (Distance > 29)
2114 Distance = 0;
2115 else
2116 Distance = 30 - Distance;
2117 }
2118
2119 uint64_t Mask22 = (1ULL << 22) - 1;
2120 uint64_t Mask16 = (1ULL << 16) - 1;
2121 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2122 ((Count & Mask16) << 22) | (Length & Mask22);
2123
2124 Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
2125 } else
2126 Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
2127
2128 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
2129 Ops);
2130}
2131
2132/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2133/// argument that specifies the vector type. The additional argument is meant
2134/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
2135/// should be kept consistent with the logic in Sema.
2136/// TODO: Make this return false for SISD builtins.
2137static bool HasExtraNeonArgument(unsigned BuiltinID) {
2138 // Required by the headers included below, but not in this particular
2139 // function.
2140 [[maybe_unused]] int PtrArgNum = -1;
2141 [[maybe_unused]] bool HasConstPtr = false;
2142
2143 // The mask encodes the type. We don't care about the actual value. Instead,
2144 // we just check whether its been set.
2145 uint64_t mask = 0;
2146 switch (BuiltinID) {
2147#define GET_NEON_OVERLOAD_CHECK
2148#include "clang/Basic/arm_fp16.inc"
2149#include "clang/Basic/arm_neon.inc"
2150#undef GET_NEON_OVERLOAD_CHECK
2151 // Non-neon builtins for controling VFP that take extra argument for
2152 // discriminating the type.
2153 case ARM::BI__builtin_arm_vcvtr_f:
2154 case ARM::BI__builtin_arm_vcvtr_d:
2155 mask = 1;
2156 }
2157
2158 if (mask)
2159 return true;
2160
2161 return false;
2162}
2163
2165 const CallExpr *E,
2167 llvm::Triple::ArchType Arch) {
2168 if (auto Hint = GetValueForARMHint(BuiltinID))
2169 return Hint;
2170
2171 if (BuiltinID == clang::ARM::BI__emit) {
2172 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2173 llvm::FunctionType *FTy =
2174 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2175
2177 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2178 llvm_unreachable("Sema will ensure that the parameter is constant");
2179
2180 llvm::APSInt Value = Result.Val.getInt();
2181 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2182
2183 llvm::InlineAsm *Emit =
2184 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2185 /*hasSideEffects=*/true)
2186 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2187 /*hasSideEffects=*/true);
2188
2189 return Builder.CreateCall(Emit);
2190 }
2191
2192 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2193 Value *Option = EmitScalarExpr(E->getArg(0));
2194 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2195 }
2196
2197 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2199 Value *RW = EmitScalarExpr(E->getArg(1));
2200 Value *IsData = EmitScalarExpr(E->getArg(2));
2201
2202 // Locality is not supported on ARM target
2203 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2204
2205 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2206 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2207 }
2208
2209 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2210 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2211 return Builder.CreateCall(
2212 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2213 }
2214
2215 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2216 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2217 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2218 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2219 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2220 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2221 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2222 return Res;
2223 }
2224
2225
2226 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2227 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2228 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2229 }
2230 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2231 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2232 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2233 "cls");
2234 }
2235
2236 if (BuiltinID == clang::ARM::BI__clear_cache) {
2237 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2238 const FunctionDecl *FD = E->getDirectCallee();
2239 Value *Ops[2];
2240 for (unsigned i = 0; i < 2; i++)
2241 Ops[i] = EmitScalarExpr(E->getArg(i));
2242 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2243 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2244 StringRef Name = FD->getName();
2245 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2246 }
2247
2248 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2249 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2250 Function *F;
2251
2252 switch (BuiltinID) {
2253 default: llvm_unreachable("unexpected builtin");
2254 case clang::ARM::BI__builtin_arm_mcrr:
2255 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2256 break;
2257 case clang::ARM::BI__builtin_arm_mcrr2:
2258 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2259 break;
2260 }
2261
2262 // MCRR{2} instruction has 5 operands but
2263 // the intrinsic has 4 because Rt and Rt2
2264 // are represented as a single unsigned 64
2265 // bit integer in the intrinsic definition
2266 // but internally it's represented as 2 32
2267 // bit integers.
2268
2269 Value *Coproc = EmitScalarExpr(E->getArg(0));
2270 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2271 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2272 Value *CRm = EmitScalarExpr(E->getArg(3));
2273
2274 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2275 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2276 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2277 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2278
2279 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2280 }
2281
2282 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2283 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2284 Function *F;
2285
2286 switch (BuiltinID) {
2287 default: llvm_unreachable("unexpected builtin");
2288 case clang::ARM::BI__builtin_arm_mrrc:
2289 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2290 break;
2291 case clang::ARM::BI__builtin_arm_mrrc2:
2292 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2293 break;
2294 }
2295
2296 Value *Coproc = EmitScalarExpr(E->getArg(0));
2297 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2298 Value *CRm = EmitScalarExpr(E->getArg(2));
2299 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2300
2301 // Returns an unsigned 64 bit integer, represented
2302 // as two 32 bit integers.
2303
2304 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2305 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2306 Rt = Builder.CreateZExt(Rt, Int64Ty);
2307 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2308
2309 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2310 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2311 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2312
2313 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2314 }
2315
2316 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2317 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2318 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2319 getContext().getTypeSize(E->getType()) == 64) ||
2320 BuiltinID == clang::ARM::BI__ldrexd) {
2321 Function *F;
2322
2323 switch (BuiltinID) {
2324 default: llvm_unreachable("unexpected builtin");
2325 case clang::ARM::BI__builtin_arm_ldaex:
2326 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2327 break;
2328 case clang::ARM::BI__builtin_arm_ldrexd:
2329 case clang::ARM::BI__builtin_arm_ldrex:
2330 case clang::ARM::BI__ldrexd:
2331 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2332 break;
2333 }
2334
2335 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2336 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2337
2338 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2339 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2340 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2341 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2342
2343 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2344 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2345 Val = Builder.CreateOr(Val, Val1);
2346 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2347 }
2348
2349 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2350 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2351 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2352
2353 QualType Ty = E->getType();
2354 llvm::Type *RealResTy = ConvertType(Ty);
2355 llvm::Type *IntTy =
2356 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2357
2358 Function *F = CGM.getIntrinsic(
2359 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2360 : Intrinsic::arm_ldrex,
2361 DefaultPtrTy);
2362 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2363 Val->addParamAttr(
2364 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2365
2366 if (RealResTy->isPointerTy())
2367 return Builder.CreateIntToPtr(Val, RealResTy);
2368 else {
2369 llvm::Type *IntResTy = llvm::IntegerType::get(
2370 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2371 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2372 RealResTy);
2373 }
2374 }
2375
2376 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2377 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2378 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2379 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2380 Function *F = CGM.getIntrinsic(
2381 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2382 : Intrinsic::arm_strexd);
2383 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2384
2386 Value *Val = EmitScalarExpr(E->getArg(0));
2387 Builder.CreateStore(Val, Tmp);
2388
2389 Address LdPtr = Tmp.withElementType(STy);
2390 Val = Builder.CreateLoad(LdPtr);
2391
2392 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2393 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2394 Value *StPtr = EmitScalarExpr(E->getArg(1));
2395 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2396 }
2397
2398 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2399 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2400 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2401 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2402
2403 QualType Ty = E->getArg(0)->getType();
2404 llvm::Type *StoreTy =
2405 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2406
2407 if (StoreVal->getType()->isPointerTy())
2408 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2409 else {
2410 llvm::Type *IntTy = llvm::IntegerType::get(
2412 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2413 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2414 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2415 }
2416
2417 Function *F = CGM.getIntrinsic(
2418 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2419 : Intrinsic::arm_strex,
2420 StoreAddr->getType());
2421
2422 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2423 CI->addParamAttr(
2424 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2425 return CI;
2426 }
2427
2428 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2429 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2430 return Builder.CreateCall(F);
2431 }
2432
2433 // CRC32
2434 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2435 switch (BuiltinID) {
2436 case clang::ARM::BI__builtin_arm_crc32b:
2437 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2438 case clang::ARM::BI__builtin_arm_crc32cb:
2439 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2440 case clang::ARM::BI__builtin_arm_crc32h:
2441 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2442 case clang::ARM::BI__builtin_arm_crc32ch:
2443 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2444 case clang::ARM::BI__builtin_arm_crc32w:
2445 case clang::ARM::BI__builtin_arm_crc32d:
2446 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2447 case clang::ARM::BI__builtin_arm_crc32cw:
2448 case clang::ARM::BI__builtin_arm_crc32cd:
2449 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2450 }
2451
2452 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2453 Value *Arg0 = EmitScalarExpr(E->getArg(0));
2454 Value *Arg1 = EmitScalarExpr(E->getArg(1));
2455
2456 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2457 // intrinsics, hence we need different codegen for these cases.
2458 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2459 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2460 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2461 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
2462 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
2463 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
2464
2465 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2466 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
2467 return Builder.CreateCall(F, {Res, Arg1b});
2468 } else {
2469 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
2470
2471 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2472 return Builder.CreateCall(F, {Arg0, Arg1});
2473 }
2474 }
2475
2476 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2477 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2478 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2479 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2480 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2481 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2482
2483 SpecialRegisterAccessKind AccessKind = Write;
2484 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2485 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2486 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2487 AccessKind = VolatileRead;
2488
2489 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2490 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2491
2492 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2493 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2494
2495 llvm::Type *ValueType;
2496 llvm::Type *RegisterType;
2497 if (IsPointerBuiltin) {
2498 ValueType = VoidPtrTy;
2500 } else if (Is64Bit) {
2501 ValueType = RegisterType = Int64Ty;
2502 } else {
2503 ValueType = RegisterType = Int32Ty;
2504 }
2505
2506 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
2507 AccessKind);
2508 }
2509
2510 if (BuiltinID == ARM::BI__builtin_sponentry) {
2511 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
2512 return Builder.CreateCall(F);
2513 }
2514
2515 // Handle MSVC intrinsics before argument evaluation to prevent double
2516 // evaluation.
2517 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
2518 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
2519
2520 // Deal with MVE builtins
2521 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2522 return Result;
2523 // Handle CDE builtins
2524 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2525 return Result;
2526
2527 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
2528 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
2529 return P.first == BuiltinID;
2530 });
2531 if (It != end(NEONEquivalentIntrinsicMap))
2532 BuiltinID = It->second;
2533
2534 // Find out if any arguments are required to be integer constant
2535 // expressions.
2536 unsigned ICEArguments = 0;
2538 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2539 assert(Error == ASTContext::GE_None && "Should not codegen an error");
2540
2541 auto getAlignmentValue32 = [&](Address addr) -> Value* {
2542 return Builder.getInt32(addr.getAlignment().getQuantity());
2543 };
2544
2545 Address PtrOp0 = Address::invalid();
2546 Address PtrOp1 = Address::invalid();
2548 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
2549 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
2550 for (unsigned i = 0, e = NumArgs; i != e; i++) {
2551 if (i == 0) {
2552 switch (BuiltinID) {
2553 case NEON::BI__builtin_neon_vld1_v:
2554 case NEON::BI__builtin_neon_vld1q_v:
2555 case NEON::BI__builtin_neon_vld1q_lane_v:
2556 case NEON::BI__builtin_neon_vld1_lane_v:
2557 case NEON::BI__builtin_neon_vld1_dup_v:
2558 case NEON::BI__builtin_neon_vld1q_dup_v:
2559 case NEON::BI__builtin_neon_vst1_v:
2560 case NEON::BI__builtin_neon_vst1q_v:
2561 case NEON::BI__builtin_neon_vst1q_lane_v:
2562 case NEON::BI__builtin_neon_vst1_lane_v:
2563 case NEON::BI__builtin_neon_vst2_v:
2564 case NEON::BI__builtin_neon_vst2q_v:
2565 case NEON::BI__builtin_neon_vst2_lane_v:
2566 case NEON::BI__builtin_neon_vst2q_lane_v:
2567 case NEON::BI__builtin_neon_vst3_v:
2568 case NEON::BI__builtin_neon_vst3q_v:
2569 case NEON::BI__builtin_neon_vst3_lane_v:
2570 case NEON::BI__builtin_neon_vst3q_lane_v:
2571 case NEON::BI__builtin_neon_vst4_v:
2572 case NEON::BI__builtin_neon_vst4q_v:
2573 case NEON::BI__builtin_neon_vst4_lane_v:
2574 case NEON::BI__builtin_neon_vst4q_lane_v:
2575 // Get the alignment for the argument in addition to the value;
2576 // we'll use it later.
2577 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
2578 Ops.push_back(PtrOp0.emitRawPointer(*this));
2579 continue;
2580 }
2581 }
2582 if (i == 1) {
2583 switch (BuiltinID) {
2584 case NEON::BI__builtin_neon_vld2_v:
2585 case NEON::BI__builtin_neon_vld2q_v:
2586 case NEON::BI__builtin_neon_vld3_v:
2587 case NEON::BI__builtin_neon_vld3q_v:
2588 case NEON::BI__builtin_neon_vld4_v:
2589 case NEON::BI__builtin_neon_vld4q_v:
2590 case NEON::BI__builtin_neon_vld2_lane_v:
2591 case NEON::BI__builtin_neon_vld2q_lane_v:
2592 case NEON::BI__builtin_neon_vld3_lane_v:
2593 case NEON::BI__builtin_neon_vld3q_lane_v:
2594 case NEON::BI__builtin_neon_vld4_lane_v:
2595 case NEON::BI__builtin_neon_vld4q_lane_v:
2596 case NEON::BI__builtin_neon_vld2_dup_v:
2597 case NEON::BI__builtin_neon_vld2q_dup_v:
2598 case NEON::BI__builtin_neon_vld3_dup_v:
2599 case NEON::BI__builtin_neon_vld3q_dup_v:
2600 case NEON::BI__builtin_neon_vld4_dup_v:
2601 case NEON::BI__builtin_neon_vld4q_dup_v:
2602 // Get the alignment for the argument in addition to the value;
2603 // we'll use it later.
2604 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
2605 Ops.push_back(PtrOp1.emitRawPointer(*this));
2606 continue;
2607 }
2608 }
2609
2610 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
2611 }
2612
2613 switch (BuiltinID) {
2614 default: break;
2615
2616 case NEON::BI__builtin_neon_vget_lane_i8:
2617 case NEON::BI__builtin_neon_vget_lane_i16:
2618 case NEON::BI__builtin_neon_vget_lane_i32:
2619 case NEON::BI__builtin_neon_vget_lane_i64:
2620 case NEON::BI__builtin_neon_vget_lane_bf16:
2621 case NEON::BI__builtin_neon_vget_lane_f32:
2622 case NEON::BI__builtin_neon_vgetq_lane_i8:
2623 case NEON::BI__builtin_neon_vgetq_lane_i16:
2624 case NEON::BI__builtin_neon_vgetq_lane_i32:
2625 case NEON::BI__builtin_neon_vgetq_lane_i64:
2626 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2627 case NEON::BI__builtin_neon_vgetq_lane_f32:
2628 case NEON::BI__builtin_neon_vduph_lane_bf16:
2629 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2630 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
2631
2632 case NEON::BI__builtin_neon_vrndns_f32: {
2633 Value *Arg = EmitScalarExpr(E->getArg(0));
2634 llvm::Type *Tys[] = {Arg->getType()};
2635 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
2636 return Builder.CreateCall(F, {Arg}, "vrndn"); }
2637
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_i16:
2640 case NEON::BI__builtin_neon_vset_lane_i32:
2641 case NEON::BI__builtin_neon_vset_lane_i64:
2642 case NEON::BI__builtin_neon_vset_lane_bf16:
2643 case NEON::BI__builtin_neon_vset_lane_f32:
2644 case NEON::BI__builtin_neon_vsetq_lane_i8:
2645 case NEON::BI__builtin_neon_vsetq_lane_i16:
2646 case NEON::BI__builtin_neon_vsetq_lane_i32:
2647 case NEON::BI__builtin_neon_vsetq_lane_i64:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_f32:
2650 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
2651
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2653 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
2654 "vsha1h");
2655 case NEON::BI__builtin_neon_vsha1cq_u32:
2656 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
2657 "vsha1h");
2658 case NEON::BI__builtin_neon_vsha1pq_u32:
2659 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
2660 "vsha1h");
2661 case NEON::BI__builtin_neon_vsha1mq_u32:
2662 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
2663 "vsha1h");
2664
2665 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
2666 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
2667 "vcvtbfp2bf");
2668 }
2669
2670 // The ARM _MoveToCoprocessor builtins put the input register value as
2671 // the first argument, but the LLVM intrinsic expects it as the third one.
2672 case clang::ARM::BI_MoveToCoprocessor:
2673 case clang::ARM::BI_MoveToCoprocessor2: {
2674 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
2675 ? Intrinsic::arm_mcr
2676 : Intrinsic::arm_mcr2);
2677 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
2678 Ops[3], Ops[4], Ops[5]});
2679 }
2680 }
2681
2682 // Get the last argument, which specifies the vector type.
2683 assert(HasExtraArg);
2684 const Expr *Arg = E->getArg(E->getNumArgs()-1);
2685 std::optional<llvm::APSInt> Result =
2687 if (!Result)
2688 return nullptr;
2689
2690 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
2691 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
2692 // Determine the overloaded type of this builtin.
2693 llvm::Type *Ty;
2694 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
2695 Ty = FloatTy;
2696 else
2697 Ty = DoubleTy;
2698
2699 // Determine whether this is an unsigned conversion or not.
2700 bool usgn = Result->getZExtValue() == 1;
2701 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
2702
2703 // Call the appropriate intrinsic.
2704 Function *F = CGM.getIntrinsic(Int, Ty);
2705 return Builder.CreateCall(F, Ops, "vcvtr");
2706 }
2707
2708 // Determine the type of this overloaded NEON intrinsic.
2709 NeonTypeFlags Type = Result->getZExtValue();
2710 bool usgn = Type.isUnsigned();
2711 bool rightShift = false;
2712
2713 llvm::FixedVectorType *VTy =
2714 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
2715 getTarget().hasBFloat16Type());
2716 llvm::Type *Ty = VTy;
2717 if (!Ty)
2718 return nullptr;
2719
2720 // Many NEON builtins have identical semantics and uses in ARM and
2721 // AArch64. Emit these in a single function.
2722 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
2724 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
2725 if (Builtin)
2727 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
2728 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
2729
2730 unsigned Int;
2731 switch (BuiltinID) {
2732 default: return nullptr;
2733 case NEON::BI__builtin_neon_vld1q_lane_v:
2734 // Handle 64-bit integer elements as a special case. Use shuffles of
2735 // one-element vectors to avoid poor code for i64 in the backend.
2736 if (VTy->getElementType()->isIntegerTy(64)) {
2737 // Extract the other lane.
2738 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2739 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
2740 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
2741 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2742 // Load the value as a one-element vector.
2743 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
2744 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2745 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
2746 Value *Align = getAlignmentValue32(PtrOp0);
2747 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
2748 // Combine them.
2749 int Indices[] = {1 - Lane, Lane};
2750 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
2751 }
2752 [[fallthrough]];
2753 case NEON::BI__builtin_neon_vld1_lane_v: {
2754 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2755 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2756 Value *Ld = Builder.CreateLoad(PtrOp0);
2757 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
2758 }
2759 case NEON::BI__builtin_neon_vqrshrn_n_v:
2760 Int =
2761 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2762 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
2763 1, true);
2764 case NEON::BI__builtin_neon_vqrshrun_n_v:
2765 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
2766 Ops, "vqrshrun_n", 1, true);
2767 case NEON::BI__builtin_neon_vqshrn_n_v:
2768 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2769 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
2770 1, true);
2771 case NEON::BI__builtin_neon_vqshrun_n_v:
2772 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
2773 Ops, "vqshrun_n", 1, true);
2774 case NEON::BI__builtin_neon_vrecpe_v:
2775 case NEON::BI__builtin_neon_vrecpeq_v:
2776 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
2777 Ops, "vrecpe");
2778 case NEON::BI__builtin_neon_vrshrn_n_v:
2779 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2780 Ops, "vrshrn_n", 1, true);
2781 case NEON::BI__builtin_neon_vrsra_n_v:
2782 case NEON::BI__builtin_neon_vrsraq_n_v:
2783 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2784 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2785 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
2786 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2787 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
2788 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2789 case NEON::BI__builtin_neon_vsri_n_v:
2790 case NEON::BI__builtin_neon_vsriq_n_v:
2791 rightShift = true;
2792 [[fallthrough]];
2793 case NEON::BI__builtin_neon_vsli_n_v:
2794 case NEON::BI__builtin_neon_vsliq_n_v:
2795 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
2796 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2797 Ops, "vsli_n");
2798 case NEON::BI__builtin_neon_vsra_n_v:
2799 case NEON::BI__builtin_neon_vsraq_n_v:
2800 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2801 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
2802 return Builder.CreateAdd(Ops[0], Ops[1]);
2803 case NEON::BI__builtin_neon_vst1q_lane_v:
2804 // Handle 64-bit integer elements as a special case. Use a shuffle to get
2805 // a one-element vector and avoid poor code for i64 in the backend.
2806 if (VTy->getElementType()->isIntegerTy(64)) {
2807 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2808 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
2809 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2810 Ops[2] = getAlignmentValue32(PtrOp0);
2811 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
2812 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
2813 Tys), Ops);
2814 }
2815 [[fallthrough]];
2816 case NEON::BI__builtin_neon_vst1_lane_v: {
2817 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2818 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
2819 return Builder.CreateStore(Ops[1],
2820 PtrOp0.withElementType(Ops[1]->getType()));
2821 }
2822 case NEON::BI__builtin_neon_vtbl1_v:
2823 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
2824 Ops, "vtbl1");
2825 case NEON::BI__builtin_neon_vtbl2_v:
2826 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
2827 Ops, "vtbl2");
2828 case NEON::BI__builtin_neon_vtbl3_v:
2829 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
2830 Ops, "vtbl3");
2831 case NEON::BI__builtin_neon_vtbl4_v:
2832 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
2833 Ops, "vtbl4");
2834 case NEON::BI__builtin_neon_vtbx1_v:
2835 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
2836 Ops, "vtbx1");
2837 case NEON::BI__builtin_neon_vtbx2_v:
2838 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
2839 Ops, "vtbx2");
2840 case NEON::BI__builtin_neon_vtbx3_v:
2841 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
2842 Ops, "vtbx3");
2843 case NEON::BI__builtin_neon_vtbx4_v:
2844 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
2845 Ops, "vtbx4");
2846 }
2847}
2848
2849template<typename Integer>
2851 return E->getIntegerConstantExpr(Context)->getExtValue();
2852}
2853
2854static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
2855 llvm::Type *T, bool Unsigned) {
2856 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
2857 // which finds it convenient to specify signed/unsigned as a boolean flag.
2858 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
2859}
2860
2861static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
2862 uint32_t Shift, bool Unsigned) {
2863 // MVE helper function for integer shift right. This must handle signed vs
2864 // unsigned, and also deal specially with the case where the shift count is
2865 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
2866 // undefined behavior, but in MVE it's legal, so we must convert it to code
2867 // that is not undefined in IR.
2868 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
2869 ->getElementType()
2870 ->getPrimitiveSizeInBits();
2871 if (Shift == LaneBits) {
2872 // An unsigned shift of the full lane size always generates zero, so we can
2873 // simply emit a zero vector. A signed shift of the full lane size does the
2874 // same thing as shifting by one bit fewer.
2875 if (Unsigned)
2876 return llvm::Constant::getNullValue(V->getType());
2877 else
2878 --Shift;
2879 }
2880 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
2881}
2882
2883static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
2884 // MVE-specific helper function for a vector splat, which infers the element
2885 // count of the output vector by knowing that MVE vectors are all 128 bits
2886 // wide.
2887 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
2888 return Builder.CreateVectorSplat(Elements, V);
2889}
2890
2891static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
2892 CodeGenFunction *CGF,
2893 llvm::Value *V,
2894 llvm::Type *DestType) {
2895 // Convert one MVE vector type into another by reinterpreting its in-register
2896 // format.
2897 //
2898 // Little-endian, this is identical to a bitcast (which reinterprets the
2899 // memory format). But big-endian, they're not necessarily the same, because
2900 // the register and memory formats map to each other differently depending on
2901 // the lane size.
2902 //
2903 // We generate a bitcast whenever we can (if we're little-endian, or if the
2904 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
2905 // that performs the different kind of reinterpretation.
2906 if (CGF->getTarget().isBigEndian() &&
2907 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
2908 return Builder.CreateCall(
2909 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
2910 {DestType, V->getType()}),
2911 V);
2912 } else {
2913 return Builder.CreateBitCast(V, DestType);
2914 }
2915}
2916
2917static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
2918 // Make a shufflevector that extracts every other element of a vector (evens
2919 // or odds, as desired).
2920 SmallVector<int, 16> Indices;
2921 unsigned InputElements =
2922 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
2923 for (unsigned i = 0; i < InputElements; i += 2)
2924 Indices.push_back(i + Odd);
2925 return Builder.CreateShuffleVector(V, Indices);
2926}
2927
2928static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
2929 llvm::Value *V1) {
2930 // Make a shufflevector that interleaves two vectors element by element.
2931 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
2932 SmallVector<int, 16> Indices;
2933 unsigned InputElements =
2934 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
2935 for (unsigned i = 0; i < InputElements; i++) {
2936 Indices.push_back(i);
2937 Indices.push_back(i + InputElements);
2938 }
2939 return Builder.CreateShuffleVector(V0, V1, Indices);
2940}
2941
2942template<unsigned HighBit, unsigned OtherBits>
2943static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
2944 // MVE-specific helper function to make a vector splat of a constant such as
2945 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
2946 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
2947 unsigned LaneBits = T->getPrimitiveSizeInBits();
2948 uint32_t Value = HighBit << (LaneBits - 1);
2949 if (OtherBits)
2950 Value |= (1UL << (LaneBits - 1)) - 1;
2951 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
2952 return ARMMVEVectorSplat(Builder, Lane);
2953}
2954
2955static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
2956 llvm::Value *V,
2957 unsigned ReverseWidth) {
2958 // MVE-specific helper function which reverses the elements of a
2959 // vector within every (ReverseWidth)-bit collection of lanes.
2960 SmallVector<int, 16> Indices;
2961 unsigned LaneSize = V->getType()->getScalarSizeInBits();
2962 unsigned Elements = 128 / LaneSize;
2963 unsigned Mask = ReverseWidth / LaneSize - 1;
2964 for (unsigned i = 0; i < Elements; i++)
2965 Indices.push_back(i ^ Mask);
2966 return Builder.CreateShuffleVector(V, Indices);
2967}
2968
2969static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
2970 CodeGenFunction *CGF, llvm::Value *V,
2971 llvm::Type *Ty) {
2972 return Builder.CreateCall(
2973 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2974 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2975}
2976
2977static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
2978 CodeGenFunction *CGF, llvm::Value *V,
2979 llvm::Type *Ty) {
2980 return Builder.CreateCall(
2981 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2982 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2983}
2984
2985static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
2986 CodeGenFunction *CGF, llvm::Value *V,
2987 llvm::Type *Ty) {
2988 return Builder.CreateCall(
2989 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2990 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2991}
2992
2993static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
2994 CodeGenFunction *CGF, llvm::Value *V,
2995 llvm::Type *Ty) {
2996 return Builder.CreateCall(
2997 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2998 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2999}
3000
3002 const CallExpr *E,
3004 llvm::Triple::ArchType Arch) {
3005 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3006 Intrinsic::ID IRIntr;
3007 unsigned NumVectors;
3008
3009 // Code autogenerated by Tablegen will handle all the simple builtins.
3010 switch (BuiltinID) {
3011 #include "clang/Basic/arm_mve_builtin_cg.inc"
3012
3013 // If we didn't match an MVE builtin id at all, go back to the
3014 // main EmitARMBuiltinExpr.
3015 default:
3016 return nullptr;
3017 }
3018
3019 // Anything that breaks from that switch is an MVE builtin that
3020 // needs handwritten code to generate.
3021
3022 switch (CustomCodeGenType) {
3023
3024 case CustomCodeGen::VLD24: {
3027
3028 auto MvecCType = E->getType();
3029 auto MvecLType = ConvertType(MvecCType);
3030 assert(MvecLType->isStructTy() &&
3031 "Return type for vld[24]q should be a struct");
3032 assert(MvecLType->getStructNumElements() == 1 &&
3033 "Return-type struct for vld[24]q should have one element");
3034 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3035 assert(MvecLTypeInner->isArrayTy() &&
3036 "Return-type struct for vld[24]q should contain an array");
3037 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3038 "Array member of return-type struct vld[24]q has wrong length");
3039 auto VecLType = MvecLTypeInner->getArrayElementType();
3040
3041 Tys.push_back(VecLType);
3042
3043 auto Addr = E->getArg(0);
3044 Ops.push_back(EmitScalarExpr(Addr));
3045 Tys.push_back(ConvertType(Addr->getType()));
3046
3047 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3048 Value *LoadResult = Builder.CreateCall(F, Ops);
3049 Value *MvecOut = PoisonValue::get(MvecLType);
3050 for (unsigned i = 0; i < NumVectors; ++i) {
3051 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3052 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3053 }
3054
3055 if (ReturnValue.isNull())
3056 return MvecOut;
3057 else
3058 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3059 }
3060
3061 case CustomCodeGen::VST24: {
3064
3065 auto Addr = E->getArg(0);
3066 Ops.push_back(EmitScalarExpr(Addr));
3067 Tys.push_back(ConvertType(Addr->getType()));
3068
3069 auto MvecCType = E->getArg(1)->getType();
3070 auto MvecLType = ConvertType(MvecCType);
3071 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3072 assert(MvecLType->getStructNumElements() == 1 &&
3073 "Data-type struct for vst2q should have one element");
3074 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3075 assert(MvecLTypeInner->isArrayTy() &&
3076 "Data-type struct for vst2q should contain an array");
3077 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3078 "Array member of return-type struct vld[24]q has wrong length");
3079 auto VecLType = MvecLTypeInner->getArrayElementType();
3080
3081 Tys.push_back(VecLType);
3082
3083 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3084 EmitAggExpr(E->getArg(1), MvecSlot);
3085 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3086 for (unsigned i = 0; i < NumVectors; i++)
3087 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3088
3089 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3090 Value *ToReturn = nullptr;
3091 for (unsigned i = 0; i < NumVectors; i++) {
3092 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3093 ToReturn = Builder.CreateCall(F, Ops);
3094 Ops.pop_back();
3095 }
3096 return ToReturn;
3097 }
3098 }
3099 llvm_unreachable("unknown custom codegen type.");
3100}
3101
3103 const CallExpr *E,
3105 llvm::Triple::ArchType Arch) {
3106 switch (BuiltinID) {
3107 default:
3108 return nullptr;
3109#include "clang/Basic/arm_cde_builtin_cg.inc"
3110 }
3111}
3112
3113static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3114 const CallExpr *E,
3116 llvm::Triple::ArchType Arch) {
3117 unsigned int Int = 0;
3118 const char *s = nullptr;
3119
3120 switch (BuiltinID) {
3121 default:
3122 return nullptr;
3123 case NEON::BI__builtin_neon_vtbl1_v:
3124 case NEON::BI__builtin_neon_vqtbl1_v:
3125 case NEON::BI__builtin_neon_vqtbl1q_v:
3126 case NEON::BI__builtin_neon_vtbl2_v:
3127 case NEON::BI__builtin_neon_vqtbl2_v:
3128 case NEON::BI__builtin_neon_vqtbl2q_v:
3129 case NEON::BI__builtin_neon_vtbl3_v:
3130 case NEON::BI__builtin_neon_vqtbl3_v:
3131 case NEON::BI__builtin_neon_vqtbl3q_v:
3132 case NEON::BI__builtin_neon_vtbl4_v:
3133 case NEON::BI__builtin_neon_vqtbl4_v:
3134 case NEON::BI__builtin_neon_vqtbl4q_v:
3135 break;
3136 case NEON::BI__builtin_neon_vtbx1_v:
3137 case NEON::BI__builtin_neon_vqtbx1_v:
3138 case NEON::BI__builtin_neon_vqtbx1q_v:
3139 case NEON::BI__builtin_neon_vtbx2_v:
3140 case NEON::BI__builtin_neon_vqtbx2_v:
3141 case NEON::BI__builtin_neon_vqtbx2q_v:
3142 case NEON::BI__builtin_neon_vtbx3_v:
3143 case NEON::BI__builtin_neon_vqtbx3_v:
3144 case NEON::BI__builtin_neon_vqtbx3q_v:
3145 case NEON::BI__builtin_neon_vtbx4_v:
3146 case NEON::BI__builtin_neon_vqtbx4_v:
3147 case NEON::BI__builtin_neon_vqtbx4q_v:
3148 break;
3149 }
3150
3151 assert(E->getNumArgs() >= 3);
3152
3153 // Get the last argument, which specifies the vector type.
3154 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3155 std::optional<llvm::APSInt> Result =
3157 if (!Result)
3158 return nullptr;
3159
3160 // Determine the type of this overloaded NEON intrinsic.
3161 NeonTypeFlags Type = Result->getZExtValue();
3162 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3163 if (!Ty)
3164 return nullptr;
3165
3166 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3167
3168 // AArch64 scalar builtins are not overloaded, they do not have an extra
3169 // argument that specifies the vector type, need to handle each case.
3170 switch (BuiltinID) {
3171 case NEON::BI__builtin_neon_vtbl1_v: {
3172 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3173 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3174 }
3175 case NEON::BI__builtin_neon_vtbl2_v: {
3176 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3177 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3178 }
3179 case NEON::BI__builtin_neon_vtbl3_v: {
3180 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3181 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3182 }
3183 case NEON::BI__builtin_neon_vtbl4_v: {
3184 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3185 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3186 }
3187 case NEON::BI__builtin_neon_vtbx1_v: {
3188 Value *TblRes =
3189 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3190 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3191
3192 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3193 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3194 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3195
3196 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3197 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3198 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3199 }
3200 case NEON::BI__builtin_neon_vtbx2_v: {
3201 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3202 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3203 }
3204 case NEON::BI__builtin_neon_vtbx3_v: {
3205 Value *TblRes =
3206 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3207 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3208
3209 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3210 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3211 TwentyFourV);
3212 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3213
3214 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3215 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3216 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3217 }
3218 case NEON::BI__builtin_neon_vtbx4_v: {
3219 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3220 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3221 }
3222 case NEON::BI__builtin_neon_vqtbl1_v:
3223 case NEON::BI__builtin_neon_vqtbl1q_v:
3224 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3225 case NEON::BI__builtin_neon_vqtbl2_v:
3226 case NEON::BI__builtin_neon_vqtbl2q_v: {
3227 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3228 case NEON::BI__builtin_neon_vqtbl3_v:
3229 case NEON::BI__builtin_neon_vqtbl3q_v:
3230 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3231 case NEON::BI__builtin_neon_vqtbl4_v:
3232 case NEON::BI__builtin_neon_vqtbl4q_v:
3233 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3234 case NEON::BI__builtin_neon_vqtbx1_v:
3235 case NEON::BI__builtin_neon_vqtbx1q_v:
3236 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3237 case NEON::BI__builtin_neon_vqtbx2_v:
3238 case NEON::BI__builtin_neon_vqtbx2q_v:
3239 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3240 case NEON::BI__builtin_neon_vqtbx3_v:
3241 case NEON::BI__builtin_neon_vqtbx3q_v:
3242 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3243 case NEON::BI__builtin_neon_vqtbx4_v:
3244 case NEON::BI__builtin_neon_vqtbx4q_v:
3245 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3246 }
3247 }
3248
3249 if (!Int)
3250 return nullptr;
3251
3252 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3253 return CGF.EmitNeonCall(F, Ops, s);
3254}
3255
3257 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3258 Op = Builder.CreateBitCast(Op, Int16Ty);
3259 Value *V = PoisonValue::get(VTy);
3260 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3261 Op = Builder.CreateInsertElement(V, Op, CI);
3262 return Op;
3263}
3264
3265/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3266/// access builtin. Only required if it can't be inferred from the base pointer
3267/// operand.
3269 switch (TypeFlags.getMemEltType()) {
3270 case SVETypeFlags::MemEltTyDefault:
3271 return getEltType(TypeFlags);
3272 case SVETypeFlags::MemEltTyInt8:
3273 return Builder.getInt8Ty();
3274 case SVETypeFlags::MemEltTyInt16:
3275 return Builder.getInt16Ty();
3276 case SVETypeFlags::MemEltTyInt32:
3277 return Builder.getInt32Ty();
3278 case SVETypeFlags::MemEltTyInt64:
3279 return Builder.getInt64Ty();
3280 }
3281 llvm_unreachable("Unknown MemEltType");
3282}
3283
3284llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3285 switch (TypeFlags.getEltType()) {
3286 default:
3287 llvm_unreachable("Invalid SVETypeFlag!");
3288
3289 case SVETypeFlags::EltTyMFloat8:
3290 case SVETypeFlags::EltTyInt8:
3291 return Builder.getInt8Ty();
3292 case SVETypeFlags::EltTyInt16:
3293 return Builder.getInt16Ty();
3294 case SVETypeFlags::EltTyInt32:
3295 return Builder.getInt32Ty();
3296 case SVETypeFlags::EltTyInt64:
3297 return Builder.getInt64Ty();
3298 case SVETypeFlags::EltTyInt128:
3299 return Builder.getInt128Ty();
3300
3301 case SVETypeFlags::EltTyFloat16:
3302 return Builder.getHalfTy();
3303 case SVETypeFlags::EltTyFloat32:
3304 return Builder.getFloatTy();
3305 case SVETypeFlags::EltTyFloat64:
3306 return Builder.getDoubleTy();
3307
3308 case SVETypeFlags::EltTyBFloat16:
3309 return Builder.getBFloatTy();
3310
3311 case SVETypeFlags::EltTyBool8:
3312 case SVETypeFlags::EltTyBool16:
3313 case SVETypeFlags::EltTyBool32:
3314 case SVETypeFlags::EltTyBool64:
3315 return Builder.getInt1Ty();
3316 }
3317}
3318
3319// Return the llvm predicate vector type corresponding to the specified element
3320// TypeFlags.
3321llvm::ScalableVectorType *
3323 switch (TypeFlags.getEltType()) {
3324 default: llvm_unreachable("Unhandled SVETypeFlag!");
3325
3326 case SVETypeFlags::EltTyInt8:
3327 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3328 case SVETypeFlags::EltTyInt16:
3329 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3330 case SVETypeFlags::EltTyInt32:
3331 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3332 case SVETypeFlags::EltTyInt64:
3333 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3334
3335 case SVETypeFlags::EltTyBFloat16:
3336 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3337 case SVETypeFlags::EltTyFloat16:
3338 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3339 case SVETypeFlags::EltTyFloat32:
3340 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3341 case SVETypeFlags::EltTyFloat64:
3342 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3343
3344 case SVETypeFlags::EltTyBool8:
3345 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3346 case SVETypeFlags::EltTyBool16:
3347 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3348 case SVETypeFlags::EltTyBool32:
3349 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3350 case SVETypeFlags::EltTyBool64:
3351 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3352 }
3353}
3354
3355// Return the llvm vector type corresponding to the specified element TypeFlags.
3356llvm::ScalableVectorType *
3358 switch (TypeFlags.getEltType()) {
3359 default:
3360 llvm_unreachable("Invalid SVETypeFlag!");
3361
3362 case SVETypeFlags::EltTyInt8:
3363 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3364 case SVETypeFlags::EltTyInt16:
3365 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3366 case SVETypeFlags::EltTyInt32:
3367 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3368 case SVETypeFlags::EltTyInt64:
3369 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3370
3371 case SVETypeFlags::EltTyMFloat8:
3372 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3373 case SVETypeFlags::EltTyFloat16:
3374 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3375 case SVETypeFlags::EltTyBFloat16:
3376 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3377 case SVETypeFlags::EltTyFloat32:
3378 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3379 case SVETypeFlags::EltTyFloat64:
3380 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3381
3382 case SVETypeFlags::EltTyBool8:
3383 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3384 case SVETypeFlags::EltTyBool16:
3385 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3386 case SVETypeFlags::EltTyBool32:
3387 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3388 case SVETypeFlags::EltTyBool64:
3389 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3390 }
3391}
3392
3393constexpr unsigned SVEBitsPerBlock = 128;
3394
3395static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3396 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3397 return llvm::ScalableVectorType::get(EltTy, NumElts);
3398}
3399
3400// Reinterpret the input predicate so that it can be used to correctly isolate
3401// the elements of the specified datatype.
3403 llvm::ScalableVectorType *VTy) {
3404
3405 if (isa<TargetExtType>(Pred->getType()) &&
3406 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3407 return Pred;
3408
3409 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3410 if (Pred->getType() == RTy)
3411 return Pred;
3412
3413 unsigned IntID;
3414 llvm::Type *IntrinsicTy;
3415 switch (VTy->getMinNumElements()) {
3416 default:
3417 llvm_unreachable("unsupported element count!");
3418 case 1:
3419 case 2:
3420 case 4:
3421 case 8:
3422 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3423 IntrinsicTy = RTy;
3424 break;
3425 case 16:
3426 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3427 IntrinsicTy = Pred->getType();
3428 break;
3429 }
3430
3431 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3432 Value *C = Builder.CreateCall(F, Pred);
3433 assert(C->getType() == RTy && "Unexpected return type!");
3434 return C;
3435}
3436
3438 llvm::StructType *Ty) {
3439 if (PredTuple->getType() == Ty)
3440 return PredTuple;
3441
3442 Value *Ret = llvm::PoisonValue::get(Ty);
3443 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3444 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3445 Pred = EmitSVEPredicateCast(
3446 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3447 Ret = Builder.CreateInsertValue(Ret, Pred, I);
3448 }
3449
3450 return Ret;
3451}
3452
3455 unsigned IntID) {
3456 auto *ResultTy = getSVEType(TypeFlags);
3457 auto *OverloadedTy =
3458 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
3459 Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
3460
3461 // At the ACLE level there's only one predicate type, svbool_t, which is
3462 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3463 // actual type being loaded. For example, when loading doubles (i64) the
3464 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3465 // the predicate and the data being loaded must match. Cast to the type
3466 // expected by the intrinsic. The intrinsic itself should be defined in
3467 // a way than enforces relations between parameter types.
3468 Ops[0] = EmitSVEPredicateCast(
3469 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3470
3471 // Pass 0 when the offset is missing. This can only be applied when using
3472 // the "vector base" addressing mode for which ACLE allows no offset. The
3473 // corresponding LLVM IR always requires an offset.
3474 if (Ops.size() == 2) {
3475 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3476 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3477 }
3478
3479 // For "vector base, scalar index" scale the index so that it becomes a
3480 // scalar offset.
3481 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3482 unsigned BytesPerElt =
3483 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3484 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3485 }
3486
3487 Value *Call = Builder.CreateCall(F, Ops);
3488
3489 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3490 // other cases it's folded into a nop.
3491 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
3492 : Builder.CreateSExt(Call, ResultTy);
3493}
3494
3497 unsigned IntID) {
3498 auto *SrcDataTy = getSVEType(TypeFlags);
3499 auto *OverloadedTy =
3500 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
3501
3502 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3503 // it's the first argument. Move it accordingly.
3504 Ops.insert(Ops.begin(), Ops.pop_back_val());
3505
3506 Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
3507
3508 // Pass 0 when the offset is missing. This can only be applied when using
3509 // the "vector base" addressing mode for which ACLE allows no offset. The
3510 // corresponding LLVM IR always requires an offset.
3511 if (Ops.size() == 3) {
3512 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3513 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3514 }
3515
3516 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
3517 // folded into a nop.
3518 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
3519
3520 // At the ACLE level there's only one predicate type, svbool_t, which is
3521 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3522 // actual type being stored. For example, when storing doubles (i64) the
3523 // predicated should be <n x 2 x i1> instead. At the IR level the type of
3524 // the predicate and the data being stored must match. Cast to the type
3525 // expected by the intrinsic. The intrinsic itself should be defined in
3526 // a way that enforces relations between parameter types.
3527 Ops[1] = EmitSVEPredicateCast(
3528 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
3529
3530 // For "vector base, scalar index" scale the index so that it becomes a
3531 // scalar offset.
3532 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
3533 unsigned BytesPerElt =
3534 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3535 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
3536 }
3537
3538 return Builder.CreateCall(F, Ops);
3539}
3540
3543 unsigned IntID) {
3544 // The gather prefetches are overloaded on the vector input - this can either
3545 // be the vector of base addresses or vector of offsets.
3546 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
3547 if (!OverloadedTy)
3548 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
3549
3550 // Cast the predicate from svbool_t to the right number of elements.
3551 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
3552
3553 // vector + imm addressing modes
3554 if (Ops[1]->getType()->isVectorTy()) {
3555 if (Ops.size() == 3) {
3556 // Pass 0 for 'vector+imm' when the index is omitted.
3557 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3558
3559 // The sv_prfop is the last operand in the builtin and IR intrinsic.
3560 std::swap(Ops[2], Ops[3]);
3561 } else {
3562 // Index needs to be passed as scaled offset.
3563 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3564 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
3565 if (BytesPerElt > 1)
3566 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3567 }
3568
3569 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
3570 return Builder.CreateCall(F, Ops);
3571 }
3572
3573 Function *F = CGM.getIntrinsic(IntID, {Ops[1]->getType(), OverloadedTy});
3574 return Builder.CreateCall(F, Ops);
3575}
3576
3579 unsigned IntID) {
3580 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3581 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3582 Value *BasePtr = Ops[1];
3583
3584 // Does the load have an offset?
3585 if (Ops.size() > 2)
3586 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3587
3588 Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
3589 return Builder.CreateCall(F, {Predicate, BasePtr});
3590}
3591
3594 unsigned IntID) {
3595 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3596
3597 unsigned N;
3598 switch (IntID) {
3599 case Intrinsic::aarch64_sve_st2:
3600 case Intrinsic::aarch64_sve_st1_pn_x2:
3601 case Intrinsic::aarch64_sve_stnt1_pn_x2:
3602 case Intrinsic::aarch64_sve_st2q:
3603 N = 2;
3604 break;
3605 case Intrinsic::aarch64_sve_st3:
3606 case Intrinsic::aarch64_sve_st3q:
3607 N = 3;
3608 break;
3609 case Intrinsic::aarch64_sve_st4:
3610 case Intrinsic::aarch64_sve_st1_pn_x4:
3611 case Intrinsic::aarch64_sve_stnt1_pn_x4:
3612 case Intrinsic::aarch64_sve_st4q:
3613 N = 4;
3614 break;
3615 default:
3616 llvm_unreachable("unknown intrinsic!");
3617 }
3618
3619 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3620 Value *BasePtr = Ops[1];
3621
3622 // Does the store have an offset?
3623 if (Ops.size() > (2 + N))
3624 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3625
3626 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
3627 // need to break up the tuple vector.
3629 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
3630 Operands.push_back(Ops[I]);
3631 Operands.append({Predicate, BasePtr});
3632 Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
3633
3634 return Builder.CreateCall(F, Operands);
3635}
3636
3637// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
3638// svpmullt_pair intrinsics, with the exception that their results are bitcast
3639// to a wider type.
3642 unsigned BuiltinID) {
3643 // Splat scalar operand to vector (intrinsics with _n infix)
3644 if (TypeFlags.hasSplatOperand()) {
3645 unsigned OpNo = TypeFlags.getSplatOperand();
3646 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
3647 }
3648
3649 // The pair-wise function has a narrower overloaded type.
3650 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
3651 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
3652
3653 // Now bitcast to the wider result type.
3654 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
3655 return EmitSVEReinterpret(Call, Ty);
3656}
3657
3659 ArrayRef<Value *> Ops, unsigned BuiltinID) {
3660 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
3661 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
3662 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
3663}
3664
3667 unsigned BuiltinID) {
3668 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3669 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
3670 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3671
3672 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
3673 Value *BasePtr = Ops[1];
3674
3675 // Implement the index operand if not omitted.
3676 if (Ops.size() > 3)
3677 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3678
3679 Value *PrfOp = Ops.back();
3680
3681 llvm::Type *Tys[2] = {Predicate->getType(), BasePtr->getType()};
3682 Function *F = CGM.getIntrinsic(BuiltinID, Tys);
3683 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
3684}
3685
3687 llvm::Type *ReturnTy,
3689 unsigned IntrinsicID,
3690 bool IsZExtReturn) {
3691 QualType LangPTy = E->getArg(1)->getType();
3692 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3693 LangPTy->castAs<PointerType>()->getPointeeType());
3694
3695 // Mfloat8 types is stored as a vector, so extra work
3696 // to extract sclar element type is necessary.
3697 if (MemEltTy->isVectorTy()) {
3698 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3699 "Only <1 x i8> expected");
3700 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3701 }
3702
3703 // The vector type that is returned may be different from the
3704 // eventual type loaded from memory.
3705 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
3706 llvm::ScalableVectorType *MemoryTy = nullptr;
3707 llvm::ScalableVectorType *PredTy = nullptr;
3708 bool IsQuadLoad = false;
3709 switch (IntrinsicID) {
3710 case Intrinsic::aarch64_sve_ld1uwq:
3711 case Intrinsic::aarch64_sve_ld1udq:
3712 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3713 PredTy = llvm::ScalableVectorType::get(
3714 llvm::Type::getInt1Ty(getLLVMContext()), 1);
3715 IsQuadLoad = true;
3716 break;
3717 default:
3718 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3719 PredTy = MemoryTy;
3720 break;
3721 }
3722
3723 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3724 Value *BasePtr = Ops[1];
3725
3726 // Does the load have an offset?
3727 if (Ops.size() > 2)
3728 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3729
3730 llvm::Type *Tys[2] = {IsQuadLoad ? VectorTy : MemoryTy, BasePtr->getType()};
3731 Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
3732 auto *Load = Builder.CreateCall(F, {Predicate, BasePtr});
3733 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3734 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
3735
3736 if (IsQuadLoad)
3737 return Load;
3738
3739 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
3740 : Builder.CreateSExt(Load, VectorTy);
3741}
3742
3745 unsigned IntrinsicID) {
3746 QualType LangPTy = E->getArg(1)->getType();
3747 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3748 LangPTy->castAs<PointerType>()->getPointeeType());
3749
3750 // Mfloat8 types is stored as a vector, so extra work
3751 // to extract sclar element type is necessary.
3752 if (MemEltTy->isVectorTy()) {
3753 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3754 "Only <1 x i8> expected");
3755 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3756 }
3757
3758 // The vector type that is stored may be different from the
3759 // eventual type stored to memory.
3760 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
3761 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3762
3763 auto PredTy = MemoryTy;
3764 auto AddrMemoryTy = MemoryTy;
3765 bool IsQuadStore = false;
3766
3767 switch (IntrinsicID) {
3768 case Intrinsic::aarch64_sve_st1wq:
3769 case Intrinsic::aarch64_sve_st1dq:
3770 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3771 PredTy =
3772 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
3773 IsQuadStore = true;
3774 break;
3775 default:
3776 break;
3777 }
3778 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3779 Value *BasePtr = Ops[1];
3780
3781 // Does the store have an offset?
3782 if (Ops.size() == 4)
3783 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
3784
3785 // Last value is always the data
3786 Value *Val =
3787 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
3788
3789 llvm::Type *Tys[2] = {IsQuadStore ? VectorTy : MemoryTy, BasePtr->getType()};
3790 Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
3791 auto *Store = Builder.CreateCall(F, {Val, Predicate, BasePtr});
3792 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3793 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
3794 return Store;
3795}
3796
3799 unsigned IntID) {
3800 Ops[2] = EmitSVEPredicateCast(
3802
3803 SmallVector<Value *> NewOps;
3804 NewOps.push_back(Ops[2]);
3805
3806 llvm::Value *BasePtr = Ops[3];
3807 llvm::Value *RealSlice = Ops[1];
3808 // If the intrinsic contains the vnum parameter, multiply it with the vector
3809 // size in bytes.
3810 if (Ops.size() == 5) {
3811 Function *StreamingVectorLength =
3812 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
3813 llvm::Value *StreamingVectorLengthCall =
3814 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
3815 llvm::ConstantInt::get(Int64Ty, 8), "svl",
3816 /* HasNUW */ true, /* HasNSW */ true);
3817 llvm::Value *Mulvl =
3818 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
3819 // The type of the ptr parameter is void *, so use Int8Ty here.
3820 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
3821 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
3822 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
3823 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
3824 }
3825 NewOps.push_back(BasePtr);
3826 NewOps.push_back(Ops[0]);
3827 NewOps.push_back(RealSlice);
3828 Function *F = CGM.getIntrinsic(IntID, BasePtr->getType());
3829 return Builder.CreateCall(F, NewOps);
3830}
3831
3834 unsigned IntID) {
3835 auto *VecTy = getSVEType(TypeFlags);
3836 Function *F = CGM.getIntrinsic(IntID, VecTy);
3837 if (TypeFlags.isReadZA())
3838 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
3839 else if (TypeFlags.isWriteZA())
3840 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
3841 return Builder.CreateCall(F, Ops);
3842}
3843
3846 unsigned IntID) {
3847 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
3848 if (Ops.size() == 0)
3849 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
3850 Function *F = CGM.getIntrinsic(IntID, {});
3851 return Builder.CreateCall(F, Ops);
3852}
3853
3856 unsigned IntID) {
3857 if (Ops.size() == 2)
3858 Ops.push_back(Builder.getInt32(0));
3859 else
3860 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
3861 Function *F = CGM.getIntrinsic(IntID, Ops[1]->getType());
3862 return Builder.CreateCall(F, Ops);
3863}
3864
3865// Limit the usage of scalable llvm IR generated by the ACLE by using the
3866// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
3867Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
3868 return Builder.CreateVectorSplat(
3869 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
3870}
3871
3873 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
3874#ifndef NDEBUG
3875 auto *VecTy = cast<llvm::VectorType>(Ty);
3876 ElementCount EC = VecTy->getElementCount();
3877 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
3878 "Only <1 x i8> expected");
3879#endif
3880 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
3881 }
3882 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
3883}
3884
3886 // FIXME: For big endian this needs an additional REV, or needs a separate
3887 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
3888 // instruction is defined as 'bitwise' equivalent from memory point of
3889 // view (when storing/reloading), whereas the svreinterpret builtin
3890 // implements bitwise equivalent cast from register point of view.
3891 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
3892
3893 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
3894 Value *Tuple = llvm::PoisonValue::get(Ty);
3895
3896 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
3897 Value *In = Builder.CreateExtractValue(Val, I);
3898 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
3899 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
3900 }
3901
3902 return Tuple;
3903 }
3904
3905 return Builder.CreateBitCast(Val, Ty);
3906}
3907
3908static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3910 auto *SplatZero = Constant::getNullValue(Ty);
3911 Ops.insert(Ops.begin(), SplatZero);
3912}
3913
3914static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3916 auto *SplatUndef = UndefValue::get(Ty);
3917 Ops.insert(Ops.begin(), SplatUndef);
3918}
3919
3920SmallVector<llvm::Type *, 2>
3922 llvm::Type *ResultType,
3923 ArrayRef<Value *> Ops) {
3924 if (TypeFlags.isOverloadNone())
3925 return {};
3926
3927 llvm::Type *DefaultType = getSVEType(TypeFlags);
3928
3929 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
3930 return {DefaultType, Ops[1]->getType()};
3931
3932 if (TypeFlags.isOverloadWhileRW())
3933 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
3934
3935 if (TypeFlags.isOverloadDefaultAndOp0())
3936 return {DefaultType, Ops[0]->getType()};
3937
3938 if (TypeFlags.isOverloadFirstandLast())
3939 return {Ops[0]->getType(), Ops.back()->getType()};
3940
3941 if (TypeFlags.isReductionQV())
3942 return {ResultType, Ops[1]->getType()};
3943
3944 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
3945 return {DefaultType};
3946}
3947
3949 ArrayRef<Value *> Ops) {
3950 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
3951 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
3952 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
3953
3954 if (TypeFlags.isTupleSet())
3955 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
3956 return Builder.CreateExtractValue(Ops[0], Idx);
3957}
3958
3960 llvm::Type *Ty,
3961 ArrayRef<Value *> Ops) {
3962 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
3963
3964 Value *Tuple = llvm::PoisonValue::get(Ty);
3965 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
3966 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
3967
3968 return Tuple;
3969}
3970
3972 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
3973 SVETypeFlags TypeFlags) {
3974 // Find out if any arguments are required to be integer constant expressions.
3975 unsigned ICEArguments = 0;
3977 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3978 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3979
3980 // Tuple set/get only requires one insert/extract vector, which is
3981 // created by EmitSVETupleSetOrGet.
3982 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
3983
3984 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
3985 bool IsICE = ICEArguments & (1 << i);
3986 Value *Arg = EmitScalarExpr(E->getArg(i));
3987
3988 if (IsICE) {
3989 // If this is required to be a constant, constant fold it so that we know
3990 // that the generated intrinsic gets a ConstantInt.
3991 std::optional<llvm::APSInt> Result =
3993 assert(Result && "Expected argument to be a constant");
3994
3995 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
3996 // truncate because the immediate has been range checked and no valid
3997 // immediate requires more than a handful of bits.
3998 *Result = Result->extOrTrunc(32);
3999 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4000 continue;
4001 }
4002
4003 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4004 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4005 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4006
4007 continue;
4008 }
4009
4010 Ops.push_back(Arg);
4011 }
4012}
4013
4015 const CallExpr *E) {
4016 llvm::Type *Ty = ConvertType(E->getType());
4017 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4018 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4019 Value *Val = EmitScalarExpr(E->getArg(0));
4020 return EmitSVEReinterpret(Val, Ty);
4021 }
4022
4025
4027 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4028 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4029
4030 if (TypeFlags.isLoad())
4031 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4032 TypeFlags.isZExtReturn());
4033 if (TypeFlags.isStore())
4034 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4035 if (TypeFlags.isGatherLoad())
4036 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4037 if (TypeFlags.isScatterStore())
4038 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4039 if (TypeFlags.isPrefetch())
4040 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4041 if (TypeFlags.isGatherPrefetch())
4042 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4043 if (TypeFlags.isStructLoad())
4044 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4045 if (TypeFlags.isStructStore())
4046 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4047 if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4048 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4049 if (TypeFlags.isTupleCreate())
4050 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4051 if (TypeFlags.isUndef())
4052 return UndefValue::get(Ty);
4053
4054 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
4055 // -------------------------------------------------------------------
4056 if (Builtin->LLVMIntrinsic != 0) {
4057 // Emit set FPMR for intrinsics that require it
4058 if (TypeFlags.setsFPMR())
4059 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4060 Ops.pop_back_val());
4061 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4063
4064 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4066
4067 // Some ACLE builtins leave out the argument to specify the predicate
4068 // pattern, which is expected to be expanded to an SV_ALL pattern.
4069 if (TypeFlags.isAppendSVALL())
4070 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4071 if (TypeFlags.isInsertOp1SVALL())
4072 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4073
4074 // Predicates must match the main datatype.
4075 for (Value *&Op : Ops)
4076 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4077 if (PredTy->getElementType()->isIntegerTy(1))
4078 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4079
4080 // Splat scalar operand to vector (intrinsics with _n infix)
4081 if (TypeFlags.hasSplatOperand()) {
4082 unsigned OpNo = TypeFlags.getSplatOperand();
4083 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4084 }
4085
4086 if (TypeFlags.isReverseCompare())
4087 std::swap(Ops[1], Ops[2]);
4088 else if (TypeFlags.isReverseUSDOT())
4089 std::swap(Ops[1], Ops[2]);
4090 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4091 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4092 std::swap(Ops[1], Ops[2]);
4093 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4094 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4095 std::swap(Ops[1], Ops[3]);
4096
4097 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4098 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4099 llvm::Type *OpndTy = Ops[1]->getType();
4100 auto *SplatZero = Constant::getNullValue(OpndTy);
4101 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4102 }
4103
4104 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4105 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4106 Value *Call = Builder.CreateCall(F, Ops);
4107
4108 if (Call->getType() == Ty)
4109 return Call;
4110
4111 // Predicate results must be converted to svbool_t.
4112 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4113 return EmitSVEPredicateCast(Call, PredTy);
4114 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4115 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4116
4117 llvm_unreachable("unsupported element count!");
4118 }
4119
4120 switch (BuiltinID) {
4121 default:
4122 return nullptr;
4123
4124 case SVE::BI__builtin_sve_svreinterpret_b: {
4125 auto SVCountTy =
4126 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4127 Function *CastFromSVCountF =
4128 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4129 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4130 }
4131 case SVE::BI__builtin_sve_svreinterpret_c: {
4132 auto SVCountTy =
4133 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4134 Function *CastToSVCountF =
4135 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4136 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4137 }
4138
4139 case SVE::BI__builtin_sve_svpsel_lane_b8:
4140 case SVE::BI__builtin_sve_svpsel_lane_b16:
4141 case SVE::BI__builtin_sve_svpsel_lane_b32:
4142 case SVE::BI__builtin_sve_svpsel_lane_b64:
4143 case SVE::BI__builtin_sve_svpsel_lane_c8:
4144 case SVE::BI__builtin_sve_svpsel_lane_c16:
4145 case SVE::BI__builtin_sve_svpsel_lane_c32:
4146 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4147 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4148 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4149 "aarch64.svcount")) &&
4150 "Unexpected TargetExtType");
4151 auto SVCountTy =
4152 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4153 Function *CastFromSVCountF =
4154 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4155 Function *CastToSVCountF =
4156 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4157
4158 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4159 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4160 llvm::Value *Ops0 =
4161 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4162 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4163 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4164 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4165 }
4166 case SVE::BI__builtin_sve_svmov_b_z: {
4167 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4168 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4169 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4170 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4171 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4172 }
4173
4174 case SVE::BI__builtin_sve_svnot_b_z: {
4175 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4176 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4177 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4178 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4179 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4180 }
4181
4182 case SVE::BI__builtin_sve_svmovlb_u16:
4183 case SVE::BI__builtin_sve_svmovlb_u32:
4184 case SVE::BI__builtin_sve_svmovlb_u64:
4185 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4186
4187 case SVE::BI__builtin_sve_svmovlb_s16:
4188 case SVE::BI__builtin_sve_svmovlb_s32:
4189 case SVE::BI__builtin_sve_svmovlb_s64:
4190 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4191
4192 case SVE::BI__builtin_sve_svmovlt_u16:
4193 case SVE::BI__builtin_sve_svmovlt_u32:
4194 case SVE::BI__builtin_sve_svmovlt_u64:
4195 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4196
4197 case SVE::BI__builtin_sve_svmovlt_s16:
4198 case SVE::BI__builtin_sve_svmovlt_s32:
4199 case SVE::BI__builtin_sve_svmovlt_s64:
4200 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4201
4202 case SVE::BI__builtin_sve_svpmullt_u16:
4203 case SVE::BI__builtin_sve_svpmullt_u64:
4204 case SVE::BI__builtin_sve_svpmullt_n_u16:
4205 case SVE::BI__builtin_sve_svpmullt_n_u64:
4206 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4207
4208 case SVE::BI__builtin_sve_svpmullb_u16:
4209 case SVE::BI__builtin_sve_svpmullb_u64:
4210 case SVE::BI__builtin_sve_svpmullb_n_u16:
4211 case SVE::BI__builtin_sve_svpmullb_n_u64:
4212 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4213
4214 case SVE::BI__builtin_sve_svdup_n_b8:
4215 case SVE::BI__builtin_sve_svdup_n_b16:
4216 case SVE::BI__builtin_sve_svdup_n_b32:
4217 case SVE::BI__builtin_sve_svdup_n_b64: {
4218 Value *CmpNE =
4219 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4220 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4221 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4223 }
4224
4225 case SVE::BI__builtin_sve_svdupq_n_b8:
4226 case SVE::BI__builtin_sve_svdupq_n_b16:
4227 case SVE::BI__builtin_sve_svdupq_n_b32:
4228 case SVE::BI__builtin_sve_svdupq_n_b64:
4229 case SVE::BI__builtin_sve_svdupq_n_u8:
4230 case SVE::BI__builtin_sve_svdupq_n_s8:
4231 case SVE::BI__builtin_sve_svdupq_n_u64:
4232 case SVE::BI__builtin_sve_svdupq_n_f64:
4233 case SVE::BI__builtin_sve_svdupq_n_s64:
4234 case SVE::BI__builtin_sve_svdupq_n_u16:
4235 case SVE::BI__builtin_sve_svdupq_n_f16:
4236 case SVE::BI__builtin_sve_svdupq_n_bf16:
4237 case SVE::BI__builtin_sve_svdupq_n_s16:
4238 case SVE::BI__builtin_sve_svdupq_n_u32:
4239 case SVE::BI__builtin_sve_svdupq_n_f32:
4240 case SVE::BI__builtin_sve_svdupq_n_s32: {
4241 // These builtins are implemented by storing each element to an array and using
4242 // ld1rq to materialize a vector.
4243 unsigned NumOpnds = Ops.size();
4244
4245 bool IsBoolTy =
4246 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4247
4248 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4249 // so that the compare can use the width that is natural for the expected
4250 // number of predicate lanes.
4251 llvm::Type *EltTy = Ops[0]->getType();
4252 if (IsBoolTy)
4253 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4254
4256 for (unsigned I = 0; I < NumOpnds; ++I)
4257 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4258 Value *Vec = BuildVector(VecOps);
4259
4260 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4261 Value *InsertSubVec = Builder.CreateInsertVector(
4262 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4263
4264 Function *F =
4265 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4266 Value *DupQLane =
4267 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4268
4269 if (!IsBoolTy)
4270 return DupQLane;
4271
4272 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4273 Constant *Pred = ConstantInt::getTrue(getSVEPredType(TypeFlags));
4274
4275 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4276 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4277 : Intrinsic::aarch64_sve_cmpne_wide,
4278 OverloadedTy);
4279 Value *Call = Builder.CreateCall(
4280 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4282 }
4283
4284 case SVE::BI__builtin_sve_svpfalse_b:
4285 return ConstantInt::getFalse(Ty);
4286
4287 case SVE::BI__builtin_sve_svpfalse_c: {
4288 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4289 Function *CastToSVCountF =
4290 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4291 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4292 }
4293
4294 case SVE::BI__builtin_sve_svlen_bf16:
4295 case SVE::BI__builtin_sve_svlen_f16:
4296 case SVE::BI__builtin_sve_svlen_f32:
4297 case SVE::BI__builtin_sve_svlen_f64:
4298 case SVE::BI__builtin_sve_svlen_s8:
4299 case SVE::BI__builtin_sve_svlen_s16:
4300 case SVE::BI__builtin_sve_svlen_s32:
4301 case SVE::BI__builtin_sve_svlen_s64:
4302 case SVE::BI__builtin_sve_svlen_u8:
4303 case SVE::BI__builtin_sve_svlen_u16:
4304 case SVE::BI__builtin_sve_svlen_u32:
4305 case SVE::BI__builtin_sve_svlen_u64: {
4306 SVETypeFlags TF(Builtin->TypeModifier);
4307 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4308 }
4309
4310 case SVE::BI__builtin_sve_svtbl2_u8:
4311 case SVE::BI__builtin_sve_svtbl2_s8:
4312 case SVE::BI__builtin_sve_svtbl2_u16:
4313 case SVE::BI__builtin_sve_svtbl2_s16:
4314 case SVE::BI__builtin_sve_svtbl2_u32:
4315 case SVE::BI__builtin_sve_svtbl2_s32:
4316 case SVE::BI__builtin_sve_svtbl2_u64:
4317 case SVE::BI__builtin_sve_svtbl2_s64:
4318 case SVE::BI__builtin_sve_svtbl2_f16:
4319 case SVE::BI__builtin_sve_svtbl2_bf16:
4320 case SVE::BI__builtin_sve_svtbl2_f32:
4321 case SVE::BI__builtin_sve_svtbl2_f64: {
4322 SVETypeFlags TF(Builtin->TypeModifier);
4323 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4324 return Builder.CreateCall(F, Ops);
4325 }
4326
4327 case SVE::BI__builtin_sve_svset_neonq_s8:
4328 case SVE::BI__builtin_sve_svset_neonq_s16:
4329 case SVE::BI__builtin_sve_svset_neonq_s32:
4330 case SVE::BI__builtin_sve_svset_neonq_s64:
4331 case SVE::BI__builtin_sve_svset_neonq_u8:
4332 case SVE::BI__builtin_sve_svset_neonq_u16:
4333 case SVE::BI__builtin_sve_svset_neonq_u32:
4334 case SVE::BI__builtin_sve_svset_neonq_u64:
4335 case SVE::BI__builtin_sve_svset_neonq_f16:
4336 case SVE::BI__builtin_sve_svset_neonq_f32:
4337 case SVE::BI__builtin_sve_svset_neonq_f64:
4338 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4339 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4340 }
4341
4342 case SVE::BI__builtin_sve_svget_neonq_s8:
4343 case SVE::BI__builtin_sve_svget_neonq_s16:
4344 case SVE::BI__builtin_sve_svget_neonq_s32:
4345 case SVE::BI__builtin_sve_svget_neonq_s64:
4346 case SVE::BI__builtin_sve_svget_neonq_u8:
4347 case SVE::BI__builtin_sve_svget_neonq_u16:
4348 case SVE::BI__builtin_sve_svget_neonq_u32:
4349 case SVE::BI__builtin_sve_svget_neonq_u64:
4350 case SVE::BI__builtin_sve_svget_neonq_f16:
4351 case SVE::BI__builtin_sve_svget_neonq_f32:
4352 case SVE::BI__builtin_sve_svget_neonq_f64:
4353 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4354 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4355 }
4356
4357 case SVE::BI__builtin_sve_svdup_neonq_s8:
4358 case SVE::BI__builtin_sve_svdup_neonq_s16:
4359 case SVE::BI__builtin_sve_svdup_neonq_s32:
4360 case SVE::BI__builtin_sve_svdup_neonq_s64:
4361 case SVE::BI__builtin_sve_svdup_neonq_u8:
4362 case SVE::BI__builtin_sve_svdup_neonq_u16:
4363 case SVE::BI__builtin_sve_svdup_neonq_u32:
4364 case SVE::BI__builtin_sve_svdup_neonq_u64:
4365 case SVE::BI__builtin_sve_svdup_neonq_f16:
4366 case SVE::BI__builtin_sve_svdup_neonq_f32:
4367 case SVE::BI__builtin_sve_svdup_neonq_f64:
4368 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4369 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4370 uint64_t(0));
4371 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4372 {Insert, Builder.getInt64(0)});
4373 }
4374 }
4375
4376 /// Should not happen
4377 return nullptr;
4378}
4379
4380static void swapCommutativeSMEOperands(unsigned BuiltinID,
4382 unsigned MultiVec;
4383 switch (BuiltinID) {
4384 default:
4385 return;
4386 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4387 MultiVec = 1;
4388 break;
4389 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4390 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4391 MultiVec = 2;
4392 break;
4393 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4394 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4395 MultiVec = 4;
4396 break;
4397 }
4398
4399 if (MultiVec > 0)
4400 for (unsigned I = 0; I < MultiVec; ++I)
4401 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4402}
4403
4405 const CallExpr *E) {
4408
4410 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4411 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4412
4413 if (TypeFlags.isLoad() || TypeFlags.isStore())
4414 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4415 if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4416 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4417 if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4418 BuiltinID == SME::BI__builtin_sme_svzero_za)
4419 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4420 if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4421 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4422 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4423 BuiltinID == SME::BI__builtin_sme_svstr_za)
4424 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4425
4426 // Emit set FPMR for intrinsics that require it
4427 if (TypeFlags.setsFPMR())
4428 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4429 Ops.pop_back_val());
4430 // Handle builtins which require their multi-vector operands to be swapped
4431 swapCommutativeSMEOperands(BuiltinID, Ops);
4432
4433 auto isCntsBuiltin = [&]() {
4434 switch (BuiltinID) {
4435 default:
4436 return 0;
4437 case SME::BI__builtin_sme_svcntsb:
4438 return 8;
4439 case SME::BI__builtin_sme_svcntsh:
4440 return 4;
4441 case SME::BI__builtin_sme_svcntsw:
4442 return 2;
4443 }
4444 };
4445
4446 if (auto Mul = isCntsBuiltin()) {
4447 llvm::Value *Cntd =
4448 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
4449 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
4450 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4451 }
4452
4453 // Should not happen!
4454 if (Builtin->LLVMIntrinsic == 0)
4455 return nullptr;
4456
4457 // Predicates must match the main datatype.
4458 for (Value *&Op : Ops)
4459 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4460 if (PredTy->getElementType()->isIntegerTy(1))
4461 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4462
4463 if (BuiltinID == SME::BI__builtin_sme_svldr_zt ||
4464 BuiltinID == SME::BI__builtin_sme_svstr_zt) {
4465 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, Ops[1]->getType());
4466 return Builder.CreateCall(F, Ops);
4467 }
4468
4469 Function *F =
4470 TypeFlags.isOverloadNone()
4471 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
4472 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
4473
4474 return Builder.CreateCall(F, Ops);
4475}
4476
4477/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4478/// return it as an i8 pointer.
4480 LLVMContext &Context = CGF.CGM.getLLVMContext();
4481 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
4482 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4483 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4484 llvm::Function *F =
4485 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
4486 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
4487 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
4488}
4489
4491 const CallExpr *E,
4492 llvm::Triple::ArchType Arch) {
4493 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4494 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4495 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4496
4497 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4498 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4499 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4500
4501 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4502 return EmitAArch64CpuSupports(E);
4503
4504 unsigned HintID = static_cast<unsigned>(-1);
4505 switch (BuiltinID) {
4506 default: break;
4507 case clang::AArch64::BI__builtin_arm_nop:
4508 HintID = 0;
4509 break;
4510 case clang::AArch64::BI__builtin_arm_yield:
4511 case clang::AArch64::BI__yield:
4512 HintID = 1;
4513 break;
4514 case clang::AArch64::BI__builtin_arm_wfe:
4515 case clang::AArch64::BI__wfe:
4516 HintID = 2;
4517 break;
4518 case clang::AArch64::BI__builtin_arm_wfi:
4519 case clang::AArch64::BI__wfi:
4520 HintID = 3;
4521 break;
4522 case clang::AArch64::BI__builtin_arm_sev:
4523 case clang::AArch64::BI__sev:
4524 HintID = 4;
4525 break;
4526 case clang::AArch64::BI__builtin_arm_sevl:
4527 case clang::AArch64::BI__sevl:
4528 HintID = 5;
4529 break;
4530 }
4531
4532 if (HintID != static_cast<unsigned>(-1)) {
4533 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4534 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4535 }
4536
4537 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
4538 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4539 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4540 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
4541 }
4542
4543 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
4544 // Create call to __arm_sme_state and store the results to the two pointers.
4545 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
4546 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
4547 false),
4548 "__arm_sme_state"));
4549 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
4550 "aarch64_pstate_sm_compatible");
4551 CI->setAttributes(Attrs);
4552 CI->setCallingConv(
4553 llvm::CallingConv::
4554 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
4555 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
4557 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
4559 }
4560
4561 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
4562 assert((getContext().getTypeSize(E->getType()) == 32) &&
4563 "rbit of unusual size!");
4564 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4565 return Builder.CreateCall(
4566 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4567 }
4568 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
4569 assert((getContext().getTypeSize(E->getType()) == 64) &&
4570 "rbit of unusual size!");
4571 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4572 return Builder.CreateCall(
4573 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4574 }
4575
4576 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
4577 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
4578 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4579 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
4580 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
4581 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
4582 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
4583 return Res;
4584 }
4585
4586 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
4587 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4588 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
4589 "cls");
4590 }
4591 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
4592 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4593 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
4594 "cls");
4595 }
4596
4597 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
4598 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
4599 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4600 llvm::Type *Ty = Arg->getType();
4601 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
4602 Arg, "frint32z");
4603 }
4604
4605 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
4606 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
4607 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4608 llvm::Type *Ty = Arg->getType();
4609 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
4610 Arg, "frint64z");
4611 }
4612
4613 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
4614 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
4615 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4616 llvm::Type *Ty = Arg->getType();
4617 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
4618 Arg, "frint32x");
4619 }
4620
4621 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
4622 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
4623 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4624 llvm::Type *Ty = Arg->getType();
4625 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
4626 Arg, "frint64x");
4627 }
4628
4629 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
4630 assert((getContext().getTypeSize(E->getType()) == 32) &&
4631 "__jcvt of unusual size!");
4632 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4633 return Builder.CreateCall(
4634 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
4635 }
4636
4637 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
4638 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
4639 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
4640 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
4641 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
4642 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
4643
4644 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
4645 // Load from the address via an LLVM intrinsic, receiving a
4646 // tuple of 8 i64 words, and store each one to ValPtr.
4647 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
4648 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
4649 llvm::Value *ToRet;
4650 for (size_t i = 0; i < 8; i++) {
4651 llvm::Value *ValOffsetPtr =
4652 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4653 Address Addr =
4654 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4655 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
4656 }
4657 return ToRet;
4658 }
4659
4660 // Load 8 i64 words from ValPtr, and store them to the address
4661 // via an LLVM intrinsic.
4663 Args.push_back(MemAddr);
4664 for (size_t i = 0; i < 8; i++) {
4665 llvm::Value *ValOffsetPtr =
4666 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4667 Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4668 Args.push_back(Builder.CreateLoad(Addr));
4669 }
4670
4671 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
4672 ? Intrinsic::aarch64_st64b
4673 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
4674 ? Intrinsic::aarch64_st64bv
4675 : Intrinsic::aarch64_st64bv0);
4676 Function *F = CGM.getIntrinsic(Intr);
4677 return Builder.CreateCall(F, Args);
4678 }
4679
4680 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
4681 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
4682
4683 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
4684 ? Intrinsic::aarch64_rndr
4685 : Intrinsic::aarch64_rndrrs);
4686 Function *F = CGM.getIntrinsic(Intr);
4687 llvm::Value *Val = Builder.CreateCall(F);
4688 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
4689 Value *Status = Builder.CreateExtractValue(Val, 1);
4690
4691 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
4692 Builder.CreateStore(RandomValue, MemAddress);
4693 Status = Builder.CreateZExt(Status, Int32Ty);
4694 return Status;
4695 }
4696
4697 if (BuiltinID == clang::AArch64::BI__clear_cache) {
4698 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4699 const FunctionDecl *FD = E->getDirectCallee();
4700 Value *Ops[2];
4701 for (unsigned i = 0; i < 2; i++)
4702 Ops[i] = EmitScalarExpr(E->getArg(i));
4703 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4704 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4705 StringRef Name = FD->getName();
4706 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4707 }
4708
4709 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4710 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
4711 getContext().getTypeSize(E->getType()) == 128) {
4712 Function *F =
4713 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4714 ? Intrinsic::aarch64_ldaxp
4715 : Intrinsic::aarch64_ldxp);
4716
4717 Value *LdPtr = EmitScalarExpr(E->getArg(0));
4718 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
4719
4720 Value *Val0 = Builder.CreateExtractValue(Val, 1);
4721 Value *Val1 = Builder.CreateExtractValue(Val, 0);
4722 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4723 Val0 = Builder.CreateZExt(Val0, Int128Ty);
4724 Val1 = Builder.CreateZExt(Val1, Int128Ty);
4725
4726 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4727 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4728 Val = Builder.CreateOr(Val, Val1);
4729 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4730 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4731 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
4732 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4733
4734 QualType Ty = E->getType();
4735 llvm::Type *RealResTy = ConvertType(Ty);
4736 llvm::Type *IntTy =
4737 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4738
4739 Function *F =
4740 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4741 ? Intrinsic::aarch64_ldaxr
4742 : Intrinsic::aarch64_ldxr,
4743 DefaultPtrTy);
4744 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4745 Val->addParamAttr(
4746 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
4747
4748 if (RealResTy->isPointerTy())
4749 return Builder.CreateIntToPtr(Val, RealResTy);
4750
4751 llvm::Type *IntResTy = llvm::IntegerType::get(
4752 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4753 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
4754 RealResTy);
4755 }
4756
4757 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4758 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
4759 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4760 Function *F =
4761 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4762 ? Intrinsic::aarch64_stlxp
4763 : Intrinsic::aarch64_stxp);
4764 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
4765
4767 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4768
4769 Tmp = Tmp.withElementType(STy);
4770 llvm::Value *Val = Builder.CreateLoad(Tmp);
4771
4772 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4773 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4774 Value *StPtr = EmitScalarExpr(E->getArg(1));
4775 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4776 }
4777
4778 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4779 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
4780 Value *StoreVal = EmitScalarExpr(E->getArg(0));
4781 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4782
4783 QualType Ty = E->getArg(0)->getType();
4784 llvm::Type *StoreTy =
4785 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4786
4787 if (StoreVal->getType()->isPointerTy())
4788 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4789 else {
4790 llvm::Type *IntTy = llvm::IntegerType::get(
4792 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4793 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4794 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4795 }
4796
4797 Function *F =
4798 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4799 ? Intrinsic::aarch64_stlxr
4800 : Intrinsic::aarch64_stxr,
4801 StoreAddr->getType());
4802 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4803 CI->addParamAttr(
4804 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
4805 return CI;
4806 }
4807
4808 if (BuiltinID == clang::AArch64::BI__getReg ||
4809 BuiltinID == clang::AArch64::BI__setReg) {
4811 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4812 llvm_unreachable("Sema will ensure that the parameter is constant");
4813
4814 llvm::APSInt Value = Result.Val.getInt();
4815 LLVMContext &Context = CGM.getLLVMContext();
4816 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
4817
4818 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4819 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4820 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4821
4822 CallInst *CI;
4823 if (BuiltinID == clang::AArch64::BI__getReg) {
4824 llvm::Function *F =
4825 CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty});
4826 CI = Builder.CreateCall(F, Metadata);
4827 } else {
4828 llvm::Function *F =
4829 CGM.getIntrinsic(Intrinsic::write_volatile_register, {Int64Ty});
4830 CI = Builder.CreateCall(F, {Metadata, EmitScalarExpr(E->getArg(1))});
4831 }
4832 return CI;
4833 }
4834
4835 if (BuiltinID == clang::AArch64::BI__getRegFp ||
4836 BuiltinID == clang::AArch64::BI__setRegFp) {
4838 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4839 llvm_unreachable("Sema will ensure that the parameter is constant");
4840
4841 llvm::APSInt Value = Result.Val.getInt();
4842 LLVMContext &Context = CGM.getLLVMContext();
4843 std::string Reg = "d" + toString(Value, 10);
4844
4845 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4846 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4847 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4848
4849 llvm::Value *Ret;
4850 if (BuiltinID == clang::AArch64::BI__getRegFp) {
4851 llvm::Function *F =
4852 CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty});
4853 llvm::Value *Bits = Builder.CreateCall(F, Metadata);
4854 Ret = Builder.CreateBitCast(Bits, llvm::Type::getDoubleTy(Context));
4855 } else {
4856 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
4857 llvm::Value *Bits = Builder.CreateBitCast(Val, Int64Ty);
4858 llvm::Function *F =
4859 CGM.getIntrinsic(Intrinsic::write_volatile_register, {Int64Ty});
4860 Ret = Builder.CreateCall(F, {Metadata, Bits});
4861 }
4862 return Ret;
4863 }
4864
4865 if (BuiltinID == clang::AArch64::BI__break) {
4867 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4868 llvm_unreachable("Sema will ensure that the parameter is constant");
4869
4870 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4871 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
4872 }
4873
4874 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
4875 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4876 return Builder.CreateCall(F);
4877 }
4878
4879 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
4880 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
4881 llvm::SyncScope::SingleThread);
4882
4883 // CRC32
4884 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4885 switch (BuiltinID) {
4886 case clang::AArch64::BI__builtin_arm_crc32b:
4887 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4888 case clang::AArch64::BI__builtin_arm_crc32cb:
4889 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4890 case clang::AArch64::BI__builtin_arm_crc32h:
4891 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4892 case clang::AArch64::BI__builtin_arm_crc32ch:
4893 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4894 case clang::AArch64::BI__builtin_arm_crc32w:
4895 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4896 case clang::AArch64::BI__builtin_arm_crc32cw:
4897 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4898 case clang::AArch64::BI__builtin_arm_crc32d:
4899 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4900 case clang::AArch64::BI__builtin_arm_crc32cd:
4901 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4902 }
4903
4904 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4905 Value *Arg0 = EmitScalarExpr(E->getArg(0));
4906 Value *Arg1 = EmitScalarExpr(E->getArg(1));
4907 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4908
4909 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4910 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4911
4912 return Builder.CreateCall(F, {Arg0, Arg1});
4913 }
4914
4915 // Memory Operations (MOPS)
4916 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
4917 Value *Dst = EmitScalarExpr(E->getArg(0));
4918 Value *Val = EmitScalarExpr(E->getArg(1));
4919 Value *Size = EmitScalarExpr(E->getArg(2));
4920 Val = Builder.CreateTrunc(Val, Int8Ty);
4921 Size = Builder.CreateIntCast(Size, Int64Ty, false);
4922 return Builder.CreateCall(
4923 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
4924 }
4925
4926 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
4927 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
4928 return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
4929
4930 // Memory Tagging Extensions (MTE) Intrinsics
4931 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
4932 switch (BuiltinID) {
4933 case clang::AArch64::BI__builtin_arm_irg:
4934 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
4935 case clang::AArch64::BI__builtin_arm_addg:
4936 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
4937 case clang::AArch64::BI__builtin_arm_gmi:
4938 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
4939 case clang::AArch64::BI__builtin_arm_ldg:
4940 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
4941 case clang::AArch64::BI__builtin_arm_stg:
4942 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
4943 case clang::AArch64::BI__builtin_arm_subp:
4944 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
4945 }
4946
4947 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
4948 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
4950 Value *Mask = EmitScalarExpr(E->getArg(1));
4951 assert(Mask->getType()->getScalarSizeInBits() == 64 &&
4952 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4953 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4954 {Pointer, Mask});
4955 }
4956 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
4958 Value *TagOffset = EmitScalarExpr(E->getArg(1));
4959
4960 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
4961 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4962 {Pointer, TagOffset});
4963 }
4964 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
4966 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
4967 assert(ExcludedMask->getType()->getScalarSizeInBits() == 64 &&
4968 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4969 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4970 {Pointer, ExcludedMask});
4971 }
4972 // Although it is possible to supply a different return
4973 // address (first arg) to this intrinsic, for now we set
4974 // return address same as input address.
4975 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
4976 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4977 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4978 {TagAddress, TagAddress});
4979 }
4980 // Although it is possible to supply a different tag (to set)
4981 // to this intrinsic (as first arg), for now we supply
4982 // the tag that is in input address arg (common use case).
4983 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
4984 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4985 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4986 {TagAddress, TagAddress});
4987 }
4988 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
4989 Value *PointerA = EmitScalarExpr(E->getArg(0));
4990 Value *PointerB = EmitScalarExpr(E->getArg(1));
4991 return Builder.CreateCall(
4992 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
4993 }
4994 }
4995
4996 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4997 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4998 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4999 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5000 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5001 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5002 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5003 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5004
5005 SpecialRegisterAccessKind AccessKind = Write;
5006 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5007 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5008 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5009 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5010 AccessKind = VolatileRead;
5011
5012 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5013 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5014
5015 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5016 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5017
5018 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5019 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5020
5021 llvm::Type *ValueType;
5022 llvm::Type *RegisterType = Int64Ty;
5023 if (Is32Bit) {
5024 ValueType = Int32Ty;
5025 } else if (Is128Bit) {
5026 llvm::Type *Int128Ty =
5027 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5028 ValueType = Int128Ty;
5029 RegisterType = Int128Ty;
5030 } else if (IsPointerBuiltin) {
5031 ValueType = VoidPtrTy;
5032 } else {
5033 ValueType = Int64Ty;
5034 };
5035
5036 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5037 AccessKind);
5038 }
5039
5040 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5041 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
5042 LLVMContext &Context = CGM.getLLVMContext();
5043
5044 unsigned SysReg =
5045 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5046
5047 std::string SysRegStr;
5048 llvm::raw_string_ostream(SysRegStr)
5049 << (0b10 | SysReg >> 14) << ":" << ((SysReg >> 11) & 7) << ":"
5050 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5051 << (SysReg & 7);
5052
5053 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5054 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5055 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5056
5057 llvm::Type *RegisterType = Int64Ty;
5058 llvm::Type *Types[] = { RegisterType };
5059
5060 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5061 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5062
5063 return Builder.CreateCall(F, Metadata);
5064 }
5065
5066 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5067 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5068 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5069
5070 return Result;
5071 }
5072
5073 if (BuiltinID == clang::AArch64::BI__sys) {
5074 unsigned SysReg =
5075 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5076 const unsigned Op1 = SysReg >> 11;
5077 const unsigned CRn = (SysReg >> 7) & 0xf;
5078 const unsigned CRm = (SysReg >> 3) & 0xf;
5079 const unsigned Op2 = SysReg & 0x7;
5080
5081 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sys),
5082 {Builder.getInt32(Op1), Builder.getInt32(CRn),
5083 Builder.getInt32(CRm), Builder.getInt32(Op2),
5084 EmitScalarExpr(E->getArg(1))});
5085
5086 // Return 0 for convenience, even though MSVC returns some other undefined
5087 // value.
5088 return ConstantInt::get(Builder.getInt32Ty(), 0);
5089 }
5090
5091 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5092 llvm::Function *F =
5093 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5094 return Builder.CreateCall(F);
5095 }
5096
5097 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5098 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5099 return Builder.CreateCall(F);
5100 }
5101
5102 if (BuiltinID == clang::AArch64::BI__mulh ||
5103 BuiltinID == clang::AArch64::BI__umulh) {
5104 llvm::Type *ResType = ConvertType(E->getType());
5105 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5106
5107 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5108 Value *LHS =
5109 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5110 Value *RHS =
5111 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5112
5113 Value *MulResult, *HigherBits;
5114 if (IsSigned) {
5115 MulResult = Builder.CreateNSWMul(LHS, RHS);
5116 HigherBits = Builder.CreateAShr(MulResult, 64);
5117 } else {
5118 MulResult = Builder.CreateNUWMul(LHS, RHS);
5119 HigherBits = Builder.CreateLShr(MulResult, 64);
5120 }
5121 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5122
5123 return HigherBits;
5124 }
5125
5126 if (BuiltinID == AArch64::BI__writex18byte ||
5127 BuiltinID == AArch64::BI__writex18word ||
5128 BuiltinID == AArch64::BI__writex18dword ||
5129 BuiltinID == AArch64::BI__writex18qword) {
5130 // Process the args first
5131 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5132 Value *DataArg = EmitScalarExpr(E->getArg(1));
5133
5134 // Read x18 as i8*
5135 llvm::Value *X18 = readX18AsPtr(*this);
5136
5137 // Store val at x18 + offset
5138 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5139 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5140 StoreInst *Store =
5141 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5142 return Store;
5143 }
5144
5145 if (BuiltinID == AArch64::BI__readx18byte ||
5146 BuiltinID == AArch64::BI__readx18word ||
5147 BuiltinID == AArch64::BI__readx18dword ||
5148 BuiltinID == AArch64::BI__readx18qword) {
5149 // Process the args first
5150 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5151
5152 // Read x18 as i8*
5153 llvm::Value *X18 = readX18AsPtr(*this);
5154
5155 // Load x18 + offset
5156 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5157 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5158 llvm::Type *IntTy = ConvertType(E->getType());
5159 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5160 return Load;
5161 }
5162
5163 if (BuiltinID == AArch64::BI__addx18byte ||
5164 BuiltinID == AArch64::BI__addx18word ||
5165 BuiltinID == AArch64::BI__addx18dword ||
5166 BuiltinID == AArch64::BI__addx18qword ||
5167 BuiltinID == AArch64::BI__incx18byte ||
5168 BuiltinID == AArch64::BI__incx18word ||
5169 BuiltinID == AArch64::BI__incx18dword ||
5170 BuiltinID == AArch64::BI__incx18qword) {
5171 llvm::Type *IntTy;
5172 bool isIncrement;
5173 switch (BuiltinID) {
5174 case AArch64::BI__incx18byte:
5175 IntTy = Int8Ty;
5176 isIncrement = true;
5177 break;
5178 case AArch64::BI__incx18word:
5179 IntTy = Int16Ty;
5180 isIncrement = true;
5181 break;
5182 case AArch64::BI__incx18dword:
5183 IntTy = Int32Ty;
5184 isIncrement = true;
5185 break;
5186 case AArch64::BI__incx18qword:
5187 IntTy = Int64Ty;
5188 isIncrement = true;
5189 break;
5190 default:
5191 IntTy = ConvertType(E->getArg(1)->getType());
5192 isIncrement = false;
5193 break;
5194 }
5195 // Process the args first
5196 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5197 Value *ValToAdd =
5198 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5199
5200 // Read x18 as i8*
5201 llvm::Value *X18 = readX18AsPtr(*this);
5202
5203 // Load x18 + offset
5204 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5205 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5206 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5207
5208 // Add values
5209 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5210
5211 // Store val at x18 + offset
5212 StoreInst *Store =
5213 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5214 return Store;
5215 }
5216
5217 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5218 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5219 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5220 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5221 Value *Arg = EmitScalarExpr(E->getArg(0));
5222 llvm::Type *RetTy = ConvertType(E->getType());
5223 return Builder.CreateBitCast(Arg, RetTy);
5224 }
5225
5226 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5227 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5228 BuiltinID == AArch64::BI_CountLeadingZeros ||
5229 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5230 Value *Arg = EmitScalarExpr(E->getArg(0));
5231 llvm::Type *ArgType = Arg->getType();
5232
5233 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5234 BuiltinID == AArch64::BI_CountLeadingOnes64)
5235 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5236
5237 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5238 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5239
5240 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5241 BuiltinID == AArch64::BI_CountLeadingZeros64)
5242 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5243 return Result;
5244 }
5245
5246 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5247 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5248 Value *Arg = EmitScalarExpr(E->getArg(0));
5249
5250 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5251 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5252 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5253
5254 Value *Result = Builder.CreateCall(F, Arg, "cls");
5255 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5256 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5257 return Result;
5258 }
5259
5260 if (BuiltinID == AArch64::BI_CountOneBits ||
5261 BuiltinID == AArch64::BI_CountOneBits64) {
5262 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5263 llvm::Type *ArgType = ArgValue->getType();
5264 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5265
5266 Value *Result = Builder.CreateCall(F, ArgValue);
5267 if (BuiltinID == AArch64::BI_CountOneBits64)
5268 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5269 return Result;
5270 }
5271
5272 if (BuiltinID == AArch64::BI_CountTrailingZeros ||
5273 BuiltinID == AArch64::BI_CountTrailingZeros64) {
5274 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5275 llvm::Type *ArgType = ArgValue->getType();
5276 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
5277
5278 // MSVC leaves 0 undefined; use false for predictable codegen
5279 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getInt1(false)});
5280 if (BuiltinID == AArch64::BI_CountTrailingZeros64)
5281 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5282 return Result;
5283 }
5284
5285 if (BuiltinID == AArch64::BI__prefetch) {
5287 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5288 Value *Locality = ConstantInt::get(Int32Ty, 3);
5289 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5290 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5291 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5292 }
5293
5294 if (BuiltinID == AArch64::BI__prefetch2) {
5296 llvm::APSInt PrfOp = E->getArg(1)->EvaluateKnownConstInt(CGM.getContext());
5297 // Decode 5-bit PRFM encoding: bits[4:3]=type, bits[2:1]=target,
5298 // bit[0]=policy
5299 // type: PLD=0(load), PLI=1(instr), PST=2(store)
5300 // target: L1=0, L2=1, L3=2
5301 // policy: KEEP=0, STRM=1
5302 uint64_t Op = PrfOp.getZExtValue();
5303 uint64_t Type = (Op >> 3) & 0x3;
5304 uint64_t Target = (Op >> 1) & 0x3;
5305 uint64_t Policy = Op & 0x1;
5306 Value *RW = Builder.getInt32(Type == 2 ? 1 : 0);
5307 Value *Local = Builder.getInt32(Target);
5308 Value *IsStream = Builder.getInt32(Policy);
5309 Value *IsData = Builder.getInt32(Type == 1 ? 0 : 1);
5310 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_prefetch);
5311 return Builder.CreateCall(F, {Address, RW, Local, IsStream, IsData});
5312 }
5313
5314 if (BuiltinID == AArch64::BI__hlt) {
5315 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5316 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5317
5318 // Return 0 for convenience, even though MSVC returns some other undefined
5319 // value.
5320 return ConstantInt::get(Builder.getInt32Ty(), 0);
5321 }
5322
5323 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5324 return Builder.CreateFPTrunc(
5325 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5326 Builder.getFloatTy()),
5327 Builder.getBFloatTy());
5328
5329 // Handle MSVC intrinsics before argument evaluation to prevent double
5330 // evaluation.
5331 if (std::optional<MSVCIntrin> MsvcIntId =
5333 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5334
5335 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5336 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5337 return P.first == BuiltinID;
5338 });
5339 if (It != end(NEONEquivalentIntrinsicMap))
5340 BuiltinID = It->second;
5341
5342 // Check whether this is an SISD builtin.
5343 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5345 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5346 bool IsSISD = (Builtin != nullptr);
5347
5348 // Find out if any arguments are required to be integer constant
5349 // expressions.
5350 unsigned ICEArguments = 0;
5352 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5353 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5354
5356 Address PtrOp0 = Address::invalid();
5357 // Note the assumption that SISD intrinsics do not contain extra arguments.
5358 // TODO: Fold this into a single function call instead of, effectively, two
5359 // separate checks.
5360 bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
5361 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5362 for (unsigned i = 0, e = NumArgs; i != e; i++) {
5363 if (i == 0) {
5364 switch (BuiltinID) {
5365 case NEON::BI__builtin_neon_vld1_v:
5366 case NEON::BI__builtin_neon_vld1q_v:
5367 case NEON::BI__builtin_neon_vld1_dup_v:
5368 case NEON::BI__builtin_neon_vld1q_dup_v:
5369 case NEON::BI__builtin_neon_vld1_lane_v:
5370 case NEON::BI__builtin_neon_vld1q_lane_v:
5371 case NEON::BI__builtin_neon_vst1_v:
5372 case NEON::BI__builtin_neon_vst1q_v:
5373 case NEON::BI__builtin_neon_vst1_lane_v:
5374 case NEON::BI__builtin_neon_vst1q_lane_v:
5375 case NEON::BI__builtin_neon_vldap1_lane_s64:
5376 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5377 case NEON::BI__builtin_neon_vstl1_lane_s64:
5378 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5379 // Get the alignment for the argument in addition to the value;
5380 // we'll use it later.
5381 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5382 Ops.push_back(PtrOp0.emitRawPointer(*this));
5383 continue;
5384 }
5385 }
5386 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5387 }
5388
5389 if (Builtin) {
5391 assert(Result && "SISD intrinsic should have been handled");
5392 return Result;
5393 }
5394
5395 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5397 if (std::optional<llvm::APSInt> Result =
5399 // Determine the type of this overloaded NEON intrinsic.
5400 Type = NeonTypeFlags(Result->getZExtValue());
5401
5402 bool usgn = Type.isUnsigned();
5403 bool quad = Type.isQuad();
5404 unsigned Int;
5405
5406 // Not all intrinsics handled by the common case work for AArch64 yet, so only
5407 // defer to common code if it's been added to our special map.
5410
5411 if (Builtin)
5413 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5414 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5415 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
5416
5417 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
5418 return V;
5419
5420 // Handle non-overloaded intrinsics first.
5421 switch (BuiltinID) {
5422 default: break;
5423 case NEON::BI__builtin_neon_vabsh_f16:
5424 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5425 case NEON::BI__builtin_neon_vaddq_p128: {
5426 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5427 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5428 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5429 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5430 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5431 return Builder.CreateBitCast(Ops[0], Int128Ty);
5432 }
5433 case NEON::BI__builtin_neon_vldrq_p128: {
5434 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5435 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
5437 }
5438 case NEON::BI__builtin_neon_vstrq_p128: {
5439 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5440 }
5441 case NEON::BI__builtin_neon_vcvts_f32_u32:
5442 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5443 usgn = true;
5444 [[fallthrough]];
5445 case NEON::BI__builtin_neon_vcvts_f32_s32:
5446 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5447 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5448 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5449 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5450 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5451 if (usgn)
5452 return Builder.CreateUIToFP(Ops[0], FTy);
5453 return Builder.CreateSIToFP(Ops[0], FTy);
5454 }
5455 case NEON::BI__builtin_neon_vcvth_f16_u16:
5456 case NEON::BI__builtin_neon_vcvth_f16_u32:
5457 case NEON::BI__builtin_neon_vcvth_f16_u64:
5458 usgn = true;
5459 [[fallthrough]];
5460 case NEON::BI__builtin_neon_vcvth_f16_s16:
5461 case NEON::BI__builtin_neon_vcvth_f16_s32:
5462 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5463 llvm::Type *FTy = HalfTy;
5464 llvm::Type *InTy;
5465 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5466 InTy = Int64Ty;
5467 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5468 InTy = Int32Ty;
5469 else
5470 InTy = Int16Ty;
5471 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5472 if (usgn)
5473 return Builder.CreateUIToFP(Ops[0], FTy);
5474 return Builder.CreateSIToFP(Ops[0], FTy);
5475 }
5476 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5477 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5478 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5479 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5480 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5481 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5482 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5483 case NEON::BI__builtin_neon_vcvtph_s16_f16: {
5484 llvm::Type *InTy = Int16Ty;
5485 llvm::Type* FTy = HalfTy;
5486 llvm::Type *Tys[2] = {InTy, FTy};
5487 switch (BuiltinID) {
5488 default: llvm_unreachable("missing builtin ID in switch!");
5489 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5490 Int = Intrinsic::aarch64_neon_fcvtau; break;
5491 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5492 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5493 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5494 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5495 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5496 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5497 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5498 Int = Intrinsic::aarch64_neon_fcvtas; break;
5499 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5500 Int = Intrinsic::aarch64_neon_fcvtms; break;
5501 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5502 Int = Intrinsic::aarch64_neon_fcvtns; break;
5503 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5504 Int = Intrinsic::aarch64_neon_fcvtps; break;
5505 }
5506 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5507 }
5508 case NEON::BI__builtin_neon_vcaleh_f16:
5509 case NEON::BI__builtin_neon_vcalth_f16:
5510 case NEON::BI__builtin_neon_vcageh_f16:
5511 case NEON::BI__builtin_neon_vcagth_f16: {
5512 llvm::Type* InTy = Int32Ty;
5513 llvm::Type* FTy = HalfTy;
5514 llvm::Type *Tys[2] = {InTy, FTy};
5515 switch (BuiltinID) {
5516 default: llvm_unreachable("missing builtin ID in switch!");
5517 case NEON::BI__builtin_neon_vcageh_f16:
5518 Int = Intrinsic::aarch64_neon_facge; break;
5519 case NEON::BI__builtin_neon_vcagth_f16:
5520 Int = Intrinsic::aarch64_neon_facgt; break;
5521 case NEON::BI__builtin_neon_vcaleh_f16:
5522 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5523 case NEON::BI__builtin_neon_vcalth_f16:
5524 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5525 }
5526 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
5527 return Builder.CreateTrunc(Ops[0], Int16Ty);
5528 }
5529 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5530 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5531 llvm::Type* InTy = Int32Ty;
5532 llvm::Type* FTy = HalfTy;
5533 llvm::Type *Tys[2] = {InTy, FTy};
5534 switch (BuiltinID) {
5535 default: llvm_unreachable("missing builtin ID in switch!");
5536 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5537 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5538 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5539 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5540 }
5541 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5542 return Builder.CreateTrunc(Ops[0], Int16Ty);
5543 }
5544 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5545 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5546 llvm::Type* FTy = HalfTy;
5547 llvm::Type* InTy = Int32Ty;
5548 llvm::Type *Tys[2] = {FTy, InTy};
5549 switch (BuiltinID) {
5550 default: llvm_unreachable("missing builtin ID in switch!");
5551 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5552 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5553 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
5554 break;
5555 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5556 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5557 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
5558 break;
5559 }
5560 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5561 }
5562 case NEON::BI__builtin_neon_vpaddd_s64: {
5563 // TODO: Isn't this handled by
5564 // EmitCommonNeonSISDBuiltinExpr?
5565 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
5566 // The vector is v2f64, so make sure it's bitcast to that.
5567 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2i64");
5568 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5569 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5570 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5571 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5572 // Pairwise addition of a v2f64 into a scalar f64.
5573 return Builder.CreateAdd(Op0, Op1, "vpaddd");
5574 }
5575 case NEON::BI__builtin_neon_vpaddd_f64: {
5576 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
5577 // The vector is v2f64, so make sure it's bitcast to that.
5578 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f64");
5579 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5580 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5581 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5582 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5583 // Pairwise addition of a v2f64 into a scalar f64.
5584 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5585 }
5586 case NEON::BI__builtin_neon_vpadds_f32: {
5587 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
5588 // The vector is v2f32, so make sure it's bitcast to that.
5589 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f32");
5590 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5591 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5592 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5593 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5594 // Pairwise addition of a v2f32 into a scalar f32.
5595 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5596 }
5597 case NEON::BI__builtin_neon_vceqzd_s64:
5600 ICmpInst::ICMP_EQ, "vceqz");
5601 case NEON::BI__builtin_neon_vceqzd_f64:
5602 case NEON::BI__builtin_neon_vceqzs_f32:
5603 case NEON::BI__builtin_neon_vceqzh_f16:
5606 ICmpInst::FCMP_OEQ, "vceqz");
5607 case NEON::BI__builtin_neon_vcgezd_s64:
5610 ICmpInst::ICMP_SGE, "vcgez");
5611 case NEON::BI__builtin_neon_vcgezd_f64:
5612 case NEON::BI__builtin_neon_vcgezs_f32:
5613 case NEON::BI__builtin_neon_vcgezh_f16:
5616 ICmpInst::FCMP_OGE, "vcgez");
5617 case NEON::BI__builtin_neon_vclezd_s64:
5620 ICmpInst::ICMP_SLE, "vclez");
5621 case NEON::BI__builtin_neon_vclezd_f64:
5622 case NEON::BI__builtin_neon_vclezs_f32:
5623 case NEON::BI__builtin_neon_vclezh_f16:
5626 ICmpInst::FCMP_OLE, "vclez");
5627 case NEON::BI__builtin_neon_vcgtzd_s64:
5630 ICmpInst::ICMP_SGT, "vcgtz");
5631 case NEON::BI__builtin_neon_vcgtzd_f64:
5632 case NEON::BI__builtin_neon_vcgtzs_f32:
5633 case NEON::BI__builtin_neon_vcgtzh_f16:
5636 ICmpInst::FCMP_OGT, "vcgtz");
5637 case NEON::BI__builtin_neon_vcltzd_s64:
5640 ICmpInst::ICMP_SLT, "vcltz");
5641
5642 case NEON::BI__builtin_neon_vcltzd_f64:
5643 case NEON::BI__builtin_neon_vcltzs_f32:
5644 case NEON::BI__builtin_neon_vcltzh_f16:
5647 ICmpInst::FCMP_OLT, "vcltz");
5648
5649 case NEON::BI__builtin_neon_vceqzd_u64: {
5652 ICmpInst::ICMP_EQ, "vceqzd");
5653 }
5654 case NEON::BI__builtin_neon_vceqd_f64:
5655 case NEON::BI__builtin_neon_vcled_f64:
5656 case NEON::BI__builtin_neon_vcltd_f64:
5657 case NEON::BI__builtin_neon_vcged_f64:
5658 case NEON::BI__builtin_neon_vcgtd_f64: {
5659 llvm::CmpInst::Predicate P;
5660 switch (BuiltinID) {
5661 default: llvm_unreachable("missing builtin ID in switch!");
5662 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5663 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5664 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5665 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5666 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5667 }
5668 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5669 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5670 if (P == llvm::FCmpInst::FCMP_OEQ)
5671 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5672 else
5673 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5674 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5675 }
5676 case NEON::BI__builtin_neon_vceqs_f32:
5677 case NEON::BI__builtin_neon_vcles_f32:
5678 case NEON::BI__builtin_neon_vclts_f32:
5679 case NEON::BI__builtin_neon_vcges_f32:
5680 case NEON::BI__builtin_neon_vcgts_f32: {
5681 llvm::CmpInst::Predicate P;
5682 switch (BuiltinID) {
5683 default: llvm_unreachable("missing builtin ID in switch!");
5684 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5685 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5686 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5687 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5688 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5689 }
5690 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5691 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5692 if (P == llvm::FCmpInst::FCMP_OEQ)
5693 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5694 else
5695 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5696 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5697 }
5698 case NEON::BI__builtin_neon_vceqh_f16:
5699 case NEON::BI__builtin_neon_vcleh_f16:
5700 case NEON::BI__builtin_neon_vclth_f16:
5701 case NEON::BI__builtin_neon_vcgeh_f16:
5702 case NEON::BI__builtin_neon_vcgth_f16: {
5703 llvm::CmpInst::Predicate P;
5704 switch (BuiltinID) {
5705 default: llvm_unreachable("missing builtin ID in switch!");
5706 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
5707 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
5708 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
5709 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
5710 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
5711 }
5712 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
5713 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
5714 if (P == llvm::FCmpInst::FCMP_OEQ)
5715 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5716 else
5717 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5718 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
5719 }
5720 case NEON::BI__builtin_neon_vceqd_s64:
5721 case NEON::BI__builtin_neon_vceqd_u64:
5722 case NEON::BI__builtin_neon_vcgtd_s64:
5723 case NEON::BI__builtin_neon_vcgtd_u64:
5724 case NEON::BI__builtin_neon_vcltd_s64:
5725 case NEON::BI__builtin_neon_vcltd_u64:
5726 case NEON::BI__builtin_neon_vcged_u64:
5727 case NEON::BI__builtin_neon_vcged_s64:
5728 case NEON::BI__builtin_neon_vcled_u64:
5729 case NEON::BI__builtin_neon_vcled_s64: {
5730 llvm::CmpInst::Predicate P;
5731 switch (BuiltinID) {
5732 default: llvm_unreachable("missing builtin ID in switch!");
5733 case NEON::BI__builtin_neon_vceqd_s64:
5734 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5735 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5736 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5737 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5738 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5739 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5740 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5741 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5742 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5743 }
5744 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5745 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5746 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5747 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5748 }
5749 case NEON::BI__builtin_neon_vnegd_s64:
5750 return Builder.CreateNeg(Ops[0], "vnegd");
5751 case NEON::BI__builtin_neon_vnegh_f16:
5752 return Builder.CreateFNeg(Ops[0], "vnegh");
5753 case NEON::BI__builtin_neon_vtstd_s64:
5754 case NEON::BI__builtin_neon_vtstd_u64: {
5755 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5756 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5757 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5758 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5759 llvm::Constant::getNullValue(Int64Ty));
5760 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5761 }
5762 case NEON::BI__builtin_neon_vset_lane_i8:
5763 case NEON::BI__builtin_neon_vset_lane_i16:
5764 case NEON::BI__builtin_neon_vset_lane_i32:
5765 case NEON::BI__builtin_neon_vset_lane_i64:
5766 case NEON::BI__builtin_neon_vset_lane_bf16:
5767 case NEON::BI__builtin_neon_vset_lane_f32:
5768 case NEON::BI__builtin_neon_vsetq_lane_i8:
5769 case NEON::BI__builtin_neon_vsetq_lane_i16:
5770 case NEON::BI__builtin_neon_vsetq_lane_i32:
5771 case NEON::BI__builtin_neon_vsetq_lane_i64:
5772 case NEON::BI__builtin_neon_vsetq_lane_bf16:
5773 case NEON::BI__builtin_neon_vsetq_lane_f32:
5774 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5775 case NEON::BI__builtin_neon_vset_lane_f64:
5776 // The vector type needs a cast for the v1f64 variant.
5777 Ops[1] =
5778 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
5779 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5780 case NEON::BI__builtin_neon_vset_lane_mf8:
5781 case NEON::BI__builtin_neon_vsetq_lane_mf8:
5782 // The input vector type needs a cast to scalar type.
5783 Ops[0] =
5784 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
5785 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5786 case NEON::BI__builtin_neon_vsetq_lane_f64:
5787 // The vector type needs a cast for the v2f64 variant.
5788 Ops[1] =
5789 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
5790 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5791
5792 case NEON::BI__builtin_neon_vget_lane_i8:
5793 case NEON::BI__builtin_neon_vdupb_lane_i8:
5794 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5795 case NEON::BI__builtin_neon_vgetq_lane_i8:
5796 case NEON::BI__builtin_neon_vdupb_laneq_i8:
5797 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5798 case NEON::BI__builtin_neon_vget_lane_mf8:
5799 case NEON::BI__builtin_neon_vdupb_lane_mf8:
5800 case NEON::BI__builtin_neon_vgetq_lane_mf8:
5801 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
5802 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5803 case NEON::BI__builtin_neon_vget_lane_i16:
5804 case NEON::BI__builtin_neon_vduph_lane_i16:
5805 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5806 case NEON::BI__builtin_neon_vgetq_lane_i16:
5807 case NEON::BI__builtin_neon_vduph_laneq_i16:
5808 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5809 case NEON::BI__builtin_neon_vget_lane_i32:
5810 case NEON::BI__builtin_neon_vdups_lane_i32:
5811 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5812 case NEON::BI__builtin_neon_vdups_lane_f32:
5813 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdups_lane");
5814 case NEON::BI__builtin_neon_vgetq_lane_i32:
5815 case NEON::BI__builtin_neon_vdups_laneq_i32:
5816 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5817 case NEON::BI__builtin_neon_vget_lane_i64:
5818 case NEON::BI__builtin_neon_vdupd_lane_i64:
5819 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5820 case NEON::BI__builtin_neon_vdupd_lane_f64:
5821 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdupd_lane");
5822 case NEON::BI__builtin_neon_vgetq_lane_i64:
5823 case NEON::BI__builtin_neon_vdupd_laneq_i64:
5824 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5825 case NEON::BI__builtin_neon_vget_lane_f32:
5826 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5827 case NEON::BI__builtin_neon_vget_lane_f64:
5828 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5829 case NEON::BI__builtin_neon_vgetq_lane_f32:
5830 case NEON::BI__builtin_neon_vdups_laneq_f32:
5831 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5832 case NEON::BI__builtin_neon_vgetq_lane_f64:
5833 case NEON::BI__builtin_neon_vdupd_laneq_f64:
5834 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5835 case NEON::BI__builtin_neon_vaddh_f16:
5836 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
5837 case NEON::BI__builtin_neon_vsubh_f16:
5838 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
5839 case NEON::BI__builtin_neon_vmulh_f16:
5840 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
5841 case NEON::BI__builtin_neon_vdivh_f16:
5842 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
5843 case NEON::BI__builtin_neon_vfmah_f16:
5844 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5846 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5847 {Ops[1], Ops[2], Ops[0]});
5848 case NEON::BI__builtin_neon_vfmsh_f16: {
5849 Value *Neg = Builder.CreateFNeg(Ops[1], "vsubh");
5850
5851 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5853 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5854 {Neg, Ops[2], Ops[0]});
5855 }
5856 case NEON::BI__builtin_neon_vaddd_s64:
5857 case NEON::BI__builtin_neon_vaddd_u64:
5858 return Builder.CreateAdd(Ops[0], Ops[1], "vaddd");
5859 case NEON::BI__builtin_neon_vsubd_s64:
5860 case NEON::BI__builtin_neon_vsubd_u64:
5861 return Builder.CreateSub(Ops[0], Ops[1], "vsubd");
5862 case NEON::BI__builtin_neon_vqdmlalh_s16:
5863 case NEON::BI__builtin_neon_vqdmlslh_s16: {
5864 SmallVector<Value *, 2> ProductOps;
5865 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5866 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5867 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5868 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5869 ProductOps, "vqdmlXl");
5870 Constant *CI = ConstantInt::get(SizeTy, 0);
5871 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5872
5873 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5874 ? Intrinsic::aarch64_neon_sqadd
5875 : Intrinsic::aarch64_neon_sqsub;
5876 // Drop the 2nd multiplication argument before the accumulation
5877 Ops.pop_back();
5878 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5879 }
5880 case NEON::BI__builtin_neon_vqshlud_n_s64: {
5881 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5882 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5883 Ops, "vqshlu_n");
5884 }
5885 case NEON::BI__builtin_neon_vqshld_n_u64:
5886 case NEON::BI__builtin_neon_vqshld_n_s64: {
5887 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5888 ? Intrinsic::aarch64_neon_uqshl
5889 : Intrinsic::aarch64_neon_sqshl;
5890 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5891 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5892 }
5893 case NEON::BI__builtin_neon_vrshrd_n_u64:
5894 case NEON::BI__builtin_neon_vrshrd_n_s64: {
5895 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5896 ? Intrinsic::aarch64_neon_urshl
5897 : Intrinsic::aarch64_neon_srshl;
5898 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5899 Ops[1] = ConstantInt::get(Int64Ty, -SV);
5900 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5901 }
5902 case NEON::BI__builtin_neon_vrsrad_n_u64:
5903 case NEON::BI__builtin_neon_vrsrad_n_s64: {
5904 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5905 ? Intrinsic::aarch64_neon_urshl
5906 : Intrinsic::aarch64_neon_srshl;
5907 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5908 Ops[2] = Builder.CreateNeg(Ops[2]);
5909 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5910 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5911 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5912 }
5913 case NEON::BI__builtin_neon_vshld_n_s64:
5914 case NEON::BI__builtin_neon_vshld_n_u64: {
5915 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5916 return Builder.CreateShl(
5917 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5918 }
5919 case NEON::BI__builtin_neon_vshrd_n_s64: {
5920 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5921 return Builder.CreateAShr(
5922 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5923 Amt->getZExtValue())),
5924 "shrd_n");
5925 }
5926 case NEON::BI__builtin_neon_vshrd_n_u64: {
5927 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5928 uint64_t ShiftAmt = Amt->getZExtValue();
5929 // Right-shifting an unsigned value by its size yields 0.
5930 if (ShiftAmt == 64)
5931 return ConstantInt::get(Int64Ty, 0);
5932 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5933 "shrd_n");
5934 }
5935 case NEON::BI__builtin_neon_vsrad_n_s64: {
5936 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5937 Ops[1] = Builder.CreateAShr(
5938 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5939 Amt->getZExtValue())),
5940 "shrd_n");
5941 return Builder.CreateAdd(Ops[0], Ops[1]);
5942 }
5943 case NEON::BI__builtin_neon_vsrad_n_u64: {
5944 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5945 uint64_t ShiftAmt = Amt->getZExtValue();
5946 // Right-shifting an unsigned value by its size yields 0.
5947 // As Op + 0 = Op, return Ops[0] directly.
5948 if (ShiftAmt == 64)
5949 return Ops[0];
5950 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5951 "shrd_n");
5952 return Builder.CreateAdd(Ops[0], Ops[1]);
5953 }
5954 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5955 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5956 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5957 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5958 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
5959 SmallVector<Value *, 2> ProductOps;
5960 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5961 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5962 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5963 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5964 ProductOps, "vqdmlXl");
5965 Constant *CI = ConstantInt::get(SizeTy, 0);
5966 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5967 // Drop lane-selection and the corresponding vector argument (these have
5968 // already been used)
5969 Ops.pop_back_n(2);
5970
5971 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5972 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5973 ? Intrinsic::aarch64_neon_sqadd
5974 : Intrinsic::aarch64_neon_sqsub;
5975 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5976 }
5977 case NEON::BI__builtin_neon_vqdmlals_s32:
5978 case NEON::BI__builtin_neon_vqdmlsls_s32: {
5979 SmallVector<Value *, 2> ProductOps;
5980 ProductOps.push_back(Ops[1]);
5981 ProductOps.push_back(Ops[2]);
5982 Ops[1] =
5983 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5984 ProductOps, "vqdmlXl");
5985
5986 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5987 ? Intrinsic::aarch64_neon_sqadd
5988 : Intrinsic::aarch64_neon_sqsub;
5989 // Drop the 2nd multiplication argument before the accumulation
5990 Ops.pop_back();
5991 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5992 }
5993 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5994 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5995 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5996 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5997 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
5998 SmallVector<Value *, 2> ProductOps;
5999 ProductOps.push_back(Ops[1]);
6000 ProductOps.push_back(Ops[2]);
6001 Ops[1] =
6002 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6003 ProductOps, "vqdmlXl");
6004 // Drop lane-selection and the corresponding vector argument (these have
6005 // already been used)
6006 Ops.pop_back_n(2);
6007
6008 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6009 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6010 ? Intrinsic::aarch64_neon_sqadd
6011 : Intrinsic::aarch64_neon_sqsub;
6012 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6013 }
6014 case NEON::BI__builtin_neon_vget_lane_bf16:
6015 case NEON::BI__builtin_neon_vduph_lane_bf16:
6016 case NEON::BI__builtin_neon_vduph_lane_f16: {
6017 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6018 }
6019 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6020 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6021 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6022 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6023 }
6024 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6025 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6026 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6027 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6028 }
6029 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6030 SmallVector<int, 16> ConcatMask(8);
6031 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6032 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6033 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6034 llvm::Value *Trunc =
6035 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6036 return Builder.CreateShuffleVector(
6037 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6038 }
6039 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6040 SmallVector<int, 16> ConcatMask(8);
6041 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6042 SmallVector<int, 16> LoMask(4);
6043 std::iota(LoMask.begin(), LoMask.end(), 0);
6044 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6045 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6046 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6047 llvm::Value *Inactive = Builder.CreateShuffleVector(
6048 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6049 llvm::Value *Trunc =
6050 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6051 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6052 }
6053
6054 case clang::AArch64::BI_InterlockedAdd:
6055 case clang::AArch64::BI_InterlockedAdd_acq:
6056 case clang::AArch64::BI_InterlockedAdd_rel:
6057 case clang::AArch64::BI_InterlockedAdd_nf:
6058 case clang::AArch64::BI_InterlockedAdd64:
6059 case clang::AArch64::BI_InterlockedAdd64_acq:
6060 case clang::AArch64::BI_InterlockedAdd64_rel:
6061 case clang::AArch64::BI_InterlockedAdd64_nf: {
6062 Address DestAddr = CheckAtomicAlignment(*this, E);
6063 Value *Val = Ops[1];
6064 llvm::AtomicOrdering Ordering;
6065 switch (BuiltinID) {
6066 case clang::AArch64::BI_InterlockedAdd:
6067 case clang::AArch64::BI_InterlockedAdd64:
6068 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6069 break;
6070 case clang::AArch64::BI_InterlockedAdd_acq:
6071 case clang::AArch64::BI_InterlockedAdd64_acq:
6072 Ordering = llvm::AtomicOrdering::Acquire;
6073 break;
6074 case clang::AArch64::BI_InterlockedAdd_rel:
6075 case clang::AArch64::BI_InterlockedAdd64_rel:
6076 Ordering = llvm::AtomicOrdering::Release;
6077 break;
6078 case clang::AArch64::BI_InterlockedAdd_nf:
6079 case clang::AArch64::BI_InterlockedAdd64_nf:
6080 Ordering = llvm::AtomicOrdering::Monotonic;
6081 break;
6082 default:
6083 llvm_unreachable("missing builtin ID in switch!");
6084 }
6085 AtomicRMWInst *RMWI =
6086 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6087 return Builder.CreateAdd(RMWI, Val);
6088 }
6089 }
6090
6091 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6092 llvm::Type *Ty = VTy;
6093 if (!Ty)
6094 return nullptr;
6095
6096 bool ExtractLow = false;
6097 bool ExtendLaneArg = false;
6098 switch (BuiltinID) {
6099 default: return nullptr;
6100 case NEON::BI__builtin_neon_vbsl_v:
6101 case NEON::BI__builtin_neon_vbslq_v: {
6102 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6103 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6104 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6105 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6106
6107 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6108 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6109 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6110 return Builder.CreateBitCast(Ops[0], Ty);
6111 }
6112 case NEON::BI__builtin_neon_vfma_lane_v:
6113 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6114 // The ARM builtins (and instructions) have the addend as the first
6115 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6116 Value *Addend = Ops[0];
6117 Value *Multiplicand = Ops[1];
6118 Value *LaneSource = Ops[2];
6119 Ops[0] = Multiplicand;
6120 Ops[1] = LaneSource;
6121 Ops[2] = Addend;
6122
6123 // Now adjust things to handle the lane access.
6124 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6125 ? llvm::FixedVectorType::get(VTy->getElementType(),
6126 VTy->getNumElements() / 2)
6127 : VTy;
6128 llvm::Constant *cst = cast<Constant>(Ops[3]);
6129 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6130 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6131 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6132
6133 Ops.pop_back();
6134 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6135 : Intrinsic::fma;
6136 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6137 }
6138 case NEON::BI__builtin_neon_vfma_laneq_v: {
6139 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6140 // v1f64 fma should be mapped to Neon scalar f64 fma
6141 if (VTy && VTy->getElementType() == DoubleTy) {
6142 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6143 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6144 llvm::FixedVectorType *VTy =
6146 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6147 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6148 Value *Result;
6150 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6151 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6152 return Builder.CreateBitCast(Result, Ty);
6153 }
6154 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6155 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6156
6157 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6158 VTy->getNumElements() * 2);
6159 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6160 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6161 cast<ConstantInt>(Ops[3]));
6162 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6163
6165 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6166 {Ops[2], Ops[1], Ops[0]});
6167 }
6168 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6169 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6170 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6171
6172 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6173 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6175 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6176 {Ops[2], Ops[1], Ops[0]});
6177 }
6178 case NEON::BI__builtin_neon_vfmah_lane_f16:
6179 case NEON::BI__builtin_neon_vfmas_lane_f32:
6180 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6181 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6182 case NEON::BI__builtin_neon_vfmad_lane_f64:
6183 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6184 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6185 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6187 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6188 {Ops[1], Ops[2], Ops[0]});
6189 }
6190 case NEON::BI__builtin_neon_vmull_v:
6191 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6192 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6193 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6194 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6195 case NEON::BI__builtin_neon_vmax_v:
6196 case NEON::BI__builtin_neon_vmaxq_v:
6197 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6198 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6199 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6200 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6201 case NEON::BI__builtin_neon_vmaxh_f16: {
6202 Int = Intrinsic::aarch64_neon_fmax;
6203 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6204 }
6205 case NEON::BI__builtin_neon_vmin_v:
6206 case NEON::BI__builtin_neon_vminq_v:
6207 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6208 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6209 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6210 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6211 case NEON::BI__builtin_neon_vminh_f16: {
6212 Int = Intrinsic::aarch64_neon_fmin;
6213 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6214 }
6215 case NEON::BI__builtin_neon_vabd_v:
6216 case NEON::BI__builtin_neon_vabdq_v:
6217 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6218 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6219 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6220 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6221 case NEON::BI__builtin_neon_vpadal_v:
6222 case NEON::BI__builtin_neon_vpadalq_v: {
6223 unsigned ArgElts = VTy->getNumElements();
6224 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6225 unsigned BitWidth = EltTy->getBitWidth();
6226 auto *ArgTy = llvm::FixedVectorType::get(
6227 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6228 llvm::Type* Tys[2] = { VTy, ArgTy };
6229 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6231 TmpOps.push_back(Ops[1]);
6232 Function *F = CGM.getIntrinsic(Int, Tys);
6233 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6234 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6235 return Builder.CreateAdd(tmp, addend);
6236 }
6237 case NEON::BI__builtin_neon_vpmin_v:
6238 case NEON::BI__builtin_neon_vpminq_v:
6239 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6240 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6241 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6242 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6243 case NEON::BI__builtin_neon_vpmax_v:
6244 case NEON::BI__builtin_neon_vpmaxq_v:
6245 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6246 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6247 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6248 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6249 case NEON::BI__builtin_neon_vminnm_v:
6250 case NEON::BI__builtin_neon_vminnmq_v:
6251 Int = Intrinsic::aarch64_neon_fminnm;
6252 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6253 case NEON::BI__builtin_neon_vminnmh_f16:
6254 Int = Intrinsic::aarch64_neon_fminnm;
6255 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6256 case NEON::BI__builtin_neon_vmaxnm_v:
6257 case NEON::BI__builtin_neon_vmaxnmq_v:
6258 Int = Intrinsic::aarch64_neon_fmaxnm;
6259 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6260 case NEON::BI__builtin_neon_vmaxnmh_f16:
6261 Int = Intrinsic::aarch64_neon_fmaxnm;
6262 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6263 case NEON::BI__builtin_neon_vrecpss_f32: {
6264 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6265 Ops, "vrecps");
6266 }
6267 case NEON::BI__builtin_neon_vrecpsd_f64:
6268 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6269 Ops, "vrecps");
6270 case NEON::BI__builtin_neon_vrecpsh_f16:
6271 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6272 Ops, "vrecps");
6273 case NEON::BI__builtin_neon_vqshrun_n_v:
6274 Int = Intrinsic::aarch64_neon_sqshrun;
6275 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6276 case NEON::BI__builtin_neon_vqrshrun_n_v:
6277 Int = Intrinsic::aarch64_neon_sqrshrun;
6278 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6279 case NEON::BI__builtin_neon_vqshrn_n_v:
6280 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6281 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6282 case NEON::BI__builtin_neon_vrshrn_n_v:
6283 Int = Intrinsic::aarch64_neon_rshrn;
6284 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6285 case NEON::BI__builtin_neon_vqrshrn_n_v:
6286 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6287 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6288 case NEON::BI__builtin_neon_vrndah_f16: {
6289 Int = Builder.getIsFPConstrained()
6290 ? Intrinsic::experimental_constrained_round
6291 : Intrinsic::round;
6292 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6293 }
6294 case NEON::BI__builtin_neon_vrnda_v:
6295 case NEON::BI__builtin_neon_vrndaq_v: {
6296 Int = Builder.getIsFPConstrained()
6297 ? Intrinsic::experimental_constrained_round
6298 : Intrinsic::round;
6299 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6300 }
6301 case NEON::BI__builtin_neon_vrndih_f16: {
6302 Int = Builder.getIsFPConstrained()
6303 ? Intrinsic::experimental_constrained_nearbyint
6304 : Intrinsic::nearbyint;
6305 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6306 }
6307 case NEON::BI__builtin_neon_vrndmh_f16: {
6308 Int = Builder.getIsFPConstrained()
6309 ? Intrinsic::experimental_constrained_floor
6310 : Intrinsic::floor;
6311 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6312 }
6313 case NEON::BI__builtin_neon_vrndm_v:
6314 case NEON::BI__builtin_neon_vrndmq_v: {
6315 Int = Builder.getIsFPConstrained()
6316 ? Intrinsic::experimental_constrained_floor
6317 : Intrinsic::floor;
6318 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6319 }
6320 case NEON::BI__builtin_neon_vrndnh_f16: {
6321 Int = Builder.getIsFPConstrained()
6322 ? Intrinsic::experimental_constrained_roundeven
6323 : Intrinsic::roundeven;
6324 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6325 }
6326 case NEON::BI__builtin_neon_vrndn_v:
6327 case NEON::BI__builtin_neon_vrndnq_v: {
6328 Int = Builder.getIsFPConstrained()
6329 ? Intrinsic::experimental_constrained_roundeven
6330 : Intrinsic::roundeven;
6331 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6332 }
6333 case NEON::BI__builtin_neon_vrndns_f32: {
6334 Int = Builder.getIsFPConstrained()
6335 ? Intrinsic::experimental_constrained_roundeven
6336 : Intrinsic::roundeven;
6337 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6338 }
6339 case NEON::BI__builtin_neon_vrndph_f16: {
6340 Int = Builder.getIsFPConstrained()
6341 ? Intrinsic::experimental_constrained_ceil
6342 : Intrinsic::ceil;
6343 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6344 }
6345 case NEON::BI__builtin_neon_vrndp_v:
6346 case NEON::BI__builtin_neon_vrndpq_v: {
6347 Int = Builder.getIsFPConstrained()
6348 ? Intrinsic::experimental_constrained_ceil
6349 : Intrinsic::ceil;
6350 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6351 }
6352 case NEON::BI__builtin_neon_vrndxh_f16: {
6353 Int = Builder.getIsFPConstrained()
6354 ? Intrinsic::experimental_constrained_rint
6355 : Intrinsic::rint;
6356 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6357 }
6358 case NEON::BI__builtin_neon_vrndx_v:
6359 case NEON::BI__builtin_neon_vrndxq_v: {
6360 Int = Builder.getIsFPConstrained()
6361 ? Intrinsic::experimental_constrained_rint
6362 : Intrinsic::rint;
6363 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6364 }
6365 case NEON::BI__builtin_neon_vrndh_f16: {
6366 Int = Builder.getIsFPConstrained()
6367 ? Intrinsic::experimental_constrained_trunc
6368 : Intrinsic::trunc;
6369 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6370 }
6371 case NEON::BI__builtin_neon_vrnd32x_f32:
6372 case NEON::BI__builtin_neon_vrnd32xq_f32:
6373 case NEON::BI__builtin_neon_vrnd32x_f64:
6374 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6375 Int = Intrinsic::aarch64_neon_frint32x;
6376 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6377 }
6378 case NEON::BI__builtin_neon_vrnd32z_f32:
6379 case NEON::BI__builtin_neon_vrnd32zq_f32:
6380 case NEON::BI__builtin_neon_vrnd32z_f64:
6381 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6382 Int = Intrinsic::aarch64_neon_frint32z;
6383 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6384 }
6385 case NEON::BI__builtin_neon_vrnd64x_f32:
6386 case NEON::BI__builtin_neon_vrnd64xq_f32:
6387 case NEON::BI__builtin_neon_vrnd64x_f64:
6388 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6389 Int = Intrinsic::aarch64_neon_frint64x;
6390 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6391 }
6392 case NEON::BI__builtin_neon_vrnd64z_f32:
6393 case NEON::BI__builtin_neon_vrnd64zq_f32:
6394 case NEON::BI__builtin_neon_vrnd64z_f64:
6395 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6396 Int = Intrinsic::aarch64_neon_frint64z;
6397 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6398 }
6399 case NEON::BI__builtin_neon_vrnd_v:
6400 case NEON::BI__builtin_neon_vrndq_v: {
6401 Int = Builder.getIsFPConstrained()
6402 ? Intrinsic::experimental_constrained_trunc
6403 : Intrinsic::trunc;
6404 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6405 }
6406 case NEON::BI__builtin_neon_vcvt_f64_v:
6407 case NEON::BI__builtin_neon_vcvtq_f64_v:
6408 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6409 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6410 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6411 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6412 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6413 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6414 "unexpected vcvt_f64_f32 builtin");
6415 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6416 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6417
6418 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6419 }
6420 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6421 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6422 "unexpected vcvt_f32_f64 builtin");
6423 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6424 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6425
6426 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6427 }
6428 case NEON::BI__builtin_neon_vcvta_s16_f16:
6429 case NEON::BI__builtin_neon_vcvta_u16_f16:
6430 case NEON::BI__builtin_neon_vcvta_s32_v:
6431 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6432 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6433 case NEON::BI__builtin_neon_vcvta_u32_v:
6434 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6435 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6436 case NEON::BI__builtin_neon_vcvta_s64_v:
6437 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6438 case NEON::BI__builtin_neon_vcvta_u64_v:
6439 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6440 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6441 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6442 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6443 }
6444 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6445 case NEON::BI__builtin_neon_vcvtm_s32_v:
6446 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6447 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6448 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6449 case NEON::BI__builtin_neon_vcvtm_u32_v:
6450 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6451 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6452 case NEON::BI__builtin_neon_vcvtm_s64_v:
6453 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6454 case NEON::BI__builtin_neon_vcvtm_u64_v:
6455 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6456 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6457 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6458 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6459 }
6460 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6461 case NEON::BI__builtin_neon_vcvtn_s32_v:
6462 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6463 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6464 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6465 case NEON::BI__builtin_neon_vcvtn_u32_v:
6466 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6467 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6468 case NEON::BI__builtin_neon_vcvtn_s64_v:
6469 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6470 case NEON::BI__builtin_neon_vcvtn_u64_v:
6471 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6472 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6473 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6474 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6475 }
6476 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6477 case NEON::BI__builtin_neon_vcvtp_s32_v:
6478 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6479 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6480 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6481 case NEON::BI__builtin_neon_vcvtp_u32_v:
6482 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6483 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6484 case NEON::BI__builtin_neon_vcvtp_s64_v:
6485 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6486 case NEON::BI__builtin_neon_vcvtp_u64_v:
6487 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6488 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6489 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6490 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6491 }
6492 case NEON::BI__builtin_neon_vmulx_v:
6493 case NEON::BI__builtin_neon_vmulxq_v: {
6494 Int = Intrinsic::aarch64_neon_fmulx;
6495 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6496 }
6497 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6498 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6499 // vmulx_lane should be mapped to Neon scalar mulx after
6500 // extracting the scalar element
6501 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6502 Ops.pop_back();
6503 Int = Intrinsic::aarch64_neon_fmulx;
6504 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
6505 }
6506 case NEON::BI__builtin_neon_vmul_lane_v:
6507 case NEON::BI__builtin_neon_vmul_laneq_v: {
6508 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6509 bool Quad = false;
6510 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6511 Quad = true;
6512 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6513 llvm::FixedVectorType *VTy =
6515 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6516 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6517 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6518 return Builder.CreateBitCast(Result, Ty);
6519 }
6520 case NEON::BI__builtin_neon_vpmaxnm_v:
6521 case NEON::BI__builtin_neon_vpmaxnmq_v: {
6522 Int = Intrinsic::aarch64_neon_fmaxnmp;
6523 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6524 }
6525 case NEON::BI__builtin_neon_vpminnm_v:
6526 case NEON::BI__builtin_neon_vpminnmq_v: {
6527 Int = Intrinsic::aarch64_neon_fminnmp;
6528 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6529 }
6530 case NEON::BI__builtin_neon_vsqrth_f16: {
6531 Int = Builder.getIsFPConstrained()
6532 ? Intrinsic::experimental_constrained_sqrt
6533 : Intrinsic::sqrt;
6534 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
6535 }
6536 case NEON::BI__builtin_neon_vsqrt_v:
6537 case NEON::BI__builtin_neon_vsqrtq_v: {
6538 Int = Builder.getIsFPConstrained()
6539 ? Intrinsic::experimental_constrained_sqrt
6540 : Intrinsic::sqrt;
6541 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6542 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6543 }
6544 case NEON::BI__builtin_neon_vrbit_v:
6545 case NEON::BI__builtin_neon_vrbitq_v: {
6546 Int = Intrinsic::bitreverse;
6547 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6548 }
6549 case NEON::BI__builtin_neon_vmaxv_f16: {
6550 Int = Intrinsic::aarch64_neon_fmaxv;
6551 Ty = HalfTy;
6552 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6553 llvm::Type *Tys[2] = {Ty, VTy};
6554 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6555 }
6556 case NEON::BI__builtin_neon_vmaxvq_f16: {
6557 Int = Intrinsic::aarch64_neon_fmaxv;
6558 Ty = HalfTy;
6559 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6560 llvm::Type *Tys[2] = {Ty, VTy};
6561 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6562 }
6563 case NEON::BI__builtin_neon_vminv_f16: {
6564 Int = Intrinsic::aarch64_neon_fminv;
6565 Ty = HalfTy;
6566 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6567 llvm::Type *Tys[2] = {Ty, VTy};
6568 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6569 }
6570 case NEON::BI__builtin_neon_vminvq_f16: {
6571 Int = Intrinsic::aarch64_neon_fminv;
6572 Ty = HalfTy;
6573 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6574 llvm::Type *Tys[2] = {Ty, VTy};
6575 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6576 }
6577 case NEON::BI__builtin_neon_vmaxnmv_f16: {
6578 Int = Intrinsic::aarch64_neon_fmaxnmv;
6579 Ty = HalfTy;
6580 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6581 llvm::Type *Tys[2] = {Ty, VTy};
6582 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6583 }
6584 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6585 Int = Intrinsic::aarch64_neon_fmaxnmv;
6586 Ty = HalfTy;
6587 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6588 llvm::Type *Tys[2] = {Ty, VTy};
6589 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6590 }
6591 case NEON::BI__builtin_neon_vminnmv_f16: {
6592 Int = Intrinsic::aarch64_neon_fminnmv;
6593 Ty = HalfTy;
6594 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6595 llvm::Type *Tys[2] = {Ty, VTy};
6596 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6597 return Builder.CreateTrunc(Ops[0], HalfTy);
6598 }
6599 case NEON::BI__builtin_neon_vminnmvq_f16: {
6600 Int = Intrinsic::aarch64_neon_fminnmv;
6601 Ty = HalfTy;
6602 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6603 llvm::Type *Tys[2] = {Ty, VTy};
6604 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6605 }
6606 case NEON::BI__builtin_neon_vmul_n_f64: {
6607 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6608 Value *RHS = Builder.CreateBitCast(Ops[1], DoubleTy);
6609 return Builder.CreateFMul(Ops[0], RHS);
6610 }
6611 case NEON::BI__builtin_neon_vaddlv_u8:
6612 case NEON::BI__builtin_neon_vaddlvq_u8:
6613 case NEON::BI__builtin_neon_vaddlv_u16:
6614 case NEON::BI__builtin_neon_vaddlvq_u16: {
6615 Int = Intrinsic::aarch64_neon_uaddlv;
6616 Ty = Int32Ty;
6617 VTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
6618 llvm::Type *Tys[2] = {Ty, VTy};
6619 Value *Result = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6620 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6621 return Builder.CreateTrunc(Result, Int16Ty);
6622 return Result;
6623 }
6624 case NEON::BI__builtin_neon_vaddlv_s8:
6625 case NEON::BI__builtin_neon_vaddlvq_s8:
6626 case NEON::BI__builtin_neon_vaddlv_s16:
6627 case NEON::BI__builtin_neon_vaddlvq_s16: {
6628 Int = Intrinsic::aarch64_neon_saddlv;
6629 Ty = Int32Ty;
6630 VTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
6631 llvm::Type *Tys[2] = {Ty, VTy};
6632 Value *Result = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6633 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6634 return Builder.CreateTrunc(Result, Int16Ty);
6635 return Result;
6636 }
6637 case NEON::BI__builtin_neon_vsri_n_v:
6638 case NEON::BI__builtin_neon_vsriq_n_v: {
6639 Int = Intrinsic::aarch64_neon_vsri;
6640 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6641 return EmitNeonCall(Intrin, Ops, "vsri_n");
6642 }
6643 case NEON::BI__builtin_neon_vsli_n_v:
6644 case NEON::BI__builtin_neon_vsliq_n_v: {
6645 Int = Intrinsic::aarch64_neon_vsli;
6646 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6647 return EmitNeonCall(Intrin, Ops, "vsli_n");
6648 }
6649 case NEON::BI__builtin_neon_vsra_n_v:
6650 case NEON::BI__builtin_neon_vsraq_n_v:
6651 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6652 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6653 return Builder.CreateAdd(Ops[0], Ops[1]);
6654 case NEON::BI__builtin_neon_vrsra_n_v:
6655 case NEON::BI__builtin_neon_vrsraq_n_v: {
6656 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6658 TmpOps.push_back(Ops[1]);
6659 TmpOps.push_back(Ops[2]);
6660 Function* F = CGM.getIntrinsic(Int, Ty);
6661 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6662 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6663 return Builder.CreateAdd(Ops[0], tmp);
6664 }
6665 case NEON::BI__builtin_neon_vld1_v:
6666 case NEON::BI__builtin_neon_vld1q_v: {
6667 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
6668 }
6669 case NEON::BI__builtin_neon_vst1_v:
6670 case NEON::BI__builtin_neon_vst1q_v:
6671 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6672 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6673 case NEON::BI__builtin_neon_vld1_lane_v:
6674 case NEON::BI__builtin_neon_vld1q_lane_v: {
6675 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6676 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6677 PtrOp0.getAlignment());
6678 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6679 }
6680 case NEON::BI__builtin_neon_vldap1_lane_s64:
6681 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
6682 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6683 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
6684 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
6685 LI->setAtomic(llvm::AtomicOrdering::Acquire);
6686 Ops[0] = LI;
6687 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
6688 }
6689 case NEON::BI__builtin_neon_vld1_dup_v:
6690 case NEON::BI__builtin_neon_vld1q_dup_v: {
6691 Value *V = PoisonValue::get(Ty);
6692 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6693 PtrOp0.getAlignment());
6694 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6695 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6696 return EmitNeonSplat(Ops[0], CI);
6697 }
6698 case NEON::BI__builtin_neon_vst1_lane_v:
6699 case NEON::BI__builtin_neon_vst1q_lane_v:
6700 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6701 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6702 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6703 case NEON::BI__builtin_neon_vstl1_lane_s64:
6704 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
6705 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6706 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6707 llvm::StoreInst *SI =
6708 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6709 SI->setAtomic(llvm::AtomicOrdering::Release);
6710 return SI;
6711 }
6712 case NEON::BI__builtin_neon_vld2_v:
6713 case NEON::BI__builtin_neon_vld2q_v: {
6714 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6715 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6716 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6717 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6718 }
6719 case NEON::BI__builtin_neon_vld3_v:
6720 case NEON::BI__builtin_neon_vld3q_v: {
6721 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6722 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6723 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6724 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6725 }
6726 case NEON::BI__builtin_neon_vld4_v:
6727 case NEON::BI__builtin_neon_vld4q_v: {
6728 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6729 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6730 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6731 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6732 }
6733 case NEON::BI__builtin_neon_vld2_dup_v:
6734 case NEON::BI__builtin_neon_vld2q_dup_v: {
6735 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6736 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6737 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6738 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6739 }
6740 case NEON::BI__builtin_neon_vld3_dup_v:
6741 case NEON::BI__builtin_neon_vld3q_dup_v: {
6742 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6743 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6744 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6745 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6746 }
6747 case NEON::BI__builtin_neon_vld4_dup_v:
6748 case NEON::BI__builtin_neon_vld4q_dup_v: {
6749 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6750 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6751 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6752 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6753 }
6754 case NEON::BI__builtin_neon_vld2_lane_v:
6755 case NEON::BI__builtin_neon_vld2q_lane_v: {
6756 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6757 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6758 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6759 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6760 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6761 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6762 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
6763 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6764 }
6765 case NEON::BI__builtin_neon_vld3_lane_v:
6766 case NEON::BI__builtin_neon_vld3q_lane_v: {
6767 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6768 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6769 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6770 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6771 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6772 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6773 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6774 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
6775 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6776 }
6777 case NEON::BI__builtin_neon_vld4_lane_v:
6778 case NEON::BI__builtin_neon_vld4q_lane_v: {
6779 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6780 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6781 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6782 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6783 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6784 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6785 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6786 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6787 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
6788 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6789 }
6790 case NEON::BI__builtin_neon_vst2_v:
6791 case NEON::BI__builtin_neon_vst2q_v: {
6792 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6793 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6794 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6795 Ops, "");
6796 }
6797 case NEON::BI__builtin_neon_vst2_lane_v:
6798 case NEON::BI__builtin_neon_vst2q_lane_v: {
6799 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6800 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6801 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6802 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6803 Ops, "");
6804 }
6805 case NEON::BI__builtin_neon_vst3_v:
6806 case NEON::BI__builtin_neon_vst3q_v: {
6807 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6808 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6809 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6810 Ops, "");
6811 }
6812 case NEON::BI__builtin_neon_vst3_lane_v:
6813 case NEON::BI__builtin_neon_vst3q_lane_v: {
6814 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6815 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6816 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6817 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6818 Ops, "");
6819 }
6820 case NEON::BI__builtin_neon_vst4_v:
6821 case NEON::BI__builtin_neon_vst4q_v: {
6822 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6823 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6824 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6825 Ops, "");
6826 }
6827 case NEON::BI__builtin_neon_vst4_lane_v:
6828 case NEON::BI__builtin_neon_vst4q_lane_v: {
6829 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6830 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6831 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6832 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6833 Ops, "");
6834 }
6835 case NEON::BI__builtin_neon_vtrn_v:
6836 case NEON::BI__builtin_neon_vtrnq_v: {
6837 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6838 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6839 Value *SV = nullptr;
6840
6841 for (unsigned vi = 0; vi != 2; ++vi) {
6842 SmallVector<int, 16> Indices;
6843 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6844 Indices.push_back(i+vi);
6845 Indices.push_back(i+e+vi);
6846 }
6847 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6848 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6849 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6850 }
6851 return SV;
6852 }
6853 case NEON::BI__builtin_neon_vuzp_v:
6854 case NEON::BI__builtin_neon_vuzpq_v: {
6855 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6856 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6857 Value *SV = nullptr;
6858
6859 for (unsigned vi = 0; vi != 2; ++vi) {
6860 SmallVector<int, 16> Indices;
6861 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6862 Indices.push_back(2*i+vi);
6863
6864 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6865 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6866 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6867 }
6868 return SV;
6869 }
6870 case NEON::BI__builtin_neon_vzip_v:
6871 case NEON::BI__builtin_neon_vzipq_v: {
6872 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6873 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6874 Value *SV = nullptr;
6875
6876 for (unsigned vi = 0; vi != 2; ++vi) {
6877 SmallVector<int, 16> Indices;
6878 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6879 Indices.push_back((i + vi*e) >> 1);
6880 Indices.push_back(((i + vi*e) >> 1)+e);
6881 }
6882 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6883 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6884 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6885 }
6886 return SV;
6887 }
6888 case NEON::BI__builtin_neon_vqtbl1q_v: {
6889 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6890 Ops, "vtbl1");
6891 }
6892 case NEON::BI__builtin_neon_vqtbl2q_v: {
6893 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6894 Ops, "vtbl2");
6895 }
6896 case NEON::BI__builtin_neon_vqtbl3q_v: {
6897 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6898 Ops, "vtbl3");
6899 }
6900 case NEON::BI__builtin_neon_vqtbl4q_v: {
6901 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6902 Ops, "vtbl4");
6903 }
6904 case NEON::BI__builtin_neon_vqtbx1q_v: {
6905 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6906 Ops, "vtbx1");
6907 }
6908 case NEON::BI__builtin_neon_vqtbx2q_v: {
6909 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6910 Ops, "vtbx2");
6911 }
6912 case NEON::BI__builtin_neon_vqtbx3q_v: {
6913 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6914 Ops, "vtbx3");
6915 }
6916 case NEON::BI__builtin_neon_vqtbx4q_v: {
6917 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6918 Ops, "vtbx4");
6919 }
6920 case NEON::BI__builtin_neon_vsqadd_v:
6921 case NEON::BI__builtin_neon_vsqaddq_v: {
6922 Int = Intrinsic::aarch64_neon_usqadd;
6923 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6924 }
6925 case NEON::BI__builtin_neon_vuqadd_v:
6926 case NEON::BI__builtin_neon_vuqaddq_v: {
6927 Int = Intrinsic::aarch64_neon_suqadd;
6928 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6929 }
6930
6931 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
6932 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
6933 case NEON::BI__builtin_neon_vluti2_laneq_f16:
6934 case NEON::BI__builtin_neon_vluti2_laneq_p16:
6935 case NEON::BI__builtin_neon_vluti2_laneq_p8:
6936 case NEON::BI__builtin_neon_vluti2_laneq_s16:
6937 case NEON::BI__builtin_neon_vluti2_laneq_s8:
6938 case NEON::BI__builtin_neon_vluti2_laneq_u16:
6939 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
6940 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6941 llvm::Type *Tys[2];
6942 Tys[0] = Ty;
6943 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6944 /*isQuad*/ false));
6945 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
6946 }
6947 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
6948 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
6949 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
6950 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
6951 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
6952 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
6953 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
6954 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
6955 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
6956 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6957 llvm::Type *Tys[2];
6958 Tys[0] = Ty;
6959 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6960 /*isQuad*/ true));
6961 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
6962 }
6963 case NEON::BI__builtin_neon_vluti2_lane_mf8:
6964 case NEON::BI__builtin_neon_vluti2_lane_bf16:
6965 case NEON::BI__builtin_neon_vluti2_lane_f16:
6966 case NEON::BI__builtin_neon_vluti2_lane_p16:
6967 case NEON::BI__builtin_neon_vluti2_lane_p8:
6968 case NEON::BI__builtin_neon_vluti2_lane_s16:
6969 case NEON::BI__builtin_neon_vluti2_lane_s8:
6970 case NEON::BI__builtin_neon_vluti2_lane_u16:
6971 case NEON::BI__builtin_neon_vluti2_lane_u8: {
6972 Int = Intrinsic::aarch64_neon_vluti2_lane;
6973 llvm::Type *Tys[2];
6974 Tys[0] = Ty;
6975 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6976 /*isQuad*/ false));
6977 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
6978 }
6979 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
6980 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
6981 case NEON::BI__builtin_neon_vluti2q_lane_f16:
6982 case NEON::BI__builtin_neon_vluti2q_lane_p16:
6983 case NEON::BI__builtin_neon_vluti2q_lane_p8:
6984 case NEON::BI__builtin_neon_vluti2q_lane_s16:
6985 case NEON::BI__builtin_neon_vluti2q_lane_s8:
6986 case NEON::BI__builtin_neon_vluti2q_lane_u16:
6987 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
6988 Int = Intrinsic::aarch64_neon_vluti2_lane;
6989 llvm::Type *Tys[2];
6990 Tys[0] = Ty;
6991 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6992 /*isQuad*/ true));
6993 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
6994 }
6995 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
6996 case NEON::BI__builtin_neon_vluti4q_lane_p8:
6997 case NEON::BI__builtin_neon_vluti4q_lane_s8:
6998 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
6999 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7000 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7001 }
7002 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7003 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7004 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7005 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7006 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7007 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7008 }
7009 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7010 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7011 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7012 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7013 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7014 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7015 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7016 }
7017 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7018 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7019 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7020 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7021 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7022 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7023 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7024 }
7025 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7026 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7027 {llvm::FixedVectorType::get(HalfTy, 8),
7028 llvm::FixedVectorType::get(Int8Ty, 16)},
7029 Ops, E, "fmmla");
7030 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7031 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7032 {llvm::FixedVectorType::get(FloatTy, 4),
7033 llvm::FixedVectorType::get(Int8Ty, 16)},
7034 Ops, E, "fmmla");
7035 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7036 ExtractLow = true;
7037 [[fallthrough]];
7038 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7039 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7040 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7041 llvm::FixedVectorType::get(BFloatTy, 8),
7042 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7043 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7044 ExtractLow = true;
7045 [[fallthrough]];
7046 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7047 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7048 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7049 llvm::FixedVectorType::get(BFloatTy, 8),
7050 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7051 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7052 ExtractLow = true;
7053 [[fallthrough]];
7054 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7055 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7056 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7057 llvm::FixedVectorType::get(HalfTy, 8),
7058 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7059 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7060 ExtractLow = true;
7061 [[fallthrough]];
7062 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7063 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7064 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7065 llvm::FixedVectorType::get(HalfTy, 8),
7066 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7067 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7068 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7069 llvm::FixedVectorType::get(Int8Ty, 8),
7070 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7071 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7072 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7073 llvm::FixedVectorType::get(Int8Ty, 8),
7074 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7075 E, "vfcvtn");
7076 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7077 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7078 llvm::FixedVectorType::get(Int8Ty, 16),
7079 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7080 E, "vfcvtn");
7081 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7082 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7083 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7084 uint64_t(0));
7085 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7086 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7087 }
7088
7089 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7090 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7091 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7092 Ops, E, "fdot2");
7093 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7094 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7095 ExtendLaneArg = true;
7096 [[fallthrough]];
7097 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7098 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7099 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7100 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7101 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7102 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7103 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7104 FloatTy, Ops, E, "fdot4");
7105 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7106 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7107 ExtendLaneArg = true;
7108 [[fallthrough]];
7109 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7110 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7111 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7112 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7113
7114 case NEON::BI__builtin_neon_vdot_f32_f16:
7115 case NEON::BI__builtin_neon_vdotq_f32_f16: {
7116 llvm::Type *InputTy =
7117 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7118 llvm::Type *Tys[2] = {Ty, InputTy};
7119 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_fdot, Tys),
7120 Ops, "vdot");
7121 }
7122
7123 case NEON::BI__builtin_neon_vdot_lane_f32_f16:
7124 case NEON::BI__builtin_neon_vdot_laneq_f32_f16:
7125 case NEON::BI__builtin_neon_vdotq_lane_f32_f16:
7126 case NEON::BI__builtin_neon_vdotq_laneq_f32_f16: {
7127 llvm::FixedVectorType *InputTy =
7128 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7129 llvm::FixedVectorType *LaneTy = llvm::FixedVectorType::get(
7130 HalfTy, Ops[2]->getType()->getPrimitiveSizeInBits() / 16);
7131 // Treat the lane argument as a splat and use non-lane version of the
7132 // intrinsic.
7133 Ops[2] = Builder.CreateBitCast(Ops[2], LaneTy);
7134 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]),
7135 InputTy->getElementCount());
7136 llvm::Type *Tys[2] = {Ty, InputTy};
7137 Ops.pop_back();
7138 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_fdot, Tys),
7139 Ops, "vdot");
7140 }
7141
7142 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7143 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7144 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7145 "vmlal");
7146 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7147 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7148 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7149 "vmlal");
7150 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7151 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7152 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7153 "vmlall");
7154 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7155 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7156 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7157 "vmlall");
7158 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7159 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7160 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7161 "vmlall");
7162 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7163 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7164 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7165 "vmlall");
7166 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7167 ExtendLaneArg = true;
7168 [[fallthrough]];
7169 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7170 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7171 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7172 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7173 ExtendLaneArg = true;
7174 [[fallthrough]];
7175 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7176 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7177 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7178 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7179 ExtendLaneArg = true;
7180 [[fallthrough]];
7181 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7182 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7183 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7184 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7185 ExtendLaneArg = true;
7186 [[fallthrough]];
7187 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7188 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7189 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7190 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7191 ExtendLaneArg = true;
7192 [[fallthrough]];
7193 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7194 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7195 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7196 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7197 ExtendLaneArg = true;
7198 [[fallthrough]];
7199 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7200 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7201 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7202 case NEON::BI__builtin_neon_vamin_f16:
7203 case NEON::BI__builtin_neon_vaminq_f16:
7204 case NEON::BI__builtin_neon_vamin_f32:
7205 case NEON::BI__builtin_neon_vaminq_f32:
7206 case NEON::BI__builtin_neon_vaminq_f64: {
7207 Int = Intrinsic::aarch64_neon_famin;
7208 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7209 }
7210 case NEON::BI__builtin_neon_vamax_f16:
7211 case NEON::BI__builtin_neon_vamaxq_f16:
7212 case NEON::BI__builtin_neon_vamax_f32:
7213 case NEON::BI__builtin_neon_vamaxq_f32:
7214 case NEON::BI__builtin_neon_vamaxq_f64: {
7215 Int = Intrinsic::aarch64_neon_famax;
7216 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7217 }
7218 case NEON::BI__builtin_neon_vscale_f16:
7219 case NEON::BI__builtin_neon_vscaleq_f16:
7220 case NEON::BI__builtin_neon_vscale_f32:
7221 case NEON::BI__builtin_neon_vscaleq_f32:
7222 case NEON::BI__builtin_neon_vscaleq_f64: {
7223 Int = Intrinsic::aarch64_neon_fp8_fscale;
7224 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7225 }
7226 }
7227}
7228
7230 const CallExpr *E) {
7231 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7232 BuiltinID == BPF::BI__builtin_btf_type_id ||
7233 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7234 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7235 "unexpected BPF builtin");
7236
7237 // A sequence number, injected into IR builtin functions, to
7238 // prevent CSE given the only difference of the function
7239 // may just be the debuginfo metadata.
7240 static uint32_t BuiltinSeqNum;
7241
7242 switch (BuiltinID) {
7243 default:
7244 llvm_unreachable("Unexpected BPF builtin");
7245 case BPF::BI__builtin_preserve_field_info: {
7246 const Expr *Arg = E->getArg(0);
7247 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7248
7249 if (!getDebugInfo()) {
7250 CGM.Error(E->getExprLoc(),
7251 "using __builtin_preserve_field_info() without -g");
7252 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7253 : EmitLValue(Arg).emitRawPointer(*this);
7254 }
7255
7256 // Enable underlying preserve_*_access_index() generation.
7257 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7258 IsInPreservedAIRegion = true;
7259 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7260 : EmitLValue(Arg).emitRawPointer(*this);
7261 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7262
7263 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7264 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7265
7266 // Built the IR for the preserve_field_info intrinsic.
7267 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7268 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7269 {FieldAddr->getType()});
7270 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7271 }
7272 case BPF::BI__builtin_btf_type_id:
7273 case BPF::BI__builtin_preserve_type_info: {
7274 if (!getDebugInfo()) {
7275 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7276 return nullptr;
7277 }
7278
7279 const Expr *Arg0 = E->getArg(0);
7280 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7281 Arg0->getType(), Arg0->getExprLoc());
7282
7283 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7284 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7285 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7286
7287 llvm::Function *FnDecl;
7288 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7289 FnDecl = Intrinsic::getOrInsertDeclaration(
7290 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7291 else
7292 FnDecl = Intrinsic::getOrInsertDeclaration(
7293 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7294 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
7295 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7296 return Fn;
7297 }
7298 case BPF::BI__builtin_preserve_enum_value: {
7299 if (!getDebugInfo()) {
7300 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7301 return nullptr;
7302 }
7303
7304 const Expr *Arg0 = E->getArg(0);
7305 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7306 Arg0->getType(), Arg0->getExprLoc());
7307
7308 // Find enumerator
7309 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
7310 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
7311 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
7312 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
7313
7314 auto InitVal = Enumerator->getInitVal();
7315 std::string InitValStr;
7316 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7317 InitValStr = std::to_string(InitVal.getSExtValue());
7318 else
7319 InitValStr = std::to_string(InitVal.getZExtValue());
7320 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7321 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
7322
7323 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7324 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7325 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7326
7327 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7328 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
7329 CallInst *Fn =
7330 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
7331 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7332 return Fn;
7333 }
7334 }
7335}
7336
7339 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7340 "Not a power-of-two sized vector!");
7341 bool AllConstants = true;
7342 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7343 AllConstants &= isa<Constant>(Ops[i]);
7344
7345 // If this is a constant vector, create a ConstantVector.
7346 if (AllConstants) {
7348 for (llvm::Value *Op : Ops)
7349 CstOps.push_back(cast<Constant>(Op));
7350 return llvm::ConstantVector::get(CstOps);
7351 }
7352
7353 // Otherwise, insertelement the values to build the vector.
7354 Value *Result = llvm::PoisonValue::get(
7355 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
7356
7357 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7358 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
7359
7360 return Result;
7361}
7362
7363Value *CodeGenFunction::EmitAArch64CpuInit() {
7364 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
7365 llvm::FunctionCallee Func =
7366 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
7367 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
7368 cast<llvm::GlobalValue>(Func.getCallee())
7369 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
7370 return Builder.CreateCall(Func);
7371}
7372
7373Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
7374 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
7375 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
7377 ArgStr.split(OrigFeatures, "+");
7379 for (StringRef Feature : OrigFeatures) {
7380 Feature = Feature.trim();
7381 if (!llvm::AArch64::parseFMVExtension(Feature))
7382 return Builder.getFalse();
7383 if (Feature != "default")
7384 Features.push_back(Feature);
7385 }
7386 return EmitAArch64CpuSupports(Features);
7387}
7388
7389llvm::Value *
7390CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
7391 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
7392 Value *Result = Builder.getTrue();
7393 if (FeaturesMask != 0) {
7394 // Get features from structure in runtime library
7395 // struct {
7396 // unsigned long long features;
7397 // } __aarch64_cpu_features;
7398 llvm::Type *STy = llvm::StructType::get(Int64Ty);
7399 llvm::Constant *AArch64CPUFeatures =
7400 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
7401 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
7402 llvm::Value *CpuFeatures = Builder.CreateGEP(
7403 STy, AArch64CPUFeatures,
7404 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
7405 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
7407 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
7408 Value *Bitset = Builder.CreateAnd(Features, Mask);
7409 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
7410 Result = Builder.CreateAnd(Result, Cmp);
7411 }
7412 return Result;
7413}
Utilities used for generating code for AArch64 that are shared between the classic and ClangIR code-g...
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
#define NEONMAP0(NameBase)
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, mlir::Type eltTy)
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > intrinsicMap, unsigned builtinID, bool &mapProvenSorted)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:2016
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:2891
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:2861
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:2854
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3908
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:1038
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3113
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4380
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:1050
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:1020
static llvm::Value * ARMMVECreateFPToSI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2985
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2137
static llvm::Value * ARMMVECreateFPToUI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2993
static llvm::Value * ARMMVECreateSIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2969
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:1051
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3914
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:2850
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:1052
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:2928
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:3393
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:862
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:361
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:4479
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:2955
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:33
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:192
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:540
static llvm::Value * ARMMVECreateUIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2977
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:2917
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:2943
SpecialRegisterAccessKind
Definition ARM.cpp:2007
@ VolatileRead
Definition ARM.cpp:2009
@ NormalRead
Definition ARM.cpp:2008
@ Write
Definition ARM.cpp:2010
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:2883
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:1047
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1118
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:344
static Value * EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:2082
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:1934
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:1049
TokenType getType() const
Returns the token's type, e.g.
Result
Implement __builtin_bit_cast and related operations.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:57
Enumerates target-specific builtins in their own namespaces within namespace clang.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:223
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3129
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1609
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:551
Address getAddress() const
Definition CGValue.h:691
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:3402
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:472
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:7338
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3577
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:3686
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:447
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3541
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3832
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3268
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3495
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3743
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4404
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:3971
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3453
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1076
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3284
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1184
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4014
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:493
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2164
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3357
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7229
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3854
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3959
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3640
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3001
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:509
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3921
const TargetCodeGenInfo & getTargetHooks() const
RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen without...
Definition CGExpr.cpp:231
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:487
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:4490
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:456
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3256
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3102
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:1905
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:309
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:3658
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:3437
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3665
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3844
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1598
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3592
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3797
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:3885
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1714
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3322
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3948
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:441
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:382
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3104
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3095
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:454
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:283
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2018
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3392
QualType getPointeeType() const
Definition TypeBase.h:3402
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
EltType getEltType() const
bool isOverloadFirstandLast() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isOverloadDefaultAndOp0() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:712
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1875
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9344
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:790
QualType getType() const
Definition Decl.h:723
QualType getType() const
Definition Value.cpp:238
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:155
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:840
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 uint32_t
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601