clang 23.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
19#include "llvm/IR/InlineAsm.h"
20#include "llvm/IR/IntrinsicsAArch64.h"
21#include "llvm/IR/IntrinsicsARM.h"
22#include "llvm/IR/IntrinsicsBPF.h"
23#include "llvm/TargetParser/AArch64TargetParser.h"
24
25#include <numeric>
26
27using namespace clang;
28using namespace CodeGen;
29using namespace llvm;
30using namespace clang::aarch64;
31
32static std::optional<CodeGenFunction::MSVCIntrin>
33translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
34 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
35 switch (BuiltinID) {
36 default:
37 return std::nullopt;
38 case clang::AArch64::BI_BitScanForward:
39 case clang::AArch64::BI_BitScanForward64:
40 return MSVCIntrin::_BitScanForward;
41 case clang::AArch64::BI_BitScanReverse:
42 case clang::AArch64::BI_BitScanReverse64:
43 return MSVCIntrin::_BitScanReverse;
44 case clang::AArch64::BI_InterlockedAnd64:
45 return MSVCIntrin::_InterlockedAnd;
46 case clang::AArch64::BI_InterlockedExchange64:
47 return MSVCIntrin::_InterlockedExchange;
48 case clang::AArch64::BI_InterlockedExchangeAdd64:
49 return MSVCIntrin::_InterlockedExchangeAdd;
50 case clang::AArch64::BI_InterlockedExchangeSub64:
51 return MSVCIntrin::_InterlockedExchangeSub;
52 case clang::AArch64::BI_InterlockedOr64:
53 return MSVCIntrin::_InterlockedOr;
54 case clang::AArch64::BI_InterlockedXor64:
55 return MSVCIntrin::_InterlockedXor;
56 case clang::AArch64::BI_InterlockedDecrement64:
57 return MSVCIntrin::_InterlockedDecrement;
58 case clang::AArch64::BI_InterlockedIncrement64:
59 return MSVCIntrin::_InterlockedIncrement;
60 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
62 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
63 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
64 return MSVCIntrin::_InterlockedExchangeAdd_acq;
65 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
67 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
68 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
69 return MSVCIntrin::_InterlockedExchangeAdd_rel;
70 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
72 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
73 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
74 return MSVCIntrin::_InterlockedExchangeAdd_nf;
75 case clang::AArch64::BI_InterlockedExchange8_acq:
76 case clang::AArch64::BI_InterlockedExchange16_acq:
77 case clang::AArch64::BI_InterlockedExchange_acq:
78 case clang::AArch64::BI_InterlockedExchange64_acq:
79 case clang::AArch64::BI_InterlockedExchangePointer_acq:
80 return MSVCIntrin::_InterlockedExchange_acq;
81 case clang::AArch64::BI_InterlockedExchange8_rel:
82 case clang::AArch64::BI_InterlockedExchange16_rel:
83 case clang::AArch64::BI_InterlockedExchange_rel:
84 case clang::AArch64::BI_InterlockedExchange64_rel:
85 case clang::AArch64::BI_InterlockedExchangePointer_rel:
86 return MSVCIntrin::_InterlockedExchange_rel;
87 case clang::AArch64::BI_InterlockedExchange8_nf:
88 case clang::AArch64::BI_InterlockedExchange16_nf:
89 case clang::AArch64::BI_InterlockedExchange_nf:
90 case clang::AArch64::BI_InterlockedExchange64_nf:
91 case clang::AArch64::BI_InterlockedExchangePointer_nf:
92 return MSVCIntrin::_InterlockedExchange_nf;
93 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
95 case clang::AArch64::BI_InterlockedCompareExchange_acq:
96 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
97 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
98 return MSVCIntrin::_InterlockedCompareExchange_acq;
99 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
101 case clang::AArch64::BI_InterlockedCompareExchange_rel:
102 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
103 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
104 return MSVCIntrin::_InterlockedCompareExchange_rel;
105 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
107 case clang::AArch64::BI_InterlockedCompareExchange_nf:
108 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
109 return MSVCIntrin::_InterlockedCompareExchange_nf;
110 case clang::AArch64::BI_InterlockedCompareExchange128:
111 return MSVCIntrin::_InterlockedCompareExchange128;
112 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
113 return MSVCIntrin::_InterlockedCompareExchange128_acq;
114 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
115 return MSVCIntrin::_InterlockedCompareExchange128_nf;
116 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
117 return MSVCIntrin::_InterlockedCompareExchange128_rel;
118 case clang::AArch64::BI_InterlockedOr8_acq:
119 case clang::AArch64::BI_InterlockedOr16_acq:
120 case clang::AArch64::BI_InterlockedOr_acq:
121 case clang::AArch64::BI_InterlockedOr64_acq:
122 return MSVCIntrin::_InterlockedOr_acq;
123 case clang::AArch64::BI_InterlockedOr8_rel:
124 case clang::AArch64::BI_InterlockedOr16_rel:
125 case clang::AArch64::BI_InterlockedOr_rel:
126 case clang::AArch64::BI_InterlockedOr64_rel:
127 return MSVCIntrin::_InterlockedOr_rel;
128 case clang::AArch64::BI_InterlockedOr8_nf:
129 case clang::AArch64::BI_InterlockedOr16_nf:
130 case clang::AArch64::BI_InterlockedOr_nf:
131 case clang::AArch64::BI_InterlockedOr64_nf:
132 return MSVCIntrin::_InterlockedOr_nf;
133 case clang::AArch64::BI_InterlockedXor8_acq:
134 case clang::AArch64::BI_InterlockedXor16_acq:
135 case clang::AArch64::BI_InterlockedXor_acq:
136 case clang::AArch64::BI_InterlockedXor64_acq:
137 return MSVCIntrin::_InterlockedXor_acq;
138 case clang::AArch64::BI_InterlockedXor8_rel:
139 case clang::AArch64::BI_InterlockedXor16_rel:
140 case clang::AArch64::BI_InterlockedXor_rel:
141 case clang::AArch64::BI_InterlockedXor64_rel:
142 return MSVCIntrin::_InterlockedXor_rel;
143 case clang::AArch64::BI_InterlockedXor8_nf:
144 case clang::AArch64::BI_InterlockedXor16_nf:
145 case clang::AArch64::BI_InterlockedXor_nf:
146 case clang::AArch64::BI_InterlockedXor64_nf:
147 return MSVCIntrin::_InterlockedXor_nf;
148 case clang::AArch64::BI_InterlockedAnd8_acq:
149 case clang::AArch64::BI_InterlockedAnd16_acq:
150 case clang::AArch64::BI_InterlockedAnd_acq:
151 case clang::AArch64::BI_InterlockedAnd64_acq:
152 return MSVCIntrin::_InterlockedAnd_acq;
153 case clang::AArch64::BI_InterlockedAnd8_rel:
154 case clang::AArch64::BI_InterlockedAnd16_rel:
155 case clang::AArch64::BI_InterlockedAnd_rel:
156 case clang::AArch64::BI_InterlockedAnd64_rel:
157 return MSVCIntrin::_InterlockedAnd_rel;
158 case clang::AArch64::BI_InterlockedAnd8_nf:
159 case clang::AArch64::BI_InterlockedAnd16_nf:
160 case clang::AArch64::BI_InterlockedAnd_nf:
161 case clang::AArch64::BI_InterlockedAnd64_nf:
162 return MSVCIntrin::_InterlockedAnd_nf;
163 case clang::AArch64::BI_InterlockedIncrement16_acq:
164 case clang::AArch64::BI_InterlockedIncrement_acq:
165 case clang::AArch64::BI_InterlockedIncrement64_acq:
166 return MSVCIntrin::_InterlockedIncrement_acq;
167 case clang::AArch64::BI_InterlockedIncrement16_rel:
168 case clang::AArch64::BI_InterlockedIncrement_rel:
169 case clang::AArch64::BI_InterlockedIncrement64_rel:
170 return MSVCIntrin::_InterlockedIncrement_rel;
171 case clang::AArch64::BI_InterlockedIncrement16_nf:
172 case clang::AArch64::BI_InterlockedIncrement_nf:
173 case clang::AArch64::BI_InterlockedIncrement64_nf:
174 return MSVCIntrin::_InterlockedIncrement_nf;
175 case clang::AArch64::BI_InterlockedDecrement16_acq:
176 case clang::AArch64::BI_InterlockedDecrement_acq:
177 case clang::AArch64::BI_InterlockedDecrement64_acq:
178 return MSVCIntrin::_InterlockedDecrement_acq;
179 case clang::AArch64::BI_InterlockedDecrement16_rel:
180 case clang::AArch64::BI_InterlockedDecrement_rel:
181 case clang::AArch64::BI_InterlockedDecrement64_rel:
182 return MSVCIntrin::_InterlockedDecrement_rel;
183 case clang::AArch64::BI_InterlockedDecrement16_nf:
184 case clang::AArch64::BI_InterlockedDecrement_nf:
185 case clang::AArch64::BI_InterlockedDecrement64_nf:
186 return MSVCIntrin::_InterlockedDecrement_nf;
187 }
188 llvm_unreachable("must return from switch");
189}
190
191static std::optional<CodeGenFunction::MSVCIntrin>
192translateArmToMsvcIntrin(unsigned BuiltinID) {
193 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
194 switch (BuiltinID) {
195 default:
196 return std::nullopt;
197 case clang::ARM::BI_BitScanForward:
198 case clang::ARM::BI_BitScanForward64:
199 return MSVCIntrin::_BitScanForward;
200 case clang::ARM::BI_BitScanReverse:
201 case clang::ARM::BI_BitScanReverse64:
202 return MSVCIntrin::_BitScanReverse;
203 case clang::ARM::BI_InterlockedAnd64:
204 return MSVCIntrin::_InterlockedAnd;
205 case clang::ARM::BI_InterlockedExchange64:
206 return MSVCIntrin::_InterlockedExchange;
207 case clang::ARM::BI_InterlockedExchangeAdd64:
208 return MSVCIntrin::_InterlockedExchangeAdd;
209 case clang::ARM::BI_InterlockedExchangeSub64:
210 return MSVCIntrin::_InterlockedExchangeSub;
211 case clang::ARM::BI_InterlockedOr64:
212 return MSVCIntrin::_InterlockedOr;
213 case clang::ARM::BI_InterlockedXor64:
214 return MSVCIntrin::_InterlockedXor;
215 case clang::ARM::BI_InterlockedDecrement64:
216 return MSVCIntrin::_InterlockedDecrement;
217 case clang::ARM::BI_InterlockedIncrement64:
218 return MSVCIntrin::_InterlockedIncrement;
219 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
221 case clang::ARM::BI_InterlockedExchangeAdd_acq:
222 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
223 return MSVCIntrin::_InterlockedExchangeAdd_acq;
224 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
226 case clang::ARM::BI_InterlockedExchangeAdd_rel:
227 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
228 return MSVCIntrin::_InterlockedExchangeAdd_rel;
229 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
231 case clang::ARM::BI_InterlockedExchangeAdd_nf:
232 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
233 return MSVCIntrin::_InterlockedExchangeAdd_nf;
234 case clang::ARM::BI_InterlockedExchange8_acq:
235 case clang::ARM::BI_InterlockedExchange16_acq:
236 case clang::ARM::BI_InterlockedExchange_acq:
237 case clang::ARM::BI_InterlockedExchange64_acq:
238 case clang::ARM::BI_InterlockedExchangePointer_acq:
239 return MSVCIntrin::_InterlockedExchange_acq;
240 case clang::ARM::BI_InterlockedExchange8_rel:
241 case clang::ARM::BI_InterlockedExchange16_rel:
242 case clang::ARM::BI_InterlockedExchange_rel:
243 case clang::ARM::BI_InterlockedExchange64_rel:
244 case clang::ARM::BI_InterlockedExchangePointer_rel:
245 return MSVCIntrin::_InterlockedExchange_rel;
246 case clang::ARM::BI_InterlockedExchange8_nf:
247 case clang::ARM::BI_InterlockedExchange16_nf:
248 case clang::ARM::BI_InterlockedExchange_nf:
249 case clang::ARM::BI_InterlockedExchange64_nf:
250 case clang::ARM::BI_InterlockedExchangePointer_nf:
251 return MSVCIntrin::_InterlockedExchange_nf;
252 case clang::ARM::BI_InterlockedCompareExchange8_acq:
253 case clang::ARM::BI_InterlockedCompareExchange16_acq:
254 case clang::ARM::BI_InterlockedCompareExchange_acq:
255 case clang::ARM::BI_InterlockedCompareExchange64_acq:
256 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
257 return MSVCIntrin::_InterlockedCompareExchange_acq;
258 case clang::ARM::BI_InterlockedCompareExchange8_rel:
259 case clang::ARM::BI_InterlockedCompareExchange16_rel:
260 case clang::ARM::BI_InterlockedCompareExchange_rel:
261 case clang::ARM::BI_InterlockedCompareExchange64_rel:
262 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
263 return MSVCIntrin::_InterlockedCompareExchange_rel;
264 case clang::ARM::BI_InterlockedCompareExchange8_nf:
265 case clang::ARM::BI_InterlockedCompareExchange16_nf:
266 case clang::ARM::BI_InterlockedCompareExchange_nf:
267 case clang::ARM::BI_InterlockedCompareExchange64_nf:
268 return MSVCIntrin::_InterlockedCompareExchange_nf;
269 case clang::ARM::BI_InterlockedOr8_acq:
270 case clang::ARM::BI_InterlockedOr16_acq:
271 case clang::ARM::BI_InterlockedOr_acq:
272 case clang::ARM::BI_InterlockedOr64_acq:
273 return MSVCIntrin::_InterlockedOr_acq;
274 case clang::ARM::BI_InterlockedOr8_rel:
275 case clang::ARM::BI_InterlockedOr16_rel:
276 case clang::ARM::BI_InterlockedOr_rel:
277 case clang::ARM::BI_InterlockedOr64_rel:
278 return MSVCIntrin::_InterlockedOr_rel;
279 case clang::ARM::BI_InterlockedOr8_nf:
280 case clang::ARM::BI_InterlockedOr16_nf:
281 case clang::ARM::BI_InterlockedOr_nf:
282 case clang::ARM::BI_InterlockedOr64_nf:
283 return MSVCIntrin::_InterlockedOr_nf;
284 case clang::ARM::BI_InterlockedXor8_acq:
285 case clang::ARM::BI_InterlockedXor16_acq:
286 case clang::ARM::BI_InterlockedXor_acq:
287 case clang::ARM::BI_InterlockedXor64_acq:
288 return MSVCIntrin::_InterlockedXor_acq;
289 case clang::ARM::BI_InterlockedXor8_rel:
290 case clang::ARM::BI_InterlockedXor16_rel:
291 case clang::ARM::BI_InterlockedXor_rel:
292 case clang::ARM::BI_InterlockedXor64_rel:
293 return MSVCIntrin::_InterlockedXor_rel;
294 case clang::ARM::BI_InterlockedXor8_nf:
295 case clang::ARM::BI_InterlockedXor16_nf:
296 case clang::ARM::BI_InterlockedXor_nf:
297 case clang::ARM::BI_InterlockedXor64_nf:
298 return MSVCIntrin::_InterlockedXor_nf;
299 case clang::ARM::BI_InterlockedAnd8_acq:
300 case clang::ARM::BI_InterlockedAnd16_acq:
301 case clang::ARM::BI_InterlockedAnd_acq:
302 case clang::ARM::BI_InterlockedAnd64_acq:
303 return MSVCIntrin::_InterlockedAnd_acq;
304 case clang::ARM::BI_InterlockedAnd8_rel:
305 case clang::ARM::BI_InterlockedAnd16_rel:
306 case clang::ARM::BI_InterlockedAnd_rel:
307 case clang::ARM::BI_InterlockedAnd64_rel:
308 return MSVCIntrin::_InterlockedAnd_rel;
309 case clang::ARM::BI_InterlockedAnd8_nf:
310 case clang::ARM::BI_InterlockedAnd16_nf:
311 case clang::ARM::BI_InterlockedAnd_nf:
312 case clang::ARM::BI_InterlockedAnd64_nf:
313 return MSVCIntrin::_InterlockedAnd_nf;
314 case clang::ARM::BI_InterlockedIncrement16_acq:
315 case clang::ARM::BI_InterlockedIncrement_acq:
316 case clang::ARM::BI_InterlockedIncrement64_acq:
317 return MSVCIntrin::_InterlockedIncrement_acq;
318 case clang::ARM::BI_InterlockedIncrement16_rel:
319 case clang::ARM::BI_InterlockedIncrement_rel:
320 case clang::ARM::BI_InterlockedIncrement64_rel:
321 return MSVCIntrin::_InterlockedIncrement_rel;
322 case clang::ARM::BI_InterlockedIncrement16_nf:
323 case clang::ARM::BI_InterlockedIncrement_nf:
324 case clang::ARM::BI_InterlockedIncrement64_nf:
325 return MSVCIntrin::_InterlockedIncrement_nf;
326 case clang::ARM::BI_InterlockedDecrement16_acq:
327 case clang::ARM::BI_InterlockedDecrement_acq:
328 case clang::ARM::BI_InterlockedDecrement64_acq:
329 return MSVCIntrin::_InterlockedDecrement_acq;
330 case clang::ARM::BI_InterlockedDecrement16_rel:
331 case clang::ARM::BI_InterlockedDecrement_rel:
332 case clang::ARM::BI_InterlockedDecrement64_rel:
333 return MSVCIntrin::_InterlockedDecrement_rel;
334 case clang::ARM::BI_InterlockedDecrement16_nf:
335 case clang::ARM::BI_InterlockedDecrement_nf:
336 case clang::ARM::BI_InterlockedDecrement64_nf:
337 return MSVCIntrin::_InterlockedDecrement_nf;
338 }
339 llvm_unreachable("must return from switch");
340}
341
342// Emit an intrinsic where all operands are of the same type as the result.
343// Depending on mode, this may be a constrained floating-point intrinsic.
345 unsigned IntrinsicID,
346 unsigned ConstrainedIntrinsicID,
347 llvm::Type *Ty,
348 ArrayRef<Value *> Args) {
349 Function *F;
350 if (CGF.Builder.getIsFPConstrained())
351 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
352 else
353 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
354
355 if (CGF.Builder.getIsFPConstrained())
356 return CGF.Builder.CreateConstrainedFPCall(F, Args);
357
358 return CGF.Builder.CreateCall(F, Args);
359}
360
361static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
362 NeonTypeFlags TypeFlags,
363 bool HasFastHalfType = true,
364 bool V1Ty = false,
365 bool AllowBFloatArgsAndRet = true) {
366 int IsQuad = TypeFlags.isQuad();
367 switch (TypeFlags.getEltType()) {
371 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
374 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
376 if (AllowBFloatArgsAndRet)
377 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
378 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
380 if (HasFastHalfType)
381 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 return Builder.CreateCall(F, Ops, name);
445}
446
450 const CallExpr *E, const char *name) {
451 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
452 Ops.pop_back_val());
453 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
454}
455
457 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
458 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
459
460 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
461 RetTy->getPrimitiveSizeInBits();
462 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
463 Ops[1]->getType()};
464 if (ExtendLaneArg) {
465 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
466 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
467 uint64_t(0));
468 }
469 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
470}
471
473 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
474 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
475
476 if (ExtendLaneArg) {
477 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
478 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
479 uint64_t(0));
480 }
481 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
482 RetTy->getPrimitiveSizeInBits();
483 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
484 Ops, E, name);
485}
486
488 bool neg) {
489 int SV = cast<ConstantInt>(V)->getSExtValue();
490 return ConstantInt::getSigned(Ty, neg ? -SV : SV);
491}
492
493Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
494 llvm::Type *Ty1, bool Extract,
496 const CallExpr *E,
497 const char *name) {
498 llvm::Type *Tys[] = {Ty0, Ty1};
499 if (Extract) {
500 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
501 // the vector.
502 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
503 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
504 }
505 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
506}
507
508// Right-shift a vector by a constant.
510 llvm::Type *Ty, bool usgn,
511 const char *name) {
512 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
513
514 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
515 int EltSize = VTy->getScalarSizeInBits();
516
517 Vec = Builder.CreateBitCast(Vec, Ty);
518
519 // lshr/ashr are undefined when the shift amount is equal to the vector
520 // element size.
521 if (ShiftAmt == EltSize) {
522 if (usgn) {
523 // Right-shifting an unsigned value by its size yields 0.
524 return llvm::ConstantAggregateZero::get(VTy);
525 } else {
526 // Right-shifting a signed value by its size is equivalent
527 // to a shift of size-1.
528 --ShiftAmt;
529 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
530 }
531 }
532
533 Shift = EmitNeonShiftVector(Shift, Ty, false);
534 if (usgn)
535 return Builder.CreateLShr(Vec, Shift, name);
536 return Builder.CreateAShr(Vec, Shift, name);
537}
538
539// clang-format off
541 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
542 NEONMAP0(splat_lane_v),
543 NEONMAP0(splat_laneq_v),
544 NEONMAP0(splatq_lane_v),
545 NEONMAP0(splatq_laneq_v),
546 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
547 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
548 NEONMAP1(vabs_v, arm_neon_vabs, 0),
549 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
550 NEONMAP0(vadd_v),
551 NEONMAP0(vaddhn_v),
552 NEONMAP0(vaddq_v),
553 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
554 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
555 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
556 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
557 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
558 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
559 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
560 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
561 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
562 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
563 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
564 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
565 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
566 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
567 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
568 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
569 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
570 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
571 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
572 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
573 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
574 NEONMAP1(vcage_v, arm_neon_vacge, 0),
575 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
576 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
577 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
578 NEONMAP1(vcale_v, arm_neon_vacge, 0),
579 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
580 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
581 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
582 NEONMAP0(vceqz_v),
583 NEONMAP0(vceqzq_v),
584 NEONMAP0(vcgez_v),
585 NEONMAP0(vcgezq_v),
586 NEONMAP0(vcgtz_v),
587 NEONMAP0(vcgtzq_v),
588 NEONMAP0(vclez_v),
589 NEONMAP0(vclezq_v),
590 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
591 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
592 NEONMAP0(vcltz_v),
593 NEONMAP0(vcltzq_v),
594 NEONMAP1(vclz_v, ctlz, Add1ArgType),
595 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
596 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
597 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
598 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
599 NEONMAP0(vcvt_f16_s16),
600 NEONMAP0(vcvt_f16_u16),
601 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
602 NEONMAP0(vcvt_f32_v),
603 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
604 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
605 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
606 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
607 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
608 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
609 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
610 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
611 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
612 NEONMAP0(vcvt_s16_f16),
613 NEONMAP0(vcvt_s32_v),
614 NEONMAP0(vcvt_s64_v),
615 NEONMAP0(vcvt_u16_f16),
616 NEONMAP0(vcvt_u32_v),
617 NEONMAP0(vcvt_u64_v),
618 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
619 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
620 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
621 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
622 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
623 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
624 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
625 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
626 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
627 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
628 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
629 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
630 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
631 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
632 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
633 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
634 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
635 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
636 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
637 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
638 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
639 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
640 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
641 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
642 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
643 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
644 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
645 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
646 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
647 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
648 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
649 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
650 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
651 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
652 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
653 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
654 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
655 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
656 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
657 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
658 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
659 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
660 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
661 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
662 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
663 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
664 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
665 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
666 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
667 NEONMAP0(vcvtq_f16_s16),
668 NEONMAP0(vcvtq_f16_u16),
669 NEONMAP0(vcvtq_f32_v),
670 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
671 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
672 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
673 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
674 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
675 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
676 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
677 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
678 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
679 NEONMAP0(vcvtq_s16_f16),
680 NEONMAP0(vcvtq_s32_v),
681 NEONMAP0(vcvtq_s64_v),
682 NEONMAP0(vcvtq_u16_f16),
683 NEONMAP0(vcvtq_u32_v),
684 NEONMAP0(vcvtq_u64_v),
685 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
686 NEONMAP1(vdot_u32, arm_neon_udot, 0),
687 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
688 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
689 NEONMAP0(vext_v),
690 NEONMAP0(vextq_v),
691 NEONMAP0(vfma_v),
692 NEONMAP0(vfmaq_v),
693 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
694 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
695 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
696 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
697 NEONMAP0(vld1_dup_v),
698 NEONMAP1(vld1_v, arm_neon_vld1, 0),
699 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
700 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
701 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
702 NEONMAP0(vld1q_dup_v),
703 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
704 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
705 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
706 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
707 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
708 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
709 NEONMAP1(vld2_v, arm_neon_vld2, 0),
710 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
711 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
712 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
713 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
714 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
715 NEONMAP1(vld3_v, arm_neon_vld3, 0),
716 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
717 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
718 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
719 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
720 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
721 NEONMAP1(vld4_v, arm_neon_vld4, 0),
722 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
723 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
724 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
725 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
726 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
727 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
728 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
729 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
730 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
731 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
732 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
733 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
734 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
735 NEONMAP0(vmovl_v),
736 NEONMAP0(vmovn_v),
737 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
738 NEONMAP0(vmull_v),
739 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
740 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
741 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
742 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
743 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
744 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
745 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
746 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
747 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
748 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
749 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
750 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
751 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
752 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
753 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
754 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
755 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
756 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
757 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
758 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
759 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
760 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
761 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
762 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
763 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
764 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
765 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
766 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
767 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
768 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
769 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
770 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
771 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
772 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
773 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
774 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
775 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
776 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
777 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
778 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
779 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
780 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
781 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
782 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
783 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
784 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
785 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
786 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
787 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
788 NEONMAP1(vrnd_v, trunc, Add1ArgType),
789 NEONMAP1(vrnda_v, round, Add1ArgType),
790 NEONMAP1(vrndaq_v, round, Add1ArgType),
791 NEONMAP0(vrndi_v),
792 NEONMAP0(vrndiq_v),
793 NEONMAP1(vrndm_v, floor, Add1ArgType),
794 NEONMAP1(vrndmq_v, floor, Add1ArgType),
795 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
796 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
797 NEONMAP1(vrndp_v, ceil, Add1ArgType),
798 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
799 NEONMAP1(vrndq_v, trunc, Add1ArgType),
800 NEONMAP1(vrndx_v, rint, Add1ArgType),
801 NEONMAP1(vrndxq_v, rint, Add1ArgType),
802 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
803 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
804 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
805 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
806 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
807 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
808 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
809 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
810 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
811 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
812 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
813 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
814 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
815 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
816 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
817 NEONMAP0(vshl_n_v),
818 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
819 NEONMAP0(vshll_n_v),
820 NEONMAP0(vshlq_n_v),
821 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
822 NEONMAP0(vshr_n_v),
823 NEONMAP0(vshrn_n_v),
824 NEONMAP0(vshrq_n_v),
825 NEONMAP1(vst1_v, arm_neon_vst1, 0),
826 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
827 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
828 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
829 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
830 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
831 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
832 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
833 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
834 NEONMAP1(vst2_v, arm_neon_vst2, 0),
835 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
836 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
837 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
838 NEONMAP1(vst3_v, arm_neon_vst3, 0),
839 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
840 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
841 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
842 NEONMAP1(vst4_v, arm_neon_vst4, 0),
843 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
844 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
845 NEONMAP0(vsubhn_v),
846 NEONMAP0(vtrn_v),
847 NEONMAP0(vtrnq_v),
848 NEONMAP0(vtst_v),
849 NEONMAP0(vtstq_v),
850 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
851 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
852 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
853 NEONMAP0(vuzp_v),
854 NEONMAP0(vuzpq_v),
855 NEONMAP0(vzip_v),
856 NEONMAP0(vzipq_v)
857};
858
859// clang-format on
860
861// Some intrinsics are equivalent for codegen.
862static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
863 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
864 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
865 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
866 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
867 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
868 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
869 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
870 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
871 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
872 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
873 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
874 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
875 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
876 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
877 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
878 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
879 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
880 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
881 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
882 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
883 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
884 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
885 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
886 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
887 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
888 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
889 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
890 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
891 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
892 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
893 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
894 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
895 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
896 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
897 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
898 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
899 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
900 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
901 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
902 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
903 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
904 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
905 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
906 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
907 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
908 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
909 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
910 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
911 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
912 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
913 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
914 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
915 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
916 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
917 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
918 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
919 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
920 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
921 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
922 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
923 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
924 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
925 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
926 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
927 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
928 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
929 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
930 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
931 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
932 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
933 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
934 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
935 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
936 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
937 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
938 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
939 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
940 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
941 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
942 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
943 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
944 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
945 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
946 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
947 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
948 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
949 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
950 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
951 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
952 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
953 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
954 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
955 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
956 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
957 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
958 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
959 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
960 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
961 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
962 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
963 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
964 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
965 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
966 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
967 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
968 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
969 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
970 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
971 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
972 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
973 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
974 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
975 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
976 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
977 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
978 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
979 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
980 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
981 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
982 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
983 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
984 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
985 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
986 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
987 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
988 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
989 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
990 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
991 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
992 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
993 // arbitrary one to be handled as tha canonical variation.
994 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
995 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
996 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
997 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
998 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
999 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1000 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1001 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1002 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1003 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1004 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1005 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1006};
1007
1008#undef NEONMAP0
1009#undef NEONMAP1
1010#undef NEONMAP2
1011
1012#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1013 { \
1014 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1015 TypeModifier \
1016 }
1017
1018#define SVEMAP2(NameBase, TypeModifier) \
1019 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1021#define GET_SVE_LLVM_INTRINSIC_MAP
1022#include "clang/Basic/arm_sve_builtin_cg.inc"
1023#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1024#undef GET_SVE_LLVM_INTRINSIC_MAP
1025};
1026
1027#undef SVEMAP1
1028#undef SVEMAP2
1029
1030#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1031 { \
1032 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1033 TypeModifier \
1034 }
1035
1036#define SMEMAP2(NameBase, TypeModifier) \
1037 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1039#define GET_SME_LLVM_INTRINSIC_MAP
1040#include "clang/Basic/arm_sme_builtin_cg.inc"
1041#undef GET_SME_LLVM_INTRINSIC_MAP
1042};
1043
1044#undef SMEMAP1
1045#undef SMEMAP2
1046
1048
1053
1054// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
1055// the corresponding info struct.
1056static const ARMVectorIntrinsicInfo *
1058 unsigned BuiltinID, bool &MapProvenSorted) {
1059
1060#ifndef NDEBUG
1061 if (!MapProvenSorted) {
1062 assert(llvm::is_sorted(IntrinsicMap));
1063 MapProvenSorted = true;
1064 }
1065#endif
1066
1068 llvm::lower_bound(IntrinsicMap, BuiltinID);
1069
1070 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1071 return Builtin;
1072
1073 return nullptr;
1074}
1075
1077 unsigned Modifier,
1078 llvm::Type *ArgType,
1079 const CallExpr *E) {
1080 int VectorSize = 0;
1081 if (Modifier & Use64BitVectors)
1082 VectorSize = 64;
1083 else if (Modifier & Use128BitVectors)
1084 VectorSize = 128;
1085
1086 // Return type.
1088 if (Modifier & AddRetType) {
1089 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1090 if (Modifier & VectorizeRetType)
1091 Ty = llvm::FixedVectorType::get(
1092 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1093
1094 Tys.push_back(Ty);
1095 }
1096
1097 // Arguments.
1098 if (Modifier & VectorizeArgTypes) {
1099 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1100 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1101 }
1102
1103 if (Modifier & (Add1ArgType | Add2ArgTypes))
1104 Tys.push_back(ArgType);
1105
1106 if (Modifier & Add2ArgTypes)
1107 Tys.push_back(ArgType);
1108
1109 if (Modifier & InventFloatType)
1110 Tys.push_back(FloatTy);
1111
1112 return CGM.getIntrinsic(IntrinsicID, Tys);
1113}
1114
1115//===----------------------------------------------------------------------===//
1116// Emit-helpers
1117//===----------------------------------------------------------------------===//
1119 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1120 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1121 assert(SISDInfo.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
1122
1123 switch (SISDInfo.BuiltinID) {
1124 case NEON::BI__builtin_neon_vcled_s64:
1125 case NEON::BI__builtin_neon_vcled_u64:
1126 case NEON::BI__builtin_neon_vcles_f32:
1127 case NEON::BI__builtin_neon_vcled_f64:
1128 case NEON::BI__builtin_neon_vcltd_s64:
1129 case NEON::BI__builtin_neon_vcltd_u64:
1130 case NEON::BI__builtin_neon_vclts_f32:
1131 case NEON::BI__builtin_neon_vcltd_f64:
1132 case NEON::BI__builtin_neon_vcales_f32:
1133 case NEON::BI__builtin_neon_vcaled_f64:
1134 case NEON::BI__builtin_neon_vcalts_f32:
1135 case NEON::BI__builtin_neon_vcaltd_f64:
1136 // Only one direction of comparisons actually exist, cmle is actually a cmge
1137 // with swapped operands. The table gives us the right intrinsic but we
1138 // still need to do the swap.
1139 std::swap(Ops[0], Ops[1]);
1140 break;
1141 }
1142
1143 // Use fptosi.sat/fptoui.sat unless under strict FP.
1144 unsigned LLVMIntrinsic = SISDInfo.LLVMIntrinsic;
1145 if (!CGF.Builder.getIsFPConstrained()) {
1146 if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzs)
1147 LLVMIntrinsic = Intrinsic::fptosi_sat;
1148 else if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzu)
1149 LLVMIntrinsic = Intrinsic::fptoui_sat;
1150 }
1151 llvm::Type *ArgTy = CGF.ConvertType(E->getArg(0)->getType());
1152 Function *F = CGF.LookupNeonLLVMIntrinsic(LLVMIntrinsic,
1153 SISDInfo.TypeModifier, ArgTy, E);
1154
1155 int j = 0;
1156 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1157 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1158 ai != ae; ++ai, ++j) {
1159 llvm::Type *ArgTy = ai->getType();
1160 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1161 ArgTy->getPrimitiveSizeInBits())
1162 continue;
1163 assert(
1164 ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy() &&
1165 "Expecting vector LLVM intrinsic type and scalar Clang builtin type!");
1166
1167 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1168 // it before inserting.
1169 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1170 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1171 Ops[j] =
1172 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1173 }
1174
1175 Value *Result = CGF.EmitNeonCall(F, Ops, SISDInfo.NameHint);
1176 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1177 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1178 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1179 return CGF.Builder.CreateExtractElement(Result, C0);
1180
1181 return CGF.Builder.CreateBitCast(Result, ResultType, SISDInfo.NameHint);
1182}
1183
1185 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1186 const char *NameHint, unsigned Modifier, const CallExpr *E,
1187 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1188 llvm::Triple::ArchType Arch) {
1189
1190 // Extract the trailing immediate argument that encodes the type discriminator
1191 // for this overloaded intrinsic.
1192 // TODO: Move to the parent code that takes care of argument processing.
1193 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1194 std::optional<llvm::APSInt> NeonTypeConst =
1196 if (!NeonTypeConst)
1197 return nullptr;
1198
1199 // Determine the type of this overloaded NEON intrinsic.
1200 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1201 const bool Usgn = Type.isUnsigned();
1202 const bool Quad = Type.isQuad();
1203 const bool Floating = Type.isFloatingPoint();
1204 const bool HasFastHalfType = getTarget().hasFastHalfType();
1205 const bool AllowBFloatArgsAndRet =
1206 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1207
1208 llvm::FixedVectorType *VTy =
1209 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1210 llvm::Type *Ty = VTy;
1211 if (!Ty)
1212 return nullptr;
1213
1214 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1215 return Builder.getInt32(addr.getAlignment().getQuantity());
1216 };
1217
1218 unsigned Int = LLVMIntrinsic;
1219 if ((Modifier & UnsignedAlts) && !Usgn)
1220 Int = AltLLVMIntrinsic;
1221
1222 switch (BuiltinID) {
1223 default: break;
1224 case NEON::BI__builtin_neon_splat_lane_v:
1225 case NEON::BI__builtin_neon_splat_laneq_v:
1226 case NEON::BI__builtin_neon_splatq_lane_v:
1227 case NEON::BI__builtin_neon_splatq_laneq_v: {
1228 auto NumElements = VTy->getElementCount();
1229 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1230 NumElements = NumElements * 2;
1231 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1232 NumElements = NumElements.divideCoefficientBy(2);
1233
1234 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1235 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1236 }
1237 case NEON::BI__builtin_neon_vpadd_v:
1238 case NEON::BI__builtin_neon_vpaddq_v:
1239 // We don't allow fp/int overloading of intrinsics.
1240 if (VTy->getElementType()->isFloatingPointTy() &&
1241 Int == Intrinsic::aarch64_neon_addp)
1242 Int = Intrinsic::aarch64_neon_faddp;
1243 break;
1244 case NEON::BI__builtin_neon_vabs_v:
1245 case NEON::BI__builtin_neon_vabsq_v:
1246 if (VTy->getElementType()->isFloatingPointTy())
1247 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1248 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1249 case NEON::BI__builtin_neon_vadd_v:
1250 case NEON::BI__builtin_neon_vaddq_v: {
1251 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1252 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1253 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1254 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1255 return Builder.CreateBitCast(Ops[0], Ty);
1256 }
1257 case NEON::BI__builtin_neon_vaddhn_v: {
1258 llvm::FixedVectorType *SrcTy =
1259 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1260
1261 // %sum = add <4 x i32> %lhs, %rhs
1262 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1263 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1264 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1265
1266 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1267 Constant *ShiftAmt =
1268 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1269 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1270
1271 // %res = trunc <4 x i32> %high to <4 x i16>
1272 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1273 }
1274 case NEON::BI__builtin_neon_vcale_v:
1275 case NEON::BI__builtin_neon_vcaleq_v:
1276 case NEON::BI__builtin_neon_vcalt_v:
1277 case NEON::BI__builtin_neon_vcaltq_v:
1278 std::swap(Ops[0], Ops[1]);
1279 [[fallthrough]];
1280 case NEON::BI__builtin_neon_vcage_v:
1281 case NEON::BI__builtin_neon_vcageq_v:
1282 case NEON::BI__builtin_neon_vcagt_v:
1283 case NEON::BI__builtin_neon_vcagtq_v: {
1284 llvm::Type *Ty;
1285 switch (VTy->getScalarSizeInBits()) {
1286 default: llvm_unreachable("unexpected type");
1287 case 32:
1288 Ty = FloatTy;
1289 break;
1290 case 64:
1291 Ty = DoubleTy;
1292 break;
1293 case 16:
1294 Ty = HalfTy;
1295 break;
1296 }
1297 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1298 llvm::Type *Tys[] = { VTy, VecFlt };
1299 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1300 return EmitNeonCall(F, Ops, NameHint);
1301 }
1302 case NEON::BI__builtin_neon_vceqz_v:
1303 case NEON::BI__builtin_neon_vceqzq_v:
1305 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1306 case NEON::BI__builtin_neon_vcgez_v:
1307 case NEON::BI__builtin_neon_vcgezq_v:
1309 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1310 "vcgez");
1311 case NEON::BI__builtin_neon_vclez_v:
1312 case NEON::BI__builtin_neon_vclezq_v:
1314 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1315 "vclez");
1316 case NEON::BI__builtin_neon_vcgtz_v:
1317 case NEON::BI__builtin_neon_vcgtzq_v:
1319 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1320 "vcgtz");
1321 case NEON::BI__builtin_neon_vcltz_v:
1322 case NEON::BI__builtin_neon_vcltzq_v:
1324 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1325 "vcltz");
1326 case NEON::BI__builtin_neon_vclz_v:
1327 case NEON::BI__builtin_neon_vclzq_v:
1328 // We generate target-independent intrinsic, which needs a second argument
1329 // for whether or not clz of zero is undefined; on ARM it isn't.
1330 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1331 break;
1332 case NEON::BI__builtin_neon_vcvt_f32_v:
1333 case NEON::BI__builtin_neon_vcvtq_f32_v:
1334 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1335 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1336 HasFastHalfType);
1337 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1338 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1339 case NEON::BI__builtin_neon_vcvt_f16_s16:
1340 case NEON::BI__builtin_neon_vcvt_f16_u16:
1341 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1342 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1343 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1344 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1345 HasFastHalfType);
1346 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1347 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1348 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1349 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1350 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1351 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1352 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1353 Function *F = CGM.getIntrinsic(Int, Tys);
1354 return EmitNeonCall(F, Ops, "vcvt_n");
1355 }
1356 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1357 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1358 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1359 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1360 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1361 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1362 Function *F = CGM.getIntrinsic(Int, Tys);
1363 return EmitNeonCall(F, Ops, "vcvt_n");
1364 }
1365 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1366 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1367 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1368 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1369 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1370 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1371 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1372 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1373 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1374 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1375 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1376 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1377 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1378 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1379 return EmitNeonCall(F, Ops, "vcvt_n");
1380 }
1381 case NEON::BI__builtin_neon_vcvt_s32_v:
1382 case NEON::BI__builtin_neon_vcvt_u32_v:
1383 case NEON::BI__builtin_neon_vcvt_s64_v:
1384 case NEON::BI__builtin_neon_vcvt_u64_v:
1385 case NEON::BI__builtin_neon_vcvt_s16_f16:
1386 case NEON::BI__builtin_neon_vcvt_u16_f16:
1387 case NEON::BI__builtin_neon_vcvtq_s32_v:
1388 case NEON::BI__builtin_neon_vcvtq_u32_v:
1389 case NEON::BI__builtin_neon_vcvtq_s64_v:
1390 case NEON::BI__builtin_neon_vcvtq_u64_v:
1391 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1392 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1393 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1394 if (Int) {
1395 // AArch64: use fptosi.sat/fptoui.sat unless under strict FP.
1396 if (!Builder.getIsFPConstrained())
1397 Int = Usgn ? Intrinsic::fptoui_sat : Intrinsic::fptosi_sat;
1398 llvm::Type *Tys[2] = {Ty, Ops[0]->getType()};
1399 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
1400 }
1401 // FIXME: ARM uses plain fptoui/fptosi which have UB on out-of-range
1402 // values. These should also use saturating intrinsics.
1403 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1404 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1405 }
1406 case NEON::BI__builtin_neon_vcvta_s16_f16:
1407 case NEON::BI__builtin_neon_vcvta_s32_v:
1408 case NEON::BI__builtin_neon_vcvta_s64_v:
1409 case NEON::BI__builtin_neon_vcvta_u16_f16:
1410 case NEON::BI__builtin_neon_vcvta_u32_v:
1411 case NEON::BI__builtin_neon_vcvta_u64_v:
1412 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1413 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1414 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1415 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1416 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1417 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1418 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1419 case NEON::BI__builtin_neon_vcvtn_s32_v:
1420 case NEON::BI__builtin_neon_vcvtn_s64_v:
1421 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1422 case NEON::BI__builtin_neon_vcvtn_u32_v:
1423 case NEON::BI__builtin_neon_vcvtn_u64_v:
1424 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1425 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1426 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1427 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1428 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1429 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1430 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1431 case NEON::BI__builtin_neon_vcvtp_s32_v:
1432 case NEON::BI__builtin_neon_vcvtp_s64_v:
1433 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1434 case NEON::BI__builtin_neon_vcvtp_u32_v:
1435 case NEON::BI__builtin_neon_vcvtp_u64_v:
1436 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1437 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1438 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1439 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1440 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1441 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1442 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1443 case NEON::BI__builtin_neon_vcvtm_s32_v:
1444 case NEON::BI__builtin_neon_vcvtm_s64_v:
1445 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1446 case NEON::BI__builtin_neon_vcvtm_u32_v:
1447 case NEON::BI__builtin_neon_vcvtm_u64_v:
1448 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1449 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1450 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1451 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1452 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1453 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1454 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1455 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1456 }
1457 case NEON::BI__builtin_neon_vcvtx_f32_v: {
1458 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
1459 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1460
1461 }
1462 case NEON::BI__builtin_neon_vext_v:
1463 case NEON::BI__builtin_neon_vextq_v: {
1464 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1465 SmallVector<int, 16> Indices;
1466 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1467 Indices.push_back(i+CV);
1468
1469 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1470 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1471 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
1472 }
1473 case NEON::BI__builtin_neon_vfma_v:
1474 case NEON::BI__builtin_neon_vfmaq_v: {
1475 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1476 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1477 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1478
1479 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
1481 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
1482 {Ops[1], Ops[2], Ops[0]});
1483 }
1484 case NEON::BI__builtin_neon_vld1_v:
1485 case NEON::BI__builtin_neon_vld1q_v: {
1486 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1487 Ops.push_back(getAlignmentValue32(PtrOp0));
1488 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
1489 }
1490 case NEON::BI__builtin_neon_vld1_x2_v:
1491 case NEON::BI__builtin_neon_vld1q_x2_v:
1492 case NEON::BI__builtin_neon_vld1_x3_v:
1493 case NEON::BI__builtin_neon_vld1q_x3_v:
1494 case NEON::BI__builtin_neon_vld1_x4_v:
1495 case NEON::BI__builtin_neon_vld1q_x4_v: {
1496 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1497 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1498 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
1499 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1500 }
1501 case NEON::BI__builtin_neon_vld2_v:
1502 case NEON::BI__builtin_neon_vld2q_v:
1503 case NEON::BI__builtin_neon_vld3_v:
1504 case NEON::BI__builtin_neon_vld3q_v:
1505 case NEON::BI__builtin_neon_vld4_v:
1506 case NEON::BI__builtin_neon_vld4q_v:
1507 case NEON::BI__builtin_neon_vld2_dup_v:
1508 case NEON::BI__builtin_neon_vld2q_dup_v:
1509 case NEON::BI__builtin_neon_vld3_dup_v:
1510 case NEON::BI__builtin_neon_vld3q_dup_v:
1511 case NEON::BI__builtin_neon_vld4_dup_v:
1512 case NEON::BI__builtin_neon_vld4q_dup_v: {
1513 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1514 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1515 Value *Align = getAlignmentValue32(PtrOp1);
1516 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
1517 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1518 }
1519 case NEON::BI__builtin_neon_vld1_dup_v:
1520 case NEON::BI__builtin_neon_vld1q_dup_v: {
1521 Value *V = PoisonValue::get(Ty);
1522 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
1523 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
1524 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
1525 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
1526 return EmitNeonSplat(Ops[0], CI);
1527 }
1528 case NEON::BI__builtin_neon_vld2_lane_v:
1529 case NEON::BI__builtin_neon_vld2q_lane_v:
1530 case NEON::BI__builtin_neon_vld3_lane_v:
1531 case NEON::BI__builtin_neon_vld3q_lane_v:
1532 case NEON::BI__builtin_neon_vld4_lane_v:
1533 case NEON::BI__builtin_neon_vld4q_lane_v: {
1534 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1535 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1536 for (unsigned I = 2; I < Ops.size() - 1; ++I)
1537 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
1538 Ops.push_back(getAlignmentValue32(PtrOp1));
1539 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
1540 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1541 }
1542 case NEON::BI__builtin_neon_vmovl_v: {
1543 llvm::FixedVectorType *DTy =
1544 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1545 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1546 if (Usgn)
1547 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1548 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1549 }
1550 case NEON::BI__builtin_neon_vmovn_v: {
1551 llvm::FixedVectorType *QTy =
1552 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1553 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1554 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1555 }
1556 case NEON::BI__builtin_neon_vmull_v:
1557 // FIXME: the integer vmull operations could be emitted in terms of pure
1558 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
1559 // hoisting the exts outside loops. Until global ISel comes along that can
1560 // see through such movement this leads to bad CodeGen. So we need an
1561 // intrinsic for now.
1562 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1563 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1564 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
1565 case NEON::BI__builtin_neon_vpadal_v:
1566 case NEON::BI__builtin_neon_vpadalq_v: {
1567 // The source operand type has twice as many elements of half the size.
1568 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1569 llvm::Type *EltTy =
1570 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1571 auto *NarrowTy =
1572 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1573 llvm::Type *Tys[2] = { Ty, NarrowTy };
1574 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1575 }
1576 case NEON::BI__builtin_neon_vpaddl_v:
1577 case NEON::BI__builtin_neon_vpaddlq_v: {
1578 // The source operand type has twice as many elements of half the size.
1579 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1580 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1581 auto *NarrowTy =
1582 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1583 llvm::Type *Tys[2] = { Ty, NarrowTy };
1584 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
1585 }
1586 case NEON::BI__builtin_neon_vqdmlal_v:
1587 case NEON::BI__builtin_neon_vqdmlsl_v: {
1588 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
1589 Ops[1] =
1590 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
1591 Ops.resize(2);
1592 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
1593 }
1594 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
1595 case NEON::BI__builtin_neon_vqdmulh_lane_v:
1596 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
1597 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
1598 auto *RTy = cast<llvm::FixedVectorType>(Ty);
1599 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
1600 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
1601 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
1602 RTy->getNumElements() * 2);
1603 llvm::Type *Tys[2] = {
1604 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1605 /*isQuad*/ false))};
1606 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1607 }
1608 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
1609 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
1610 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
1611 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
1612 llvm::Type *Tys[2] = {
1613 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
1614 /*isQuad*/ true))};
1615 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
1616 }
1617 case NEON::BI__builtin_neon_vqshl_n_v:
1618 case NEON::BI__builtin_neon_vqshlq_n_v:
1619 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1620 1, false);
1621 case NEON::BI__builtin_neon_vqshlu_n_v:
1622 case NEON::BI__builtin_neon_vqshluq_n_v:
1623 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
1624 1, false);
1625 case NEON::BI__builtin_neon_vrecpe_v:
1626 case NEON::BI__builtin_neon_vrecpeq_v:
1627 case NEON::BI__builtin_neon_vrsqrte_v:
1628 case NEON::BI__builtin_neon_vrsqrteq_v:
1629 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
1630 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1631 case NEON::BI__builtin_neon_vrndi_v:
1632 case NEON::BI__builtin_neon_vrndiq_v:
1633 Int = Builder.getIsFPConstrained()
1634 ? Intrinsic::experimental_constrained_nearbyint
1635 : Intrinsic::nearbyint;
1636 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
1637 case NEON::BI__builtin_neon_vrshr_n_v:
1638 case NEON::BI__builtin_neon_vrshrq_n_v:
1639 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
1640 1, true);
1641 case NEON::BI__builtin_neon_vsha512hq_u64:
1642 case NEON::BI__builtin_neon_vsha512h2q_u64:
1643 case NEON::BI__builtin_neon_vsha512su0q_u64:
1644 case NEON::BI__builtin_neon_vsha512su1q_u64: {
1645 Function *F = CGM.getIntrinsic(Int);
1646 return EmitNeonCall(F, Ops, "");
1647 }
1648 case NEON::BI__builtin_neon_vshl_n_v:
1649 case NEON::BI__builtin_neon_vshlq_n_v:
1650 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1651 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
1652 "vshl_n");
1653 case NEON::BI__builtin_neon_vshll_n_v: {
1654 llvm::FixedVectorType *SrcTy =
1655 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1656 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1657 if (Usgn)
1658 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
1659 else
1660 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
1661 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
1662 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
1663 }
1664 case NEON::BI__builtin_neon_vshrn_n_v: {
1665 llvm::FixedVectorType *SrcTy =
1666 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1667 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1668 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
1669 if (Usgn)
1670 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
1671 else
1672 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
1673 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
1674 }
1675 case NEON::BI__builtin_neon_vshr_n_v:
1676 case NEON::BI__builtin_neon_vshrq_n_v:
1677 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
1678 case NEON::BI__builtin_neon_vst1_v:
1679 case NEON::BI__builtin_neon_vst1q_v:
1680 case NEON::BI__builtin_neon_vst2_v:
1681 case NEON::BI__builtin_neon_vst2q_v:
1682 case NEON::BI__builtin_neon_vst3_v:
1683 case NEON::BI__builtin_neon_vst3q_v:
1684 case NEON::BI__builtin_neon_vst4_v:
1685 case NEON::BI__builtin_neon_vst4q_v:
1686 case NEON::BI__builtin_neon_vst2_lane_v:
1687 case NEON::BI__builtin_neon_vst2q_lane_v:
1688 case NEON::BI__builtin_neon_vst3_lane_v:
1689 case NEON::BI__builtin_neon_vst3q_lane_v:
1690 case NEON::BI__builtin_neon_vst4_lane_v:
1691 case NEON::BI__builtin_neon_vst4q_lane_v: {
1692 llvm::Type *Tys[] = {Int8PtrTy, Ty};
1693 Ops.push_back(getAlignmentValue32(PtrOp0));
1694 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
1695 }
1696 case NEON::BI__builtin_neon_vsm3partw1q_u32:
1697 case NEON::BI__builtin_neon_vsm3partw2q_u32:
1698 case NEON::BI__builtin_neon_vsm3ss1q_u32:
1699 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
1700 case NEON::BI__builtin_neon_vsm4eq_u32: {
1701 Function *F = CGM.getIntrinsic(Int);
1702 return EmitNeonCall(F, Ops, "");
1703 }
1704 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
1705 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
1706 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
1707 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
1708 Function *F = CGM.getIntrinsic(Int);
1709 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
1710 return EmitNeonCall(F, Ops, "");
1711 }
1712 case NEON::BI__builtin_neon_vst1_x2_v:
1713 case NEON::BI__builtin_neon_vst1q_x2_v:
1714 case NEON::BI__builtin_neon_vst1_x3_v:
1715 case NEON::BI__builtin_neon_vst1q_x3_v:
1716 case NEON::BI__builtin_neon_vst1_x4_v:
1717 case NEON::BI__builtin_neon_vst1q_x4_v: {
1718 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
1719 // in AArch64 it comes last. We may want to stick to one or another.
1720 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
1721 Arch == llvm::Triple::aarch64_32) {
1722 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1723 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
1724 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1725 }
1726 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
1727 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
1728 }
1729 case NEON::BI__builtin_neon_vsubhn_v: {
1730 llvm::FixedVectorType *SrcTy =
1731 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1732
1733 // %sum = add <4 x i32> %lhs, %rhs
1734 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1735 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1736 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
1737
1738 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1739 Constant *ShiftAmt =
1740 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1741 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
1742
1743 // %res = trunc <4 x i32> %high to <4 x i16>
1744 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
1745 }
1746 case NEON::BI__builtin_neon_vtrn_v:
1747 case NEON::BI__builtin_neon_vtrnq_v: {
1748 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1749 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1750 Value *SV = nullptr;
1751
1752 for (unsigned vi = 0; vi != 2; ++vi) {
1753 SmallVector<int, 16> Indices;
1754 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1755 Indices.push_back(i+vi);
1756 Indices.push_back(i+e+vi);
1757 }
1758 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1759 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
1760 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1761 }
1762 return SV;
1763 }
1764 case NEON::BI__builtin_neon_vtst_v:
1765 case NEON::BI__builtin_neon_vtstq_v: {
1766 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1767 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1768 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1769 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1770 ConstantAggregateZero::get(Ty));
1771 return Builder.CreateSExt(Ops[0], Ty, "vtst");
1772 }
1773 case NEON::BI__builtin_neon_vuzp_v:
1774 case NEON::BI__builtin_neon_vuzpq_v: {
1775 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1776 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1777 Value *SV = nullptr;
1778
1779 for (unsigned vi = 0; vi != 2; ++vi) {
1780 SmallVector<int, 16> Indices;
1781 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1782 Indices.push_back(2*i+vi);
1783
1784 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1785 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
1786 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1787 }
1788 return SV;
1789 }
1790 case NEON::BI__builtin_neon_vxarq_u64: {
1791 Function *F = CGM.getIntrinsic(Int);
1792 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
1793 return EmitNeonCall(F, Ops, "");
1794 }
1795 case NEON::BI__builtin_neon_vzip_v:
1796 case NEON::BI__builtin_neon_vzipq_v: {
1797 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1798 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1799 Value *SV = nullptr;
1800
1801 for (unsigned vi = 0; vi != 2; ++vi) {
1802 SmallVector<int, 16> Indices;
1803 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1804 Indices.push_back((i + vi*e) >> 1);
1805 Indices.push_back(((i + vi*e) >> 1)+e);
1806 }
1807 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
1808 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
1809 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
1810 }
1811 return SV;
1812 }
1813 case NEON::BI__builtin_neon_vdot_s32:
1814 case NEON::BI__builtin_neon_vdot_u32:
1815 case NEON::BI__builtin_neon_vdotq_s32:
1816 case NEON::BI__builtin_neon_vdotq_u32: {
1817 auto *InputTy =
1818 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1819 llvm::Type *Tys[2] = { Ty, InputTy };
1820 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
1821 }
1822 case NEON::BI__builtin_neon_vfmlal_low_f16:
1823 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
1824 auto *InputTy =
1825 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1826 llvm::Type *Tys[2] = { Ty, InputTy };
1827 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
1828 }
1829 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1830 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
1831 auto *InputTy =
1832 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1833 llvm::Type *Tys[2] = { Ty, InputTy };
1834 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
1835 }
1836 case NEON::BI__builtin_neon_vfmlal_high_f16:
1837 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
1838 auto *InputTy =
1839 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1840 llvm::Type *Tys[2] = { Ty, InputTy };
1841 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
1842 }
1843 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1844 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
1845 auto *InputTy =
1846 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1847 llvm::Type *Tys[2] = { Ty, InputTy };
1848 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
1849 }
1850 case NEON::BI__builtin_neon_vmmlaq_s32:
1851 case NEON::BI__builtin_neon_vmmlaq_u32: {
1852 auto *InputTy =
1853 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1854 llvm::Type *Tys[2] = { Ty, InputTy };
1855 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
1856 }
1857 case NEON::BI__builtin_neon_vmmlaq_f16_f16:
1858 case NEON::BI__builtin_neon_vmmlaq_f32_f16: {
1859 auto *InputTy =
1860 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1861 llvm::Type *Tys[2] = {Ty, InputTy};
1862 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fmmla");
1863 }
1864 case NEON::BI__builtin_neon_vusmmlaq_s32: {
1865 auto *InputTy =
1866 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1867 llvm::Type *Tys[2] = { Ty, InputTy };
1868 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
1869 }
1870 case NEON::BI__builtin_neon_vusdot_s32:
1871 case NEON::BI__builtin_neon_vusdotq_s32: {
1872 auto *InputTy =
1873 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1874 llvm::Type *Tys[2] = { Ty, InputTy };
1875 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
1876 }
1877 case NEON::BI__builtin_neon_vbfdot_f32:
1878 case NEON::BI__builtin_neon_vbfdotq_f32: {
1879 llvm::Type *InputTy =
1880 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
1881 llvm::Type *Tys[2] = { Ty, InputTy };
1882 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
1883 }
1884 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
1885 llvm::Type *Tys[1] = { Ty };
1886 Function *F = CGM.getIntrinsic(Int, Tys);
1887 return EmitNeonCall(F, Ops, "vcvtfp2bf");
1888 }
1889
1890 }
1891
1892 assert(Int && "Expected valid intrinsic number");
1893
1894 // Determine the type(s) of this overloaded AArch64 intrinsic.
1895 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
1896
1897 Value *Result = EmitNeonCall(F, Ops, NameHint);
1898 llvm::Type *ResultType = ConvertType(E->getType());
1899 // AArch64 intrinsic one-element vector type cast to
1900 // scalar type expected by the builtin
1901 return Builder.CreateBitCast(Result, ResultType, NameHint);
1902}
1903
1904Value *
1906 const CmpInst::Predicate Pred,
1907 const Twine &Name) {
1908
1909 if (isa<FixedVectorType>(Ty)) {
1910 // Vector types are cast to i8 vectors. Recover original type.
1911 Op = Builder.CreateBitCast(Op, Ty);
1912 }
1913
1914 Constant *zero = Constant::getNullValue(Op->getType());
1915
1916 if (CmpInst::isFPPredicate(Pred)) {
1917 if (Pred == CmpInst::FCMP_OEQ)
1918 Op = Builder.CreateFCmp(Pred, Op, zero);
1919 else
1920 Op = Builder.CreateFCmpS(Pred, Op, zero);
1921 } else {
1922 Op = Builder.CreateICmp(Pred, Op, zero);
1923 }
1924
1925 llvm::Type *ResTy = Ty;
1926 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
1927 ResTy = FixedVectorType::get(
1928 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
1929 VTy->getNumElements());
1930
1931 return Builder.CreateSExt(Op, ResTy, Name);
1932}
1933
1935 Value *ExtOp, Value *IndexOp,
1936 llvm::Type *ResTy, unsigned IntID,
1937 const char *Name) {
1939 if (ExtOp)
1940 TblOps.push_back(ExtOp);
1941
1942 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
1943 SmallVector<int, 16> Indices;
1944 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
1945 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
1946 Indices.push_back(2*i);
1947 Indices.push_back(2*i+1);
1948 }
1949
1950 int PairPos = 0, End = Ops.size() - 1;
1951 while (PairPos < End) {
1952 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1953 Ops[PairPos+1], Indices,
1954 Name));
1955 PairPos += 2;
1956 }
1957
1958 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
1959 // of the 128-bit lookup table with zero.
1960 if (PairPos == End) {
1961 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
1962 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
1963 ZeroTbl, Indices, Name));
1964 }
1965
1966 Function *TblF;
1967 TblOps.push_back(IndexOp);
1968 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
1969
1970 return CGF.EmitNeonCall(TblF, TblOps, Name);
1971}
1972
1973Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
1974 unsigned Value;
1975 switch (BuiltinID) {
1976 default:
1977 return nullptr;
1978 case clang::ARM::BI__builtin_arm_nop:
1979 Value = 0;
1980 break;
1981 case clang::ARM::BI__builtin_arm_yield:
1982 case clang::ARM::BI__yield:
1983 Value = 1;
1984 break;
1985 case clang::ARM::BI__builtin_arm_wfe:
1986 case clang::ARM::BI__wfe:
1987 Value = 2;
1988 break;
1989 case clang::ARM::BI__builtin_arm_wfi:
1990 case clang::ARM::BI__wfi:
1991 Value = 3;
1992 break;
1993 case clang::ARM::BI__builtin_arm_sev:
1994 case clang::ARM::BI__sev:
1995 Value = 4;
1996 break;
1997 case clang::ARM::BI__builtin_arm_sevl:
1998 case clang::ARM::BI__sevl:
1999 Value = 5;
2000 break;
2001 }
2002
2003 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2004 llvm::ConstantInt::get(Int32Ty, Value));
2005}
2006
2012
2013// Generates the IR for the read/write special register builtin,
2014// ValueType is the type of the value that is to be written or read,
2015// RegisterType is the type of the register being written to or read from.
2017 const CallExpr *E,
2018 llvm::Type *RegisterType,
2019 llvm::Type *ValueType,
2020 SpecialRegisterAccessKind AccessKind,
2021 StringRef SysReg = "") {
2022 // write and register intrinsics only support 32, 64 and 128 bit operations.
2023 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2024 RegisterType->isIntegerTy(128)) &&
2025 "Unsupported size for register.");
2026
2027 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2028 CodeGen::CodeGenModule &CGM = CGF.CGM;
2029 LLVMContext &Context = CGM.getLLVMContext();
2030
2031 if (SysReg.empty()) {
2032 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2033 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2034 }
2035
2036 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2037 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2038 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2039
2040 llvm::Type *Types[] = { RegisterType };
2041
2042 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2043 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2044 && "Can't fit 64-bit value in 32-bit register");
2045
2046 if (AccessKind != Write) {
2047 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2048 llvm::Function *F = CGM.getIntrinsic(
2049 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2050 : Intrinsic::read_register,
2051 Types);
2052 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2053
2054 if (MixedTypes)
2055 // Read into 64 bit register and then truncate result to 32 bit.
2056 return Builder.CreateTrunc(Call, ValueType);
2057
2058 if (ValueType->isPointerTy())
2059 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2060 return Builder.CreateIntToPtr(Call, ValueType);
2061
2062 return Call;
2063 }
2064
2065 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2066 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2067 if (MixedTypes) {
2068 // Extend 32 bit write value to 64 bit to pass to write.
2069 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2070 return Builder.CreateCall(F, { Metadata, ArgValue });
2071 }
2072
2073 if (ValueType->isPointerTy()) {
2074 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2075 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2076 return Builder.CreateCall(F, { Metadata, ArgValue });
2077 }
2078
2079 return Builder.CreateCall(F, { Metadata, ArgValue });
2080}
2081
2082static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2083 const CallExpr *E) {
2084 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2085 CodeGen::CodeGenModule &CGM = CGF.CGM;
2087
2088 auto getIntArg = [&](unsigned ArgNo) {
2090 if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
2091 llvm_unreachable("Expected constant argument to range prefetch.");
2092 return Result.Val.getInt().getExtValue();
2093 };
2094
2095 Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
2096 Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
2097 Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
2098
2099 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2100 auto Length = getIntArg(3);
2101 auto Count = getIntArg(4) - 1;
2102 auto Stride = getIntArg(5);
2103 auto Distance = getIntArg(6);
2104
2105 // Map ReuseDistance given in bytes to four bits representing decreasing
2106 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2107 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2108 // over the maximum is represented by 0 (distance not known).
2109 if (Distance > 0) {
2110 Distance = llvm::Log2_32_Ceil(Distance);
2111 if (Distance < 15)
2112 Distance = 15;
2113 else if (Distance > 29)
2114 Distance = 0;
2115 else
2116 Distance = 30 - Distance;
2117 }
2118
2119 uint64_t Mask22 = (1ULL << 22) - 1;
2120 uint64_t Mask16 = (1ULL << 16) - 1;
2121 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2122 ((Count & Mask16) << 22) | (Length & Mask22);
2123
2124 Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
2125 } else
2126 Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
2127
2128 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
2129 Ops);
2130}
2131
2132/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2133/// argument that specifies the vector type. The additional argument is meant
2134/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
2135/// should be kept consistent with the logic in Sema.
2136/// TODO: Make this return false for SISD builtins.
2137static bool HasExtraNeonArgument(unsigned BuiltinID) {
2138 // Required by the headers included below, but not in this particular
2139 // function.
2140 [[maybe_unused]] int PtrArgNum = -1;
2141 [[maybe_unused]] bool HasConstPtr = false;
2142
2143 // The mask encodes the type. We don't care about the actual value. Instead,
2144 // we just check whether its been set.
2145 uint64_t mask = 0;
2146 switch (BuiltinID) {
2147#define GET_NEON_OVERLOAD_CHECK
2148#include "clang/Basic/arm_fp16.inc"
2149#include "clang/Basic/arm_neon.inc"
2150#undef GET_NEON_OVERLOAD_CHECK
2151 // Non-neon builtins for controling VFP that take extra argument for
2152 // discriminating the type.
2153 case ARM::BI__builtin_arm_vcvtr_f:
2154 case ARM::BI__builtin_arm_vcvtr_d:
2155 mask = 1;
2156 }
2157
2158 if (mask)
2159 return true;
2160
2161 return false;
2162}
2163
2165 const CallExpr *E,
2167 llvm::Triple::ArchType Arch) {
2168 if (auto Hint = GetValueForARMHint(BuiltinID))
2169 return Hint;
2170
2171 if (BuiltinID == clang::ARM::BI__emit) {
2172 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2173 llvm::FunctionType *FTy =
2174 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2175
2177 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2178 llvm_unreachable("Sema will ensure that the parameter is constant");
2179
2180 llvm::APSInt Value = Result.Val.getInt();
2181 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2182
2183 llvm::InlineAsm *Emit =
2184 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2185 /*hasSideEffects=*/true)
2186 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2187 /*hasSideEffects=*/true);
2188
2189 return Builder.CreateCall(Emit);
2190 }
2191
2192 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2193 Value *Option = EmitScalarExpr(E->getArg(0));
2194 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2195 }
2196
2197 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2199 Value *RW = EmitScalarExpr(E->getArg(1));
2200 Value *IsData = EmitScalarExpr(E->getArg(2));
2201
2202 // Locality is not supported on ARM target
2203 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2204
2205 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2206 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2207 }
2208
2209 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2210 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2211 return Builder.CreateCall(
2212 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2213 }
2214
2215 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2216 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2217 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2218 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2219 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2220 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2221 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2222 return Res;
2223 }
2224
2225
2226 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2227 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2228 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2229 }
2230 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2231 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2232 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2233 "cls");
2234 }
2235
2236 if (BuiltinID == clang::ARM::BI__clear_cache) {
2237 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2238 const FunctionDecl *FD = E->getDirectCallee();
2239 Value *Ops[2];
2240 for (unsigned i = 0; i < 2; i++)
2241 Ops[i] = EmitScalarExpr(E->getArg(i));
2242 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2243 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2244 StringRef Name = FD->getName();
2245 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2246 }
2247
2248 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2249 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2250 Function *F;
2251
2252 switch (BuiltinID) {
2253 default: llvm_unreachable("unexpected builtin");
2254 case clang::ARM::BI__builtin_arm_mcrr:
2255 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2256 break;
2257 case clang::ARM::BI__builtin_arm_mcrr2:
2258 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2259 break;
2260 }
2261
2262 // MCRR{2} instruction has 5 operands but
2263 // the intrinsic has 4 because Rt and Rt2
2264 // are represented as a single unsigned 64
2265 // bit integer in the intrinsic definition
2266 // but internally it's represented as 2 32
2267 // bit integers.
2268
2269 Value *Coproc = EmitScalarExpr(E->getArg(0));
2270 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2271 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2272 Value *CRm = EmitScalarExpr(E->getArg(3));
2273
2274 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2275 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2276 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2277 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2278
2279 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2280 }
2281
2282 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2283 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2284 Function *F;
2285
2286 switch (BuiltinID) {
2287 default: llvm_unreachable("unexpected builtin");
2288 case clang::ARM::BI__builtin_arm_mrrc:
2289 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2290 break;
2291 case clang::ARM::BI__builtin_arm_mrrc2:
2292 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2293 break;
2294 }
2295
2296 Value *Coproc = EmitScalarExpr(E->getArg(0));
2297 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2298 Value *CRm = EmitScalarExpr(E->getArg(2));
2299 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2300
2301 // Returns an unsigned 64 bit integer, represented
2302 // as two 32 bit integers.
2303
2304 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2305 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2306 Rt = Builder.CreateZExt(Rt, Int64Ty);
2307 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2308
2309 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2310 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2311 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2312
2313 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2314 }
2315
2316 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2317 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2318 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2319 getContext().getTypeSize(E->getType()) == 64) ||
2320 BuiltinID == clang::ARM::BI__ldrexd) {
2321 Function *F;
2322
2323 switch (BuiltinID) {
2324 default: llvm_unreachable("unexpected builtin");
2325 case clang::ARM::BI__builtin_arm_ldaex:
2326 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2327 break;
2328 case clang::ARM::BI__builtin_arm_ldrexd:
2329 case clang::ARM::BI__builtin_arm_ldrex:
2330 case clang::ARM::BI__ldrexd:
2331 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2332 break;
2333 }
2334
2335 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2336 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2337
2338 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2339 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2340 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2341 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2342
2343 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2344 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2345 Val = Builder.CreateOr(Val, Val1);
2346 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2347 }
2348
2349 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2350 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2351 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2352
2353 QualType Ty = E->getType();
2354 llvm::Type *RealResTy = ConvertType(Ty);
2355 llvm::Type *IntTy =
2356 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2357
2358 Function *F = CGM.getIntrinsic(
2359 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2360 : Intrinsic::arm_ldrex,
2361 DefaultPtrTy);
2362 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2363 Val->addParamAttr(
2364 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2365
2366 if (RealResTy->isPointerTy())
2367 return Builder.CreateIntToPtr(Val, RealResTy);
2368 else {
2369 llvm::Type *IntResTy = llvm::IntegerType::get(
2370 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2371 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2372 RealResTy);
2373 }
2374 }
2375
2376 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2377 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2378 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2379 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2380 Function *F = CGM.getIntrinsic(
2381 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2382 : Intrinsic::arm_strexd);
2383 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2384
2386 Value *Val = EmitScalarExpr(E->getArg(0));
2387 Builder.CreateStore(Val, Tmp);
2388
2389 Address LdPtr = Tmp.withElementType(STy);
2390 Val = Builder.CreateLoad(LdPtr);
2391
2392 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2393 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2394 Value *StPtr = EmitScalarExpr(E->getArg(1));
2395 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2396 }
2397
2398 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2399 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2400 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2401 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2402
2403 QualType Ty = E->getArg(0)->getType();
2404 llvm::Type *StoreTy =
2405 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2406
2407 if (StoreVal->getType()->isPointerTy())
2408 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2409 else {
2410 llvm::Type *IntTy = llvm::IntegerType::get(
2412 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2413 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2414 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2415 }
2416
2417 Function *F = CGM.getIntrinsic(
2418 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2419 : Intrinsic::arm_strex,
2420 StoreAddr->getType());
2421
2422 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2423 CI->addParamAttr(
2424 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2425 return CI;
2426 }
2427
2428 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2429 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2430 return Builder.CreateCall(F);
2431 }
2432
2433 // CRC32
2434 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2435 switch (BuiltinID) {
2436 case clang::ARM::BI__builtin_arm_crc32b:
2437 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2438 case clang::ARM::BI__builtin_arm_crc32cb:
2439 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2440 case clang::ARM::BI__builtin_arm_crc32h:
2441 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2442 case clang::ARM::BI__builtin_arm_crc32ch:
2443 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2444 case clang::ARM::BI__builtin_arm_crc32w:
2445 case clang::ARM::BI__builtin_arm_crc32d:
2446 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2447 case clang::ARM::BI__builtin_arm_crc32cw:
2448 case clang::ARM::BI__builtin_arm_crc32cd:
2449 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2450 }
2451
2452 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2453 Value *Arg0 = EmitScalarExpr(E->getArg(0));
2454 Value *Arg1 = EmitScalarExpr(E->getArg(1));
2455
2456 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2457 // intrinsics, hence we need different codegen for these cases.
2458 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2459 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2460 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2461 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
2462 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
2463 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
2464
2465 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2466 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
2467 return Builder.CreateCall(F, {Res, Arg1b});
2468 } else {
2469 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
2470
2471 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2472 return Builder.CreateCall(F, {Arg0, Arg1});
2473 }
2474 }
2475
2476 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2477 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2478 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2479 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2480 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2481 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2482
2483 SpecialRegisterAccessKind AccessKind = Write;
2484 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2485 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2486 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2487 AccessKind = VolatileRead;
2488
2489 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2490 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2491
2492 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2493 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2494
2495 llvm::Type *ValueType;
2496 llvm::Type *RegisterType;
2497 if (IsPointerBuiltin) {
2498 ValueType = VoidPtrTy;
2500 } else if (Is64Bit) {
2501 ValueType = RegisterType = Int64Ty;
2502 } else {
2503 ValueType = RegisterType = Int32Ty;
2504 }
2505
2506 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
2507 AccessKind);
2508 }
2509
2510 if (BuiltinID == ARM::BI__builtin_sponentry) {
2511 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
2512 return Builder.CreateCall(F);
2513 }
2514
2515 // Handle MSVC intrinsics before argument evaluation to prevent double
2516 // evaluation.
2517 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
2518 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
2519
2520 // Deal with MVE builtins
2521 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2522 return Result;
2523 // Handle CDE builtins
2524 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2525 return Result;
2526
2527 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
2528 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
2529 return P.first == BuiltinID;
2530 });
2531 if (It != end(NEONEquivalentIntrinsicMap))
2532 BuiltinID = It->second;
2533
2534 // Find out if any arguments are required to be integer constant
2535 // expressions.
2536 unsigned ICEArguments = 0;
2538 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2539 assert(Error == ASTContext::GE_None && "Should not codegen an error");
2540
2541 auto getAlignmentValue32 = [&](Address addr) -> Value* {
2542 return Builder.getInt32(addr.getAlignment().getQuantity());
2543 };
2544
2545 Address PtrOp0 = Address::invalid();
2546 Address PtrOp1 = Address::invalid();
2548 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
2549 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
2550 for (unsigned i = 0, e = NumArgs; i != e; i++) {
2551 if (i == 0) {
2552 switch (BuiltinID) {
2553 case NEON::BI__builtin_neon_vld1_v:
2554 case NEON::BI__builtin_neon_vld1q_v:
2555 case NEON::BI__builtin_neon_vld1q_lane_v:
2556 case NEON::BI__builtin_neon_vld1_lane_v:
2557 case NEON::BI__builtin_neon_vld1_dup_v:
2558 case NEON::BI__builtin_neon_vld1q_dup_v:
2559 case NEON::BI__builtin_neon_vst1_v:
2560 case NEON::BI__builtin_neon_vst1q_v:
2561 case NEON::BI__builtin_neon_vst1q_lane_v:
2562 case NEON::BI__builtin_neon_vst1_lane_v:
2563 case NEON::BI__builtin_neon_vst2_v:
2564 case NEON::BI__builtin_neon_vst2q_v:
2565 case NEON::BI__builtin_neon_vst2_lane_v:
2566 case NEON::BI__builtin_neon_vst2q_lane_v:
2567 case NEON::BI__builtin_neon_vst3_v:
2568 case NEON::BI__builtin_neon_vst3q_v:
2569 case NEON::BI__builtin_neon_vst3_lane_v:
2570 case NEON::BI__builtin_neon_vst3q_lane_v:
2571 case NEON::BI__builtin_neon_vst4_v:
2572 case NEON::BI__builtin_neon_vst4q_v:
2573 case NEON::BI__builtin_neon_vst4_lane_v:
2574 case NEON::BI__builtin_neon_vst4q_lane_v:
2575 // Get the alignment for the argument in addition to the value;
2576 // we'll use it later.
2577 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
2578 Ops.push_back(PtrOp0.emitRawPointer(*this));
2579 continue;
2580 }
2581 }
2582 if (i == 1) {
2583 switch (BuiltinID) {
2584 case NEON::BI__builtin_neon_vld2_v:
2585 case NEON::BI__builtin_neon_vld2q_v:
2586 case NEON::BI__builtin_neon_vld3_v:
2587 case NEON::BI__builtin_neon_vld3q_v:
2588 case NEON::BI__builtin_neon_vld4_v:
2589 case NEON::BI__builtin_neon_vld4q_v:
2590 case NEON::BI__builtin_neon_vld2_lane_v:
2591 case NEON::BI__builtin_neon_vld2q_lane_v:
2592 case NEON::BI__builtin_neon_vld3_lane_v:
2593 case NEON::BI__builtin_neon_vld3q_lane_v:
2594 case NEON::BI__builtin_neon_vld4_lane_v:
2595 case NEON::BI__builtin_neon_vld4q_lane_v:
2596 case NEON::BI__builtin_neon_vld2_dup_v:
2597 case NEON::BI__builtin_neon_vld2q_dup_v:
2598 case NEON::BI__builtin_neon_vld3_dup_v:
2599 case NEON::BI__builtin_neon_vld3q_dup_v:
2600 case NEON::BI__builtin_neon_vld4_dup_v:
2601 case NEON::BI__builtin_neon_vld4q_dup_v:
2602 // Get the alignment for the argument in addition to the value;
2603 // we'll use it later.
2604 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
2605 Ops.push_back(PtrOp1.emitRawPointer(*this));
2606 continue;
2607 }
2608 }
2609
2610 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
2611 }
2612
2613 switch (BuiltinID) {
2614 default: break;
2615
2616 case NEON::BI__builtin_neon_vget_lane_i8:
2617 case NEON::BI__builtin_neon_vget_lane_i16:
2618 case NEON::BI__builtin_neon_vget_lane_i32:
2619 case NEON::BI__builtin_neon_vget_lane_i64:
2620 case NEON::BI__builtin_neon_vget_lane_bf16:
2621 case NEON::BI__builtin_neon_vget_lane_f32:
2622 case NEON::BI__builtin_neon_vgetq_lane_i8:
2623 case NEON::BI__builtin_neon_vgetq_lane_i16:
2624 case NEON::BI__builtin_neon_vgetq_lane_i32:
2625 case NEON::BI__builtin_neon_vgetq_lane_i64:
2626 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2627 case NEON::BI__builtin_neon_vgetq_lane_f32:
2628 case NEON::BI__builtin_neon_vduph_lane_bf16:
2629 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2630 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
2631
2632 case NEON::BI__builtin_neon_vrndns_f32: {
2633 Value *Arg = EmitScalarExpr(E->getArg(0));
2634 llvm::Type *Tys[] = {Arg->getType()};
2635 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
2636 return Builder.CreateCall(F, {Arg}, "vrndn"); }
2637
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_i16:
2640 case NEON::BI__builtin_neon_vset_lane_i32:
2641 case NEON::BI__builtin_neon_vset_lane_i64:
2642 case NEON::BI__builtin_neon_vset_lane_bf16:
2643 case NEON::BI__builtin_neon_vset_lane_f32:
2644 case NEON::BI__builtin_neon_vsetq_lane_i8:
2645 case NEON::BI__builtin_neon_vsetq_lane_i16:
2646 case NEON::BI__builtin_neon_vsetq_lane_i32:
2647 case NEON::BI__builtin_neon_vsetq_lane_i64:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_f32:
2650 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
2651
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2653 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
2654 "vsha1h");
2655 case NEON::BI__builtin_neon_vsha1cq_u32:
2656 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
2657 "vsha1h");
2658 case NEON::BI__builtin_neon_vsha1pq_u32:
2659 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
2660 "vsha1h");
2661 case NEON::BI__builtin_neon_vsha1mq_u32:
2662 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
2663 "vsha1h");
2664
2665 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
2666 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
2667 "vcvtbfp2bf");
2668 }
2669
2670 // The ARM _MoveToCoprocessor builtins put the input register value as
2671 // the first argument, but the LLVM intrinsic expects it as the third one.
2672 case clang::ARM::BI_MoveToCoprocessor:
2673 case clang::ARM::BI_MoveToCoprocessor2: {
2674 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
2675 ? Intrinsic::arm_mcr
2676 : Intrinsic::arm_mcr2);
2677 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
2678 Ops[3], Ops[4], Ops[5]});
2679 }
2680 }
2681
2682 // Get the last argument, which specifies the vector type.
2683 assert(HasExtraArg);
2684 const Expr *Arg = E->getArg(E->getNumArgs()-1);
2685 std::optional<llvm::APSInt> Result =
2687 if (!Result)
2688 return nullptr;
2689
2690 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
2691 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
2692 // Determine the overloaded type of this builtin.
2693 llvm::Type *Ty;
2694 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
2695 Ty = FloatTy;
2696 else
2697 Ty = DoubleTy;
2698
2699 // Determine whether this is an unsigned conversion or not.
2700 bool usgn = Result->getZExtValue() == 1;
2701 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
2702
2703 // Call the appropriate intrinsic.
2704 Function *F = CGM.getIntrinsic(Int, Ty);
2705 return Builder.CreateCall(F, Ops, "vcvtr");
2706 }
2707
2708 // Determine the type of this overloaded NEON intrinsic.
2709 NeonTypeFlags Type = Result->getZExtValue();
2710 bool usgn = Type.isUnsigned();
2711 bool rightShift = false;
2712
2713 llvm::FixedVectorType *VTy =
2714 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
2715 getTarget().hasBFloat16Type());
2716 llvm::Type *Ty = VTy;
2717 if (!Ty)
2718 return nullptr;
2719
2720 // Many NEON builtins have identical semantics and uses in ARM and
2721 // AArch64. Emit these in a single function.
2722 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
2724 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
2725 if (Builtin)
2727 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
2728 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
2729
2730 unsigned Int;
2731 switch (BuiltinID) {
2732 default: return nullptr;
2733 case NEON::BI__builtin_neon_vld1q_lane_v:
2734 // Handle 64-bit integer elements as a special case. Use shuffles of
2735 // one-element vectors to avoid poor code for i64 in the backend.
2736 if (VTy->getElementType()->isIntegerTy(64)) {
2737 // Extract the other lane.
2738 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2739 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
2740 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
2741 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2742 // Load the value as a one-element vector.
2743 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
2744 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2745 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
2746 Value *Align = getAlignmentValue32(PtrOp0);
2747 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
2748 // Combine them.
2749 int Indices[] = {1 - Lane, Lane};
2750 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
2751 }
2752 [[fallthrough]];
2753 case NEON::BI__builtin_neon_vld1_lane_v: {
2754 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2755 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2756 Value *Ld = Builder.CreateLoad(PtrOp0);
2757 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
2758 }
2759 case NEON::BI__builtin_neon_vqrshrn_n_v:
2760 Int =
2761 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2762 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
2763 1, true);
2764 case NEON::BI__builtin_neon_vqrshrun_n_v:
2765 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
2766 Ops, "vqrshrun_n", 1, true);
2767 case NEON::BI__builtin_neon_vqshrn_n_v:
2768 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2769 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
2770 1, true);
2771 case NEON::BI__builtin_neon_vqshrun_n_v:
2772 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
2773 Ops, "vqshrun_n", 1, true);
2774 case NEON::BI__builtin_neon_vrecpe_v:
2775 case NEON::BI__builtin_neon_vrecpeq_v:
2776 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
2777 Ops, "vrecpe");
2778 case NEON::BI__builtin_neon_vrshrn_n_v:
2779 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2780 Ops, "vrshrn_n", 1, true);
2781 case NEON::BI__builtin_neon_vrsra_n_v:
2782 case NEON::BI__builtin_neon_vrsraq_n_v:
2783 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2784 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2785 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
2786 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2787 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
2788 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2789 case NEON::BI__builtin_neon_vsri_n_v:
2790 case NEON::BI__builtin_neon_vsriq_n_v:
2791 rightShift = true;
2792 [[fallthrough]];
2793 case NEON::BI__builtin_neon_vsli_n_v:
2794 case NEON::BI__builtin_neon_vsliq_n_v:
2795 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
2796 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2797 Ops, "vsli_n");
2798 case NEON::BI__builtin_neon_vsra_n_v:
2799 case NEON::BI__builtin_neon_vsraq_n_v:
2800 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2801 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
2802 return Builder.CreateAdd(Ops[0], Ops[1]);
2803 case NEON::BI__builtin_neon_vst1q_lane_v:
2804 // Handle 64-bit integer elements as a special case. Use a shuffle to get
2805 // a one-element vector and avoid poor code for i64 in the backend.
2806 if (VTy->getElementType()->isIntegerTy(64)) {
2807 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2808 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
2809 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2810 Ops[2] = getAlignmentValue32(PtrOp0);
2811 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
2812 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
2813 Tys), Ops);
2814 }
2815 [[fallthrough]];
2816 case NEON::BI__builtin_neon_vst1_lane_v: {
2817 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2818 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
2819 return Builder.CreateStore(Ops[1],
2820 PtrOp0.withElementType(Ops[1]->getType()));
2821 }
2822 case NEON::BI__builtin_neon_vtbl1_v:
2823 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
2824 Ops, "vtbl1");
2825 case NEON::BI__builtin_neon_vtbl2_v:
2826 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
2827 Ops, "vtbl2");
2828 case NEON::BI__builtin_neon_vtbl3_v:
2829 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
2830 Ops, "vtbl3");
2831 case NEON::BI__builtin_neon_vtbl4_v:
2832 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
2833 Ops, "vtbl4");
2834 case NEON::BI__builtin_neon_vtbx1_v:
2835 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
2836 Ops, "vtbx1");
2837 case NEON::BI__builtin_neon_vtbx2_v:
2838 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
2839 Ops, "vtbx2");
2840 case NEON::BI__builtin_neon_vtbx3_v:
2841 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
2842 Ops, "vtbx3");
2843 case NEON::BI__builtin_neon_vtbx4_v:
2844 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
2845 Ops, "vtbx4");
2846 }
2847}
2848
2849template<typename Integer>
2851 return E->getIntegerConstantExpr(Context)->getExtValue();
2852}
2853
2854static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
2855 llvm::Type *T, bool Unsigned) {
2856 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
2857 // which finds it convenient to specify signed/unsigned as a boolean flag.
2858 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
2859}
2860
2861static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
2862 uint32_t Shift, bool Unsigned) {
2863 // MVE helper function for integer shift right. This must handle signed vs
2864 // unsigned, and also deal specially with the case where the shift count is
2865 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
2866 // undefined behavior, but in MVE it's legal, so we must convert it to code
2867 // that is not undefined in IR.
2868 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
2869 ->getElementType()
2870 ->getPrimitiveSizeInBits();
2871 if (Shift == LaneBits) {
2872 // An unsigned shift of the full lane size always generates zero, so we can
2873 // simply emit a zero vector. A signed shift of the full lane size does the
2874 // same thing as shifting by one bit fewer.
2875 if (Unsigned)
2876 return llvm::Constant::getNullValue(V->getType());
2877 else
2878 --Shift;
2879 }
2880 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
2881}
2882
2883static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
2884 // MVE-specific helper function for a vector splat, which infers the element
2885 // count of the output vector by knowing that MVE vectors are all 128 bits
2886 // wide.
2887 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
2888 return Builder.CreateVectorSplat(Elements, V);
2889}
2890
2891static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
2892 CodeGenFunction *CGF,
2893 llvm::Value *V,
2894 llvm::Type *DestType) {
2895 // Convert one MVE vector type into another by reinterpreting its in-register
2896 // format.
2897 //
2898 // Little-endian, this is identical to a bitcast (which reinterprets the
2899 // memory format). But big-endian, they're not necessarily the same, because
2900 // the register and memory formats map to each other differently depending on
2901 // the lane size.
2902 //
2903 // We generate a bitcast whenever we can (if we're little-endian, or if the
2904 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
2905 // that performs the different kind of reinterpretation.
2906 if (CGF->getTarget().isBigEndian() &&
2907 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
2908 return Builder.CreateCall(
2909 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
2910 {DestType, V->getType()}),
2911 V);
2912 } else {
2913 return Builder.CreateBitCast(V, DestType);
2914 }
2915}
2916
2917static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
2918 // Make a shufflevector that extracts every other element of a vector (evens
2919 // or odds, as desired).
2920 SmallVector<int, 16> Indices;
2921 unsigned InputElements =
2922 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
2923 for (unsigned i = 0; i < InputElements; i += 2)
2924 Indices.push_back(i + Odd);
2925 return Builder.CreateShuffleVector(V, Indices);
2926}
2927
2928static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
2929 llvm::Value *V1) {
2930 // Make a shufflevector that interleaves two vectors element by element.
2931 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
2932 SmallVector<int, 16> Indices;
2933 unsigned InputElements =
2934 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
2935 for (unsigned i = 0; i < InputElements; i++) {
2936 Indices.push_back(i);
2937 Indices.push_back(i + InputElements);
2938 }
2939 return Builder.CreateShuffleVector(V0, V1, Indices);
2940}
2941
2942template<unsigned HighBit, unsigned OtherBits>
2943static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
2944 // MVE-specific helper function to make a vector splat of a constant such as
2945 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
2946 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
2947 unsigned LaneBits = T->getPrimitiveSizeInBits();
2948 uint32_t Value = HighBit << (LaneBits - 1);
2949 if (OtherBits)
2950 Value |= (1UL << (LaneBits - 1)) - 1;
2951 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
2952 return ARMMVEVectorSplat(Builder, Lane);
2953}
2954
2955static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
2956 llvm::Value *V,
2957 unsigned ReverseWidth) {
2958 // MVE-specific helper function which reverses the elements of a
2959 // vector within every (ReverseWidth)-bit collection of lanes.
2960 SmallVector<int, 16> Indices;
2961 unsigned LaneSize = V->getType()->getScalarSizeInBits();
2962 unsigned Elements = 128 / LaneSize;
2963 unsigned Mask = ReverseWidth / LaneSize - 1;
2964 for (unsigned i = 0; i < Elements; i++)
2965 Indices.push_back(i ^ Mask);
2966 return Builder.CreateShuffleVector(V, Indices);
2967}
2968
2969static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
2970 CodeGenFunction *CGF, llvm::Value *V,
2971 llvm::Type *Ty) {
2972 return Builder.CreateCall(
2973 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2974 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2975}
2976
2977static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
2978 CodeGenFunction *CGF, llvm::Value *V,
2979 llvm::Type *Ty) {
2980 return Builder.CreateCall(
2981 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
2982 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2983}
2984
2985static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
2986 CodeGenFunction *CGF, llvm::Value *V,
2987 llvm::Type *Ty) {
2988 return Builder.CreateCall(
2989 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2990 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
2991}
2992
2993static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
2994 CodeGenFunction *CGF, llvm::Value *V,
2995 llvm::Type *Ty) {
2996 return Builder.CreateCall(
2997 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
2998 {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
2999}
3000
3002 const CallExpr *E,
3004 llvm::Triple::ArchType Arch) {
3005 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3006 Intrinsic::ID IRIntr;
3007 unsigned NumVectors;
3008
3009 // Code autogenerated by Tablegen will handle all the simple builtins.
3010 switch (BuiltinID) {
3011 #include "clang/Basic/arm_mve_builtin_cg.inc"
3012
3013 // If we didn't match an MVE builtin id at all, go back to the
3014 // main EmitARMBuiltinExpr.
3015 default:
3016 return nullptr;
3017 }
3018
3019 // Anything that breaks from that switch is an MVE builtin that
3020 // needs handwritten code to generate.
3021
3022 switch (CustomCodeGenType) {
3023
3024 case CustomCodeGen::VLD24: {
3027
3028 auto MvecCType = E->getType();
3029 auto MvecLType = ConvertType(MvecCType);
3030 assert(MvecLType->isStructTy() &&
3031 "Return type for vld[24]q should be a struct");
3032 assert(MvecLType->getStructNumElements() == 1 &&
3033 "Return-type struct for vld[24]q should have one element");
3034 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3035 assert(MvecLTypeInner->isArrayTy() &&
3036 "Return-type struct for vld[24]q should contain an array");
3037 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3038 "Array member of return-type struct vld[24]q has wrong length");
3039 auto VecLType = MvecLTypeInner->getArrayElementType();
3040
3041 Tys.push_back(VecLType);
3042
3043 auto Addr = E->getArg(0);
3044 Ops.push_back(EmitScalarExpr(Addr));
3045 Tys.push_back(ConvertType(Addr->getType()));
3046
3047 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3048 Value *LoadResult = Builder.CreateCall(F, Ops);
3049 Value *MvecOut = PoisonValue::get(MvecLType);
3050 for (unsigned i = 0; i < NumVectors; ++i) {
3051 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3052 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3053 }
3054
3055 if (ReturnValue.isNull())
3056 return MvecOut;
3057 else
3058 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3059 }
3060
3061 case CustomCodeGen::VST24: {
3064
3065 auto Addr = E->getArg(0);
3066 Ops.push_back(EmitScalarExpr(Addr));
3067 Tys.push_back(ConvertType(Addr->getType()));
3068
3069 auto MvecCType = E->getArg(1)->getType();
3070 auto MvecLType = ConvertType(MvecCType);
3071 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3072 assert(MvecLType->getStructNumElements() == 1 &&
3073 "Data-type struct for vst2q should have one element");
3074 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3075 assert(MvecLTypeInner->isArrayTy() &&
3076 "Data-type struct for vst2q should contain an array");
3077 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3078 "Array member of return-type struct vld[24]q has wrong length");
3079 auto VecLType = MvecLTypeInner->getArrayElementType();
3080
3081 Tys.push_back(VecLType);
3082
3083 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3084 EmitAggExpr(E->getArg(1), MvecSlot);
3085 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3086 for (unsigned i = 0; i < NumVectors; i++)
3087 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3088
3089 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3090 Value *ToReturn = nullptr;
3091 for (unsigned i = 0; i < NumVectors; i++) {
3092 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3093 ToReturn = Builder.CreateCall(F, Ops);
3094 Ops.pop_back();
3095 }
3096 return ToReturn;
3097 }
3098 }
3099 llvm_unreachable("unknown custom codegen type.");
3100}
3101
3103 const CallExpr *E,
3105 llvm::Triple::ArchType Arch) {
3106 switch (BuiltinID) {
3107 default:
3108 return nullptr;
3109#include "clang/Basic/arm_cde_builtin_cg.inc"
3110 }
3111}
3112
3113static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3114 const CallExpr *E,
3116 llvm::Triple::ArchType Arch) {
3117 unsigned int Int = 0;
3118 const char *s = nullptr;
3119
3120 switch (BuiltinID) {
3121 default:
3122 return nullptr;
3123 case NEON::BI__builtin_neon_vtbl1_v:
3124 case NEON::BI__builtin_neon_vqtbl1_v:
3125 case NEON::BI__builtin_neon_vqtbl1q_v:
3126 case NEON::BI__builtin_neon_vtbl2_v:
3127 case NEON::BI__builtin_neon_vqtbl2_v:
3128 case NEON::BI__builtin_neon_vqtbl2q_v:
3129 case NEON::BI__builtin_neon_vtbl3_v:
3130 case NEON::BI__builtin_neon_vqtbl3_v:
3131 case NEON::BI__builtin_neon_vqtbl3q_v:
3132 case NEON::BI__builtin_neon_vtbl4_v:
3133 case NEON::BI__builtin_neon_vqtbl4_v:
3134 case NEON::BI__builtin_neon_vqtbl4q_v:
3135 break;
3136 case NEON::BI__builtin_neon_vtbx1_v:
3137 case NEON::BI__builtin_neon_vqtbx1_v:
3138 case NEON::BI__builtin_neon_vqtbx1q_v:
3139 case NEON::BI__builtin_neon_vtbx2_v:
3140 case NEON::BI__builtin_neon_vqtbx2_v:
3141 case NEON::BI__builtin_neon_vqtbx2q_v:
3142 case NEON::BI__builtin_neon_vtbx3_v:
3143 case NEON::BI__builtin_neon_vqtbx3_v:
3144 case NEON::BI__builtin_neon_vqtbx3q_v:
3145 case NEON::BI__builtin_neon_vtbx4_v:
3146 case NEON::BI__builtin_neon_vqtbx4_v:
3147 case NEON::BI__builtin_neon_vqtbx4q_v:
3148 break;
3149 }
3150
3151 assert(E->getNumArgs() >= 3);
3152
3153 // Get the last argument, which specifies the vector type.
3154 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3155 std::optional<llvm::APSInt> Result =
3157 if (!Result)
3158 return nullptr;
3159
3160 // Determine the type of this overloaded NEON intrinsic.
3161 NeonTypeFlags Type = Result->getZExtValue();
3162 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3163 if (!Ty)
3164 return nullptr;
3165
3166 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3167
3168 // AArch64 scalar builtins are not overloaded, they do not have an extra
3169 // argument that specifies the vector type, need to handle each case.
3170 switch (BuiltinID) {
3171 case NEON::BI__builtin_neon_vtbl1_v: {
3172 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3173 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3174 }
3175 case NEON::BI__builtin_neon_vtbl2_v: {
3176 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3177 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3178 }
3179 case NEON::BI__builtin_neon_vtbl3_v: {
3180 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3181 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3182 }
3183 case NEON::BI__builtin_neon_vtbl4_v: {
3184 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3185 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3186 }
3187 case NEON::BI__builtin_neon_vtbx1_v: {
3188 Value *TblRes =
3189 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3190 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3191
3192 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3193 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3194 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3195
3196 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3197 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3198 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3199 }
3200 case NEON::BI__builtin_neon_vtbx2_v: {
3201 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3202 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3203 }
3204 case NEON::BI__builtin_neon_vtbx3_v: {
3205 Value *TblRes =
3206 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3207 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3208
3209 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3210 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3211 TwentyFourV);
3212 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3213
3214 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3215 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3216 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3217 }
3218 case NEON::BI__builtin_neon_vtbx4_v: {
3219 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3220 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3221 }
3222 case NEON::BI__builtin_neon_vqtbl1_v:
3223 case NEON::BI__builtin_neon_vqtbl1q_v:
3224 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3225 case NEON::BI__builtin_neon_vqtbl2_v:
3226 case NEON::BI__builtin_neon_vqtbl2q_v: {
3227 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3228 case NEON::BI__builtin_neon_vqtbl3_v:
3229 case NEON::BI__builtin_neon_vqtbl3q_v:
3230 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3231 case NEON::BI__builtin_neon_vqtbl4_v:
3232 case NEON::BI__builtin_neon_vqtbl4q_v:
3233 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3234 case NEON::BI__builtin_neon_vqtbx1_v:
3235 case NEON::BI__builtin_neon_vqtbx1q_v:
3236 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3237 case NEON::BI__builtin_neon_vqtbx2_v:
3238 case NEON::BI__builtin_neon_vqtbx2q_v:
3239 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3240 case NEON::BI__builtin_neon_vqtbx3_v:
3241 case NEON::BI__builtin_neon_vqtbx3q_v:
3242 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3243 case NEON::BI__builtin_neon_vqtbx4_v:
3244 case NEON::BI__builtin_neon_vqtbx4q_v:
3245 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3246 }
3247 }
3248
3249 if (!Int)
3250 return nullptr;
3251
3252 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3253 return CGF.EmitNeonCall(F, Ops, s);
3254}
3255
3257 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3258 Op = Builder.CreateBitCast(Op, Int16Ty);
3259 Value *V = PoisonValue::get(VTy);
3260 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3261 Op = Builder.CreateInsertElement(V, Op, CI);
3262 return Op;
3263}
3264
3265/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3266/// access builtin. Only required if it can't be inferred from the base pointer
3267/// operand.
3269 switch (TypeFlags.getMemEltType()) {
3270 case SVETypeFlags::MemEltTyDefault:
3271 return getEltType(TypeFlags);
3272 case SVETypeFlags::MemEltTyInt8:
3273 return Builder.getInt8Ty();
3274 case SVETypeFlags::MemEltTyInt16:
3275 return Builder.getInt16Ty();
3276 case SVETypeFlags::MemEltTyInt32:
3277 return Builder.getInt32Ty();
3278 case SVETypeFlags::MemEltTyInt64:
3279 return Builder.getInt64Ty();
3280 }
3281 llvm_unreachable("Unknown MemEltType");
3282}
3283
3284llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3285 switch (TypeFlags.getEltType()) {
3286 default:
3287 llvm_unreachable("Invalid SVETypeFlag!");
3288
3289 case SVETypeFlags::EltTyMFloat8:
3290 case SVETypeFlags::EltTyInt8:
3291 return Builder.getInt8Ty();
3292 case SVETypeFlags::EltTyInt16:
3293 return Builder.getInt16Ty();
3294 case SVETypeFlags::EltTyInt32:
3295 return Builder.getInt32Ty();
3296 case SVETypeFlags::EltTyInt64:
3297 return Builder.getInt64Ty();
3298 case SVETypeFlags::EltTyInt128:
3299 return Builder.getInt128Ty();
3300
3301 case SVETypeFlags::EltTyFloat16:
3302 return Builder.getHalfTy();
3303 case SVETypeFlags::EltTyFloat32:
3304 return Builder.getFloatTy();
3305 case SVETypeFlags::EltTyFloat64:
3306 return Builder.getDoubleTy();
3307
3308 case SVETypeFlags::EltTyBFloat16:
3309 return Builder.getBFloatTy();
3310
3311 case SVETypeFlags::EltTyBool8:
3312 case SVETypeFlags::EltTyBool16:
3313 case SVETypeFlags::EltTyBool32:
3314 case SVETypeFlags::EltTyBool64:
3315 return Builder.getInt1Ty();
3316 }
3317}
3318
3319// Return the llvm predicate vector type corresponding to the specified element
3320// TypeFlags.
3321llvm::ScalableVectorType *
3323 switch (TypeFlags.getEltType()) {
3324 default: llvm_unreachable("Unhandled SVETypeFlag!");
3325
3326 case SVETypeFlags::EltTyInt8:
3327 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3328 case SVETypeFlags::EltTyInt16:
3329 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3330 case SVETypeFlags::EltTyInt32:
3331 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3332 case SVETypeFlags::EltTyInt64:
3333 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3334
3335 case SVETypeFlags::EltTyBFloat16:
3336 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3337 case SVETypeFlags::EltTyFloat16:
3338 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3339 case SVETypeFlags::EltTyFloat32:
3340 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3341 case SVETypeFlags::EltTyFloat64:
3342 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3343
3344 case SVETypeFlags::EltTyBool8:
3345 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3346 case SVETypeFlags::EltTyBool16:
3347 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3348 case SVETypeFlags::EltTyBool32:
3349 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3350 case SVETypeFlags::EltTyBool64:
3351 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3352 }
3353}
3354
3355// Return the llvm vector type corresponding to the specified element TypeFlags.
3356llvm::ScalableVectorType *
3358 switch (TypeFlags.getEltType()) {
3359 default:
3360 llvm_unreachable("Invalid SVETypeFlag!");
3361
3362 case SVETypeFlags::EltTyInt8:
3363 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3364 case SVETypeFlags::EltTyInt16:
3365 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3366 case SVETypeFlags::EltTyInt32:
3367 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3368 case SVETypeFlags::EltTyInt64:
3369 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3370
3371 case SVETypeFlags::EltTyMFloat8:
3372 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3373 case SVETypeFlags::EltTyFloat16:
3374 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3375 case SVETypeFlags::EltTyBFloat16:
3376 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3377 case SVETypeFlags::EltTyFloat32:
3378 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3379 case SVETypeFlags::EltTyFloat64:
3380 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3381
3382 case SVETypeFlags::EltTyBool8:
3383 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3384 case SVETypeFlags::EltTyBool16:
3385 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3386 case SVETypeFlags::EltTyBool32:
3387 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3388 case SVETypeFlags::EltTyBool64:
3389 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3390 }
3391}
3392
3393llvm::Value *
3395 Function *Ptrue =
3396 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3397 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
3398}
3399
3400constexpr unsigned SVEBitsPerBlock = 128;
3401
3402static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3403 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3404 return llvm::ScalableVectorType::get(EltTy, NumElts);
3405}
3406
3407// Reinterpret the input predicate so that it can be used to correctly isolate
3408// the elements of the specified datatype.
3410 llvm::ScalableVectorType *VTy) {
3411
3412 if (isa<TargetExtType>(Pred->getType()) &&
3413 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3414 return Pred;
3415
3416 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3417 if (Pred->getType() == RTy)
3418 return Pred;
3419
3420 unsigned IntID;
3421 llvm::Type *IntrinsicTy;
3422 switch (VTy->getMinNumElements()) {
3423 default:
3424 llvm_unreachable("unsupported element count!");
3425 case 1:
3426 case 2:
3427 case 4:
3428 case 8:
3429 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3430 IntrinsicTy = RTy;
3431 break;
3432 case 16:
3433 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3434 IntrinsicTy = Pred->getType();
3435 break;
3436 }
3437
3438 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3439 Value *C = Builder.CreateCall(F, Pred);
3440 assert(C->getType() == RTy && "Unexpected return type!");
3441 return C;
3442}
3443
3445 llvm::StructType *Ty) {
3446 if (PredTuple->getType() == Ty)
3447 return PredTuple;
3448
3449 Value *Ret = llvm::PoisonValue::get(Ty);
3450 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3451 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3452 Pred = EmitSVEPredicateCast(
3453 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3454 Ret = Builder.CreateInsertValue(Ret, Pred, I);
3455 }
3456
3457 return Ret;
3458}
3459
3462 unsigned IntID) {
3463 auto *ResultTy = getSVEType(TypeFlags);
3464 auto *OverloadedTy =
3465 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
3466 Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
3467
3468 // At the ACLE level there's only one predicate type, svbool_t, which is
3469 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3470 // actual type being loaded. For example, when loading doubles (i64) the
3471 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3472 // the predicate and the data being loaded must match. Cast to the type
3473 // expected by the intrinsic. The intrinsic itself should be defined in
3474 // a way than enforces relations between parameter types.
3475 Ops[0] = EmitSVEPredicateCast(
3476 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3477
3478 // Pass 0 when the offset is missing. This can only be applied when using
3479 // the "vector base" addressing mode for which ACLE allows no offset. The
3480 // corresponding LLVM IR always requires an offset.
3481 if (Ops.size() == 2) {
3482 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3483 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3484 }
3485
3486 // For "vector base, scalar index" scale the index so that it becomes a
3487 // scalar offset.
3488 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3489 unsigned BytesPerElt =
3490 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3491 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3492 }
3493
3494 Value *Call = Builder.CreateCall(F, Ops);
3495
3496 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3497 // other cases it's folded into a nop.
3498 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
3499 : Builder.CreateSExt(Call, ResultTy);
3500}
3501
3504 unsigned IntID) {
3505 auto *SrcDataTy = getSVEType(TypeFlags);
3506 auto *OverloadedTy =
3507 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
3508
3509 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3510 // it's the first argument. Move it accordingly.
3511 Ops.insert(Ops.begin(), Ops.pop_back_val());
3512
3513 Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
3514
3515 // Pass 0 when the offset is missing. This can only be applied when using
3516 // the "vector base" addressing mode for which ACLE allows no offset. The
3517 // corresponding LLVM IR always requires an offset.
3518 if (Ops.size() == 3) {
3519 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3520 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3521 }
3522
3523 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
3524 // folded into a nop.
3525 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
3526
3527 // At the ACLE level there's only one predicate type, svbool_t, which is
3528 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3529 // actual type being stored. For example, when storing doubles (i64) the
3530 // predicated should be <n x 2 x i1> instead. At the IR level the type of
3531 // the predicate and the data being stored must match. Cast to the type
3532 // expected by the intrinsic. The intrinsic itself should be defined in
3533 // a way that enforces relations between parameter types.
3534 Ops[1] = EmitSVEPredicateCast(
3535 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
3536
3537 // For "vector base, scalar index" scale the index so that it becomes a
3538 // scalar offset.
3539 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
3540 unsigned BytesPerElt =
3541 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3542 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
3543 }
3544
3545 return Builder.CreateCall(F, Ops);
3546}
3547
3550 unsigned IntID) {
3551 // The gather prefetches are overloaded on the vector input - this can either
3552 // be the vector of base addresses or vector of offsets.
3553 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
3554 if (!OverloadedTy)
3555 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
3556
3557 // Cast the predicate from svbool_t to the right number of elements.
3558 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
3559
3560 // vector + imm addressing modes
3561 if (Ops[1]->getType()->isVectorTy()) {
3562 if (Ops.size() == 3) {
3563 // Pass 0 for 'vector+imm' when the index is omitted.
3564 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3565
3566 // The sv_prfop is the last operand in the builtin and IR intrinsic.
3567 std::swap(Ops[2], Ops[3]);
3568 } else {
3569 // Index needs to be passed as scaled offset.
3570 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3571 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
3572 if (BytesPerElt > 1)
3573 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3574 }
3575
3576 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
3577 return Builder.CreateCall(F, Ops);
3578 }
3579
3580 Function *F = CGM.getIntrinsic(IntID, {Ops[1]->getType(), OverloadedTy});
3581 return Builder.CreateCall(F, Ops);
3582}
3583
3586 unsigned IntID) {
3587 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3588 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3589 Value *BasePtr = Ops[1];
3590
3591 // Does the load have an offset?
3592 if (Ops.size() > 2)
3593 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3594
3595 Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
3596 return Builder.CreateCall(F, {Predicate, BasePtr});
3597}
3598
3601 unsigned IntID) {
3602 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3603
3604 unsigned N;
3605 switch (IntID) {
3606 case Intrinsic::aarch64_sve_st2:
3607 case Intrinsic::aarch64_sve_st1_pn_x2:
3608 case Intrinsic::aarch64_sve_stnt1_pn_x2:
3609 case Intrinsic::aarch64_sve_st2q:
3610 N = 2;
3611 break;
3612 case Intrinsic::aarch64_sve_st3:
3613 case Intrinsic::aarch64_sve_st3q:
3614 N = 3;
3615 break;
3616 case Intrinsic::aarch64_sve_st4:
3617 case Intrinsic::aarch64_sve_st1_pn_x4:
3618 case Intrinsic::aarch64_sve_stnt1_pn_x4:
3619 case Intrinsic::aarch64_sve_st4q:
3620 N = 4;
3621 break;
3622 default:
3623 llvm_unreachable("unknown intrinsic!");
3624 }
3625
3626 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
3627 Value *BasePtr = Ops[1];
3628
3629 // Does the store have an offset?
3630 if (Ops.size() > (2 + N))
3631 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
3632
3633 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
3634 // need to break up the tuple vector.
3636 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
3637 Operands.push_back(Ops[I]);
3638 Operands.append({Predicate, BasePtr});
3639 Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
3640
3641 return Builder.CreateCall(F, Operands);
3642}
3643
3644// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
3645// svpmullt_pair intrinsics, with the exception that their results are bitcast
3646// to a wider type.
3649 unsigned BuiltinID) {
3650 // Splat scalar operand to vector (intrinsics with _n infix)
3651 if (TypeFlags.hasSplatOperand()) {
3652 unsigned OpNo = TypeFlags.getSplatOperand();
3653 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
3654 }
3655
3656 // The pair-wise function has a narrower overloaded type.
3657 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
3658 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
3659
3660 // Now bitcast to the wider result type.
3661 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
3662 return EmitSVEReinterpret(Call, Ty);
3663}
3664
3666 ArrayRef<Value *> Ops, unsigned BuiltinID) {
3667 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
3668 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
3669 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
3670}
3671
3674 unsigned BuiltinID) {
3675 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3676 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
3677 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3678
3679 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
3680 Value *BasePtr = Ops[1];
3681
3682 // Implement the index operand if not omitted.
3683 if (Ops.size() > 3)
3684 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3685
3686 Value *PrfOp = Ops.back();
3687
3688 llvm::Type *Tys[2] = {Predicate->getType(), BasePtr->getType()};
3689 Function *F = CGM.getIntrinsic(BuiltinID, Tys);
3690 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
3691}
3692
3694 llvm::Type *ReturnTy,
3696 unsigned IntrinsicID,
3697 bool IsZExtReturn) {
3698 QualType LangPTy = E->getArg(1)->getType();
3699 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3700 LangPTy->castAs<PointerType>()->getPointeeType());
3701
3702 // Mfloat8 types is stored as a vector, so extra work
3703 // to extract sclar element type is necessary.
3704 if (MemEltTy->isVectorTy()) {
3705 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3706 "Only <1 x i8> expected");
3707 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3708 }
3709
3710 // The vector type that is returned may be different from the
3711 // eventual type loaded from memory.
3712 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
3713 llvm::ScalableVectorType *MemoryTy = nullptr;
3714 llvm::ScalableVectorType *PredTy = nullptr;
3715 bool IsQuadLoad = false;
3716 switch (IntrinsicID) {
3717 case Intrinsic::aarch64_sve_ld1uwq:
3718 case Intrinsic::aarch64_sve_ld1udq:
3719 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3720 PredTy = llvm::ScalableVectorType::get(
3721 llvm::Type::getInt1Ty(getLLVMContext()), 1);
3722 IsQuadLoad = true;
3723 break;
3724 default:
3725 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3726 PredTy = MemoryTy;
3727 break;
3728 }
3729
3730 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3731 Value *BasePtr = Ops[1];
3732
3733 // Does the load have an offset?
3734 if (Ops.size() > 2)
3735 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
3736
3737 llvm::Type *Tys[2] = {IsQuadLoad ? VectorTy : MemoryTy, BasePtr->getType()};
3738 Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
3739 auto *Load = Builder.CreateCall(F, {Predicate, BasePtr});
3740 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3741 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
3742
3743 if (IsQuadLoad)
3744 return Load;
3745
3746 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
3747 : Builder.CreateSExt(Load, VectorTy);
3748}
3749
3752 unsigned IntrinsicID) {
3753 QualType LangPTy = E->getArg(1)->getType();
3754 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3755 LangPTy->castAs<PointerType>()->getPointeeType());
3756
3757 // Mfloat8 types is stored as a vector, so extra work
3758 // to extract sclar element type is necessary.
3759 if (MemEltTy->isVectorTy()) {
3760 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3761 "Only <1 x i8> expected");
3762 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
3763 }
3764
3765 // The vector type that is stored may be different from the
3766 // eventual type stored to memory.
3767 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
3768 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
3769
3770 auto PredTy = MemoryTy;
3771 auto AddrMemoryTy = MemoryTy;
3772 bool IsQuadStore = false;
3773
3774 switch (IntrinsicID) {
3775 case Intrinsic::aarch64_sve_st1wq:
3776 case Intrinsic::aarch64_sve_st1dq:
3777 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
3778 PredTy =
3779 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
3780 IsQuadStore = true;
3781 break;
3782 default:
3783 break;
3784 }
3785 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
3786 Value *BasePtr = Ops[1];
3787
3788 // Does the store have an offset?
3789 if (Ops.size() == 4)
3790 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
3791
3792 // Last value is always the data
3793 Value *Val =
3794 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
3795
3796 llvm::Type *Tys[2] = {IsQuadStore ? VectorTy : MemoryTy, BasePtr->getType()};
3797 Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
3798 auto *Store = Builder.CreateCall(F, {Val, Predicate, BasePtr});
3799 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
3800 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
3801 return Store;
3802}
3803
3806 unsigned IntID) {
3807 Ops[2] = EmitSVEPredicateCast(
3809
3810 SmallVector<Value *> NewOps;
3811 NewOps.push_back(Ops[2]);
3812
3813 llvm::Value *BasePtr = Ops[3];
3814 llvm::Value *RealSlice = Ops[1];
3815 // If the intrinsic contains the vnum parameter, multiply it with the vector
3816 // size in bytes.
3817 if (Ops.size() == 5) {
3818 Function *StreamingVectorLength =
3819 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
3820 llvm::Value *StreamingVectorLengthCall =
3821 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
3822 llvm::ConstantInt::get(Int64Ty, 8), "svl",
3823 /* HasNUW */ true, /* HasNSW */ true);
3824 llvm::Value *Mulvl =
3825 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
3826 // The type of the ptr parameter is void *, so use Int8Ty here.
3827 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
3828 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
3829 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
3830 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
3831 }
3832 NewOps.push_back(BasePtr);
3833 NewOps.push_back(Ops[0]);
3834 NewOps.push_back(RealSlice);
3835 Function *F = CGM.getIntrinsic(IntID, BasePtr->getType());
3836 return Builder.CreateCall(F, NewOps);
3837}
3838
3841 unsigned IntID) {
3842 auto *VecTy = getSVEType(TypeFlags);
3843 Function *F = CGM.getIntrinsic(IntID, VecTy);
3844 if (TypeFlags.isReadZA())
3845 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
3846 else if (TypeFlags.isWriteZA())
3847 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
3848 return Builder.CreateCall(F, Ops);
3849}
3850
3853 unsigned IntID) {
3854 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
3855 if (Ops.size() == 0)
3856 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
3857 Function *F = CGM.getIntrinsic(IntID, {});
3858 return Builder.CreateCall(F, Ops);
3859}
3860
3863 unsigned IntID) {
3864 if (Ops.size() == 2)
3865 Ops.push_back(Builder.getInt32(0));
3866 else
3867 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
3868 Function *F = CGM.getIntrinsic(IntID, Ops[1]->getType());
3869 return Builder.CreateCall(F, Ops);
3870}
3871
3872// Limit the usage of scalable llvm IR generated by the ACLE by using the
3873// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
3874Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
3875 return Builder.CreateVectorSplat(
3876 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
3877}
3878
3880 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
3881#ifndef NDEBUG
3882 auto *VecTy = cast<llvm::VectorType>(Ty);
3883 ElementCount EC = VecTy->getElementCount();
3884 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
3885 "Only <1 x i8> expected");
3886#endif
3887 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
3888 }
3889 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
3890}
3891
3893 // FIXME: For big endian this needs an additional REV, or needs a separate
3894 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
3895 // instruction is defined as 'bitwise' equivalent from memory point of
3896 // view (when storing/reloading), whereas the svreinterpret builtin
3897 // implements bitwise equivalent cast from register point of view.
3898 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
3899
3900 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
3901 Value *Tuple = llvm::PoisonValue::get(Ty);
3902
3903 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
3904 Value *In = Builder.CreateExtractValue(Val, I);
3905 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
3906 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
3907 }
3908
3909 return Tuple;
3910 }
3911
3912 return Builder.CreateBitCast(Val, Ty);
3913}
3914
3915static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3917 auto *SplatZero = Constant::getNullValue(Ty);
3918 Ops.insert(Ops.begin(), SplatZero);
3919}
3920
3921static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3923 auto *SplatUndef = UndefValue::get(Ty);
3924 Ops.insert(Ops.begin(), SplatUndef);
3925}
3926
3927SmallVector<llvm::Type *, 2>
3929 llvm::Type *ResultType,
3930 ArrayRef<Value *> Ops) {
3931 if (TypeFlags.isOverloadNone())
3932 return {};
3933
3934 llvm::Type *DefaultType = getSVEType(TypeFlags);
3935
3936 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
3937 return {DefaultType, Ops[1]->getType()};
3938
3939 if (TypeFlags.isOverloadWhileRW())
3940 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
3941
3942 if (TypeFlags.isOverloadDefaultAndOp0())
3943 return {DefaultType, Ops[0]->getType()};
3944
3945 if (TypeFlags.isOverloadFirstandLast())
3946 return {Ops[0]->getType(), Ops.back()->getType()};
3947
3948 if (TypeFlags.isReductionQV())
3949 return {ResultType, Ops[1]->getType()};
3950
3951 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
3952 return {DefaultType};
3953}
3954
3956 ArrayRef<Value *> Ops) {
3957 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
3958 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
3959 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
3960
3961 if (TypeFlags.isTupleSet())
3962 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
3963 return Builder.CreateExtractValue(Ops[0], Idx);
3964}
3965
3967 llvm::Type *Ty,
3968 ArrayRef<Value *> Ops) {
3969 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
3970
3971 Value *Tuple = llvm::PoisonValue::get(Ty);
3972 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
3973 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
3974
3975 return Tuple;
3976}
3977
3979 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
3980 SVETypeFlags TypeFlags) {
3981 // Find out if any arguments are required to be integer constant expressions.
3982 unsigned ICEArguments = 0;
3984 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3985 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3986
3987 // Tuple set/get only requires one insert/extract vector, which is
3988 // created by EmitSVETupleSetOrGet.
3989 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
3990
3991 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
3992 bool IsICE = ICEArguments & (1 << i);
3993 Value *Arg = EmitScalarExpr(E->getArg(i));
3994
3995 if (IsICE) {
3996 // If this is required to be a constant, constant fold it so that we know
3997 // that the generated intrinsic gets a ConstantInt.
3998 std::optional<llvm::APSInt> Result =
4000 assert(Result && "Expected argument to be a constant");
4001
4002 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4003 // truncate because the immediate has been range checked and no valid
4004 // immediate requires more than a handful of bits.
4005 *Result = Result->extOrTrunc(32);
4006 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4007 continue;
4008 }
4009
4010 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4011 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4012 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4013
4014 continue;
4015 }
4016
4017 Ops.push_back(Arg);
4018 }
4019}
4020
4022 const CallExpr *E) {
4023 llvm::Type *Ty = ConvertType(E->getType());
4024 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4025 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4026 Value *Val = EmitScalarExpr(E->getArg(0));
4027 return EmitSVEReinterpret(Val, Ty);
4028 }
4029
4032
4034 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4035 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4036
4037 if (TypeFlags.isLoad())
4038 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4039 TypeFlags.isZExtReturn());
4040 if (TypeFlags.isStore())
4041 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4042 if (TypeFlags.isGatherLoad())
4043 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4044 if (TypeFlags.isScatterStore())
4045 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4046 if (TypeFlags.isPrefetch())
4047 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4048 if (TypeFlags.isGatherPrefetch())
4049 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4050 if (TypeFlags.isStructLoad())
4051 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4052 if (TypeFlags.isStructStore())
4053 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4054 if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4055 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4056 if (TypeFlags.isTupleCreate())
4057 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4058 if (TypeFlags.isUndef())
4059 return UndefValue::get(Ty);
4060
4061 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
4062 // -------------------------------------------------------------------
4063 if (Builtin->LLVMIntrinsic != 0) {
4064 // Emit set FPMR for intrinsics that require it
4065 if (TypeFlags.setsFPMR())
4066 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4067 Ops.pop_back_val());
4068 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4070
4071 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4073
4074 // Some ACLE builtins leave out the argument to specify the predicate
4075 // pattern, which is expected to be expanded to an SV_ALL pattern.
4076 if (TypeFlags.isAppendSVALL())
4077 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4078 if (TypeFlags.isInsertOp1SVALL())
4079 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4080
4081 // Predicates must match the main datatype.
4082 for (Value *&Op : Ops)
4083 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4084 if (PredTy->getElementType()->isIntegerTy(1))
4085 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4086
4087 // Splat scalar operand to vector (intrinsics with _n infix)
4088 if (TypeFlags.hasSplatOperand()) {
4089 unsigned OpNo = TypeFlags.getSplatOperand();
4090 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4091 }
4092
4093 if (TypeFlags.isReverseCompare())
4094 std::swap(Ops[1], Ops[2]);
4095 else if (TypeFlags.isReverseUSDOT())
4096 std::swap(Ops[1], Ops[2]);
4097 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4098 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4099 std::swap(Ops[1], Ops[2]);
4100 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4101 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4102 std::swap(Ops[1], Ops[3]);
4103
4104 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4105 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4106 llvm::Type *OpndTy = Ops[1]->getType();
4107 auto *SplatZero = Constant::getNullValue(OpndTy);
4108 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4109 }
4110
4111 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4112 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4113 Value *Call = Builder.CreateCall(F, Ops);
4114
4115 if (Call->getType() == Ty)
4116 return Call;
4117
4118 // Predicate results must be converted to svbool_t.
4119 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4120 return EmitSVEPredicateCast(Call, PredTy);
4121 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4122 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4123
4124 llvm_unreachable("unsupported element count!");
4125 }
4126
4127 switch (BuiltinID) {
4128 default:
4129 return nullptr;
4130
4131 case SVE::BI__builtin_sve_svreinterpret_b: {
4132 auto SVCountTy =
4133 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4134 Function *CastFromSVCountF =
4135 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4136 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4137 }
4138 case SVE::BI__builtin_sve_svreinterpret_c: {
4139 auto SVCountTy =
4140 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4141 Function *CastToSVCountF =
4142 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4143 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4144 }
4145
4146 case SVE::BI__builtin_sve_svpsel_lane_b8:
4147 case SVE::BI__builtin_sve_svpsel_lane_b16:
4148 case SVE::BI__builtin_sve_svpsel_lane_b32:
4149 case SVE::BI__builtin_sve_svpsel_lane_b64:
4150 case SVE::BI__builtin_sve_svpsel_lane_c8:
4151 case SVE::BI__builtin_sve_svpsel_lane_c16:
4152 case SVE::BI__builtin_sve_svpsel_lane_c32:
4153 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4154 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4155 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4156 "aarch64.svcount")) &&
4157 "Unexpected TargetExtType");
4158 auto SVCountTy =
4159 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4160 Function *CastFromSVCountF =
4161 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4162 Function *CastToSVCountF =
4163 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4164
4165 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4166 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4167 llvm::Value *Ops0 =
4168 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4169 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4170 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4171 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4172 }
4173 case SVE::BI__builtin_sve_svmov_b_z: {
4174 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4175 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4176 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4177 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4178 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4179 }
4180
4181 case SVE::BI__builtin_sve_svnot_b_z: {
4182 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4183 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4184 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4185 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4186 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4187 }
4188
4189 case SVE::BI__builtin_sve_svmovlb_u16:
4190 case SVE::BI__builtin_sve_svmovlb_u32:
4191 case SVE::BI__builtin_sve_svmovlb_u64:
4192 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4193
4194 case SVE::BI__builtin_sve_svmovlb_s16:
4195 case SVE::BI__builtin_sve_svmovlb_s32:
4196 case SVE::BI__builtin_sve_svmovlb_s64:
4197 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4198
4199 case SVE::BI__builtin_sve_svmovlt_u16:
4200 case SVE::BI__builtin_sve_svmovlt_u32:
4201 case SVE::BI__builtin_sve_svmovlt_u64:
4202 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4203
4204 case SVE::BI__builtin_sve_svmovlt_s16:
4205 case SVE::BI__builtin_sve_svmovlt_s32:
4206 case SVE::BI__builtin_sve_svmovlt_s64:
4207 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4208
4209 case SVE::BI__builtin_sve_svpmullt_u16:
4210 case SVE::BI__builtin_sve_svpmullt_u64:
4211 case SVE::BI__builtin_sve_svpmullt_n_u16:
4212 case SVE::BI__builtin_sve_svpmullt_n_u64:
4213 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4214
4215 case SVE::BI__builtin_sve_svpmullb_u16:
4216 case SVE::BI__builtin_sve_svpmullb_u64:
4217 case SVE::BI__builtin_sve_svpmullb_n_u16:
4218 case SVE::BI__builtin_sve_svpmullb_n_u64:
4219 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4220
4221 case SVE::BI__builtin_sve_svdup_n_b8:
4222 case SVE::BI__builtin_sve_svdup_n_b16:
4223 case SVE::BI__builtin_sve_svdup_n_b32:
4224 case SVE::BI__builtin_sve_svdup_n_b64: {
4225 Value *CmpNE =
4226 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4227 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4228 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4230 }
4231
4232 case SVE::BI__builtin_sve_svdupq_n_b8:
4233 case SVE::BI__builtin_sve_svdupq_n_b16:
4234 case SVE::BI__builtin_sve_svdupq_n_b32:
4235 case SVE::BI__builtin_sve_svdupq_n_b64:
4236 case SVE::BI__builtin_sve_svdupq_n_u8:
4237 case SVE::BI__builtin_sve_svdupq_n_s8:
4238 case SVE::BI__builtin_sve_svdupq_n_u64:
4239 case SVE::BI__builtin_sve_svdupq_n_f64:
4240 case SVE::BI__builtin_sve_svdupq_n_s64:
4241 case SVE::BI__builtin_sve_svdupq_n_u16:
4242 case SVE::BI__builtin_sve_svdupq_n_f16:
4243 case SVE::BI__builtin_sve_svdupq_n_bf16:
4244 case SVE::BI__builtin_sve_svdupq_n_s16:
4245 case SVE::BI__builtin_sve_svdupq_n_u32:
4246 case SVE::BI__builtin_sve_svdupq_n_f32:
4247 case SVE::BI__builtin_sve_svdupq_n_s32: {
4248 // These builtins are implemented by storing each element to an array and using
4249 // ld1rq to materialize a vector.
4250 unsigned NumOpnds = Ops.size();
4251
4252 bool IsBoolTy =
4253 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4254
4255 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4256 // so that the compare can use the width that is natural for the expected
4257 // number of predicate lanes.
4258 llvm::Type *EltTy = Ops[0]->getType();
4259 if (IsBoolTy)
4260 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4261
4263 for (unsigned I = 0; I < NumOpnds; ++I)
4264 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4265 Value *Vec = BuildVector(VecOps);
4266
4267 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4268 Value *InsertSubVec = Builder.CreateInsertVector(
4269 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4270
4271 Function *F =
4272 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4273 Value *DupQLane =
4274 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4275
4276 if (!IsBoolTy)
4277 return DupQLane;
4278
4279 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4280 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4281
4282 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4283 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4284 : Intrinsic::aarch64_sve_cmpne_wide,
4285 OverloadedTy);
4286 Value *Call = Builder.CreateCall(
4287 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4289 }
4290
4291 case SVE::BI__builtin_sve_svpfalse_b:
4292 return ConstantInt::getFalse(Ty);
4293
4294 case SVE::BI__builtin_sve_svpfalse_c: {
4295 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4296 Function *CastToSVCountF =
4297 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4298 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4299 }
4300
4301 case SVE::BI__builtin_sve_svlen_bf16:
4302 case SVE::BI__builtin_sve_svlen_f16:
4303 case SVE::BI__builtin_sve_svlen_f32:
4304 case SVE::BI__builtin_sve_svlen_f64:
4305 case SVE::BI__builtin_sve_svlen_s8:
4306 case SVE::BI__builtin_sve_svlen_s16:
4307 case SVE::BI__builtin_sve_svlen_s32:
4308 case SVE::BI__builtin_sve_svlen_s64:
4309 case SVE::BI__builtin_sve_svlen_u8:
4310 case SVE::BI__builtin_sve_svlen_u16:
4311 case SVE::BI__builtin_sve_svlen_u32:
4312 case SVE::BI__builtin_sve_svlen_u64: {
4313 SVETypeFlags TF(Builtin->TypeModifier);
4314 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4315 }
4316
4317 case SVE::BI__builtin_sve_svtbl2_u8:
4318 case SVE::BI__builtin_sve_svtbl2_s8:
4319 case SVE::BI__builtin_sve_svtbl2_u16:
4320 case SVE::BI__builtin_sve_svtbl2_s16:
4321 case SVE::BI__builtin_sve_svtbl2_u32:
4322 case SVE::BI__builtin_sve_svtbl2_s32:
4323 case SVE::BI__builtin_sve_svtbl2_u64:
4324 case SVE::BI__builtin_sve_svtbl2_s64:
4325 case SVE::BI__builtin_sve_svtbl2_f16:
4326 case SVE::BI__builtin_sve_svtbl2_bf16:
4327 case SVE::BI__builtin_sve_svtbl2_f32:
4328 case SVE::BI__builtin_sve_svtbl2_f64: {
4329 SVETypeFlags TF(Builtin->TypeModifier);
4330 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4331 return Builder.CreateCall(F, Ops);
4332 }
4333
4334 case SVE::BI__builtin_sve_svset_neonq_s8:
4335 case SVE::BI__builtin_sve_svset_neonq_s16:
4336 case SVE::BI__builtin_sve_svset_neonq_s32:
4337 case SVE::BI__builtin_sve_svset_neonq_s64:
4338 case SVE::BI__builtin_sve_svset_neonq_u8:
4339 case SVE::BI__builtin_sve_svset_neonq_u16:
4340 case SVE::BI__builtin_sve_svset_neonq_u32:
4341 case SVE::BI__builtin_sve_svset_neonq_u64:
4342 case SVE::BI__builtin_sve_svset_neonq_f16:
4343 case SVE::BI__builtin_sve_svset_neonq_f32:
4344 case SVE::BI__builtin_sve_svset_neonq_f64:
4345 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4346 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4347 }
4348
4349 case SVE::BI__builtin_sve_svget_neonq_s8:
4350 case SVE::BI__builtin_sve_svget_neonq_s16:
4351 case SVE::BI__builtin_sve_svget_neonq_s32:
4352 case SVE::BI__builtin_sve_svget_neonq_s64:
4353 case SVE::BI__builtin_sve_svget_neonq_u8:
4354 case SVE::BI__builtin_sve_svget_neonq_u16:
4355 case SVE::BI__builtin_sve_svget_neonq_u32:
4356 case SVE::BI__builtin_sve_svget_neonq_u64:
4357 case SVE::BI__builtin_sve_svget_neonq_f16:
4358 case SVE::BI__builtin_sve_svget_neonq_f32:
4359 case SVE::BI__builtin_sve_svget_neonq_f64:
4360 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4361 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4362 }
4363
4364 case SVE::BI__builtin_sve_svdup_neonq_s8:
4365 case SVE::BI__builtin_sve_svdup_neonq_s16:
4366 case SVE::BI__builtin_sve_svdup_neonq_s32:
4367 case SVE::BI__builtin_sve_svdup_neonq_s64:
4368 case SVE::BI__builtin_sve_svdup_neonq_u8:
4369 case SVE::BI__builtin_sve_svdup_neonq_u16:
4370 case SVE::BI__builtin_sve_svdup_neonq_u32:
4371 case SVE::BI__builtin_sve_svdup_neonq_u64:
4372 case SVE::BI__builtin_sve_svdup_neonq_f16:
4373 case SVE::BI__builtin_sve_svdup_neonq_f32:
4374 case SVE::BI__builtin_sve_svdup_neonq_f64:
4375 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4376 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4377 uint64_t(0));
4378 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4379 {Insert, Builder.getInt64(0)});
4380 }
4381 }
4382
4383 /// Should not happen
4384 return nullptr;
4385}
4386
4387static void swapCommutativeSMEOperands(unsigned BuiltinID,
4389 unsigned MultiVec;
4390 switch (BuiltinID) {
4391 default:
4392 return;
4393 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4394 MultiVec = 1;
4395 break;
4396 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4397 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4398 MultiVec = 2;
4399 break;
4400 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4401 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4402 MultiVec = 4;
4403 break;
4404 }
4405
4406 if (MultiVec > 0)
4407 for (unsigned I = 0; I < MultiVec; ++I)
4408 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4409}
4410
4412 const CallExpr *E) {
4415
4417 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4418 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4419
4420 if (TypeFlags.isLoad() || TypeFlags.isStore())
4421 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4422 if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4423 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4424 if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4425 BuiltinID == SME::BI__builtin_sme_svzero_za)
4426 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4427 if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4428 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4429 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4430 BuiltinID == SME::BI__builtin_sme_svstr_za)
4431 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4432
4433 // Emit set FPMR for intrinsics that require it
4434 if (TypeFlags.setsFPMR())
4435 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4436 Ops.pop_back_val());
4437 // Handle builtins which require their multi-vector operands to be swapped
4438 swapCommutativeSMEOperands(BuiltinID, Ops);
4439
4440 auto isCntsBuiltin = [&]() {
4441 switch (BuiltinID) {
4442 default:
4443 return 0;
4444 case SME::BI__builtin_sme_svcntsb:
4445 return 8;
4446 case SME::BI__builtin_sme_svcntsh:
4447 return 4;
4448 case SME::BI__builtin_sme_svcntsw:
4449 return 2;
4450 }
4451 };
4452
4453 if (auto Mul = isCntsBuiltin()) {
4454 llvm::Value *Cntd =
4455 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
4456 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
4457 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4458 }
4459
4460 // Should not happen!
4461 if (Builtin->LLVMIntrinsic == 0)
4462 return nullptr;
4463
4464 // Predicates must match the main datatype.
4465 for (Value *&Op : Ops)
4466 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4467 if (PredTy->getElementType()->isIntegerTy(1))
4468 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4469
4470 if (BuiltinID == SME::BI__builtin_sme_svldr_zt ||
4471 BuiltinID == SME::BI__builtin_sme_svstr_zt) {
4472 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, Ops[1]->getType());
4473 return Builder.CreateCall(F, Ops);
4474 }
4475
4476 Function *F =
4477 TypeFlags.isOverloadNone()
4478 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
4479 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
4480
4481 return Builder.CreateCall(F, Ops);
4482}
4483
4484/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4485/// return it as an i8 pointer.
4487 LLVMContext &Context = CGF.CGM.getLLVMContext();
4488 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
4489 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4490 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4491 llvm::Function *F =
4492 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
4493 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
4494 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
4495}
4496
4498 const CallExpr *E,
4499 llvm::Triple::ArchType Arch) {
4500 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4501 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4502 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4503
4504 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4505 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4506 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4507
4508 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4509 return EmitAArch64CpuSupports(E);
4510
4511 unsigned HintID = static_cast<unsigned>(-1);
4512 switch (BuiltinID) {
4513 default: break;
4514 case clang::AArch64::BI__builtin_arm_nop:
4515 HintID = 0;
4516 break;
4517 case clang::AArch64::BI__builtin_arm_yield:
4518 case clang::AArch64::BI__yield:
4519 HintID = 1;
4520 break;
4521 case clang::AArch64::BI__builtin_arm_wfe:
4522 case clang::AArch64::BI__wfe:
4523 HintID = 2;
4524 break;
4525 case clang::AArch64::BI__builtin_arm_wfi:
4526 case clang::AArch64::BI__wfi:
4527 HintID = 3;
4528 break;
4529 case clang::AArch64::BI__builtin_arm_sev:
4530 case clang::AArch64::BI__sev:
4531 HintID = 4;
4532 break;
4533 case clang::AArch64::BI__builtin_arm_sevl:
4534 case clang::AArch64::BI__sevl:
4535 HintID = 5;
4536 break;
4537 }
4538
4539 if (HintID != static_cast<unsigned>(-1)) {
4540 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4541 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4542 }
4543
4544 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
4545 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4546 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4547 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
4548 }
4549
4550 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
4551 // Create call to __arm_sme_state and store the results to the two pointers.
4552 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
4553 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
4554 false),
4555 "__arm_sme_state"));
4556 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
4557 "aarch64_pstate_sm_compatible");
4558 CI->setAttributes(Attrs);
4559 CI->setCallingConv(
4560 llvm::CallingConv::
4561 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
4562 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
4564 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
4566 }
4567
4568 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
4569 assert((getContext().getTypeSize(E->getType()) == 32) &&
4570 "rbit of unusual size!");
4571 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4572 return Builder.CreateCall(
4573 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4574 }
4575 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
4576 assert((getContext().getTypeSize(E->getType()) == 64) &&
4577 "rbit of unusual size!");
4578 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4579 return Builder.CreateCall(
4580 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4581 }
4582
4583 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
4584 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
4585 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4586 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
4587 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
4588 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
4589 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
4590 return Res;
4591 }
4592
4593 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
4594 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4595 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
4596 "cls");
4597 }
4598 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
4599 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4600 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
4601 "cls");
4602 }
4603
4604 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
4605 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
4606 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4607 llvm::Type *Ty = Arg->getType();
4608 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
4609 Arg, "frint32z");
4610 }
4611
4612 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
4613 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
4614 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4615 llvm::Type *Ty = Arg->getType();
4616 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
4617 Arg, "frint64z");
4618 }
4619
4620 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
4621 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
4622 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4623 llvm::Type *Ty = Arg->getType();
4624 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
4625 Arg, "frint32x");
4626 }
4627
4628 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
4629 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
4630 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4631 llvm::Type *Ty = Arg->getType();
4632 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
4633 Arg, "frint64x");
4634 }
4635
4636 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
4637 assert((getContext().getTypeSize(E->getType()) == 32) &&
4638 "__jcvt of unusual size!");
4639 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4640 return Builder.CreateCall(
4641 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
4642 }
4643
4644 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
4645 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
4646 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
4647 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
4648 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
4649 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
4650
4651 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
4652 // Load from the address via an LLVM intrinsic, receiving a
4653 // tuple of 8 i64 words, and store each one to ValPtr.
4654 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
4655 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
4656 llvm::Value *ToRet;
4657 for (size_t i = 0; i < 8; i++) {
4658 llvm::Value *ValOffsetPtr =
4659 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4660 Address Addr =
4661 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4662 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
4663 }
4664 return ToRet;
4665 }
4666
4667 // Load 8 i64 words from ValPtr, and store them to the address
4668 // via an LLVM intrinsic.
4670 Args.push_back(MemAddr);
4671 for (size_t i = 0; i < 8; i++) {
4672 llvm::Value *ValOffsetPtr =
4673 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
4674 Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
4675 Args.push_back(Builder.CreateLoad(Addr));
4676 }
4677
4678 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
4679 ? Intrinsic::aarch64_st64b
4680 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
4681 ? Intrinsic::aarch64_st64bv
4682 : Intrinsic::aarch64_st64bv0);
4683 Function *F = CGM.getIntrinsic(Intr);
4684 return Builder.CreateCall(F, Args);
4685 }
4686
4687 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
4688 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
4689
4690 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
4691 ? Intrinsic::aarch64_rndr
4692 : Intrinsic::aarch64_rndrrs);
4693 Function *F = CGM.getIntrinsic(Intr);
4694 llvm::Value *Val = Builder.CreateCall(F);
4695 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
4696 Value *Status = Builder.CreateExtractValue(Val, 1);
4697
4698 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
4699 Builder.CreateStore(RandomValue, MemAddress);
4700 Status = Builder.CreateZExt(Status, Int32Ty);
4701 return Status;
4702 }
4703
4704 if (BuiltinID == clang::AArch64::BI__clear_cache) {
4705 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4706 const FunctionDecl *FD = E->getDirectCallee();
4707 Value *Ops[2];
4708 for (unsigned i = 0; i < 2; i++)
4709 Ops[i] = EmitScalarExpr(E->getArg(i));
4710 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4711 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4712 StringRef Name = FD->getName();
4713 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4714 }
4715
4716 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4717 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
4718 getContext().getTypeSize(E->getType()) == 128) {
4719 Function *F =
4720 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4721 ? Intrinsic::aarch64_ldaxp
4722 : Intrinsic::aarch64_ldxp);
4723
4724 Value *LdPtr = EmitScalarExpr(E->getArg(0));
4725 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
4726
4727 Value *Val0 = Builder.CreateExtractValue(Val, 1);
4728 Value *Val1 = Builder.CreateExtractValue(Val, 0);
4729 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4730 Val0 = Builder.CreateZExt(Val0, Int128Ty);
4731 Val1 = Builder.CreateZExt(Val1, Int128Ty);
4732
4733 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4734 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4735 Val = Builder.CreateOr(Val, Val1);
4736 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4737 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4738 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
4739 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4740
4741 QualType Ty = E->getType();
4742 llvm::Type *RealResTy = ConvertType(Ty);
4743 llvm::Type *IntTy =
4744 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4745
4746 Function *F =
4747 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4748 ? Intrinsic::aarch64_ldaxr
4749 : Intrinsic::aarch64_ldxr,
4750 DefaultPtrTy);
4751 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4752 Val->addParamAttr(
4753 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
4754
4755 if (RealResTy->isPointerTy())
4756 return Builder.CreateIntToPtr(Val, RealResTy);
4757
4758 llvm::Type *IntResTy = llvm::IntegerType::get(
4759 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4760 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
4761 RealResTy);
4762 }
4763
4764 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4765 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
4766 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4767 Function *F =
4768 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4769 ? Intrinsic::aarch64_stlxp
4770 : Intrinsic::aarch64_stxp);
4771 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
4772
4774 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4775
4776 Tmp = Tmp.withElementType(STy);
4777 llvm::Value *Val = Builder.CreateLoad(Tmp);
4778
4779 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4780 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4781 Value *StPtr = EmitScalarExpr(E->getArg(1));
4782 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4783 }
4784
4785 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4786 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
4787 Value *StoreVal = EmitScalarExpr(E->getArg(0));
4788 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4789
4790 QualType Ty = E->getArg(0)->getType();
4791 llvm::Type *StoreTy =
4792 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
4793
4794 if (StoreVal->getType()->isPointerTy())
4795 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4796 else {
4797 llvm::Type *IntTy = llvm::IntegerType::get(
4799 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4800 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4801 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4802 }
4803
4804 Function *F =
4805 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4806 ? Intrinsic::aarch64_stlxr
4807 : Intrinsic::aarch64_stxr,
4808 StoreAddr->getType());
4809 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4810 CI->addParamAttr(
4811 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
4812 return CI;
4813 }
4814
4815 if (BuiltinID == clang::AArch64::BI__getReg ||
4816 BuiltinID == clang::AArch64::BI__setReg) {
4818 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4819 llvm_unreachable("Sema will ensure that the parameter is constant");
4820
4821 llvm::APSInt Value = Result.Val.getInt();
4822 LLVMContext &Context = CGM.getLLVMContext();
4823 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
4824
4825 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4826 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4827 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4828
4829 CallInst *CI;
4830 if (BuiltinID == clang::AArch64::BI__getReg) {
4831 llvm::Function *F =
4832 CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty});
4833 CI = Builder.CreateCall(F, Metadata);
4834 } else {
4835 llvm::Function *F =
4836 CGM.getIntrinsic(Intrinsic::write_volatile_register, {Int64Ty});
4837 CI = Builder.CreateCall(F, {Metadata, EmitScalarExpr(E->getArg(1))});
4838 }
4839 return CI;
4840 }
4841
4842 if (BuiltinID == clang::AArch64::BI__getRegFp ||
4843 BuiltinID == clang::AArch64::BI__setRegFp) {
4845 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4846 llvm_unreachable("Sema will ensure that the parameter is constant");
4847
4848 llvm::APSInt Value = Result.Val.getInt();
4849 LLVMContext &Context = CGM.getLLVMContext();
4850 std::string Reg = "d" + toString(Value, 10);
4851
4852 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4853 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4854 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4855
4856 llvm::Value *Ret;
4857 if (BuiltinID == clang::AArch64::BI__getRegFp) {
4858 llvm::Function *F =
4859 CGM.getIntrinsic(Intrinsic::read_volatile_register, {Int64Ty});
4860 llvm::Value *Bits = Builder.CreateCall(F, Metadata);
4861 Ret = Builder.CreateBitCast(Bits, llvm::Type::getDoubleTy(Context));
4862 } else {
4863 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
4864 llvm::Value *Bits = Builder.CreateBitCast(Val, Int64Ty);
4865 llvm::Function *F =
4866 CGM.getIntrinsic(Intrinsic::write_volatile_register, {Int64Ty});
4867 Ret = Builder.CreateCall(F, {Metadata, Bits});
4868 }
4869 return Ret;
4870 }
4871
4872 if (BuiltinID == clang::AArch64::BI__break) {
4874 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
4875 llvm_unreachable("Sema will ensure that the parameter is constant");
4876
4877 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
4878 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
4879 }
4880
4881 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
4882 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4883 return Builder.CreateCall(F);
4884 }
4885
4886 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
4887 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
4888 llvm::SyncScope::SingleThread);
4889
4890 // CRC32
4891 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4892 switch (BuiltinID) {
4893 case clang::AArch64::BI__builtin_arm_crc32b:
4894 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4895 case clang::AArch64::BI__builtin_arm_crc32cb:
4896 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4897 case clang::AArch64::BI__builtin_arm_crc32h:
4898 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4899 case clang::AArch64::BI__builtin_arm_crc32ch:
4900 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4901 case clang::AArch64::BI__builtin_arm_crc32w:
4902 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4903 case clang::AArch64::BI__builtin_arm_crc32cw:
4904 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4905 case clang::AArch64::BI__builtin_arm_crc32d:
4906 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4907 case clang::AArch64::BI__builtin_arm_crc32cd:
4908 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4909 }
4910
4911 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4912 Value *Arg0 = EmitScalarExpr(E->getArg(0));
4913 Value *Arg1 = EmitScalarExpr(E->getArg(1));
4914 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4915
4916 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4917 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4918
4919 return Builder.CreateCall(F, {Arg0, Arg1});
4920 }
4921
4922 // Memory Operations (MOPS)
4923 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
4924 Value *Dst = EmitScalarExpr(E->getArg(0));
4925 Value *Val = EmitScalarExpr(E->getArg(1));
4926 Value *Size = EmitScalarExpr(E->getArg(2));
4927 Val = Builder.CreateTrunc(Val, Int8Ty);
4928 Size = Builder.CreateIntCast(Size, Int64Ty, false);
4929 return Builder.CreateCall(
4930 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
4931 }
4932
4933 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
4934 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
4935 return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
4936
4937 // Memory Tagging Extensions (MTE) Intrinsics
4938 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
4939 switch (BuiltinID) {
4940 case clang::AArch64::BI__builtin_arm_irg:
4941 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
4942 case clang::AArch64::BI__builtin_arm_addg:
4943 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
4944 case clang::AArch64::BI__builtin_arm_gmi:
4945 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
4946 case clang::AArch64::BI__builtin_arm_ldg:
4947 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
4948 case clang::AArch64::BI__builtin_arm_stg:
4949 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
4950 case clang::AArch64::BI__builtin_arm_subp:
4951 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
4952 }
4953
4954 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
4955 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
4957 Value *Mask = EmitScalarExpr(E->getArg(1));
4958 assert(Mask->getType()->getScalarSizeInBits() == 64 &&
4959 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4960 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4961 {Pointer, Mask});
4962 }
4963 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
4965 Value *TagOffset = EmitScalarExpr(E->getArg(1));
4966
4967 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
4968 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4969 {Pointer, TagOffset});
4970 }
4971 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
4973 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
4974 assert(ExcludedMask->getType()->getScalarSizeInBits() == 64 &&
4975 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4976 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4977 {Pointer, ExcludedMask});
4978 }
4979 // Although it is possible to supply a different return
4980 // address (first arg) to this intrinsic, for now we set
4981 // return address same as input address.
4982 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
4983 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4984 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4985 {TagAddress, TagAddress});
4986 }
4987 // Although it is possible to supply a different tag (to set)
4988 // to this intrinsic (as first arg), for now we supply
4989 // the tag that is in input address arg (common use case).
4990 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
4991 Value *TagAddress = EmitScalarExpr(E->getArg(0));
4992 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
4993 {TagAddress, TagAddress});
4994 }
4995 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
4996 Value *PointerA = EmitScalarExpr(E->getArg(0));
4997 Value *PointerB = EmitScalarExpr(E->getArg(1));
4998 return Builder.CreateCall(
4999 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5000 }
5001 }
5002
5003 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5004 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5005 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5006 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5007 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5008 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5009 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5010 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5011
5012 SpecialRegisterAccessKind AccessKind = Write;
5013 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5014 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5015 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5016 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5017 AccessKind = VolatileRead;
5018
5019 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5020 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5021
5022 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5023 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5024
5025 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5026 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5027
5028 llvm::Type *ValueType;
5029 llvm::Type *RegisterType = Int64Ty;
5030 if (Is32Bit) {
5031 ValueType = Int32Ty;
5032 } else if (Is128Bit) {
5033 llvm::Type *Int128Ty =
5034 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5035 ValueType = Int128Ty;
5036 RegisterType = Int128Ty;
5037 } else if (IsPointerBuiltin) {
5038 ValueType = VoidPtrTy;
5039 } else {
5040 ValueType = Int64Ty;
5041 };
5042
5043 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5044 AccessKind);
5045 }
5046
5047 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5048 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
5049 LLVMContext &Context = CGM.getLLVMContext();
5050
5051 unsigned SysReg =
5052 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5053
5054 std::string SysRegStr;
5055 llvm::raw_string_ostream(SysRegStr)
5056 << (0b10 | SysReg >> 14) << ":" << ((SysReg >> 11) & 7) << ":"
5057 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5058 << (SysReg & 7);
5059
5060 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5061 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5062 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5063
5064 llvm::Type *RegisterType = Int64Ty;
5065 llvm::Type *Types[] = { RegisterType };
5066
5067 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5068 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5069
5070 return Builder.CreateCall(F, Metadata);
5071 }
5072
5073 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5074 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5075 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5076
5077 return Result;
5078 }
5079
5080 if (BuiltinID == clang::AArch64::BI__sys) {
5081 unsigned SysReg =
5082 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5083 const unsigned Op1 = SysReg >> 11;
5084 const unsigned CRn = (SysReg >> 7) & 0xf;
5085 const unsigned CRm = (SysReg >> 3) & 0xf;
5086 const unsigned Op2 = SysReg & 0x7;
5087
5088 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sys),
5089 {Builder.getInt32(Op1), Builder.getInt32(CRn),
5090 Builder.getInt32(CRm), Builder.getInt32(Op2),
5091 EmitScalarExpr(E->getArg(1))});
5092
5093 // Return 0 for convenience, even though MSVC returns some other undefined
5094 // value.
5095 return ConstantInt::get(Builder.getInt32Ty(), 0);
5096 }
5097
5098 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5099 llvm::Function *F =
5100 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5101 return Builder.CreateCall(F);
5102 }
5103
5104 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5105 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5106 return Builder.CreateCall(F);
5107 }
5108
5109 if (BuiltinID == clang::AArch64::BI__mulh ||
5110 BuiltinID == clang::AArch64::BI__umulh) {
5111 llvm::Type *ResType = ConvertType(E->getType());
5112 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5113
5114 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5115 Value *LHS =
5116 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5117 Value *RHS =
5118 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5119
5120 Value *MulResult, *HigherBits;
5121 if (IsSigned) {
5122 MulResult = Builder.CreateNSWMul(LHS, RHS);
5123 HigherBits = Builder.CreateAShr(MulResult, 64);
5124 } else {
5125 MulResult = Builder.CreateNUWMul(LHS, RHS);
5126 HigherBits = Builder.CreateLShr(MulResult, 64);
5127 }
5128 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5129
5130 return HigherBits;
5131 }
5132
5133 if (BuiltinID == AArch64::BI__writex18byte ||
5134 BuiltinID == AArch64::BI__writex18word ||
5135 BuiltinID == AArch64::BI__writex18dword ||
5136 BuiltinID == AArch64::BI__writex18qword) {
5137 // Process the args first
5138 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5139 Value *DataArg = EmitScalarExpr(E->getArg(1));
5140
5141 // Read x18 as i8*
5142 llvm::Value *X18 = readX18AsPtr(*this);
5143
5144 // Store val at x18 + offset
5145 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5146 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5147 StoreInst *Store =
5148 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5149 return Store;
5150 }
5151
5152 if (BuiltinID == AArch64::BI__readx18byte ||
5153 BuiltinID == AArch64::BI__readx18word ||
5154 BuiltinID == AArch64::BI__readx18dword ||
5155 BuiltinID == AArch64::BI__readx18qword) {
5156 // Process the args first
5157 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5158
5159 // Read x18 as i8*
5160 llvm::Value *X18 = readX18AsPtr(*this);
5161
5162 // Load x18 + offset
5163 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5164 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5165 llvm::Type *IntTy = ConvertType(E->getType());
5166 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5167 return Load;
5168 }
5169
5170 if (BuiltinID == AArch64::BI__addx18byte ||
5171 BuiltinID == AArch64::BI__addx18word ||
5172 BuiltinID == AArch64::BI__addx18dword ||
5173 BuiltinID == AArch64::BI__addx18qword ||
5174 BuiltinID == AArch64::BI__incx18byte ||
5175 BuiltinID == AArch64::BI__incx18word ||
5176 BuiltinID == AArch64::BI__incx18dword ||
5177 BuiltinID == AArch64::BI__incx18qword) {
5178 llvm::Type *IntTy;
5179 bool isIncrement;
5180 switch (BuiltinID) {
5181 case AArch64::BI__incx18byte:
5182 IntTy = Int8Ty;
5183 isIncrement = true;
5184 break;
5185 case AArch64::BI__incx18word:
5186 IntTy = Int16Ty;
5187 isIncrement = true;
5188 break;
5189 case AArch64::BI__incx18dword:
5190 IntTy = Int32Ty;
5191 isIncrement = true;
5192 break;
5193 case AArch64::BI__incx18qword:
5194 IntTy = Int64Ty;
5195 isIncrement = true;
5196 break;
5197 default:
5198 IntTy = ConvertType(E->getArg(1)->getType());
5199 isIncrement = false;
5200 break;
5201 }
5202 // Process the args first
5203 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5204 Value *ValToAdd =
5205 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5206
5207 // Read x18 as i8*
5208 llvm::Value *X18 = readX18AsPtr(*this);
5209
5210 // Load x18 + offset
5211 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5212 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5213 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5214
5215 // Add values
5216 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5217
5218 // Store val at x18 + offset
5219 StoreInst *Store =
5220 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5221 return Store;
5222 }
5223
5224 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5225 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5226 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5227 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5228 Value *Arg = EmitScalarExpr(E->getArg(0));
5229 llvm::Type *RetTy = ConvertType(E->getType());
5230 return Builder.CreateBitCast(Arg, RetTy);
5231 }
5232
5233 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5234 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5235 BuiltinID == AArch64::BI_CountLeadingZeros ||
5236 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5237 Value *Arg = EmitScalarExpr(E->getArg(0));
5238 llvm::Type *ArgType = Arg->getType();
5239
5240 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5241 BuiltinID == AArch64::BI_CountLeadingOnes64)
5242 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5243
5244 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5245 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5246
5247 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5248 BuiltinID == AArch64::BI_CountLeadingZeros64)
5249 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5250 return Result;
5251 }
5252
5253 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5254 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5255 Value *Arg = EmitScalarExpr(E->getArg(0));
5256
5257 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5258 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5259 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5260
5261 Value *Result = Builder.CreateCall(F, Arg, "cls");
5262 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5263 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5264 return Result;
5265 }
5266
5267 if (BuiltinID == AArch64::BI_CountOneBits ||
5268 BuiltinID == AArch64::BI_CountOneBits64) {
5269 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5270 llvm::Type *ArgType = ArgValue->getType();
5271 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5272
5273 Value *Result = Builder.CreateCall(F, ArgValue);
5274 if (BuiltinID == AArch64::BI_CountOneBits64)
5275 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5276 return Result;
5277 }
5278
5279 if (BuiltinID == AArch64::BI_CountTrailingZeros ||
5280 BuiltinID == AArch64::BI_CountTrailingZeros64) {
5281 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5282 llvm::Type *ArgType = ArgValue->getType();
5283 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
5284
5285 // MSVC leaves 0 undefined; use false for predictable codegen
5286 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getInt1(false)});
5287 if (BuiltinID == AArch64::BI_CountTrailingZeros64)
5288 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5289 return Result;
5290 }
5291
5292 if (BuiltinID == AArch64::BI__prefetch) {
5294 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5295 Value *Locality = ConstantInt::get(Int32Ty, 3);
5296 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5297 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5298 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5299 }
5300
5301 if (BuiltinID == AArch64::BI__prefetch2) {
5303 llvm::APSInt PrfOp = E->getArg(1)->EvaluateKnownConstInt(CGM.getContext());
5304 // Decode 5-bit PRFM encoding: bits[4:3]=type, bits[2:1]=target,
5305 // bit[0]=policy
5306 // type: PLD=0(load), PLI=1(instr), PST=2(store)
5307 // target: L1=0, L2=1, L3=2
5308 // policy: KEEP=0, STRM=1
5309 uint64_t Op = PrfOp.getZExtValue();
5310 uint64_t Type = (Op >> 3) & 0x3;
5311 uint64_t Target = (Op >> 1) & 0x3;
5312 uint64_t Policy = Op & 0x1;
5313 Value *RW = Builder.getInt32(Type == 2 ? 1 : 0);
5314 Value *Local = Builder.getInt32(Target);
5315 Value *IsStream = Builder.getInt32(Policy);
5316 Value *IsData = Builder.getInt32(Type == 1 ? 0 : 1);
5317 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_prefetch);
5318 return Builder.CreateCall(F, {Address, RW, Local, IsStream, IsData});
5319 }
5320
5321 if (BuiltinID == AArch64::BI__hlt) {
5322 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5323 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5324
5325 // Return 0 for convenience, even though MSVC returns some other undefined
5326 // value.
5327 return ConstantInt::get(Builder.getInt32Ty(), 0);
5328 }
5329
5330 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5331 return Builder.CreateFPTrunc(
5332 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5333 Builder.getFloatTy()),
5334 Builder.getBFloatTy());
5335
5336 // Handle MSVC intrinsics before argument evaluation to prevent double
5337 // evaluation.
5338 if (std::optional<MSVCIntrin> MsvcIntId =
5340 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5341
5342 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5343 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5344 return P.first == BuiltinID;
5345 });
5346 if (It != end(NEONEquivalentIntrinsicMap))
5347 BuiltinID = It->second;
5348
5349 // Check whether this is an SISD builtin.
5350 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5352 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5353 bool IsSISD = (Builtin != nullptr);
5354
5355 // Find out if any arguments are required to be integer constant
5356 // expressions.
5357 unsigned ICEArguments = 0;
5359 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5360 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5361
5363 Address PtrOp0 = Address::invalid();
5364 // Note the assumption that SISD intrinsics do not contain extra arguments.
5365 // TODO: Fold this into a single function call instead of, effectively, two
5366 // separate checks.
5367 bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
5368 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5369 for (unsigned i = 0, e = NumArgs; i != e; i++) {
5370 if (i == 0) {
5371 switch (BuiltinID) {
5372 case NEON::BI__builtin_neon_vld1_v:
5373 case NEON::BI__builtin_neon_vld1q_v:
5374 case NEON::BI__builtin_neon_vld1_dup_v:
5375 case NEON::BI__builtin_neon_vld1q_dup_v:
5376 case NEON::BI__builtin_neon_vld1_lane_v:
5377 case NEON::BI__builtin_neon_vld1q_lane_v:
5378 case NEON::BI__builtin_neon_vst1_v:
5379 case NEON::BI__builtin_neon_vst1q_v:
5380 case NEON::BI__builtin_neon_vst1_lane_v:
5381 case NEON::BI__builtin_neon_vst1q_lane_v:
5382 case NEON::BI__builtin_neon_vldap1_lane_s64:
5383 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5384 case NEON::BI__builtin_neon_vstl1_lane_s64:
5385 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5386 // Get the alignment for the argument in addition to the value;
5387 // we'll use it later.
5388 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5389 Ops.push_back(PtrOp0.emitRawPointer(*this));
5390 continue;
5391 }
5392 }
5393 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5394 }
5395
5396 if (Builtin) {
5398 assert(Result && "SISD intrinsic should have been handled");
5399 return Result;
5400 }
5401
5402 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5404 if (std::optional<llvm::APSInt> Result =
5406 // Determine the type of this overloaded NEON intrinsic.
5407 Type = NeonTypeFlags(Result->getZExtValue());
5408
5409 bool usgn = Type.isUnsigned();
5410 bool quad = Type.isQuad();
5411 unsigned Int;
5412
5413 // Not all intrinsics handled by the common case work for AArch64 yet, so only
5414 // defer to common code if it's been added to our special map.
5417
5418 if (Builtin)
5420 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5421 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5422 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
5423
5424 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
5425 return V;
5426
5427 // Handle non-overloaded intrinsics first.
5428 switch (BuiltinID) {
5429 default: break;
5430 case NEON::BI__builtin_neon_vabsh_f16:
5431 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5432 case NEON::BI__builtin_neon_vaddq_p128: {
5433 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5434 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5435 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5436 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5437 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5438 return Builder.CreateBitCast(Ops[0], Int128Ty);
5439 }
5440 case NEON::BI__builtin_neon_vldrq_p128: {
5441 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5442 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
5444 }
5445 case NEON::BI__builtin_neon_vstrq_p128: {
5446 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5447 }
5448 case NEON::BI__builtin_neon_vcvts_f32_u32:
5449 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5450 usgn = true;
5451 [[fallthrough]];
5452 case NEON::BI__builtin_neon_vcvts_f32_s32:
5453 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5454 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5455 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5456 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5457 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5458 if (usgn)
5459 return Builder.CreateUIToFP(Ops[0], FTy);
5460 return Builder.CreateSIToFP(Ops[0], FTy);
5461 }
5462 case NEON::BI__builtin_neon_vcvth_f16_u16:
5463 case NEON::BI__builtin_neon_vcvth_f16_u32:
5464 case NEON::BI__builtin_neon_vcvth_f16_u64:
5465 usgn = true;
5466 [[fallthrough]];
5467 case NEON::BI__builtin_neon_vcvth_f16_s16:
5468 case NEON::BI__builtin_neon_vcvth_f16_s32:
5469 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5470 llvm::Type *FTy = HalfTy;
5471 llvm::Type *InTy;
5472 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5473 InTy = Int64Ty;
5474 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5475 InTy = Int32Ty;
5476 else
5477 InTy = Int16Ty;
5478 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5479 if (usgn)
5480 return Builder.CreateUIToFP(Ops[0], FTy);
5481 return Builder.CreateSIToFP(Ops[0], FTy);
5482 }
5483 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5484 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5485 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5486 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5487 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5488 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5489 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5490 case NEON::BI__builtin_neon_vcvtph_s16_f16: {
5491 llvm::Type *InTy = Int16Ty;
5492 llvm::Type* FTy = HalfTy;
5493 llvm::Type *Tys[2] = {InTy, FTy};
5494 switch (BuiltinID) {
5495 default: llvm_unreachable("missing builtin ID in switch!");
5496 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5497 Int = Intrinsic::aarch64_neon_fcvtau; break;
5498 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5499 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5500 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5501 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5502 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5503 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5504 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5505 Int = Intrinsic::aarch64_neon_fcvtas; break;
5506 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5507 Int = Intrinsic::aarch64_neon_fcvtms; break;
5508 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5509 Int = Intrinsic::aarch64_neon_fcvtns; break;
5510 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5511 Int = Intrinsic::aarch64_neon_fcvtps; break;
5512 }
5513 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5514 }
5515 case NEON::BI__builtin_neon_vcaleh_f16:
5516 case NEON::BI__builtin_neon_vcalth_f16:
5517 case NEON::BI__builtin_neon_vcageh_f16:
5518 case NEON::BI__builtin_neon_vcagth_f16: {
5519 llvm::Type* InTy = Int32Ty;
5520 llvm::Type* FTy = HalfTy;
5521 llvm::Type *Tys[2] = {InTy, FTy};
5522 switch (BuiltinID) {
5523 default: llvm_unreachable("missing builtin ID in switch!");
5524 case NEON::BI__builtin_neon_vcageh_f16:
5525 Int = Intrinsic::aarch64_neon_facge; break;
5526 case NEON::BI__builtin_neon_vcagth_f16:
5527 Int = Intrinsic::aarch64_neon_facgt; break;
5528 case NEON::BI__builtin_neon_vcaleh_f16:
5529 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5530 case NEON::BI__builtin_neon_vcalth_f16:
5531 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5532 }
5533 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
5534 return Builder.CreateTrunc(Ops[0], Int16Ty);
5535 }
5536 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5537 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5538 llvm::Type* InTy = Int32Ty;
5539 llvm::Type* FTy = HalfTy;
5540 llvm::Type *Tys[2] = {InTy, FTy};
5541 switch (BuiltinID) {
5542 default: llvm_unreachable("missing builtin ID in switch!");
5543 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5544 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5545 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5546 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5547 }
5548 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5549 return Builder.CreateTrunc(Ops[0], Int16Ty);
5550 }
5551 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5552 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5553 llvm::Type* FTy = HalfTy;
5554 llvm::Type* InTy = Int32Ty;
5555 llvm::Type *Tys[2] = {FTy, InTy};
5556 switch (BuiltinID) {
5557 default: llvm_unreachable("missing builtin ID in switch!");
5558 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5559 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5560 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
5561 break;
5562 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5563 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5564 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
5565 break;
5566 }
5567 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5568 }
5569 case NEON::BI__builtin_neon_vpaddd_s64: {
5570 // TODO: Isn't this handled by
5571 // EmitCommonNeonSISDBuiltinExpr?
5572 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
5573 // The vector is v2f64, so make sure it's bitcast to that.
5574 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2i64");
5575 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5576 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5577 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5578 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5579 // Pairwise addition of a v2f64 into a scalar f64.
5580 return Builder.CreateAdd(Op0, Op1, "vpaddd");
5581 }
5582 case NEON::BI__builtin_neon_vpaddd_f64: {
5583 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
5584 // The vector is v2f64, so make sure it's bitcast to that.
5585 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f64");
5586 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5587 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5588 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5589 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5590 // Pairwise addition of a v2f64 into a scalar f64.
5591 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5592 }
5593 case NEON::BI__builtin_neon_vpadds_f32: {
5594 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
5595 // The vector is v2f32, so make sure it's bitcast to that.
5596 Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f32");
5597 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5598 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5599 Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
5600 Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
5601 // Pairwise addition of a v2f32 into a scalar f32.
5602 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5603 }
5604 case NEON::BI__builtin_neon_vceqzd_s64:
5607 ICmpInst::ICMP_EQ, "vceqz");
5608 case NEON::BI__builtin_neon_vceqzd_f64:
5609 case NEON::BI__builtin_neon_vceqzs_f32:
5610 case NEON::BI__builtin_neon_vceqzh_f16:
5613 ICmpInst::FCMP_OEQ, "vceqz");
5614 case NEON::BI__builtin_neon_vcgezd_s64:
5617 ICmpInst::ICMP_SGE, "vcgez");
5618 case NEON::BI__builtin_neon_vcgezd_f64:
5619 case NEON::BI__builtin_neon_vcgezs_f32:
5620 case NEON::BI__builtin_neon_vcgezh_f16:
5623 ICmpInst::FCMP_OGE, "vcgez");
5624 case NEON::BI__builtin_neon_vclezd_s64:
5627 ICmpInst::ICMP_SLE, "vclez");
5628 case NEON::BI__builtin_neon_vclezd_f64:
5629 case NEON::BI__builtin_neon_vclezs_f32:
5630 case NEON::BI__builtin_neon_vclezh_f16:
5633 ICmpInst::FCMP_OLE, "vclez");
5634 case NEON::BI__builtin_neon_vcgtzd_s64:
5637 ICmpInst::ICMP_SGT, "vcgtz");
5638 case NEON::BI__builtin_neon_vcgtzd_f64:
5639 case NEON::BI__builtin_neon_vcgtzs_f32:
5640 case NEON::BI__builtin_neon_vcgtzh_f16:
5643 ICmpInst::FCMP_OGT, "vcgtz");
5644 case NEON::BI__builtin_neon_vcltzd_s64:
5647 ICmpInst::ICMP_SLT, "vcltz");
5648
5649 case NEON::BI__builtin_neon_vcltzd_f64:
5650 case NEON::BI__builtin_neon_vcltzs_f32:
5651 case NEON::BI__builtin_neon_vcltzh_f16:
5654 ICmpInst::FCMP_OLT, "vcltz");
5655
5656 case NEON::BI__builtin_neon_vceqzd_u64: {
5659 ICmpInst::ICMP_EQ, "vceqzd");
5660 }
5661 case NEON::BI__builtin_neon_vceqd_f64:
5662 case NEON::BI__builtin_neon_vcled_f64:
5663 case NEON::BI__builtin_neon_vcltd_f64:
5664 case NEON::BI__builtin_neon_vcged_f64:
5665 case NEON::BI__builtin_neon_vcgtd_f64: {
5666 llvm::CmpInst::Predicate P;
5667 switch (BuiltinID) {
5668 default: llvm_unreachable("missing builtin ID in switch!");
5669 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5670 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5671 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5672 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5673 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5674 }
5675 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5676 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5677 if (P == llvm::FCmpInst::FCMP_OEQ)
5678 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5679 else
5680 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5681 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5682 }
5683 case NEON::BI__builtin_neon_vceqs_f32:
5684 case NEON::BI__builtin_neon_vcles_f32:
5685 case NEON::BI__builtin_neon_vclts_f32:
5686 case NEON::BI__builtin_neon_vcges_f32:
5687 case NEON::BI__builtin_neon_vcgts_f32: {
5688 llvm::CmpInst::Predicate P;
5689 switch (BuiltinID) {
5690 default: llvm_unreachable("missing builtin ID in switch!");
5691 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5692 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5693 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5694 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5695 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5696 }
5697 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5698 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5699 if (P == llvm::FCmpInst::FCMP_OEQ)
5700 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5701 else
5702 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5703 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5704 }
5705 case NEON::BI__builtin_neon_vceqh_f16:
5706 case NEON::BI__builtin_neon_vcleh_f16:
5707 case NEON::BI__builtin_neon_vclth_f16:
5708 case NEON::BI__builtin_neon_vcgeh_f16:
5709 case NEON::BI__builtin_neon_vcgth_f16: {
5710 llvm::CmpInst::Predicate P;
5711 switch (BuiltinID) {
5712 default: llvm_unreachable("missing builtin ID in switch!");
5713 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
5714 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
5715 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
5716 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
5717 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
5718 }
5719 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
5720 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
5721 if (P == llvm::FCmpInst::FCMP_OEQ)
5722 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5723 else
5724 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5725 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
5726 }
5727 case NEON::BI__builtin_neon_vceqd_s64:
5728 case NEON::BI__builtin_neon_vceqd_u64:
5729 case NEON::BI__builtin_neon_vcgtd_s64:
5730 case NEON::BI__builtin_neon_vcgtd_u64:
5731 case NEON::BI__builtin_neon_vcltd_s64:
5732 case NEON::BI__builtin_neon_vcltd_u64:
5733 case NEON::BI__builtin_neon_vcged_u64:
5734 case NEON::BI__builtin_neon_vcged_s64:
5735 case NEON::BI__builtin_neon_vcled_u64:
5736 case NEON::BI__builtin_neon_vcled_s64: {
5737 llvm::CmpInst::Predicate P;
5738 switch (BuiltinID) {
5739 default: llvm_unreachable("missing builtin ID in switch!");
5740 case NEON::BI__builtin_neon_vceqd_s64:
5741 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5742 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5743 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5744 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5745 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5746 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5747 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5748 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5749 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5750 }
5751 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5752 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5753 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5754 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5755 }
5756 case NEON::BI__builtin_neon_vnegd_s64:
5757 return Builder.CreateNeg(Ops[0], "vnegd");
5758 case NEON::BI__builtin_neon_vnegh_f16:
5759 return Builder.CreateFNeg(Ops[0], "vnegh");
5760 case NEON::BI__builtin_neon_vtstd_s64:
5761 case NEON::BI__builtin_neon_vtstd_u64: {
5762 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5763 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5764 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5765 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5766 llvm::Constant::getNullValue(Int64Ty));
5767 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5768 }
5769 case NEON::BI__builtin_neon_vset_lane_i8:
5770 case NEON::BI__builtin_neon_vset_lane_i16:
5771 case NEON::BI__builtin_neon_vset_lane_i32:
5772 case NEON::BI__builtin_neon_vset_lane_i64:
5773 case NEON::BI__builtin_neon_vset_lane_bf16:
5774 case NEON::BI__builtin_neon_vset_lane_f32:
5775 case NEON::BI__builtin_neon_vsetq_lane_i8:
5776 case NEON::BI__builtin_neon_vsetq_lane_i16:
5777 case NEON::BI__builtin_neon_vsetq_lane_i32:
5778 case NEON::BI__builtin_neon_vsetq_lane_i64:
5779 case NEON::BI__builtin_neon_vsetq_lane_bf16:
5780 case NEON::BI__builtin_neon_vsetq_lane_f32:
5781 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5782 case NEON::BI__builtin_neon_vset_lane_f64:
5783 // The vector type needs a cast for the v1f64 variant.
5784 Ops[1] =
5785 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
5786 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5787 case NEON::BI__builtin_neon_vset_lane_mf8:
5788 case NEON::BI__builtin_neon_vsetq_lane_mf8:
5789 // The input vector type needs a cast to scalar type.
5790 Ops[0] =
5791 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
5792 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5793 case NEON::BI__builtin_neon_vsetq_lane_f64:
5794 // The vector type needs a cast for the v2f64 variant.
5795 Ops[1] =
5796 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
5797 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5798
5799 case NEON::BI__builtin_neon_vget_lane_i8:
5800 case NEON::BI__builtin_neon_vdupb_lane_i8:
5801 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5802 case NEON::BI__builtin_neon_vgetq_lane_i8:
5803 case NEON::BI__builtin_neon_vdupb_laneq_i8:
5804 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5805 case NEON::BI__builtin_neon_vget_lane_mf8:
5806 case NEON::BI__builtin_neon_vdupb_lane_mf8:
5807 case NEON::BI__builtin_neon_vgetq_lane_mf8:
5808 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
5809 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5810 case NEON::BI__builtin_neon_vget_lane_i16:
5811 case NEON::BI__builtin_neon_vduph_lane_i16:
5812 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5813 case NEON::BI__builtin_neon_vgetq_lane_i16:
5814 case NEON::BI__builtin_neon_vduph_laneq_i16:
5815 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5816 case NEON::BI__builtin_neon_vget_lane_i32:
5817 case NEON::BI__builtin_neon_vdups_lane_i32:
5818 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5819 case NEON::BI__builtin_neon_vdups_lane_f32:
5820 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdups_lane");
5821 case NEON::BI__builtin_neon_vgetq_lane_i32:
5822 case NEON::BI__builtin_neon_vdups_laneq_i32:
5823 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5824 case NEON::BI__builtin_neon_vget_lane_i64:
5825 case NEON::BI__builtin_neon_vdupd_lane_i64:
5826 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5827 case NEON::BI__builtin_neon_vdupd_lane_f64:
5828 return Builder.CreateExtractElement(Ops[0], Ops[1], "vdupd_lane");
5829 case NEON::BI__builtin_neon_vgetq_lane_i64:
5830 case NEON::BI__builtin_neon_vdupd_laneq_i64:
5831 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5832 case NEON::BI__builtin_neon_vget_lane_f32:
5833 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5834 case NEON::BI__builtin_neon_vget_lane_f64:
5835 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5836 case NEON::BI__builtin_neon_vgetq_lane_f32:
5837 case NEON::BI__builtin_neon_vdups_laneq_f32:
5838 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5839 case NEON::BI__builtin_neon_vgetq_lane_f64:
5840 case NEON::BI__builtin_neon_vdupd_laneq_f64:
5841 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
5842 case NEON::BI__builtin_neon_vaddh_f16:
5843 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
5844 case NEON::BI__builtin_neon_vsubh_f16:
5845 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
5846 case NEON::BI__builtin_neon_vmulh_f16:
5847 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
5848 case NEON::BI__builtin_neon_vdivh_f16:
5849 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
5850 case NEON::BI__builtin_neon_vfmah_f16:
5851 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5853 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5854 {Ops[1], Ops[2], Ops[0]});
5855 case NEON::BI__builtin_neon_vfmsh_f16: {
5856 Value *Neg = Builder.CreateFNeg(Ops[1], "vsubh");
5857
5858 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5860 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
5861 {Neg, Ops[2], Ops[0]});
5862 }
5863 case NEON::BI__builtin_neon_vaddd_s64:
5864 case NEON::BI__builtin_neon_vaddd_u64:
5865 return Builder.CreateAdd(Ops[0], Ops[1], "vaddd");
5866 case NEON::BI__builtin_neon_vsubd_s64:
5867 case NEON::BI__builtin_neon_vsubd_u64:
5868 return Builder.CreateSub(Ops[0], Ops[1], "vsubd");
5869 case NEON::BI__builtin_neon_vqdmlalh_s16:
5870 case NEON::BI__builtin_neon_vqdmlslh_s16: {
5871 SmallVector<Value *, 2> ProductOps;
5872 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5873 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5874 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5875 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5876 ProductOps, "vqdmlXl");
5877 Constant *CI = ConstantInt::get(SizeTy, 0);
5878 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5879
5880 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5881 ? Intrinsic::aarch64_neon_sqadd
5882 : Intrinsic::aarch64_neon_sqsub;
5883 // Drop the 2nd multiplication argument before the accumulation
5884 Ops.pop_back();
5885 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5886 }
5887 case NEON::BI__builtin_neon_vqshlud_n_s64: {
5888 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5889 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5890 Ops, "vqshlu_n");
5891 }
5892 case NEON::BI__builtin_neon_vqshld_n_u64:
5893 case NEON::BI__builtin_neon_vqshld_n_s64: {
5894 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5895 ? Intrinsic::aarch64_neon_uqshl
5896 : Intrinsic::aarch64_neon_sqshl;
5897 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5898 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5899 }
5900 case NEON::BI__builtin_neon_vrshrd_n_u64:
5901 case NEON::BI__builtin_neon_vrshrd_n_s64: {
5902 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5903 ? Intrinsic::aarch64_neon_urshl
5904 : Intrinsic::aarch64_neon_srshl;
5905 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5906 Ops[1] = ConstantInt::get(Int64Ty, -SV);
5907 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5908 }
5909 case NEON::BI__builtin_neon_vrsrad_n_u64:
5910 case NEON::BI__builtin_neon_vrsrad_n_s64: {
5911 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5912 ? Intrinsic::aarch64_neon_urshl
5913 : Intrinsic::aarch64_neon_srshl;
5914 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5915 Ops[2] = Builder.CreateNeg(Ops[2]);
5916 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5917 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5918 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5919 }
5920 case NEON::BI__builtin_neon_vshld_n_s64:
5921 case NEON::BI__builtin_neon_vshld_n_u64: {
5922 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5923 return Builder.CreateShl(
5924 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5925 }
5926 case NEON::BI__builtin_neon_vshrd_n_s64: {
5927 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5928 return Builder.CreateAShr(
5929 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5930 Amt->getZExtValue())),
5931 "shrd_n");
5932 }
5933 case NEON::BI__builtin_neon_vshrd_n_u64: {
5934 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[1]);
5935 uint64_t ShiftAmt = Amt->getZExtValue();
5936 // Right-shifting an unsigned value by its size yields 0.
5937 if (ShiftAmt == 64)
5938 return ConstantInt::get(Int64Ty, 0);
5939 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5940 "shrd_n");
5941 }
5942 case NEON::BI__builtin_neon_vsrad_n_s64: {
5943 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5944 Ops[1] = Builder.CreateAShr(
5945 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5946 Amt->getZExtValue())),
5947 "shrd_n");
5948 return Builder.CreateAdd(Ops[0], Ops[1]);
5949 }
5950 case NEON::BI__builtin_neon_vsrad_n_u64: {
5951 llvm::ConstantInt *Amt = cast<ConstantInt>(Ops[2]);
5952 uint64_t ShiftAmt = Amt->getZExtValue();
5953 // Right-shifting an unsigned value by its size yields 0.
5954 // As Op + 0 = Op, return Ops[0] directly.
5955 if (ShiftAmt == 64)
5956 return Ops[0];
5957 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5958 "shrd_n");
5959 return Builder.CreateAdd(Ops[0], Ops[1]);
5960 }
5961 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5962 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5963 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5964 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5965 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
5966 SmallVector<Value *, 2> ProductOps;
5967 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5968 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5969 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
5970 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5971 ProductOps, "vqdmlXl");
5972 Constant *CI = ConstantInt::get(SizeTy, 0);
5973 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5974 // Drop lane-selection and the corresponding vector argument (these have
5975 // already been used)
5976 Ops.pop_back_n(2);
5977
5978 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5979 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5980 ? Intrinsic::aarch64_neon_sqadd
5981 : Intrinsic::aarch64_neon_sqsub;
5982 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5983 }
5984 case NEON::BI__builtin_neon_vqdmlals_s32:
5985 case NEON::BI__builtin_neon_vqdmlsls_s32: {
5986 SmallVector<Value *, 2> ProductOps;
5987 ProductOps.push_back(Ops[1]);
5988 ProductOps.push_back(Ops[2]);
5989 Ops[1] =
5990 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5991 ProductOps, "vqdmlXl");
5992
5993 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5994 ? Intrinsic::aarch64_neon_sqadd
5995 : Intrinsic::aarch64_neon_sqsub;
5996 // Drop the 2nd multiplication argument before the accumulation
5997 Ops.pop_back();
5998 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5999 }
6000 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6001 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6002 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6003 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6004 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
6005 SmallVector<Value *, 2> ProductOps;
6006 ProductOps.push_back(Ops[1]);
6007 ProductOps.push_back(Ops[2]);
6008 Ops[1] =
6009 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6010 ProductOps, "vqdmlXl");
6011 // Drop lane-selection and the corresponding vector argument (these have
6012 // already been used)
6013 Ops.pop_back_n(2);
6014
6015 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6016 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6017 ? Intrinsic::aarch64_neon_sqadd
6018 : Intrinsic::aarch64_neon_sqsub;
6019 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6020 }
6021 case NEON::BI__builtin_neon_vget_lane_bf16:
6022 case NEON::BI__builtin_neon_vduph_lane_bf16:
6023 case NEON::BI__builtin_neon_vduph_lane_f16: {
6024 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6025 }
6026 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6027 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6028 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6029 return Builder.CreateExtractElement(Ops[0], Ops[1], "vgetq_lane");
6030 }
6031 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6032 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6033 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6034 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6035 }
6036 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6037 SmallVector<int, 16> ConcatMask(8);
6038 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6039 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6040 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6041 llvm::Value *Trunc =
6042 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6043 return Builder.CreateShuffleVector(
6044 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6045 }
6046 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6047 SmallVector<int, 16> ConcatMask(8);
6048 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6049 SmallVector<int, 16> LoMask(4);
6050 std::iota(LoMask.begin(), LoMask.end(), 0);
6051 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6052 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6053 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6054 llvm::Value *Inactive = Builder.CreateShuffleVector(
6055 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6056 llvm::Value *Trunc =
6057 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6058 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6059 }
6060
6061 case clang::AArch64::BI_InterlockedAdd:
6062 case clang::AArch64::BI_InterlockedAdd_acq:
6063 case clang::AArch64::BI_InterlockedAdd_rel:
6064 case clang::AArch64::BI_InterlockedAdd_nf:
6065 case clang::AArch64::BI_InterlockedAdd64:
6066 case clang::AArch64::BI_InterlockedAdd64_acq:
6067 case clang::AArch64::BI_InterlockedAdd64_rel:
6068 case clang::AArch64::BI_InterlockedAdd64_nf: {
6069 Address DestAddr = CheckAtomicAlignment(*this, E);
6070 Value *Val = Ops[1];
6071 llvm::AtomicOrdering Ordering;
6072 switch (BuiltinID) {
6073 case clang::AArch64::BI_InterlockedAdd:
6074 case clang::AArch64::BI_InterlockedAdd64:
6075 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6076 break;
6077 case clang::AArch64::BI_InterlockedAdd_acq:
6078 case clang::AArch64::BI_InterlockedAdd64_acq:
6079 Ordering = llvm::AtomicOrdering::Acquire;
6080 break;
6081 case clang::AArch64::BI_InterlockedAdd_rel:
6082 case clang::AArch64::BI_InterlockedAdd64_rel:
6083 Ordering = llvm::AtomicOrdering::Release;
6084 break;
6085 case clang::AArch64::BI_InterlockedAdd_nf:
6086 case clang::AArch64::BI_InterlockedAdd64_nf:
6087 Ordering = llvm::AtomicOrdering::Monotonic;
6088 break;
6089 default:
6090 llvm_unreachable("missing builtin ID in switch!");
6091 }
6092 AtomicRMWInst *RMWI =
6093 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6094 return Builder.CreateAdd(RMWI, Val);
6095 }
6096 }
6097
6098 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6099 llvm::Type *Ty = VTy;
6100 if (!Ty)
6101 return nullptr;
6102
6103 bool ExtractLow = false;
6104 bool ExtendLaneArg = false;
6105 switch (BuiltinID) {
6106 default: return nullptr;
6107 case NEON::BI__builtin_neon_vbsl_v:
6108 case NEON::BI__builtin_neon_vbslq_v: {
6109 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6110 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6111 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6112 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6113
6114 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6115 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6116 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6117 return Builder.CreateBitCast(Ops[0], Ty);
6118 }
6119 case NEON::BI__builtin_neon_vfma_lane_v:
6120 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6121 // The ARM builtins (and instructions) have the addend as the first
6122 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6123 Value *Addend = Ops[0];
6124 Value *Multiplicand = Ops[1];
6125 Value *LaneSource = Ops[2];
6126 Ops[0] = Multiplicand;
6127 Ops[1] = LaneSource;
6128 Ops[2] = Addend;
6129
6130 // Now adjust things to handle the lane access.
6131 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6132 ? llvm::FixedVectorType::get(VTy->getElementType(),
6133 VTy->getNumElements() / 2)
6134 : VTy;
6135 llvm::Constant *cst = cast<Constant>(Ops[3]);
6136 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6137 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6138 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6139
6140 Ops.pop_back();
6141 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6142 : Intrinsic::fma;
6143 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6144 }
6145 case NEON::BI__builtin_neon_vfma_laneq_v: {
6146 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6147 // v1f64 fma should be mapped to Neon scalar f64 fma
6148 if (VTy && VTy->getElementType() == DoubleTy) {
6149 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6150 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6151 llvm::FixedVectorType *VTy =
6153 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6154 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6155 Value *Result;
6157 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6158 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6159 return Builder.CreateBitCast(Result, Ty);
6160 }
6161 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6162 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6163
6164 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6165 VTy->getNumElements() * 2);
6166 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6167 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6168 cast<ConstantInt>(Ops[3]));
6169 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6170
6172 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6173 {Ops[2], Ops[1], Ops[0]});
6174 }
6175 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6176 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6177 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6178
6179 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6180 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6182 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6183 {Ops[2], Ops[1], Ops[0]});
6184 }
6185 case NEON::BI__builtin_neon_vfmah_lane_f16:
6186 case NEON::BI__builtin_neon_vfmas_lane_f32:
6187 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6188 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6189 case NEON::BI__builtin_neon_vfmad_lane_f64:
6190 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6191 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6192 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6194 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6195 {Ops[1], Ops[2], Ops[0]});
6196 }
6197 case NEON::BI__builtin_neon_vmull_v:
6198 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6199 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6200 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6201 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6202 case NEON::BI__builtin_neon_vmax_v:
6203 case NEON::BI__builtin_neon_vmaxq_v:
6204 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6205 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6206 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6207 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6208 case NEON::BI__builtin_neon_vmaxh_f16: {
6209 Int = Intrinsic::aarch64_neon_fmax;
6210 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6211 }
6212 case NEON::BI__builtin_neon_vmin_v:
6213 case NEON::BI__builtin_neon_vminq_v:
6214 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6215 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6216 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6217 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6218 case NEON::BI__builtin_neon_vminh_f16: {
6219 Int = Intrinsic::aarch64_neon_fmin;
6220 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6221 }
6222 case NEON::BI__builtin_neon_vabd_v:
6223 case NEON::BI__builtin_neon_vabdq_v:
6224 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6225 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6226 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6227 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6228 case NEON::BI__builtin_neon_vpadal_v:
6229 case NEON::BI__builtin_neon_vpadalq_v: {
6230 unsigned ArgElts = VTy->getNumElements();
6231 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6232 unsigned BitWidth = EltTy->getBitWidth();
6233 auto *ArgTy = llvm::FixedVectorType::get(
6234 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6235 llvm::Type* Tys[2] = { VTy, ArgTy };
6236 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6238 TmpOps.push_back(Ops[1]);
6239 Function *F = CGM.getIntrinsic(Int, Tys);
6240 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6241 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6242 return Builder.CreateAdd(tmp, addend);
6243 }
6244 case NEON::BI__builtin_neon_vpmin_v:
6245 case NEON::BI__builtin_neon_vpminq_v:
6246 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6247 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6248 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6249 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6250 case NEON::BI__builtin_neon_vpmax_v:
6251 case NEON::BI__builtin_neon_vpmaxq_v:
6252 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6253 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6254 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6255 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6256 case NEON::BI__builtin_neon_vminnm_v:
6257 case NEON::BI__builtin_neon_vminnmq_v:
6258 Int = Intrinsic::aarch64_neon_fminnm;
6259 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6260 case NEON::BI__builtin_neon_vminnmh_f16:
6261 Int = Intrinsic::aarch64_neon_fminnm;
6262 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6263 case NEON::BI__builtin_neon_vmaxnm_v:
6264 case NEON::BI__builtin_neon_vmaxnmq_v:
6265 Int = Intrinsic::aarch64_neon_fmaxnm;
6266 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6267 case NEON::BI__builtin_neon_vmaxnmh_f16:
6268 Int = Intrinsic::aarch64_neon_fmaxnm;
6269 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6270 case NEON::BI__builtin_neon_vrecpss_f32: {
6271 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6272 Ops, "vrecps");
6273 }
6274 case NEON::BI__builtin_neon_vrecpsd_f64:
6275 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6276 Ops, "vrecps");
6277 case NEON::BI__builtin_neon_vrecpsh_f16:
6278 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6279 Ops, "vrecps");
6280 case NEON::BI__builtin_neon_vqshrun_n_v:
6281 Int = Intrinsic::aarch64_neon_sqshrun;
6282 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6283 case NEON::BI__builtin_neon_vqrshrun_n_v:
6284 Int = Intrinsic::aarch64_neon_sqrshrun;
6285 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6286 case NEON::BI__builtin_neon_vqshrn_n_v:
6287 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6288 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6289 case NEON::BI__builtin_neon_vrshrn_n_v:
6290 Int = Intrinsic::aarch64_neon_rshrn;
6291 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6292 case NEON::BI__builtin_neon_vqrshrn_n_v:
6293 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6294 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6295 case NEON::BI__builtin_neon_vrndah_f16: {
6296 Int = Builder.getIsFPConstrained()
6297 ? Intrinsic::experimental_constrained_round
6298 : Intrinsic::round;
6299 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6300 }
6301 case NEON::BI__builtin_neon_vrnda_v:
6302 case NEON::BI__builtin_neon_vrndaq_v: {
6303 Int = Builder.getIsFPConstrained()
6304 ? Intrinsic::experimental_constrained_round
6305 : Intrinsic::round;
6306 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6307 }
6308 case NEON::BI__builtin_neon_vrndih_f16: {
6309 Int = Builder.getIsFPConstrained()
6310 ? Intrinsic::experimental_constrained_nearbyint
6311 : Intrinsic::nearbyint;
6312 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6313 }
6314 case NEON::BI__builtin_neon_vrndmh_f16: {
6315 Int = Builder.getIsFPConstrained()
6316 ? Intrinsic::experimental_constrained_floor
6317 : Intrinsic::floor;
6318 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6319 }
6320 case NEON::BI__builtin_neon_vrndm_v:
6321 case NEON::BI__builtin_neon_vrndmq_v: {
6322 Int = Builder.getIsFPConstrained()
6323 ? Intrinsic::experimental_constrained_floor
6324 : Intrinsic::floor;
6325 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6326 }
6327 case NEON::BI__builtin_neon_vrndnh_f16: {
6328 Int = Builder.getIsFPConstrained()
6329 ? Intrinsic::experimental_constrained_roundeven
6330 : Intrinsic::roundeven;
6331 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6332 }
6333 case NEON::BI__builtin_neon_vrndn_v:
6334 case NEON::BI__builtin_neon_vrndnq_v: {
6335 Int = Builder.getIsFPConstrained()
6336 ? Intrinsic::experimental_constrained_roundeven
6337 : Intrinsic::roundeven;
6338 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6339 }
6340 case NEON::BI__builtin_neon_vrndns_f32: {
6341 Int = Builder.getIsFPConstrained()
6342 ? Intrinsic::experimental_constrained_roundeven
6343 : Intrinsic::roundeven;
6344 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6345 }
6346 case NEON::BI__builtin_neon_vrndph_f16: {
6347 Int = Builder.getIsFPConstrained()
6348 ? Intrinsic::experimental_constrained_ceil
6349 : Intrinsic::ceil;
6350 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6351 }
6352 case NEON::BI__builtin_neon_vrndp_v:
6353 case NEON::BI__builtin_neon_vrndpq_v: {
6354 Int = Builder.getIsFPConstrained()
6355 ? Intrinsic::experimental_constrained_ceil
6356 : Intrinsic::ceil;
6357 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6358 }
6359 case NEON::BI__builtin_neon_vrndxh_f16: {
6360 Int = Builder.getIsFPConstrained()
6361 ? Intrinsic::experimental_constrained_rint
6362 : Intrinsic::rint;
6363 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6364 }
6365 case NEON::BI__builtin_neon_vrndx_v:
6366 case NEON::BI__builtin_neon_vrndxq_v: {
6367 Int = Builder.getIsFPConstrained()
6368 ? Intrinsic::experimental_constrained_rint
6369 : Intrinsic::rint;
6370 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6371 }
6372 case NEON::BI__builtin_neon_vrndh_f16: {
6373 Int = Builder.getIsFPConstrained()
6374 ? Intrinsic::experimental_constrained_trunc
6375 : Intrinsic::trunc;
6376 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6377 }
6378 case NEON::BI__builtin_neon_vrnd32x_f32:
6379 case NEON::BI__builtin_neon_vrnd32xq_f32:
6380 case NEON::BI__builtin_neon_vrnd32x_f64:
6381 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6382 Int = Intrinsic::aarch64_neon_frint32x;
6383 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6384 }
6385 case NEON::BI__builtin_neon_vrnd32z_f32:
6386 case NEON::BI__builtin_neon_vrnd32zq_f32:
6387 case NEON::BI__builtin_neon_vrnd32z_f64:
6388 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6389 Int = Intrinsic::aarch64_neon_frint32z;
6390 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6391 }
6392 case NEON::BI__builtin_neon_vrnd64x_f32:
6393 case NEON::BI__builtin_neon_vrnd64xq_f32:
6394 case NEON::BI__builtin_neon_vrnd64x_f64:
6395 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6396 Int = Intrinsic::aarch64_neon_frint64x;
6397 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6398 }
6399 case NEON::BI__builtin_neon_vrnd64z_f32:
6400 case NEON::BI__builtin_neon_vrnd64zq_f32:
6401 case NEON::BI__builtin_neon_vrnd64z_f64:
6402 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6403 Int = Intrinsic::aarch64_neon_frint64z;
6404 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6405 }
6406 case NEON::BI__builtin_neon_vrnd_v:
6407 case NEON::BI__builtin_neon_vrndq_v: {
6408 Int = Builder.getIsFPConstrained()
6409 ? Intrinsic::experimental_constrained_trunc
6410 : Intrinsic::trunc;
6411 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6412 }
6413 case NEON::BI__builtin_neon_vcvt_f64_v:
6414 case NEON::BI__builtin_neon_vcvtq_f64_v:
6415 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6416 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6417 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6418 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6419 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6420 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6421 "unexpected vcvt_f64_f32 builtin");
6422 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6423 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6424
6425 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6426 }
6427 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6428 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6429 "unexpected vcvt_f32_f64 builtin");
6430 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6431 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6432
6433 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6434 }
6435 case NEON::BI__builtin_neon_vcvta_s16_f16:
6436 case NEON::BI__builtin_neon_vcvta_u16_f16:
6437 case NEON::BI__builtin_neon_vcvta_s32_v:
6438 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6439 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6440 case NEON::BI__builtin_neon_vcvta_u32_v:
6441 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6442 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6443 case NEON::BI__builtin_neon_vcvta_s64_v:
6444 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6445 case NEON::BI__builtin_neon_vcvta_u64_v:
6446 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6447 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6448 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6449 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6450 }
6451 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6452 case NEON::BI__builtin_neon_vcvtm_s32_v:
6453 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6454 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6455 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6456 case NEON::BI__builtin_neon_vcvtm_u32_v:
6457 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6458 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6459 case NEON::BI__builtin_neon_vcvtm_s64_v:
6460 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6461 case NEON::BI__builtin_neon_vcvtm_u64_v:
6462 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6463 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6464 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6465 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6466 }
6467 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6468 case NEON::BI__builtin_neon_vcvtn_s32_v:
6469 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6470 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6471 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6472 case NEON::BI__builtin_neon_vcvtn_u32_v:
6473 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6474 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6475 case NEON::BI__builtin_neon_vcvtn_s64_v:
6476 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6477 case NEON::BI__builtin_neon_vcvtn_u64_v:
6478 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6479 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6480 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6481 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6482 }
6483 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6484 case NEON::BI__builtin_neon_vcvtp_s32_v:
6485 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6486 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6487 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6488 case NEON::BI__builtin_neon_vcvtp_u32_v:
6489 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6490 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6491 case NEON::BI__builtin_neon_vcvtp_s64_v:
6492 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6493 case NEON::BI__builtin_neon_vcvtp_u64_v:
6494 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6495 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6496 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6497 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6498 }
6499 case NEON::BI__builtin_neon_vmulx_v:
6500 case NEON::BI__builtin_neon_vmulxq_v: {
6501 Int = Intrinsic::aarch64_neon_fmulx;
6502 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6503 }
6504 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6505 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6506 // vmulx_lane should be mapped to Neon scalar mulx after
6507 // extracting the scalar element
6508 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6509 Ops.pop_back();
6510 Int = Intrinsic::aarch64_neon_fmulx;
6511 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
6512 }
6513 case NEON::BI__builtin_neon_vmul_lane_v:
6514 case NEON::BI__builtin_neon_vmul_laneq_v: {
6515 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6516 bool Quad = false;
6517 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6518 Quad = true;
6519 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6520 llvm::FixedVectorType *VTy =
6522 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6523 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6524 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6525 return Builder.CreateBitCast(Result, Ty);
6526 }
6527 case NEON::BI__builtin_neon_vpmaxnm_v:
6528 case NEON::BI__builtin_neon_vpmaxnmq_v: {
6529 Int = Intrinsic::aarch64_neon_fmaxnmp;
6530 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6531 }
6532 case NEON::BI__builtin_neon_vpminnm_v:
6533 case NEON::BI__builtin_neon_vpminnmq_v: {
6534 Int = Intrinsic::aarch64_neon_fminnmp;
6535 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6536 }
6537 case NEON::BI__builtin_neon_vsqrth_f16: {
6538 Int = Builder.getIsFPConstrained()
6539 ? Intrinsic::experimental_constrained_sqrt
6540 : Intrinsic::sqrt;
6541 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
6542 }
6543 case NEON::BI__builtin_neon_vsqrt_v:
6544 case NEON::BI__builtin_neon_vsqrtq_v: {
6545 Int = Builder.getIsFPConstrained()
6546 ? Intrinsic::experimental_constrained_sqrt
6547 : Intrinsic::sqrt;
6548 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6549 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6550 }
6551 case NEON::BI__builtin_neon_vrbit_v:
6552 case NEON::BI__builtin_neon_vrbitq_v: {
6553 Int = Intrinsic::bitreverse;
6554 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6555 }
6556 case NEON::BI__builtin_neon_vmaxv_f16: {
6557 Int = Intrinsic::aarch64_neon_fmaxv;
6558 Ty = HalfTy;
6559 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6560 llvm::Type *Tys[2] = {Ty, VTy};
6561 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6562 }
6563 case NEON::BI__builtin_neon_vmaxvq_f16: {
6564 Int = Intrinsic::aarch64_neon_fmaxv;
6565 Ty = HalfTy;
6566 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6567 llvm::Type *Tys[2] = {Ty, VTy};
6568 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6569 }
6570 case NEON::BI__builtin_neon_vminv_f16: {
6571 Int = Intrinsic::aarch64_neon_fminv;
6572 Ty = HalfTy;
6573 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6574 llvm::Type *Tys[2] = {Ty, VTy};
6575 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6576 }
6577 case NEON::BI__builtin_neon_vminvq_f16: {
6578 Int = Intrinsic::aarch64_neon_fminv;
6579 Ty = HalfTy;
6580 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6581 llvm::Type *Tys[2] = {Ty, VTy};
6582 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6583 }
6584 case NEON::BI__builtin_neon_vmaxnmv_f16: {
6585 Int = Intrinsic::aarch64_neon_fmaxnmv;
6586 Ty = HalfTy;
6587 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6588 llvm::Type *Tys[2] = {Ty, VTy};
6589 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6590 }
6591 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6592 Int = Intrinsic::aarch64_neon_fmaxnmv;
6593 Ty = HalfTy;
6594 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6595 llvm::Type *Tys[2] = {Ty, VTy};
6596 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6597 }
6598 case NEON::BI__builtin_neon_vminnmv_f16: {
6599 Int = Intrinsic::aarch64_neon_fminnmv;
6600 Ty = HalfTy;
6601 VTy = llvm::FixedVectorType::get(HalfTy, 4);
6602 llvm::Type *Tys[2] = {Ty, VTy};
6603 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6604 return Builder.CreateTrunc(Ops[0], HalfTy);
6605 }
6606 case NEON::BI__builtin_neon_vminnmvq_f16: {
6607 Int = Intrinsic::aarch64_neon_fminnmv;
6608 Ty = HalfTy;
6609 VTy = llvm::FixedVectorType::get(HalfTy, 8);
6610 llvm::Type *Tys[2] = {Ty, VTy};
6611 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6612 }
6613 case NEON::BI__builtin_neon_vmul_n_f64: {
6614 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6615 Value *RHS = Builder.CreateBitCast(Ops[1], DoubleTy);
6616 return Builder.CreateFMul(Ops[0], RHS);
6617 }
6618 case NEON::BI__builtin_neon_vaddlv_u8:
6619 case NEON::BI__builtin_neon_vaddlvq_u8:
6620 case NEON::BI__builtin_neon_vaddlv_u16:
6621 case NEON::BI__builtin_neon_vaddlvq_u16: {
6622 Int = Intrinsic::aarch64_neon_uaddlv;
6623 Ty = Int32Ty;
6624 VTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
6625 llvm::Type *Tys[2] = {Ty, VTy};
6626 Value *Result = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6627 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6628 return Builder.CreateTrunc(Result, Int16Ty);
6629 return Result;
6630 }
6631 case NEON::BI__builtin_neon_vaddlv_s8:
6632 case NEON::BI__builtin_neon_vaddlvq_s8:
6633 case NEON::BI__builtin_neon_vaddlv_s16:
6634 case NEON::BI__builtin_neon_vaddlvq_s16: {
6635 Int = Intrinsic::aarch64_neon_saddlv;
6636 Ty = Int32Ty;
6637 VTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
6638 llvm::Type *Tys[2] = {Ty, VTy};
6639 Value *Result = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6640 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6641 return Builder.CreateTrunc(Result, Int16Ty);
6642 return Result;
6643 }
6644 case NEON::BI__builtin_neon_vsri_n_v:
6645 case NEON::BI__builtin_neon_vsriq_n_v: {
6646 Int = Intrinsic::aarch64_neon_vsri;
6647 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6648 return EmitNeonCall(Intrin, Ops, "vsri_n");
6649 }
6650 case NEON::BI__builtin_neon_vsli_n_v:
6651 case NEON::BI__builtin_neon_vsliq_n_v: {
6652 Int = Intrinsic::aarch64_neon_vsli;
6653 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6654 return EmitNeonCall(Intrin, Ops, "vsli_n");
6655 }
6656 case NEON::BI__builtin_neon_vsra_n_v:
6657 case NEON::BI__builtin_neon_vsraq_n_v:
6658 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6659 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6660 return Builder.CreateAdd(Ops[0], Ops[1]);
6661 case NEON::BI__builtin_neon_vrsra_n_v:
6662 case NEON::BI__builtin_neon_vrsraq_n_v: {
6663 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6665 TmpOps.push_back(Ops[1]);
6666 TmpOps.push_back(Ops[2]);
6667 Function* F = CGM.getIntrinsic(Int, Ty);
6668 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6669 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6670 return Builder.CreateAdd(Ops[0], tmp);
6671 }
6672 case NEON::BI__builtin_neon_vld1_v:
6673 case NEON::BI__builtin_neon_vld1q_v: {
6674 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
6675 }
6676 case NEON::BI__builtin_neon_vst1_v:
6677 case NEON::BI__builtin_neon_vst1q_v:
6678 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6679 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6680 case NEON::BI__builtin_neon_vld1_lane_v:
6681 case NEON::BI__builtin_neon_vld1q_lane_v: {
6682 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6683 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6684 PtrOp0.getAlignment());
6685 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6686 }
6687 case NEON::BI__builtin_neon_vldap1_lane_s64:
6688 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
6689 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6690 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
6691 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
6692 LI->setAtomic(llvm::AtomicOrdering::Acquire);
6693 Ops[0] = LI;
6694 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
6695 }
6696 case NEON::BI__builtin_neon_vld1_dup_v:
6697 case NEON::BI__builtin_neon_vld1q_dup_v: {
6698 Value *V = PoisonValue::get(Ty);
6699 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6700 PtrOp0.getAlignment());
6701 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6702 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6703 return EmitNeonSplat(Ops[0], CI);
6704 }
6705 case NEON::BI__builtin_neon_vst1_lane_v:
6706 case NEON::BI__builtin_neon_vst1q_lane_v:
6707 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6708 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6709 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6710 case NEON::BI__builtin_neon_vstl1_lane_s64:
6711 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
6712 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6713 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6714 llvm::StoreInst *SI =
6715 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
6716 SI->setAtomic(llvm::AtomicOrdering::Release);
6717 return SI;
6718 }
6719 case NEON::BI__builtin_neon_vld2_v:
6720 case NEON::BI__builtin_neon_vld2q_v: {
6721 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6722 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6723 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6724 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6725 }
6726 case NEON::BI__builtin_neon_vld3_v:
6727 case NEON::BI__builtin_neon_vld3q_v: {
6728 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6729 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6730 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6731 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6732 }
6733 case NEON::BI__builtin_neon_vld4_v:
6734 case NEON::BI__builtin_neon_vld4q_v: {
6735 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6736 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6737 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6738 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6739 }
6740 case NEON::BI__builtin_neon_vld2_dup_v:
6741 case NEON::BI__builtin_neon_vld2q_dup_v: {
6742 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6743 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6744 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6745 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6746 }
6747 case NEON::BI__builtin_neon_vld3_dup_v:
6748 case NEON::BI__builtin_neon_vld3q_dup_v: {
6749 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6750 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6751 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6752 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6753 }
6754 case NEON::BI__builtin_neon_vld4_dup_v:
6755 case NEON::BI__builtin_neon_vld4q_dup_v: {
6756 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6757 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6758 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6759 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6760 }
6761 case NEON::BI__builtin_neon_vld2_lane_v:
6762 case NEON::BI__builtin_neon_vld2q_lane_v: {
6763 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6764 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6765 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6766 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6767 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6768 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6769 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
6770 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6771 }
6772 case NEON::BI__builtin_neon_vld3_lane_v:
6773 case NEON::BI__builtin_neon_vld3q_lane_v: {
6774 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6775 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6776 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6777 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6778 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6779 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6780 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6781 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
6782 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6783 }
6784 case NEON::BI__builtin_neon_vld4_lane_v:
6785 case NEON::BI__builtin_neon_vld4q_lane_v: {
6786 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6787 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6788 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6789 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6790 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6791 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6792 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6793 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6794 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
6795 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6796 }
6797 case NEON::BI__builtin_neon_vst2_v:
6798 case NEON::BI__builtin_neon_vst2q_v: {
6799 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6800 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6801 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6802 Ops, "");
6803 }
6804 case NEON::BI__builtin_neon_vst2_lane_v:
6805 case NEON::BI__builtin_neon_vst2q_lane_v: {
6806 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6807 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6808 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6809 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6810 Ops, "");
6811 }
6812 case NEON::BI__builtin_neon_vst3_v:
6813 case NEON::BI__builtin_neon_vst3q_v: {
6814 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6815 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6816 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6817 Ops, "");
6818 }
6819 case NEON::BI__builtin_neon_vst3_lane_v:
6820 case NEON::BI__builtin_neon_vst3q_lane_v: {
6821 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6822 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6823 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6824 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6825 Ops, "");
6826 }
6827 case NEON::BI__builtin_neon_vst4_v:
6828 case NEON::BI__builtin_neon_vst4q_v: {
6829 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6830 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6831 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6832 Ops, "");
6833 }
6834 case NEON::BI__builtin_neon_vst4_lane_v:
6835 case NEON::BI__builtin_neon_vst4q_lane_v: {
6836 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6837 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6838 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6839 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6840 Ops, "");
6841 }
6842 case NEON::BI__builtin_neon_vtrn_v:
6843 case NEON::BI__builtin_neon_vtrnq_v: {
6844 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6845 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6846 Value *SV = nullptr;
6847
6848 for (unsigned vi = 0; vi != 2; ++vi) {
6849 SmallVector<int, 16> Indices;
6850 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6851 Indices.push_back(i+vi);
6852 Indices.push_back(i+e+vi);
6853 }
6854 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6855 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6856 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6857 }
6858 return SV;
6859 }
6860 case NEON::BI__builtin_neon_vuzp_v:
6861 case NEON::BI__builtin_neon_vuzpq_v: {
6862 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6863 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6864 Value *SV = nullptr;
6865
6866 for (unsigned vi = 0; vi != 2; ++vi) {
6867 SmallVector<int, 16> Indices;
6868 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6869 Indices.push_back(2*i+vi);
6870
6871 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6872 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6873 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6874 }
6875 return SV;
6876 }
6877 case NEON::BI__builtin_neon_vzip_v:
6878 case NEON::BI__builtin_neon_vzipq_v: {
6879 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6880 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6881 Value *SV = nullptr;
6882
6883 for (unsigned vi = 0; vi != 2; ++vi) {
6884 SmallVector<int, 16> Indices;
6885 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6886 Indices.push_back((i + vi*e) >> 1);
6887 Indices.push_back(((i + vi*e) >> 1)+e);
6888 }
6889 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6890 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6891 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6892 }
6893 return SV;
6894 }
6895 case NEON::BI__builtin_neon_vqtbl1q_v: {
6896 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6897 Ops, "vtbl1");
6898 }
6899 case NEON::BI__builtin_neon_vqtbl2q_v: {
6900 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6901 Ops, "vtbl2");
6902 }
6903 case NEON::BI__builtin_neon_vqtbl3q_v: {
6904 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6905 Ops, "vtbl3");
6906 }
6907 case NEON::BI__builtin_neon_vqtbl4q_v: {
6908 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6909 Ops, "vtbl4");
6910 }
6911 case NEON::BI__builtin_neon_vqtbx1q_v: {
6912 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6913 Ops, "vtbx1");
6914 }
6915 case NEON::BI__builtin_neon_vqtbx2q_v: {
6916 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6917 Ops, "vtbx2");
6918 }
6919 case NEON::BI__builtin_neon_vqtbx3q_v: {
6920 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6921 Ops, "vtbx3");
6922 }
6923 case NEON::BI__builtin_neon_vqtbx4q_v: {
6924 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6925 Ops, "vtbx4");
6926 }
6927 case NEON::BI__builtin_neon_vsqadd_v:
6928 case NEON::BI__builtin_neon_vsqaddq_v: {
6929 Int = Intrinsic::aarch64_neon_usqadd;
6930 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6931 }
6932 case NEON::BI__builtin_neon_vuqadd_v:
6933 case NEON::BI__builtin_neon_vuqaddq_v: {
6934 Int = Intrinsic::aarch64_neon_suqadd;
6935 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6936 }
6937
6938 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
6939 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
6940 case NEON::BI__builtin_neon_vluti2_laneq_f16:
6941 case NEON::BI__builtin_neon_vluti2_laneq_p16:
6942 case NEON::BI__builtin_neon_vluti2_laneq_p8:
6943 case NEON::BI__builtin_neon_vluti2_laneq_s16:
6944 case NEON::BI__builtin_neon_vluti2_laneq_s8:
6945 case NEON::BI__builtin_neon_vluti2_laneq_u16:
6946 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
6947 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6948 llvm::Type *Tys[2];
6949 Tys[0] = Ty;
6950 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6951 /*isQuad*/ false));
6952 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
6953 }
6954 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
6955 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
6956 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
6957 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
6958 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
6959 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
6960 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
6961 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
6962 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
6963 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6964 llvm::Type *Tys[2];
6965 Tys[0] = Ty;
6966 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6967 /*isQuad*/ true));
6968 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
6969 }
6970 case NEON::BI__builtin_neon_vluti2_lane_mf8:
6971 case NEON::BI__builtin_neon_vluti2_lane_bf16:
6972 case NEON::BI__builtin_neon_vluti2_lane_f16:
6973 case NEON::BI__builtin_neon_vluti2_lane_p16:
6974 case NEON::BI__builtin_neon_vluti2_lane_p8:
6975 case NEON::BI__builtin_neon_vluti2_lane_s16:
6976 case NEON::BI__builtin_neon_vluti2_lane_s8:
6977 case NEON::BI__builtin_neon_vluti2_lane_u16:
6978 case NEON::BI__builtin_neon_vluti2_lane_u8: {
6979 Int = Intrinsic::aarch64_neon_vluti2_lane;
6980 llvm::Type *Tys[2];
6981 Tys[0] = Ty;
6982 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6983 /*isQuad*/ false));
6984 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
6985 }
6986 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
6987 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
6988 case NEON::BI__builtin_neon_vluti2q_lane_f16:
6989 case NEON::BI__builtin_neon_vluti2q_lane_p16:
6990 case NEON::BI__builtin_neon_vluti2q_lane_p8:
6991 case NEON::BI__builtin_neon_vluti2q_lane_s16:
6992 case NEON::BI__builtin_neon_vluti2q_lane_s8:
6993 case NEON::BI__builtin_neon_vluti2q_lane_u16:
6994 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
6995 Int = Intrinsic::aarch64_neon_vluti2_lane;
6996 llvm::Type *Tys[2];
6997 Tys[0] = Ty;
6998 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
6999 /*isQuad*/ true));
7000 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7001 }
7002 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7003 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7004 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7005 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7006 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7007 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7008 }
7009 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7010 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7011 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7012 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7013 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7014 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7015 }
7016 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7017 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7018 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7019 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7020 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7021 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7022 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7023 }
7024 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7025 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7026 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7027 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7028 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7029 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7030 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7031 }
7032 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7033 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7034 {llvm::FixedVectorType::get(HalfTy, 8),
7035 llvm::FixedVectorType::get(Int8Ty, 16)},
7036 Ops, E, "fmmla");
7037 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7038 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
7039 {llvm::FixedVectorType::get(FloatTy, 4),
7040 llvm::FixedVectorType::get(Int8Ty, 16)},
7041 Ops, E, "fmmla");
7042 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7043 ExtractLow = true;
7044 [[fallthrough]];
7045 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7046 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7047 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7048 llvm::FixedVectorType::get(BFloatTy, 8),
7049 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7050 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7051 ExtractLow = true;
7052 [[fallthrough]];
7053 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7054 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7055 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7056 llvm::FixedVectorType::get(BFloatTy, 8),
7057 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7058 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7059 ExtractLow = true;
7060 [[fallthrough]];
7061 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7062 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7063 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7064 llvm::FixedVectorType::get(HalfTy, 8),
7065 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7066 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7067 ExtractLow = true;
7068 [[fallthrough]];
7069 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7070 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7071 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7072 llvm::FixedVectorType::get(HalfTy, 8),
7073 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7074 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7075 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7076 llvm::FixedVectorType::get(Int8Ty, 8),
7077 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7078 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7079 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7080 llvm::FixedVectorType::get(Int8Ty, 8),
7081 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7082 E, "vfcvtn");
7083 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7084 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7085 llvm::FixedVectorType::get(Int8Ty, 16),
7086 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7087 E, "vfcvtn");
7088 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7089 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7090 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7091 uint64_t(0));
7092 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7093 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7094 }
7095
7096 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7097 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7098 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7099 Ops, E, "fdot2");
7100 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7101 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7102 ExtendLaneArg = true;
7103 [[fallthrough]];
7104 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7105 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7106 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7107 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7108 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7109 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7110 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7111 FloatTy, Ops, E, "fdot4");
7112 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7113 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7114 ExtendLaneArg = true;
7115 [[fallthrough]];
7116 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7117 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7118 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7119 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7120
7121 case NEON::BI__builtin_neon_vdot_f32_f16:
7122 case NEON::BI__builtin_neon_vdotq_f32_f16: {
7123 llvm::Type *InputTy =
7124 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7125 llvm::Type *Tys[2] = {Ty, InputTy};
7126 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_fdot, Tys),
7127 Ops, "vdot");
7128 }
7129
7130 case NEON::BI__builtin_neon_vdot_lane_f32_f16:
7131 case NEON::BI__builtin_neon_vdot_laneq_f32_f16:
7132 case NEON::BI__builtin_neon_vdotq_lane_f32_f16:
7133 case NEON::BI__builtin_neon_vdotq_laneq_f32_f16: {
7134 llvm::FixedVectorType *InputTy =
7135 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7136 llvm::FixedVectorType *LaneTy = llvm::FixedVectorType::get(
7137 HalfTy, Ops[2]->getType()->getPrimitiveSizeInBits() / 16);
7138 // Treat the lane argument as a splat and use non-lane version of the
7139 // intrinsic.
7140 Ops[2] = Builder.CreateBitCast(Ops[2], LaneTy);
7141 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]),
7142 InputTy->getElementCount());
7143 llvm::Type *Tys[2] = {Ty, InputTy};
7144 Ops.pop_back();
7145 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_fdot, Tys),
7146 Ops, "vdot");
7147 }
7148
7149 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7150 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7151 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7152 "vmlal");
7153 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7154 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7155 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7156 "vmlal");
7157 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7158 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7159 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7160 "vmlall");
7161 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7162 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7163 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7164 "vmlall");
7165 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7166 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7167 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7168 "vmlall");
7169 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7170 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7171 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7172 "vmlall");
7173 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7174 ExtendLaneArg = true;
7175 [[fallthrough]];
7176 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7177 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7178 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7179 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7180 ExtendLaneArg = true;
7181 [[fallthrough]];
7182 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7183 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7184 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7185 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7186 ExtendLaneArg = true;
7187 [[fallthrough]];
7188 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7189 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7190 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7191 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7192 ExtendLaneArg = true;
7193 [[fallthrough]];
7194 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7195 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7196 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7197 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7198 ExtendLaneArg = true;
7199 [[fallthrough]];
7200 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7201 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7202 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7203 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7204 ExtendLaneArg = true;
7205 [[fallthrough]];
7206 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7207 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7208 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7209 case NEON::BI__builtin_neon_vamin_f16:
7210 case NEON::BI__builtin_neon_vaminq_f16:
7211 case NEON::BI__builtin_neon_vamin_f32:
7212 case NEON::BI__builtin_neon_vaminq_f32:
7213 case NEON::BI__builtin_neon_vaminq_f64: {
7214 Int = Intrinsic::aarch64_neon_famin;
7215 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7216 }
7217 case NEON::BI__builtin_neon_vamax_f16:
7218 case NEON::BI__builtin_neon_vamaxq_f16:
7219 case NEON::BI__builtin_neon_vamax_f32:
7220 case NEON::BI__builtin_neon_vamaxq_f32:
7221 case NEON::BI__builtin_neon_vamaxq_f64: {
7222 Int = Intrinsic::aarch64_neon_famax;
7223 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7224 }
7225 case NEON::BI__builtin_neon_vscale_f16:
7226 case NEON::BI__builtin_neon_vscaleq_f16:
7227 case NEON::BI__builtin_neon_vscale_f32:
7228 case NEON::BI__builtin_neon_vscaleq_f32:
7229 case NEON::BI__builtin_neon_vscaleq_f64: {
7230 Int = Intrinsic::aarch64_neon_fp8_fscale;
7231 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7232 }
7233 }
7234}
7235
7237 const CallExpr *E) {
7238 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7239 BuiltinID == BPF::BI__builtin_btf_type_id ||
7240 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7241 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7242 "unexpected BPF builtin");
7243
7244 // A sequence number, injected into IR builtin functions, to
7245 // prevent CSE given the only difference of the function
7246 // may just be the debuginfo metadata.
7247 static uint32_t BuiltinSeqNum;
7248
7249 switch (BuiltinID) {
7250 default:
7251 llvm_unreachable("Unexpected BPF builtin");
7252 case BPF::BI__builtin_preserve_field_info: {
7253 const Expr *Arg = E->getArg(0);
7254 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7255
7256 if (!getDebugInfo()) {
7257 CGM.Error(E->getExprLoc(),
7258 "using __builtin_preserve_field_info() without -g");
7259 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7260 : EmitLValue(Arg).emitRawPointer(*this);
7261 }
7262
7263 // Enable underlying preserve_*_access_index() generation.
7264 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7265 IsInPreservedAIRegion = true;
7266 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7267 : EmitLValue(Arg).emitRawPointer(*this);
7268 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7269
7270 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7271 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7272
7273 // Built the IR for the preserve_field_info intrinsic.
7274 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7275 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7276 {FieldAddr->getType()});
7277 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7278 }
7279 case BPF::BI__builtin_btf_type_id:
7280 case BPF::BI__builtin_preserve_type_info: {
7281 if (!getDebugInfo()) {
7282 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7283 return nullptr;
7284 }
7285
7286 const Expr *Arg0 = E->getArg(0);
7287 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7288 Arg0->getType(), Arg0->getExprLoc());
7289
7290 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7291 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7292 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7293
7294 llvm::Function *FnDecl;
7295 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7296 FnDecl = Intrinsic::getOrInsertDeclaration(
7297 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
7298 else
7299 FnDecl = Intrinsic::getOrInsertDeclaration(
7300 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
7301 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
7302 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7303 return Fn;
7304 }
7305 case BPF::BI__builtin_preserve_enum_value: {
7306 if (!getDebugInfo()) {
7307 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7308 return nullptr;
7309 }
7310
7311 const Expr *Arg0 = E->getArg(0);
7312 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7313 Arg0->getType(), Arg0->getExprLoc());
7314
7315 // Find enumerator
7316 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
7317 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
7318 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
7319 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
7320
7321 auto InitVal = Enumerator->getInitVal();
7322 std::string InitValStr;
7323 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7324 InitValStr = std::to_string(InitVal.getSExtValue());
7325 else
7326 InitValStr = std::to_string(InitVal.getZExtValue());
7327 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7328 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
7329
7330 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7331 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7332 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7333
7334 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7335 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
7336 CallInst *Fn =
7337 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
7338 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
7339 return Fn;
7340 }
7341 }
7342}
7343
7346 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7347 "Not a power-of-two sized vector!");
7348 bool AllConstants = true;
7349 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7350 AllConstants &= isa<Constant>(Ops[i]);
7351
7352 // If this is a constant vector, create a ConstantVector.
7353 if (AllConstants) {
7355 for (llvm::Value *Op : Ops)
7356 CstOps.push_back(cast<Constant>(Op));
7357 return llvm::ConstantVector::get(CstOps);
7358 }
7359
7360 // Otherwise, insertelement the values to build the vector.
7361 Value *Result = llvm::PoisonValue::get(
7362 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
7363
7364 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7365 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
7366
7367 return Result;
7368}
7369
7370Value *CodeGenFunction::EmitAArch64CpuInit() {
7371 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
7372 llvm::FunctionCallee Func =
7373 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
7374 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
7375 cast<llvm::GlobalValue>(Func.getCallee())
7376 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
7377 return Builder.CreateCall(Func);
7378}
7379
7380Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
7381 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
7382 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
7384 ArgStr.split(OrigFeatures, "+");
7386 for (StringRef Feature : OrigFeatures) {
7387 Feature = Feature.trim();
7388 if (!llvm::AArch64::parseFMVExtension(Feature))
7389 return Builder.getFalse();
7390 if (Feature != "default")
7391 Features.push_back(Feature);
7392 }
7393 return EmitAArch64CpuSupports(Features);
7394}
7395
7396llvm::Value *
7397CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
7398 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
7399 Value *Result = Builder.getTrue();
7400 if (FeaturesMask != 0) {
7401 // Get features from structure in runtime library
7402 // struct {
7403 // unsigned long long features;
7404 // } __aarch64_cpu_features;
7405 llvm::Type *STy = llvm::StructType::get(Int64Ty);
7406 llvm::Constant *AArch64CPUFeatures =
7407 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
7408 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
7409 llvm::Value *CpuFeatures = Builder.CreateGEP(
7410 STy, AArch64CPUFeatures,
7411 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
7412 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
7414 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
7415 Value *Bitset = Builder.CreateAnd(Features, Mask);
7416 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
7417 Result = Builder.CreateAnd(Result, Cmp);
7418 }
7419 return Result;
7420}
Utilities used for generating code for AArch64 that are shared between the classic and ClangIR code-g...
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
#define NEONMAP0(NameBase)
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static cir::VectorType getSVEVectorForElementType(CIRGenModule &cgm, mlir::Type eltTy)
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > intrinsicMap, unsigned builtinID, bool &mapProvenSorted)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:2016
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:2891
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:2861
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:2854
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3915
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:1038
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3113
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4387
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:1050
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:1020
static llvm::Value * ARMMVECreateFPToSI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2985
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2137
static llvm::Value * ARMMVECreateFPToUI(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2993
static llvm::Value * ARMMVECreateSIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2969
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:1051
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:3921
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:2850
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:1052
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:2928
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:3400
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:862
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:361
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:4486
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:2955
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:33
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:192
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:540
static llvm::Value * ARMMVECreateUIToFP(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *Ty)
Definition ARM.cpp:2977
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:2917
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:2943
SpecialRegisterAccessKind
Definition ARM.cpp:2007
@ VolatileRead
Definition ARM.cpp:2009
@ NormalRead
Definition ARM.cpp:2008
@ Write
Definition ARM.cpp:2010
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:2883
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:1047
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1118
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:344
static Value * EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:2082
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:1934
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:1049
TokenType getType() const
Returns the token's type, e.g.
Result
Implement __builtin_bit_cast and related operations.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:57
Enumerates target-specific builtins in their own namespaces within namespace clang.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:226
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3129
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1608
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:551
Address getAddress() const
Definition CGValue.h:691
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:3409
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:472
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:7345
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3584
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:3693
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:447
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3548
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3839
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3268
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3502
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3750
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4411
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:3978
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3460
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1076
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3284
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1184
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4021
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:493
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2164
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3357
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7236
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3861
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3966
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3647
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3001
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:509
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3928
const TargetCodeGenInfo & getTargetHooks() const
RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen without...
Definition CGExpr.cpp:231
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:487
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:4497
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:456
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3256
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3102
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:1905
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:309
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:3665
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:3444
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:3672
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3851
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1598
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3599
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3804
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3394
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:3892
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1714
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3322
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:3955
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:441
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:382
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3102
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3093
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:454
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:282
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2018
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3390
QualType getPointeeType() const
Definition TypeBase.h:3400
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
EltType getEltType() const
bool isOverloadFirstandLast() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isOverloadDefaultAndOp0() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:712
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1875
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9337
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:790
QualType getType() const
Definition Decl.h:723
QualType getType() const
Definition Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:155
@ Result
The result type of a method or function.
Definition TypeBase.h:905
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:840
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601