clang  6.0.0svn
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/ASTContext.h"
22 #include "clang/AST/Decl.h"
25 #include "clang/Basic/TargetInfo.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include "llvm/Support/ScopedPrinter.h"
34 #include "llvm/Support/ConvertUTF.h"
35 #include <sstream>
36 
37 using namespace clang;
38 using namespace CodeGen;
39 using namespace llvm;
40 
41 static
42 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
43  return std::min(High, std::max(Low, Value));
44 }
45 
46 /// getBuiltinLibFunction - Given a builtin id for a function like
47 /// "__builtin_fabsf", return a Function* for "fabsf".
49  unsigned BuiltinID) {
50  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
51 
52  // Get the name, skip over the __builtin_ prefix (if necessary).
53  StringRef Name;
54  GlobalDecl D(FD);
55 
56  // If the builtin has been declared explicitly with an assembler label,
57  // use the mangled name. This differs from the plain label on platforms
58  // that prefix labels.
59  if (FD->hasAttr<AsmLabelAttr>())
60  Name = getMangledName(D);
61  else
62  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
63 
64  llvm::FunctionType *Ty =
65  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
66 
67  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
68 }
69 
70 /// Emit the conversions required to turn the given value into an
71 /// integer of the given size.
73  QualType T, llvm::IntegerType *IntType) {
74  V = CGF.EmitToMemory(V, T);
75 
76  if (V->getType()->isPointerTy())
77  return CGF.Builder.CreatePtrToInt(V, IntType);
78 
79  assert(V->getType() == IntType);
80  return V;
81 }
82 
84  QualType T, llvm::Type *ResultType) {
85  V = CGF.EmitFromMemory(V, T);
86 
87  if (ResultType->isPointerTy())
88  return CGF.Builder.CreateIntToPtr(V, ResultType);
89 
90  assert(V->getType() == ResultType);
91  return V;
92 }
93 
94 /// Utility to insert an atomic instruction based on Instrinsic::ID
95 /// and the expression node.
97  llvm::AtomicRMWInst::BinOp Kind,
98  const CallExpr *E) {
99  QualType T = E->getType();
100  assert(E->getArg(0)->getType()->isPointerType());
101  assert(CGF.getContext().hasSameUnqualifiedType(T,
102  E->getArg(0)->getType()->getPointeeType()));
103  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
104 
105  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
106  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
107 
108  llvm::IntegerType *IntType =
109  llvm::IntegerType::get(CGF.getLLVMContext(),
110  CGF.getContext().getTypeSize(T));
111  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
112 
113  llvm::Value *Args[2];
114  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
115  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
116  llvm::Type *ValueType = Args[1]->getType();
117  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
118 
119  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
120  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
121  return EmitFromInt(CGF, Result, T, ValueType);
122 }
123 
125  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
126  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
127 
128  // Convert the type of the pointer to a pointer to the stored type.
129  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
130  Value *BC = CGF.Builder.CreateBitCast(
131  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
132  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
133  LV.setNontemporal(true);
134  CGF.EmitStoreOfScalar(Val, LV, false);
135  return nullptr;
136 }
137 
139  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
140 
141  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
142  LV.setNontemporal(true);
143  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
144 }
145 
147  llvm::AtomicRMWInst::BinOp Kind,
148  const CallExpr *E) {
149  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
150 }
151 
152 /// Utility to insert an atomic instruction based Instrinsic::ID and
153 /// the expression node, where the return value is the result of the
154 /// operation.
156  llvm::AtomicRMWInst::BinOp Kind,
157  const CallExpr *E,
158  Instruction::BinaryOps Op,
159  bool Invert = false) {
160  QualType T = E->getType();
161  assert(E->getArg(0)->getType()->isPointerType());
162  assert(CGF.getContext().hasSameUnqualifiedType(T,
163  E->getArg(0)->getType()->getPointeeType()));
164  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
165 
166  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
167  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
168 
169  llvm::IntegerType *IntType =
170  llvm::IntegerType::get(CGF.getLLVMContext(),
171  CGF.getContext().getTypeSize(T));
172  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
173 
174  llvm::Value *Args[2];
175  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
176  llvm::Type *ValueType = Args[1]->getType();
177  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
178  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
179 
180  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
181  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
182  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
183  if (Invert)
184  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
185  llvm::ConstantInt::get(IntType, -1));
186  Result = EmitFromInt(CGF, Result, T, ValueType);
187  return RValue::get(Result);
188 }
189 
190 /// @brief Utility to insert an atomic cmpxchg instruction.
191 ///
192 /// @param CGF The current codegen function.
193 /// @param E Builtin call expression to convert to cmpxchg.
194 /// arg0 - address to operate on
195 /// arg1 - value to compare with
196 /// arg2 - new value
197 /// @param ReturnBool Specifies whether to return success flag of
198 /// cmpxchg result or the old value.
199 ///
200 /// @returns result of cmpxchg, according to ReturnBool
202  bool ReturnBool) {
203  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
204  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
205  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
206 
207  llvm::IntegerType *IntType = llvm::IntegerType::get(
208  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
209  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
210 
211  Value *Args[3];
212  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
213  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
214  llvm::Type *ValueType = Args[1]->getType();
215  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
216  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
217 
218  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
219  Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
220  llvm::AtomicOrdering::SequentiallyConsistent);
221  if (ReturnBool)
222  // Extract boolean success flag and zext it to int.
223  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
224  CGF.ConvertType(E->getType()));
225  else
226  // Extract old value and emit it using the same type as compare value.
227  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
228  ValueType);
229 }
230 
231 // Emit a simple mangled intrinsic that has 1 argument and a return type
232 // matching the argument type.
234  const CallExpr *E,
235  unsigned IntrinsicID) {
236  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
237 
238  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
239  return CGF.Builder.CreateCall(F, Src0);
240 }
241 
242 // Emit an intrinsic that has 2 operands of the same type as its result.
244  const CallExpr *E,
245  unsigned IntrinsicID) {
246  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
247  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
248 
249  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
250  return CGF.Builder.CreateCall(F, { Src0, Src1 });
251 }
252 
253 // Emit an intrinsic that has 3 operands of the same type as its result.
255  const CallExpr *E,
256  unsigned IntrinsicID) {
257  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
258  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
259  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
260 
261  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
262  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
263 }
264 
265 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
267  const CallExpr *E,
268  unsigned IntrinsicID) {
269  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
270  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
271 
272  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
273  return CGF.Builder.CreateCall(F, {Src0, Src1});
274 }
275 
276 /// EmitFAbs - Emit a call to @llvm.fabs().
277 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
278  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
279  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
280  Call->setDoesNotAccessMemory();
281  return Call;
282 }
283 
284 /// Emit the computation of the sign bit for a floating point value. Returns
285 /// the i1 sign bit value.
287  LLVMContext &C = CGF.CGM.getLLVMContext();
288 
289  llvm::Type *Ty = V->getType();
290  int Width = Ty->getPrimitiveSizeInBits();
291  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
292  V = CGF.Builder.CreateBitCast(V, IntTy);
293  if (Ty->isPPC_FP128Ty()) {
294  // We want the sign bit of the higher-order double. The bitcast we just
295  // did works as if the double-double was stored to memory and then
296  // read as an i128. The "store" will put the higher-order double in the
297  // lower address in both little- and big-Endian modes, but the "load"
298  // will treat those bits as a different part of the i128: the low bits in
299  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
300  // we need to shift the high bits down to the low before truncating.
301  Width >>= 1;
302  if (CGF.getTarget().isBigEndian()) {
303  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
304  V = CGF.Builder.CreateLShr(V, ShiftCst);
305  }
306  // We are truncating value in order to extract the higher-order
307  // double, which we will be using to extract the sign from.
308  IntTy = llvm::IntegerType::get(C, Width);
309  V = CGF.Builder.CreateTrunc(V, IntTy);
310  }
311  Value *Zero = llvm::Constant::getNullValue(IntTy);
312  return CGF.Builder.CreateICmpSLT(V, Zero);
313 }
314 
316  const CallExpr *E, llvm::Constant *calleeValue) {
317  CGCallee callee = CGCallee::forDirect(calleeValue, FD);
318  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
319 }
320 
321 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
322 /// depending on IntrinsicID.
323 ///
324 /// \arg CGF The current codegen function.
325 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
326 /// \arg X The first argument to the llvm.*.with.overflow.*.
327 /// \arg Y The second argument to the llvm.*.with.overflow.*.
328 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
329 /// \returns The result (i.e. sum/product) returned by the intrinsic.
331  const llvm::Intrinsic::ID IntrinsicID,
333  llvm::Value *&Carry) {
334  // Make sure we have integers of the same width.
335  assert(X->getType() == Y->getType() &&
336  "Arguments must be the same type. (Did you forget to make sure both "
337  "arguments have the same integer width?)");
338 
339  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
340  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
341  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
342  return CGF.Builder.CreateExtractValue(Tmp, 0);
343 }
344 
346  unsigned IntrinsicID,
347  int low, int high) {
348  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
349  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
350  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
351  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
352  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
353  return Call;
354 }
355 
356 namespace {
357  struct WidthAndSignedness {
358  unsigned Width;
359  bool Signed;
360  };
361 }
362 
363 static WidthAndSignedness
365  const clang::QualType Type) {
366  assert(Type->isIntegerType() && "Given type is not an integer.");
367  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
368  bool Signed = Type->isSignedIntegerType();
369  return {Width, Signed};
370 }
371 
372 // Given one or more integer types, this function produces an integer type that
373 // encompasses them: any value in one of the given types could be expressed in
374 // the encompassing type.
375 static struct WidthAndSignedness
376 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
377  assert(Types.size() > 0 && "Empty list of types.");
378 
379  // If any of the given types is signed, we must return a signed type.
380  bool Signed = false;
381  for (const auto &Type : Types) {
382  Signed |= Type.Signed;
383  }
384 
385  // The encompassing type must have a width greater than or equal to the width
386  // of the specified types. Aditionally, if the encompassing type is signed,
387  // its width must be strictly greater than the width of any unsigned types
388  // given.
389  unsigned Width = 0;
390  for (const auto &Type : Types) {
391  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
392  if (Width < MinWidth) {
393  Width = MinWidth;
394  }
395  }
396 
397  return {Width, Signed};
398 }
399 
400 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
401  llvm::Type *DestType = Int8PtrTy;
402  if (ArgValue->getType() != DestType)
403  ArgValue =
404  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
405 
406  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
407  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
408 }
409 
410 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
411 /// __builtin_object_size(p, @p To) is correct
412 static bool areBOSTypesCompatible(int From, int To) {
413  // Note: Our __builtin_object_size implementation currently treats Type=0 and
414  // Type=2 identically. Encoding this implementation detail here may make
415  // improving __builtin_object_size difficult in the future, so it's omitted.
416  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
417 }
418 
419 static llvm::Value *
420 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
421  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
422 }
423 
424 llvm::Value *
425 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
426  llvm::IntegerType *ResType,
427  llvm::Value *EmittedE) {
428  uint64_t ObjectSize;
429  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
430  return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
431  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
432 }
433 
434 /// Returns a Value corresponding to the size of the given expression.
435 /// This Value may be either of the following:
436 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
437 /// it)
438 /// - A call to the @llvm.objectsize intrinsic
439 ///
440 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
441 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
442 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
443 llvm::Value *
444 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
445  llvm::IntegerType *ResType,
446  llvm::Value *EmittedE) {
447  // We need to reference an argument if the pointer is a parameter with the
448  // pass_object_size attribute.
449  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
450  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
451  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
452  if (Param != nullptr && PS != nullptr &&
453  areBOSTypesCompatible(PS->getType(), Type)) {
454  auto Iter = SizeArguments.find(Param);
455  assert(Iter != SizeArguments.end());
456 
457  const ImplicitParamDecl *D = Iter->second;
458  auto DIter = LocalDeclMap.find(D);
459  assert(DIter != LocalDeclMap.end());
460 
461  return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
462  getContext().getSizeType(), E->getLocStart());
463  }
464  }
465 
466  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
467  // evaluate E for side-effects. In either case, we shouldn't lower to
468  // @llvm.objectsize.
469  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
470  return getDefaultBuiltinObjectSizeResult(Type, ResType);
471 
472  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
473  assert(Ptr->getType()->isPointerTy() &&
474  "Non-pointer passed to __builtin_object_size?");
475 
476  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
477 
478  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
479  Value *Min = Builder.getInt1((Type & 2) != 0);
480  // For GCC compatability, __builtin_object_size treat NULL as unknown size.
481  Value *NullIsUnknown = Builder.getTrue();
482  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
483 }
484 
485 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
486 // handle them here.
488  _BitScanForward,
489  _BitScanReverse,
490  _InterlockedAnd,
491  _InterlockedDecrement,
492  _InterlockedExchange,
493  _InterlockedExchangeAdd,
494  _InterlockedExchangeSub,
495  _InterlockedIncrement,
496  _InterlockedOr,
497  _InterlockedXor,
498  _interlockedbittestandset,
499  __fastfail,
500 };
501 
503  const CallExpr *E) {
504  switch (BuiltinID) {
505  case MSVCIntrin::_BitScanForward:
506  case MSVCIntrin::_BitScanReverse: {
507  Value *ArgValue = EmitScalarExpr(E->getArg(1));
508 
509  llvm::Type *ArgType = ArgValue->getType();
510  llvm::Type *IndexType =
511  EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
512  llvm::Type *ResultType = ConvertType(E->getType());
513 
514  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
515  Value *ResZero = llvm::Constant::getNullValue(ResultType);
516  Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
517 
518  BasicBlock *Begin = Builder.GetInsertBlock();
519  BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
520  Builder.SetInsertPoint(End);
521  PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
522 
523  Builder.SetInsertPoint(Begin);
524  Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
525  BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
526  Builder.CreateCondBr(IsZero, End, NotZero);
527  Result->addIncoming(ResZero, Begin);
528 
529  Builder.SetInsertPoint(NotZero);
530  Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
531 
532  if (BuiltinID == MSVCIntrin::_BitScanForward) {
533  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
534  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
535  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
536  Builder.CreateStore(ZeroCount, IndexAddress, false);
537  } else {
538  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
539  Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
540 
541  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
542  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
543  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
544  Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
545  Builder.CreateStore(Index, IndexAddress, false);
546  }
547  Builder.CreateBr(End);
548  Result->addIncoming(ResOne, NotZero);
549 
550  Builder.SetInsertPoint(End);
551  return Result;
552  }
553  case MSVCIntrin::_InterlockedAnd:
554  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
555  case MSVCIntrin::_InterlockedExchange:
556  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
557  case MSVCIntrin::_InterlockedExchangeAdd:
558  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
559  case MSVCIntrin::_InterlockedExchangeSub:
560  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
561  case MSVCIntrin::_InterlockedOr:
562  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
563  case MSVCIntrin::_InterlockedXor:
564  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
565 
566  case MSVCIntrin::_interlockedbittestandset: {
567  llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
568  llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
569  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
570  AtomicRMWInst::Or, Addr,
571  Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
572  llvm::AtomicOrdering::SequentiallyConsistent);
573  // Shift the relevant bit to the least significant position, truncate to
574  // the result type, and test the low bit.
575  llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
576  llvm::Value *Truncated =
577  Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
578  return Builder.CreateAnd(Truncated,
579  ConstantInt::get(Truncated->getType(), 1));
580  }
581 
582  case MSVCIntrin::_InterlockedDecrement: {
583  llvm::Type *IntTy = ConvertType(E->getType());
584  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
585  AtomicRMWInst::Sub,
586  EmitScalarExpr(E->getArg(0)),
587  ConstantInt::get(IntTy, 1),
588  llvm::AtomicOrdering::SequentiallyConsistent);
589  return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
590  }
591  case MSVCIntrin::_InterlockedIncrement: {
592  llvm::Type *IntTy = ConvertType(E->getType());
593  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
594  AtomicRMWInst::Add,
595  EmitScalarExpr(E->getArg(0)),
596  ConstantInt::get(IntTy, 1),
597  llvm::AtomicOrdering::SequentiallyConsistent);
598  return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
599  }
600 
601  case MSVCIntrin::__fastfail: {
602  // Request immediate process termination from the kernel. The instruction
603  // sequences to do this are documented on MSDN:
604  // https://msdn.microsoft.com/en-us/library/dn774154.aspx
605  llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
606  StringRef Asm, Constraints;
607  switch (ISA) {
608  default:
609  ErrorUnsupported(E, "__fastfail call for this architecture");
610  break;
611  case llvm::Triple::x86:
612  case llvm::Triple::x86_64:
613  Asm = "int $$0x29";
614  Constraints = "{cx}";
615  break;
616  case llvm::Triple::thumb:
617  Asm = "udf #251";
618  Constraints = "{r0}";
619  break;
620  }
621  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
622  llvm::InlineAsm *IA =
623  llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
624  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
625  getLLVMContext(), llvm::AttributeList::FunctionIndex,
626  llvm::Attribute::NoReturn);
627  CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
628  CS.setAttributes(NoReturnAttr);
629  return CS.getInstruction();
630  }
631  }
632  llvm_unreachable("Incorrect MSVC intrinsic!");
633 }
634 
635 namespace {
636 // ARC cleanup for __builtin_os_log_format
637 struct CallObjCArcUse final : EHScopeStack::Cleanup {
638  CallObjCArcUse(llvm::Value *object) : object(object) {}
639  llvm::Value *object;
640 
641  void Emit(CodeGenFunction &CGF, Flags flags) override {
642  CGF.EmitARCIntrinsicUse(object);
643  }
644 };
645 }
646 
649  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
650  && "Unsupported builtin check kind");
651 
652  Value *ArgValue = EmitScalarExpr(E);
653  if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
654  return ArgValue;
655 
656  SanitizerScope SanScope(this);
657  Value *Cond = Builder.CreateICmpNE(
658  ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
659  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
660  SanitizerHandler::InvalidBuiltin,
661  {EmitCheckSourceLocation(E->getExprLoc()),
662  llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
663  None);
664  return ArgValue;
665 }
666 
667 /// Get the argument type for arguments to os_log_helper.
668 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
669  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
670  return C.getCanonicalType(UnsignedTy);
671 }
672 
674  const analyze_os_log::OSLogBufferLayout &Layout,
675  CharUnits BufferAlignment) {
676  ASTContext &Ctx = getContext();
677 
679  {
680  raw_svector_ostream OS(Name);
681  OS << "__os_log_helper";
682  OS << "_" << BufferAlignment.getQuantity();
683  OS << "_" << int(Layout.getSummaryByte());
684  OS << "_" << int(Layout.getNumArgsByte());
685  for (const auto &Item : Layout.Items)
686  OS << "_" << int(Item.getSizeByte()) << "_"
687  << int(Item.getDescriptorByte());
688  }
689 
690  if (llvm::Function *F = CGM.getModule().getFunction(Name))
691  return F;
692 
694  Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
696 
697  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
698  char Size = Layout.Items[I].getSizeByte();
699  if (!Size)
700  continue;
701 
702  Params.emplace_back(
703  Ctx, nullptr, SourceLocation(),
704  &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
706  }
707 
708  FunctionArgList Args;
709  for (auto &P : Params)
710  Args.push_back(&P);
711 
712  // The helper function has linkonce_odr linkage to enable the linker to merge
713  // identical functions. To ensure the merging always happens, 'noinline' is
714  // attached to the function when compiling with -Oz.
715  const CGFunctionInfo &FI =
716  CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
717  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
718  llvm::Function *Fn = llvm::Function::Create(
719  FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
720  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
721  CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
722  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
723 
724  // Attach 'noinline' at -Oz.
725  if (CGM.getCodeGenOpts().OptimizeSize == 2)
726  Fn->addFnAttr(llvm::Attribute::NoInline);
727 
728  auto NL = ApplyDebugLocation::CreateEmpty(*this);
729  IdentifierInfo *II = &Ctx.Idents.get(Name);
732  Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
733 
734  StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
735 
736  // Create a scope with an artificial location for the body of this function.
737  auto AL = ApplyDebugLocation::CreateArtificial(*this);
738 
740  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
741  BufferAlignment);
742  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
743  Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
744  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
745  Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
746 
747  unsigned I = 1;
748  for (const auto &Item : Layout.Items) {
749  Builder.CreateStore(
750  Builder.getInt8(Item.getDescriptorByte()),
751  Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
752  Builder.CreateStore(
753  Builder.getInt8(Item.getSizeByte()),
754  Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
755 
756  CharUnits Size = Item.size();
757  if (!Size.getQuantity())
758  continue;
759 
760  Address Arg = GetAddrOfLocalVar(&Params[I]);
761  Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
762  Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
763  "argDataCast");
764  Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
765  Offset += Size;
766  ++I;
767  }
768 
769  FinishFunction();
770 
771  return Fn;
772 }
773 
775  assert(E.getNumArgs() >= 2 &&
776  "__builtin_os_log_format takes at least 2 arguments");
777  ASTContext &Ctx = getContext();
780  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
781  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
782 
783  // Ignore argument 1, the format string. It is not currently used.
784  CallArgList Args;
785  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
786 
787  for (const auto &Item : Layout.Items) {
788  int Size = Item.getSizeByte();
789  if (!Size)
790  continue;
791 
792  llvm::Value *ArgVal;
793 
794  if (const Expr *TheExpr = Item.getExpr()) {
795  ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
796 
797  // Check if this is a retainable type.
798  if (TheExpr->getType()->isObjCRetainableType()) {
799  assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
800  "Only scalar can be a ObjC retainable type");
801  // Check if the object is constant, if not, save it in
802  // RetainableOperands.
803  if (!isa<Constant>(ArgVal))
804  RetainableOperands.push_back(ArgVal);
805  }
806  } else {
807  ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
808  }
809 
810  unsigned ArgValSize =
811  CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
812  llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
813  ArgValSize);
814  ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
815  CanQualType ArgTy = getOSLogArgType(Ctx, Size);
816  // If ArgVal has type x86_fp80, zero-extend ArgVal.
817  ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
818  Args.add(RValue::get(ArgVal), ArgTy);
819  }
820 
821  const CGFunctionInfo &FI =
822  CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
823  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
824  Layout, BufAddr.getAlignment());
825  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
826 
827  // Push a clang.arc.use cleanup for each object in RetainableOperands. The
828  // cleanup will cause the use to appear after the final log call, keeping
829  // the object valid while it’s held in the log buffer. Note that if there’s
830  // a release cleanup on the object, it will already be active; since
831  // cleanups are emitted in reverse order, the use will occur before the
832  // object is released.
833  if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
834  CGM.getCodeGenOpts().OptimizationLevel != 0)
835  for (llvm::Value *Object : RetainableOperands)
836  pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
837 
838  return RValue::get(BufAddr.getPointer());
839 }
840 
842  unsigned BuiltinID, const CallExpr *E,
843  ReturnValueSlot ReturnValue) {
844  // See if we can constant fold this builtin. If so, don't emit it at all.
845  Expr::EvalResult Result;
846  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
847  !Result.hasSideEffects()) {
848  if (Result.Val.isInt())
849  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
850  Result.Val.getInt()));
851  if (Result.Val.isFloat())
852  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
853  Result.Val.getFloat()));
854  }
855 
856  switch (BuiltinID) {
857  default: break; // Handle intrinsics and libm functions below.
858  case Builtin::BI__builtin___CFStringMakeConstantString:
859  case Builtin::BI__builtin___NSStringMakeConstantString:
860  return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
861  case Builtin::BI__builtin_stdarg_start:
862  case Builtin::BI__builtin_va_start:
863  case Builtin::BI__va_start:
864  case Builtin::BI__builtin_va_end:
865  return RValue::get(
866  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
867  ? EmitScalarExpr(E->getArg(0))
868  : EmitVAListRef(E->getArg(0)).getPointer(),
869  BuiltinID != Builtin::BI__builtin_va_end));
870  case Builtin::BI__builtin_va_copy: {
871  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
872  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
873 
874  llvm::Type *Type = Int8PtrTy;
875 
876  DstPtr = Builder.CreateBitCast(DstPtr, Type);
877  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
878  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
879  {DstPtr, SrcPtr}));
880  }
881  case Builtin::BI__builtin_abs:
882  case Builtin::BI__builtin_labs:
883  case Builtin::BI__builtin_llabs: {
884  Value *ArgValue = EmitScalarExpr(E->getArg(0));
885 
886  Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
887  Value *CmpResult =
888  Builder.CreateICmpSGE(ArgValue,
889  llvm::Constant::getNullValue(ArgValue->getType()),
890  "abscond");
891  Value *Result =
892  Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
893 
894  return RValue::get(Result);
895  }
896  case Builtin::BI__builtin_fabs:
897  case Builtin::BI__builtin_fabsf:
898  case Builtin::BI__builtin_fabsl: {
899  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
900  }
901  case Builtin::BI__builtin_fmod:
902  case Builtin::BI__builtin_fmodf:
903  case Builtin::BI__builtin_fmodl: {
904  Value *Arg1 = EmitScalarExpr(E->getArg(0));
905  Value *Arg2 = EmitScalarExpr(E->getArg(1));
906  Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
907  return RValue::get(Result);
908  }
909  case Builtin::BI__builtin_copysign:
910  case Builtin::BI__builtin_copysignf:
911  case Builtin::BI__builtin_copysignl: {
913  }
914  case Builtin::BI__builtin_ceil:
915  case Builtin::BI__builtin_ceilf:
916  case Builtin::BI__builtin_ceill: {
917  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
918  }
919  case Builtin::BI__builtin_floor:
920  case Builtin::BI__builtin_floorf:
921  case Builtin::BI__builtin_floorl: {
922  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
923  }
924  case Builtin::BI__builtin_trunc:
925  case Builtin::BI__builtin_truncf:
926  case Builtin::BI__builtin_truncl: {
927  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
928  }
929  case Builtin::BI__builtin_rint:
930  case Builtin::BI__builtin_rintf:
931  case Builtin::BI__builtin_rintl: {
932  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
933  }
934  case Builtin::BI__builtin_nearbyint:
935  case Builtin::BI__builtin_nearbyintf:
936  case Builtin::BI__builtin_nearbyintl: {
938  }
939  case Builtin::BI__builtin_round:
940  case Builtin::BI__builtin_roundf:
941  case Builtin::BI__builtin_roundl: {
942  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
943  }
944  case Builtin::BI__builtin_fmin:
945  case Builtin::BI__builtin_fminf:
946  case Builtin::BI__builtin_fminl: {
947  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
948  }
949  case Builtin::BI__builtin_fmax:
950  case Builtin::BI__builtin_fmaxf:
951  case Builtin::BI__builtin_fmaxl: {
952  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
953  }
954  case Builtin::BI__builtin_conj:
955  case Builtin::BI__builtin_conjf:
956  case Builtin::BI__builtin_conjl: {
957  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
958  Value *Real = ComplexVal.first;
959  Value *Imag = ComplexVal.second;
960  Value *Zero =
961  Imag->getType()->isFPOrFPVectorTy()
962  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
963  : llvm::Constant::getNullValue(Imag->getType());
964 
965  Imag = Builder.CreateFSub(Zero, Imag, "sub");
966  return RValue::getComplex(std::make_pair(Real, Imag));
967  }
968  case Builtin::BI__builtin_creal:
969  case Builtin::BI__builtin_crealf:
970  case Builtin::BI__builtin_creall:
971  case Builtin::BIcreal:
972  case Builtin::BIcrealf:
973  case Builtin::BIcreall: {
974  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
975  return RValue::get(ComplexVal.first);
976  }
977 
978  case Builtin::BI__builtin_cimag:
979  case Builtin::BI__builtin_cimagf:
980  case Builtin::BI__builtin_cimagl:
981  case Builtin::BIcimag:
982  case Builtin::BIcimagf:
983  case Builtin::BIcimagl: {
984  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
985  return RValue::get(ComplexVal.second);
986  }
987 
988  case Builtin::BI__builtin_ctzs:
989  case Builtin::BI__builtin_ctz:
990  case Builtin::BI__builtin_ctzl:
991  case Builtin::BI__builtin_ctzll: {
992  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
993 
994  llvm::Type *ArgType = ArgValue->getType();
995  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
996 
997  llvm::Type *ResultType = ConvertType(E->getType());
998  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
999  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1000  if (Result->getType() != ResultType)
1001  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1002  "cast");
1003  return RValue::get(Result);
1004  }
1005  case Builtin::BI__builtin_clzs:
1006  case Builtin::BI__builtin_clz:
1007  case Builtin::BI__builtin_clzl:
1008  case Builtin::BI__builtin_clzll: {
1009  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1010 
1011  llvm::Type *ArgType = ArgValue->getType();
1012  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1013 
1014  llvm::Type *ResultType = ConvertType(E->getType());
1015  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1016  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1017  if (Result->getType() != ResultType)
1018  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1019  "cast");
1020  return RValue::get(Result);
1021  }
1022  case Builtin::BI__builtin_ffs:
1023  case Builtin::BI__builtin_ffsl:
1024  case Builtin::BI__builtin_ffsll: {
1025  // ffs(x) -> x ? cttz(x) + 1 : 0
1026  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1027 
1028  llvm::Type *ArgType = ArgValue->getType();
1029  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1030 
1031  llvm::Type *ResultType = ConvertType(E->getType());
1032  Value *Tmp =
1033  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1034  llvm::ConstantInt::get(ArgType, 1));
1035  Value *Zero = llvm::Constant::getNullValue(ArgType);
1036  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1037  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1038  if (Result->getType() != ResultType)
1039  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1040  "cast");
1041  return RValue::get(Result);
1042  }
1043  case Builtin::BI__builtin_parity:
1044  case Builtin::BI__builtin_parityl:
1045  case Builtin::BI__builtin_parityll: {
1046  // parity(x) -> ctpop(x) & 1
1047  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1048 
1049  llvm::Type *ArgType = ArgValue->getType();
1050  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1051 
1052  llvm::Type *ResultType = ConvertType(E->getType());
1053  Value *Tmp = Builder.CreateCall(F, ArgValue);
1054  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1055  if (Result->getType() != ResultType)
1056  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1057  "cast");
1058  return RValue::get(Result);
1059  }
1060  case Builtin::BI__popcnt16:
1061  case Builtin::BI__popcnt:
1062  case Builtin::BI__popcnt64:
1063  case Builtin::BI__builtin_popcount:
1064  case Builtin::BI__builtin_popcountl:
1065  case Builtin::BI__builtin_popcountll: {
1066  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1067 
1068  llvm::Type *ArgType = ArgValue->getType();
1069  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1070 
1071  llvm::Type *ResultType = ConvertType(E->getType());
1072  Value *Result = Builder.CreateCall(F, ArgValue);
1073  if (Result->getType() != ResultType)
1074  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1075  "cast");
1076  return RValue::get(Result);
1077  }
1078  case Builtin::BI_rotr8:
1079  case Builtin::BI_rotr16:
1080  case Builtin::BI_rotr:
1081  case Builtin::BI_lrotr:
1082  case Builtin::BI_rotr64: {
1083  Value *Val = EmitScalarExpr(E->getArg(0));
1084  Value *Shift = EmitScalarExpr(E->getArg(1));
1085 
1086  llvm::Type *ArgType = Val->getType();
1087  Shift = Builder.CreateIntCast(Shift, ArgType, false);
1088  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1089  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1090  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1091 
1092  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1093  Shift = Builder.CreateAnd(Shift, Mask);
1094  Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
1095 
1096  Value *RightShifted = Builder.CreateLShr(Val, Shift);
1097  Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
1098  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1099 
1100  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1101  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1102  return RValue::get(Result);
1103  }
1104  case Builtin::BI_rotl8:
1105  case Builtin::BI_rotl16:
1106  case Builtin::BI_rotl:
1107  case Builtin::BI_lrotl:
1108  case Builtin::BI_rotl64: {
1109  Value *Val = EmitScalarExpr(E->getArg(0));
1110  Value *Shift = EmitScalarExpr(E->getArg(1));
1111 
1112  llvm::Type *ArgType = Val->getType();
1113  Shift = Builder.CreateIntCast(Shift, ArgType, false);
1114  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1115  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1116  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1117 
1118  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1119  Shift = Builder.CreateAnd(Shift, Mask);
1120  Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
1121 
1122  Value *LeftShifted = Builder.CreateShl(Val, Shift);
1123  Value *RightShifted = Builder.CreateLShr(Val, RightShift);
1124  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1125 
1126  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1127  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1128  return RValue::get(Result);
1129  }
1130  case Builtin::BI__builtin_unpredictable: {
1131  // Always return the argument of __builtin_unpredictable. LLVM does not
1132  // handle this builtin. Metadata for this builtin should be added directly
1133  // to instructions such as branches or switches that use it.
1134  return RValue::get(EmitScalarExpr(E->getArg(0)));
1135  }
1136  case Builtin::BI__builtin_expect: {
1137  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1138  llvm::Type *ArgType = ArgValue->getType();
1139 
1140  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1141  // Don't generate llvm.expect on -O0 as the backend won't use it for
1142  // anything.
1143  // Note, we still IRGen ExpectedValue because it could have side-effects.
1144  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1145  return RValue::get(ArgValue);
1146 
1147  Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1148  Value *Result =
1149  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1150  return RValue::get(Result);
1151  }
1152  case Builtin::BI__builtin_assume_aligned: {
1153  Value *PtrValue = EmitScalarExpr(E->getArg(0));
1154  Value *OffsetValue =
1155  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1156 
1157  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1158  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1159  unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
1160 
1161  EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
1162  return RValue::get(PtrValue);
1163  }
1164  case Builtin::BI__assume:
1165  case Builtin::BI__builtin_assume: {
1166  if (E->getArg(0)->HasSideEffects(getContext()))
1167  return RValue::get(nullptr);
1168 
1169  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1170  Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1171  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1172  }
1173  case Builtin::BI__builtin_bswap16:
1174  case Builtin::BI__builtin_bswap32:
1175  case Builtin::BI__builtin_bswap64: {
1176  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1177  }
1178  case Builtin::BI__builtin_bitreverse8:
1179  case Builtin::BI__builtin_bitreverse16:
1180  case Builtin::BI__builtin_bitreverse32:
1181  case Builtin::BI__builtin_bitreverse64: {
1182  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1183  }
1184  case Builtin::BI__builtin_object_size: {
1185  unsigned Type =
1186  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1187  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1188 
1189  // We pass this builtin onto the optimizer so that it can figure out the
1190  // object size in more complex cases.
1191  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1192  /*EmittedE=*/nullptr));
1193  }
1194  case Builtin::BI__builtin_prefetch: {
1195  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1196  // FIXME: Technically these constants should of type 'int', yes?
1197  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1198  llvm::ConstantInt::get(Int32Ty, 0);
1199  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1200  llvm::ConstantInt::get(Int32Ty, 3);
1201  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1202  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1203  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1204  }
1205  case Builtin::BI__builtin_readcyclecounter: {
1206  Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1207  return RValue::get(Builder.CreateCall(F));
1208  }
1209  case Builtin::BI__builtin___clear_cache: {
1210  Value *Begin = EmitScalarExpr(E->getArg(0));
1211  Value *End = EmitScalarExpr(E->getArg(1));
1212  Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1213  return RValue::get(Builder.CreateCall(F, {Begin, End}));
1214  }
1215  case Builtin::BI__builtin_trap:
1216  return RValue::get(EmitTrapCall(Intrinsic::trap));
1217  case Builtin::BI__debugbreak:
1218  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1219  case Builtin::BI__builtin_unreachable: {
1220  if (SanOpts.has(SanitizerKind::Unreachable)) {
1221  SanitizerScope SanScope(this);
1222  EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1223  SanitizerKind::Unreachable),
1224  SanitizerHandler::BuiltinUnreachable,
1225  EmitCheckSourceLocation(E->getExprLoc()), None);
1226  } else
1227  Builder.CreateUnreachable();
1228 
1229  // We do need to preserve an insertion point.
1230  EmitBlock(createBasicBlock("unreachable.cont"));
1231 
1232  return RValue::get(nullptr);
1233  }
1234 
1235  case Builtin::BI__builtin_powi:
1236  case Builtin::BI__builtin_powif:
1237  case Builtin::BI__builtin_powil: {
1238  Value *Base = EmitScalarExpr(E->getArg(0));
1239  Value *Exponent = EmitScalarExpr(E->getArg(1));
1240  llvm::Type *ArgType = Base->getType();
1241  Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1242  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1243  }
1244 
1245  case Builtin::BI__builtin_isgreater:
1246  case Builtin::BI__builtin_isgreaterequal:
1247  case Builtin::BI__builtin_isless:
1248  case Builtin::BI__builtin_islessequal:
1249  case Builtin::BI__builtin_islessgreater:
1250  case Builtin::BI__builtin_isunordered: {
1251  // Ordered comparisons: we know the arguments to these are matching scalar
1252  // floating point values.
1253  Value *LHS = EmitScalarExpr(E->getArg(0));
1254  Value *RHS = EmitScalarExpr(E->getArg(1));
1255 
1256  switch (BuiltinID) {
1257  default: llvm_unreachable("Unknown ordered comparison");
1258  case Builtin::BI__builtin_isgreater:
1259  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1260  break;
1261  case Builtin::BI__builtin_isgreaterequal:
1262  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1263  break;
1264  case Builtin::BI__builtin_isless:
1265  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1266  break;
1267  case Builtin::BI__builtin_islessequal:
1268  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1269  break;
1270  case Builtin::BI__builtin_islessgreater:
1271  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1272  break;
1273  case Builtin::BI__builtin_isunordered:
1274  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1275  break;
1276  }
1277  // ZExt bool to int type.
1278  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1279  }
1280  case Builtin::BI__builtin_isnan: {
1281  Value *V = EmitScalarExpr(E->getArg(0));
1282  V = Builder.CreateFCmpUNO(V, V, "cmp");
1283  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1284  }
1285 
1286  case Builtin::BIfinite:
1287  case Builtin::BI__finite:
1288  case Builtin::BIfinitef:
1289  case Builtin::BI__finitef:
1290  case Builtin::BIfinitel:
1291  case Builtin::BI__finitel:
1292  case Builtin::BI__builtin_isinf:
1293  case Builtin::BI__builtin_isfinite: {
1294  // isinf(x) --> fabs(x) == infinity
1295  // isfinite(x) --> fabs(x) != infinity
1296  // x != NaN via the ordered compare in either case.
1297  Value *V = EmitScalarExpr(E->getArg(0));
1298  Value *Fabs = EmitFAbs(*this, V);
1299  Constant *Infinity = ConstantFP::getInfinity(V->getType());
1300  CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1301  ? CmpInst::FCMP_OEQ
1302  : CmpInst::FCMP_ONE;
1303  Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1304  return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1305  }
1306 
1307  case Builtin::BI__builtin_isinf_sign: {
1308  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1309  Value *Arg = EmitScalarExpr(E->getArg(0));
1310  Value *AbsArg = EmitFAbs(*this, Arg);
1311  Value *IsInf = Builder.CreateFCmpOEQ(
1312  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1313  Value *IsNeg = EmitSignBit(*this, Arg);
1314 
1315  llvm::Type *IntTy = ConvertType(E->getType());
1316  Value *Zero = Constant::getNullValue(IntTy);
1317  Value *One = ConstantInt::get(IntTy, 1);
1318  Value *NegativeOne = ConstantInt::get(IntTy, -1);
1319  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1320  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1321  return RValue::get(Result);
1322  }
1323 
1324  case Builtin::BI__builtin_isnormal: {
1325  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1326  Value *V = EmitScalarExpr(E->getArg(0));
1327  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1328 
1329  Value *Abs = EmitFAbs(*this, V);
1330  Value *IsLessThanInf =
1331  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1332  APFloat Smallest = APFloat::getSmallestNormalized(
1333  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1334  Value *IsNormal =
1335  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1336  "isnormal");
1337  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1338  V = Builder.CreateAnd(V, IsNormal, "and");
1339  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1340  }
1341 
1342  case Builtin::BI__builtin_fpclassify: {
1343  Value *V = EmitScalarExpr(E->getArg(5));
1344  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1345 
1346  // Create Result
1347  BasicBlock *Begin = Builder.GetInsertBlock();
1348  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1349  Builder.SetInsertPoint(End);
1350  PHINode *Result =
1351  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1352  "fpclassify_result");
1353 
1354  // if (V==0) return FP_ZERO
1355  Builder.SetInsertPoint(Begin);
1356  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1357  "iszero");
1358  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1359  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1360  Builder.CreateCondBr(IsZero, End, NotZero);
1361  Result->addIncoming(ZeroLiteral, Begin);
1362 
1363  // if (V != V) return FP_NAN
1364  Builder.SetInsertPoint(NotZero);
1365  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1366  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1367  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1368  Builder.CreateCondBr(IsNan, End, NotNan);
1369  Result->addIncoming(NanLiteral, NotZero);
1370 
1371  // if (fabs(V) == infinity) return FP_INFINITY
1372  Builder.SetInsertPoint(NotNan);
1373  Value *VAbs = EmitFAbs(*this, V);
1374  Value *IsInf =
1375  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1376  "isinf");
1377  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1378  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1379  Builder.CreateCondBr(IsInf, End, NotInf);
1380  Result->addIncoming(InfLiteral, NotNan);
1381 
1382  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1383  Builder.SetInsertPoint(NotInf);
1384  APFloat Smallest = APFloat::getSmallestNormalized(
1385  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1386  Value *IsNormal =
1387  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1388  "isnormal");
1389  Value *NormalResult =
1390  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1391  EmitScalarExpr(E->getArg(3)));
1392  Builder.CreateBr(End);
1393  Result->addIncoming(NormalResult, NotInf);
1394 
1395  // return Result
1396  Builder.SetInsertPoint(End);
1397  return RValue::get(Result);
1398  }
1399 
1400  case Builtin::BIalloca:
1401  case Builtin::BI_alloca:
1402  case Builtin::BI__builtin_alloca: {
1403  Value *Size = EmitScalarExpr(E->getArg(0));
1404  const TargetInfo &TI = getContext().getTargetInfo();
1405  // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1406  unsigned SuitableAlignmentInBytes =
1407  CGM.getContext()
1408  .toCharUnitsFromBits(TI.getSuitableAlign())
1409  .getQuantity();
1410  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1411  AI->setAlignment(SuitableAlignmentInBytes);
1412  return RValue::get(AI);
1413  }
1414 
1415  case Builtin::BI__builtin_alloca_with_align: {
1416  Value *Size = EmitScalarExpr(E->getArg(0));
1417  Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1418  auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1419  unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1420  unsigned AlignmentInBytes =
1421  CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1422  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1423  AI->setAlignment(AlignmentInBytes);
1424  return RValue::get(AI);
1425  }
1426 
1427  case Builtin::BIbzero:
1428  case Builtin::BI__builtin_bzero: {
1429  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1430  Value *SizeVal = EmitScalarExpr(E->getArg(1));
1431  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1432  E->getArg(0)->getExprLoc(), FD, 0);
1433  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1434  return RValue::get(Dest.getPointer());
1435  }
1436  case Builtin::BImemcpy:
1437  case Builtin::BI__builtin_memcpy: {
1438  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1439  Address Src = EmitPointerWithAlignment(E->getArg(1));
1440  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1441  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1442  E->getArg(0)->getExprLoc(), FD, 0);
1443  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1444  E->getArg(1)->getExprLoc(), FD, 1);
1445  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1446  return RValue::get(Dest.getPointer());
1447  }
1448 
1449  case Builtin::BI__builtin_char_memchr:
1450  BuiltinID = Builtin::BI__builtin_memchr;
1451  break;
1452 
1453  case Builtin::BI__builtin___memcpy_chk: {
1454  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1455  llvm::APSInt Size, DstSize;
1456  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1457  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1458  break;
1459  if (Size.ugt(DstSize))
1460  break;
1461  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1462  Address Src = EmitPointerWithAlignment(E->getArg(1));
1463  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1464  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1465  return RValue::get(Dest.getPointer());
1466  }
1467 
1468  case Builtin::BI__builtin_objc_memmove_collectable: {
1469  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1470  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1471  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1472  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1473  DestAddr, SrcAddr, SizeVal);
1474  return RValue::get(DestAddr.getPointer());
1475  }
1476 
1477  case Builtin::BI__builtin___memmove_chk: {
1478  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1479  llvm::APSInt Size, DstSize;
1480  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1481  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1482  break;
1483  if (Size.ugt(DstSize))
1484  break;
1485  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1486  Address Src = EmitPointerWithAlignment(E->getArg(1));
1487  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1488  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1489  return RValue::get(Dest.getPointer());
1490  }
1491 
1492  case Builtin::BImemmove:
1493  case Builtin::BI__builtin_memmove: {
1494  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1495  Address Src = EmitPointerWithAlignment(E->getArg(1));
1496  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1497  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1498  E->getArg(0)->getExprLoc(), FD, 0);
1499  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1500  E->getArg(1)->getExprLoc(), FD, 1);
1501  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1502  return RValue::get(Dest.getPointer());
1503  }
1504  case Builtin::BImemset:
1505  case Builtin::BI__builtin_memset: {
1506  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1507  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1508  Builder.getInt8Ty());
1509  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1510  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1511  E->getArg(0)->getExprLoc(), FD, 0);
1512  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1513  return RValue::get(Dest.getPointer());
1514  }
1515  case Builtin::BI__builtin___memset_chk: {
1516  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1517  llvm::APSInt Size, DstSize;
1518  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1519  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1520  break;
1521  if (Size.ugt(DstSize))
1522  break;
1523  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1524  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1525  Builder.getInt8Ty());
1526  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1527  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1528  return RValue::get(Dest.getPointer());
1529  }
1530  case Builtin::BI__builtin_dwarf_cfa: {
1531  // The offset in bytes from the first argument to the CFA.
1532  //
1533  // Why on earth is this in the frontend? Is there any reason at
1534  // all that the backend can't reasonably determine this while
1535  // lowering llvm.eh.dwarf.cfa()?
1536  //
1537  // TODO: If there's a satisfactory reason, add a target hook for
1538  // this instead of hard-coding 0, which is correct for most targets.
1539  int32_t Offset = 0;
1540 
1541  Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1542  return RValue::get(Builder.CreateCall(F,
1543  llvm::ConstantInt::get(Int32Ty, Offset)));
1544  }
1545  case Builtin::BI__builtin_return_address: {
1546  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1547  getContext().UnsignedIntTy);
1548  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1549  return RValue::get(Builder.CreateCall(F, Depth));
1550  }
1551  case Builtin::BI_ReturnAddress: {
1552  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1553  return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1554  }
1555  case Builtin::BI__builtin_frame_address: {
1556  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1557  getContext().UnsignedIntTy);
1558  Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1559  return RValue::get(Builder.CreateCall(F, Depth));
1560  }
1561  case Builtin::BI__builtin_extract_return_addr: {
1562  Value *Address = EmitScalarExpr(E->getArg(0));
1563  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1564  return RValue::get(Result);
1565  }
1566  case Builtin::BI__builtin_frob_return_addr: {
1567  Value *Address = EmitScalarExpr(E->getArg(0));
1568  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1569  return RValue::get(Result);
1570  }
1571  case Builtin::BI__builtin_dwarf_sp_column: {
1572  llvm::IntegerType *Ty
1573  = cast<llvm::IntegerType>(ConvertType(E->getType()));
1574  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1575  if (Column == -1) {
1576  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1577  return RValue::get(llvm::UndefValue::get(Ty));
1578  }
1579  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1580  }
1581  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1582  Value *Address = EmitScalarExpr(E->getArg(0));
1583  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1584  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1585  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1586  }
1587  case Builtin::BI__builtin_eh_return: {
1588  Value *Int = EmitScalarExpr(E->getArg(0));
1589  Value *Ptr = EmitScalarExpr(E->getArg(1));
1590 
1591  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1592  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1593  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1594  Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1595  ? Intrinsic::eh_return_i32
1596  : Intrinsic::eh_return_i64);
1597  Builder.CreateCall(F, {Int, Ptr});
1598  Builder.CreateUnreachable();
1599 
1600  // We do need to preserve an insertion point.
1601  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1602 
1603  return RValue::get(nullptr);
1604  }
1605  case Builtin::BI__builtin_unwind_init: {
1606  Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1607  return RValue::get(Builder.CreateCall(F));
1608  }
1609  case Builtin::BI__builtin_extend_pointer: {
1610  // Extends a pointer to the size of an _Unwind_Word, which is
1611  // uint64_t on all platforms. Generally this gets poked into a
1612  // register and eventually used as an address, so if the
1613  // addressing registers are wider than pointers and the platform
1614  // doesn't implicitly ignore high-order bits when doing
1615  // addressing, we need to make sure we zext / sext based on
1616  // the platform's expectations.
1617  //
1618  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1619 
1620  // Cast the pointer to intptr_t.
1621  Value *Ptr = EmitScalarExpr(E->getArg(0));
1622  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1623 
1624  // If that's 64 bits, we're done.
1625  if (IntPtrTy->getBitWidth() == 64)
1626  return RValue::get(Result);
1627 
1628  // Otherwise, ask the codegen data what to do.
1629  if (getTargetHooks().extendPointerWithSExt())
1630  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1631  else
1632  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1633  }
1634  case Builtin::BI__builtin_setjmp: {
1635  // Buffer is a void**.
1636  Address Buf = EmitPointerWithAlignment(E->getArg(0));
1637 
1638  // Store the frame pointer to the setjmp buffer.
1639  Value *FrameAddr =
1640  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1641  ConstantInt::get(Int32Ty, 0));
1642  Builder.CreateStore(FrameAddr, Buf);
1643 
1644  // Store the stack pointer to the setjmp buffer.
1645  Value *StackAddr =
1646  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1647  Address StackSaveSlot =
1648  Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1649  Builder.CreateStore(StackAddr, StackSaveSlot);
1650 
1651  // Call LLVM's EH setjmp, which is lightweight.
1652  Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1653  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1654  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1655  }
1656  case Builtin::BI__builtin_longjmp: {
1657  Value *Buf = EmitScalarExpr(E->getArg(0));
1658  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1659 
1660  // Call LLVM's EH longjmp, which is lightweight.
1661  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1662 
1663  // longjmp doesn't return; mark this as unreachable.
1664  Builder.CreateUnreachable();
1665 
1666  // We do need to preserve an insertion point.
1667  EmitBlock(createBasicBlock("longjmp.cont"));
1668 
1669  return RValue::get(nullptr);
1670  }
1671  case Builtin::BI__sync_fetch_and_add:
1672  case Builtin::BI__sync_fetch_and_sub:
1673  case Builtin::BI__sync_fetch_and_or:
1674  case Builtin::BI__sync_fetch_and_and:
1675  case Builtin::BI__sync_fetch_and_xor:
1676  case Builtin::BI__sync_fetch_and_nand:
1677  case Builtin::BI__sync_add_and_fetch:
1678  case Builtin::BI__sync_sub_and_fetch:
1679  case Builtin::BI__sync_and_and_fetch:
1680  case Builtin::BI__sync_or_and_fetch:
1681  case Builtin::BI__sync_xor_and_fetch:
1682  case Builtin::BI__sync_nand_and_fetch:
1683  case Builtin::BI__sync_val_compare_and_swap:
1684  case Builtin::BI__sync_bool_compare_and_swap:
1685  case Builtin::BI__sync_lock_test_and_set:
1686  case Builtin::BI__sync_lock_release:
1687  case Builtin::BI__sync_swap:
1688  llvm_unreachable("Shouldn't make it through sema");
1689  case Builtin::BI__sync_fetch_and_add_1:
1690  case Builtin::BI__sync_fetch_and_add_2:
1691  case Builtin::BI__sync_fetch_and_add_4:
1692  case Builtin::BI__sync_fetch_and_add_8:
1693  case Builtin::BI__sync_fetch_and_add_16:
1694  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1695  case Builtin::BI__sync_fetch_and_sub_1:
1696  case Builtin::BI__sync_fetch_and_sub_2:
1697  case Builtin::BI__sync_fetch_and_sub_4:
1698  case Builtin::BI__sync_fetch_and_sub_8:
1699  case Builtin::BI__sync_fetch_and_sub_16:
1700  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1701  case Builtin::BI__sync_fetch_and_or_1:
1702  case Builtin::BI__sync_fetch_and_or_2:
1703  case Builtin::BI__sync_fetch_and_or_4:
1704  case Builtin::BI__sync_fetch_and_or_8:
1705  case Builtin::BI__sync_fetch_and_or_16:
1706  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1707  case Builtin::BI__sync_fetch_and_and_1:
1708  case Builtin::BI__sync_fetch_and_and_2:
1709  case Builtin::BI__sync_fetch_and_and_4:
1710  case Builtin::BI__sync_fetch_and_and_8:
1711  case Builtin::BI__sync_fetch_and_and_16:
1712  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1713  case Builtin::BI__sync_fetch_and_xor_1:
1714  case Builtin::BI__sync_fetch_and_xor_2:
1715  case Builtin::BI__sync_fetch_and_xor_4:
1716  case Builtin::BI__sync_fetch_and_xor_8:
1717  case Builtin::BI__sync_fetch_and_xor_16:
1718  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1719  case Builtin::BI__sync_fetch_and_nand_1:
1720  case Builtin::BI__sync_fetch_and_nand_2:
1721  case Builtin::BI__sync_fetch_and_nand_4:
1722  case Builtin::BI__sync_fetch_and_nand_8:
1723  case Builtin::BI__sync_fetch_and_nand_16:
1724  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1725 
1726  // Clang extensions: not overloaded yet.
1727  case Builtin::BI__sync_fetch_and_min:
1728  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1729  case Builtin::BI__sync_fetch_and_max:
1730  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1731  case Builtin::BI__sync_fetch_and_umin:
1732  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1733  case Builtin::BI__sync_fetch_and_umax:
1734  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1735 
1736  case Builtin::BI__sync_add_and_fetch_1:
1737  case Builtin::BI__sync_add_and_fetch_2:
1738  case Builtin::BI__sync_add_and_fetch_4:
1739  case Builtin::BI__sync_add_and_fetch_8:
1740  case Builtin::BI__sync_add_and_fetch_16:
1741  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1742  llvm::Instruction::Add);
1743  case Builtin::BI__sync_sub_and_fetch_1:
1744  case Builtin::BI__sync_sub_and_fetch_2:
1745  case Builtin::BI__sync_sub_and_fetch_4:
1746  case Builtin::BI__sync_sub_and_fetch_8:
1747  case Builtin::BI__sync_sub_and_fetch_16:
1748  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1749  llvm::Instruction::Sub);
1750  case Builtin::BI__sync_and_and_fetch_1:
1751  case Builtin::BI__sync_and_and_fetch_2:
1752  case Builtin::BI__sync_and_and_fetch_4:
1753  case Builtin::BI__sync_and_and_fetch_8:
1754  case Builtin::BI__sync_and_and_fetch_16:
1757  case Builtin::BI__sync_or_and_fetch_1:
1758  case Builtin::BI__sync_or_and_fetch_2:
1759  case Builtin::BI__sync_or_and_fetch_4:
1760  case Builtin::BI__sync_or_and_fetch_8:
1761  case Builtin::BI__sync_or_and_fetch_16:
1762  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1763  llvm::Instruction::Or);
1764  case Builtin::BI__sync_xor_and_fetch_1:
1765  case Builtin::BI__sync_xor_and_fetch_2:
1766  case Builtin::BI__sync_xor_and_fetch_4:
1767  case Builtin::BI__sync_xor_and_fetch_8:
1768  case Builtin::BI__sync_xor_and_fetch_16:
1769  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1770  llvm::Instruction::Xor);
1771  case Builtin::BI__sync_nand_and_fetch_1:
1772  case Builtin::BI__sync_nand_and_fetch_2:
1773  case Builtin::BI__sync_nand_and_fetch_4:
1774  case Builtin::BI__sync_nand_and_fetch_8:
1775  case Builtin::BI__sync_nand_and_fetch_16:
1776  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1777  llvm::Instruction::And, true);
1778 
1779  case Builtin::BI__sync_val_compare_and_swap_1:
1780  case Builtin::BI__sync_val_compare_and_swap_2:
1781  case Builtin::BI__sync_val_compare_and_swap_4:
1782  case Builtin::BI__sync_val_compare_and_swap_8:
1783  case Builtin::BI__sync_val_compare_and_swap_16:
1784  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1785 
1786  case Builtin::BI__sync_bool_compare_and_swap_1:
1787  case Builtin::BI__sync_bool_compare_and_swap_2:
1788  case Builtin::BI__sync_bool_compare_and_swap_4:
1789  case Builtin::BI__sync_bool_compare_and_swap_8:
1790  case Builtin::BI__sync_bool_compare_and_swap_16:
1791  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1792 
1793  case Builtin::BI__sync_swap_1:
1794  case Builtin::BI__sync_swap_2:
1795  case Builtin::BI__sync_swap_4:
1796  case Builtin::BI__sync_swap_8:
1797  case Builtin::BI__sync_swap_16:
1798  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1799 
1800  case Builtin::BI__sync_lock_test_and_set_1:
1801  case Builtin::BI__sync_lock_test_and_set_2:
1802  case Builtin::BI__sync_lock_test_and_set_4:
1803  case Builtin::BI__sync_lock_test_and_set_8:
1804  case Builtin::BI__sync_lock_test_and_set_16:
1805  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1806 
1807  case Builtin::BI__sync_lock_release_1:
1808  case Builtin::BI__sync_lock_release_2:
1809  case Builtin::BI__sync_lock_release_4:
1810  case Builtin::BI__sync_lock_release_8:
1811  case Builtin::BI__sync_lock_release_16: {
1812  Value *Ptr = EmitScalarExpr(E->getArg(0));
1813  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1814  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1815  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1816  StoreSize.getQuantity() * 8);
1817  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1818  llvm::StoreInst *Store =
1819  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1820  StoreSize);
1821  Store->setAtomic(llvm::AtomicOrdering::Release);
1822  return RValue::get(nullptr);
1823  }
1824 
1825  case Builtin::BI__sync_synchronize: {
1826  // We assume this is supposed to correspond to a C++0x-style
1827  // sequentially-consistent fence (i.e. this is only usable for
1828  // synchonization, not device I/O or anything like that). This intrinsic
1829  // is really badly designed in the sense that in theory, there isn't
1830  // any way to safely use it... but in practice, it mostly works
1831  // to use it with non-atomic loads and stores to get acquire/release
1832  // semantics.
1833  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1834  return RValue::get(nullptr);
1835  }
1836 
1837  case Builtin::BI__builtin_nontemporal_load:
1838  return RValue::get(EmitNontemporalLoad(*this, E));
1839  case Builtin::BI__builtin_nontemporal_store:
1840  return RValue::get(EmitNontemporalStore(*this, E));
1841  case Builtin::BI__c11_atomic_is_lock_free:
1842  case Builtin::BI__atomic_is_lock_free: {
1843  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1844  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1845  // _Atomic(T) is always properly-aligned.
1846  const char *LibCallName = "__atomic_is_lock_free";
1847  CallArgList Args;
1848  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1849  getContext().getSizeType());
1850  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1851  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1852  getContext().VoidPtrTy);
1853  else
1854  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1855  getContext().VoidPtrTy);
1856  const CGFunctionInfo &FuncInfo =
1857  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1858  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1859  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1860  return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1861  ReturnValueSlot(), Args);
1862  }
1863 
1864  case Builtin::BI__atomic_test_and_set: {
1865  // Look at the argument type to determine whether this is a volatile
1866  // operation. The parameter type is always volatile.
1867  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1868  bool Volatile =
1869  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1870 
1871  Value *Ptr = EmitScalarExpr(E->getArg(0));
1872  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1873  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1874  Value *NewVal = Builder.getInt8(1);
1875  Value *Order = EmitScalarExpr(E->getArg(1));
1876  if (isa<llvm::ConstantInt>(Order)) {
1877  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1878  AtomicRMWInst *Result = nullptr;
1879  switch (ord) {
1880  case 0: // memory_order_relaxed
1881  default: // invalid order
1882  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1883  llvm::AtomicOrdering::Monotonic);
1884  break;
1885  case 1: // memory_order_consume
1886  case 2: // memory_order_acquire
1887  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1888  llvm::AtomicOrdering::Acquire);
1889  break;
1890  case 3: // memory_order_release
1891  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1892  llvm::AtomicOrdering::Release);
1893  break;
1894  case 4: // memory_order_acq_rel
1895 
1896  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1897  llvm::AtomicOrdering::AcquireRelease);
1898  break;
1899  case 5: // memory_order_seq_cst
1900  Result = Builder.CreateAtomicRMW(
1901  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1902  llvm::AtomicOrdering::SequentiallyConsistent);
1903  break;
1904  }
1905  Result->setVolatile(Volatile);
1906  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1907  }
1908 
1909  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1910 
1911  llvm::BasicBlock *BBs[5] = {
1912  createBasicBlock("monotonic", CurFn),
1913  createBasicBlock("acquire", CurFn),
1914  createBasicBlock("release", CurFn),
1915  createBasicBlock("acqrel", CurFn),
1916  createBasicBlock("seqcst", CurFn)
1917  };
1918  llvm::AtomicOrdering Orders[5] = {
1919  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1920  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1921  llvm::AtomicOrdering::SequentiallyConsistent};
1922 
1923  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1924  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1925 
1926  Builder.SetInsertPoint(ContBB);
1927  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1928 
1929  for (unsigned i = 0; i < 5; ++i) {
1930  Builder.SetInsertPoint(BBs[i]);
1931  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1932  Ptr, NewVal, Orders[i]);
1933  RMW->setVolatile(Volatile);
1934  Result->addIncoming(RMW, BBs[i]);
1935  Builder.CreateBr(ContBB);
1936  }
1937 
1938  SI->addCase(Builder.getInt32(0), BBs[0]);
1939  SI->addCase(Builder.getInt32(1), BBs[1]);
1940  SI->addCase(Builder.getInt32(2), BBs[1]);
1941  SI->addCase(Builder.getInt32(3), BBs[2]);
1942  SI->addCase(Builder.getInt32(4), BBs[3]);
1943  SI->addCase(Builder.getInt32(5), BBs[4]);
1944 
1945  Builder.SetInsertPoint(ContBB);
1946  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1947  }
1948 
1949  case Builtin::BI__atomic_clear: {
1950  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1951  bool Volatile =
1952  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1953 
1954  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1955  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1956  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1957  Value *NewVal = Builder.getInt8(0);
1958  Value *Order = EmitScalarExpr(E->getArg(1));
1959  if (isa<llvm::ConstantInt>(Order)) {
1960  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1961  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1962  switch (ord) {
1963  case 0: // memory_order_relaxed
1964  default: // invalid order
1965  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1966  break;
1967  case 3: // memory_order_release
1968  Store->setOrdering(llvm::AtomicOrdering::Release);
1969  break;
1970  case 5: // memory_order_seq_cst
1971  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1972  break;
1973  }
1974  return RValue::get(nullptr);
1975  }
1976 
1977  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1978 
1979  llvm::BasicBlock *BBs[3] = {
1980  createBasicBlock("monotonic", CurFn),
1981  createBasicBlock("release", CurFn),
1982  createBasicBlock("seqcst", CurFn)
1983  };
1984  llvm::AtomicOrdering Orders[3] = {
1985  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1986  llvm::AtomicOrdering::SequentiallyConsistent};
1987 
1988  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1989  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1990 
1991  for (unsigned i = 0; i < 3; ++i) {
1992  Builder.SetInsertPoint(BBs[i]);
1993  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1994  Store->setOrdering(Orders[i]);
1995  Builder.CreateBr(ContBB);
1996  }
1997 
1998  SI->addCase(Builder.getInt32(0), BBs[0]);
1999  SI->addCase(Builder.getInt32(3), BBs[1]);
2000  SI->addCase(Builder.getInt32(5), BBs[2]);
2001 
2002  Builder.SetInsertPoint(ContBB);
2003  return RValue::get(nullptr);
2004  }
2005 
2006  case Builtin::BI__atomic_thread_fence:
2007  case Builtin::BI__atomic_signal_fence:
2008  case Builtin::BI__c11_atomic_thread_fence:
2009  case Builtin::BI__c11_atomic_signal_fence: {
2010  llvm::SyncScope::ID SSID;
2011  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2012  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2013  SSID = llvm::SyncScope::SingleThread;
2014  else
2015  SSID = llvm::SyncScope::System;
2016  Value *Order = EmitScalarExpr(E->getArg(0));
2017  if (isa<llvm::ConstantInt>(Order)) {
2018  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2019  switch (ord) {
2020  case 0: // memory_order_relaxed
2021  default: // invalid order
2022  break;
2023  case 1: // memory_order_consume
2024  case 2: // memory_order_acquire
2025  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2026  break;
2027  case 3: // memory_order_release
2028  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2029  break;
2030  case 4: // memory_order_acq_rel
2031  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2032  break;
2033  case 5: // memory_order_seq_cst
2034  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2035  break;
2036  }
2037  return RValue::get(nullptr);
2038  }
2039 
2040  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2041  AcquireBB = createBasicBlock("acquire", CurFn);
2042  ReleaseBB = createBasicBlock("release", CurFn);
2043  AcqRelBB = createBasicBlock("acqrel", CurFn);
2044  SeqCstBB = createBasicBlock("seqcst", CurFn);
2045  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2046 
2047  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2048  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2049 
2050  Builder.SetInsertPoint(AcquireBB);
2051  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2052  Builder.CreateBr(ContBB);
2053  SI->addCase(Builder.getInt32(1), AcquireBB);
2054  SI->addCase(Builder.getInt32(2), AcquireBB);
2055 
2056  Builder.SetInsertPoint(ReleaseBB);
2057  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2058  Builder.CreateBr(ContBB);
2059  SI->addCase(Builder.getInt32(3), ReleaseBB);
2060 
2061  Builder.SetInsertPoint(AcqRelBB);
2062  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2063  Builder.CreateBr(ContBB);
2064  SI->addCase(Builder.getInt32(4), AcqRelBB);
2065 
2066  Builder.SetInsertPoint(SeqCstBB);
2067  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2068  Builder.CreateBr(ContBB);
2069  SI->addCase(Builder.getInt32(5), SeqCstBB);
2070 
2071  Builder.SetInsertPoint(ContBB);
2072  return RValue::get(nullptr);
2073  }
2074 
2075  // Library functions with special handling.
2076  case Builtin::BIsqrt:
2077  case Builtin::BIsqrtf:
2078  case Builtin::BIsqrtl: {
2079  // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
2080  // in finite- or unsafe-math mode (the intrinsic has different semantics
2081  // for handling negative numbers compared to the library function, so
2082  // -fmath-errno=0 is not enough).
2083  if (!FD->hasAttr<ConstAttr>())
2084  break;
2085  if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
2086  CGM.getCodeGenOpts().NoNaNsFPMath))
2087  break;
2088  Value *Arg0 = EmitScalarExpr(E->getArg(0));
2089  llvm::Type *ArgType = Arg0->getType();
2090  Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
2091  return RValue::get(Builder.CreateCall(F, Arg0));
2092  }
2093 
2094  case Builtin::BI__builtin_pow:
2095  case Builtin::BI__builtin_powf:
2096  case Builtin::BI__builtin_powl:
2097  case Builtin::BIpow:
2098  case Builtin::BIpowf:
2099  case Builtin::BIpowl: {
2100  // Transform a call to pow* into a @llvm.pow.* intrinsic call.
2101  if (!FD->hasAttr<ConstAttr>())
2102  break;
2103  Value *Base = EmitScalarExpr(E->getArg(0));
2104  Value *Exponent = EmitScalarExpr(E->getArg(1));
2105  llvm::Type *ArgType = Base->getType();
2106  Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
2107  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
2108  }
2109 
2110  case Builtin::BIfma:
2111  case Builtin::BIfmaf:
2112  case Builtin::BIfmal:
2113  case Builtin::BI__builtin_fma:
2114  case Builtin::BI__builtin_fmaf:
2115  case Builtin::BI__builtin_fmal: {
2116  // Rewrite fma to intrinsic.
2117  Value *FirstArg = EmitScalarExpr(E->getArg(0));
2118  llvm::Type *ArgType = FirstArg->getType();
2119  Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
2120  return RValue::get(
2121  Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
2122  EmitScalarExpr(E->getArg(2))}));
2123  }
2124 
2125  case Builtin::BI__builtin_signbit:
2126  case Builtin::BI__builtin_signbitf:
2127  case Builtin::BI__builtin_signbitl: {
2128  return RValue::get(
2129  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
2130  ConvertType(E->getType())));
2131  }
2132  case Builtin::BI__annotation: {
2133  // Re-encode each wide string to UTF8 and make an MDString.
2135  for (const Expr *Arg : E->arguments()) {
2136  const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
2137  assert(Str->getCharByteWidth() == 2);
2138  StringRef WideBytes = Str->getBytes();
2139  std::string StrUtf8;
2140  if (!convertUTF16ToUTF8String(
2141  makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
2142  CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
2143  continue;
2144  }
2145  Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
2146  }
2147 
2148  // Build and MDTuple of MDStrings and emit the intrinsic call.
2149  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
2150  MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
2151  Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
2152  return RValue::getIgnored();
2153  }
2154  case Builtin::BI__builtin_annotation: {
2155  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
2156  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
2157  AnnVal->getType());
2158 
2159  // Get the annotation string, go through casts. Sema requires this to be a
2160  // non-wide string literal, potentially casted, so the cast<> is safe.
2161  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
2162  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
2163  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
2164  }
2165  case Builtin::BI__builtin_addcb:
2166  case Builtin::BI__builtin_addcs:
2167  case Builtin::BI__builtin_addc:
2168  case Builtin::BI__builtin_addcl:
2169  case Builtin::BI__builtin_addcll:
2170  case Builtin::BI__builtin_subcb:
2171  case Builtin::BI__builtin_subcs:
2172  case Builtin::BI__builtin_subc:
2173  case Builtin::BI__builtin_subcl:
2174  case Builtin::BI__builtin_subcll: {
2175 
2176  // We translate all of these builtins from expressions of the form:
2177  // int x = ..., y = ..., carryin = ..., carryout, result;
2178  // result = __builtin_addc(x, y, carryin, &carryout);
2179  //
2180  // to LLVM IR of the form:
2181  //
2182  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2183  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2184  // %carry1 = extractvalue {i32, i1} %tmp1, 1
2185  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2186  // i32 %carryin)
2187  // %result = extractvalue {i32, i1} %tmp2, 0
2188  // %carry2 = extractvalue {i32, i1} %tmp2, 1
2189  // %tmp3 = or i1 %carry1, %carry2
2190  // %tmp4 = zext i1 %tmp3 to i32
2191  // store i32 %tmp4, i32* %carryout
2192 
2193  // Scalarize our inputs.
2194  llvm::Value *X = EmitScalarExpr(E->getArg(0));
2195  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2196  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
2197  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2198 
2199  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2200  llvm::Intrinsic::ID IntrinsicId;
2201  switch (BuiltinID) {
2202  default: llvm_unreachable("Unknown multiprecision builtin id.");
2203  case Builtin::BI__builtin_addcb:
2204  case Builtin::BI__builtin_addcs:
2205  case Builtin::BI__builtin_addc:
2206  case Builtin::BI__builtin_addcl:
2207  case Builtin::BI__builtin_addcll:
2208  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2209  break;
2210  case Builtin::BI__builtin_subcb:
2211  case Builtin::BI__builtin_subcs:
2212  case Builtin::BI__builtin_subc:
2213  case Builtin::BI__builtin_subcl:
2214  case Builtin::BI__builtin_subcll:
2215  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2216  break;
2217  }
2218 
2219  // Construct our resulting LLVM IR expression.
2220  llvm::Value *Carry1;
2221  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2222  X, Y, Carry1);
2223  llvm::Value *Carry2;
2224  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2225  Sum1, Carryin, Carry2);
2226  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2227  X->getType());
2228  Builder.CreateStore(CarryOut, CarryOutPtr);
2229  return RValue::get(Sum2);
2230  }
2231 
2232  case Builtin::BI__builtin_add_overflow:
2233  case Builtin::BI__builtin_sub_overflow:
2234  case Builtin::BI__builtin_mul_overflow: {
2235  const clang::Expr *LeftArg = E->getArg(0);
2236  const clang::Expr *RightArg = E->getArg(1);
2237  const clang::Expr *ResultArg = E->getArg(2);
2238 
2239  clang::QualType ResultQTy =
2240  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2241 
2242  WidthAndSignedness LeftInfo =
2243  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2244  WidthAndSignedness RightInfo =
2245  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2246  WidthAndSignedness ResultInfo =
2247  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2248  WidthAndSignedness EncompassingInfo =
2249  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2250 
2251  llvm::Type *EncompassingLLVMTy =
2252  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2253 
2254  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2255 
2256  llvm::Intrinsic::ID IntrinsicId;
2257  switch (BuiltinID) {
2258  default:
2259  llvm_unreachable("Unknown overflow builtin id.");
2260  case Builtin::BI__builtin_add_overflow:
2261  IntrinsicId = EncompassingInfo.Signed
2262  ? llvm::Intrinsic::sadd_with_overflow
2263  : llvm::Intrinsic::uadd_with_overflow;
2264  break;
2265  case Builtin::BI__builtin_sub_overflow:
2266  IntrinsicId = EncompassingInfo.Signed
2267  ? llvm::Intrinsic::ssub_with_overflow
2268  : llvm::Intrinsic::usub_with_overflow;
2269  break;
2270  case Builtin::BI__builtin_mul_overflow:
2271  IntrinsicId = EncompassingInfo.Signed
2272  ? llvm::Intrinsic::smul_with_overflow
2273  : llvm::Intrinsic::umul_with_overflow;
2274  break;
2275  }
2276 
2277  llvm::Value *Left = EmitScalarExpr(LeftArg);
2278  llvm::Value *Right = EmitScalarExpr(RightArg);
2279  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2280 
2281  // Extend each operand to the encompassing type.
2282  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2283  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2284 
2285  // Perform the operation on the extended values.
2286  llvm::Value *Overflow, *Result;
2287  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2288 
2289  if (EncompassingInfo.Width > ResultInfo.Width) {
2290  // The encompassing type is wider than the result type, so we need to
2291  // truncate it.
2292  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2293 
2294  // To see if the truncation caused an overflow, we will extend
2295  // the result and then compare it to the original result.
2296  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2297  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2298  llvm::Value *TruncationOverflow =
2299  Builder.CreateICmpNE(Result, ResultTruncExt);
2300 
2301  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2302  Result = ResultTrunc;
2303  }
2304 
2305  // Finally, store the result using the pointer.
2306  bool isVolatile =
2307  ResultArg->getType()->getPointeeType().isVolatileQualified();
2308  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2309 
2310  return RValue::get(Overflow);
2311  }
2312 
2313  case Builtin::BI__builtin_uadd_overflow:
2314  case Builtin::BI__builtin_uaddl_overflow:
2315  case Builtin::BI__builtin_uaddll_overflow:
2316  case Builtin::BI__builtin_usub_overflow:
2317  case Builtin::BI__builtin_usubl_overflow:
2318  case Builtin::BI__builtin_usubll_overflow:
2319  case Builtin::BI__builtin_umul_overflow:
2320  case Builtin::BI__builtin_umull_overflow:
2321  case Builtin::BI__builtin_umulll_overflow:
2322  case Builtin::BI__builtin_sadd_overflow:
2323  case Builtin::BI__builtin_saddl_overflow:
2324  case Builtin::BI__builtin_saddll_overflow:
2325  case Builtin::BI__builtin_ssub_overflow:
2326  case Builtin::BI__builtin_ssubl_overflow:
2327  case Builtin::BI__builtin_ssubll_overflow:
2328  case Builtin::BI__builtin_smul_overflow:
2329  case Builtin::BI__builtin_smull_overflow:
2330  case Builtin::BI__builtin_smulll_overflow: {
2331 
2332  // We translate all of these builtins directly to the relevant llvm IR node.
2333 
2334  // Scalarize our inputs.
2335  llvm::Value *X = EmitScalarExpr(E->getArg(0));
2336  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2337  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2338 
2339  // Decide which of the overflow intrinsics we are lowering to:
2340  llvm::Intrinsic::ID IntrinsicId;
2341  switch (BuiltinID) {
2342  default: llvm_unreachable("Unknown overflow builtin id.");
2343  case Builtin::BI__builtin_uadd_overflow:
2344  case Builtin::BI__builtin_uaddl_overflow:
2345  case Builtin::BI__builtin_uaddll_overflow:
2346  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2347  break;
2348  case Builtin::BI__builtin_usub_overflow:
2349  case Builtin::BI__builtin_usubl_overflow:
2350  case Builtin::BI__builtin_usubll_overflow:
2351  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2352  break;
2353  case Builtin::BI__builtin_umul_overflow:
2354  case Builtin::BI__builtin_umull_overflow:
2355  case Builtin::BI__builtin_umulll_overflow:
2356  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2357  break;
2358  case Builtin::BI__builtin_sadd_overflow:
2359  case Builtin::BI__builtin_saddl_overflow:
2360  case Builtin::BI__builtin_saddll_overflow:
2361  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2362  break;
2363  case Builtin::BI__builtin_ssub_overflow:
2364  case Builtin::BI__builtin_ssubl_overflow:
2365  case Builtin::BI__builtin_ssubll_overflow:
2366  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2367  break;
2368  case Builtin::BI__builtin_smul_overflow:
2369  case Builtin::BI__builtin_smull_overflow:
2370  case Builtin::BI__builtin_smulll_overflow:
2371  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2372  break;
2373  }
2374 
2375 
2376  llvm::Value *Carry;
2377  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2378  Builder.CreateStore(Sum, SumOutPtr);
2379 
2380  return RValue::get(Carry);
2381  }
2382  case Builtin::BI__builtin_addressof:
2383  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2384  case Builtin::BI__builtin_operator_new:
2385  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2386  E->getArg(0), false);
2387  case Builtin::BI__builtin_operator_delete:
2388  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2389  E->getArg(0), true);
2390  case Builtin::BI__noop:
2391  // __noop always evaluates to an integer literal zero.
2392  return RValue::get(ConstantInt::get(IntTy, 0));
2393  case Builtin::BI__builtin_call_with_static_chain: {
2394  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2395  const Expr *Chain = E->getArg(1);
2396  return EmitCall(Call->getCallee()->getType(),
2397  EmitCallee(Call->getCallee()), Call, ReturnValue,
2398  EmitScalarExpr(Chain));
2399  }
2400  case Builtin::BI_InterlockedExchange8:
2401  case Builtin::BI_InterlockedExchange16:
2402  case Builtin::BI_InterlockedExchange:
2403  case Builtin::BI_InterlockedExchangePointer:
2404  return RValue::get(
2405  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2406  case Builtin::BI_InterlockedCompareExchangePointer: {
2407  llvm::Type *RTy;
2408  llvm::IntegerType *IntType =
2409  IntegerType::get(getLLVMContext(),
2410  getContext().getTypeSize(E->getType()));
2411  llvm::Type *IntPtrType = IntType->getPointerTo();
2412 
2413  llvm::Value *Destination =
2414  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2415 
2416  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2417  RTy = Exchange->getType();
2418  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2419 
2420  llvm::Value *Comparand =
2421  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2422 
2423  auto Result =
2424  Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2425  AtomicOrdering::SequentiallyConsistent,
2426  AtomicOrdering::SequentiallyConsistent);
2427  Result->setVolatile(true);
2428 
2429  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2430  0),
2431  RTy));
2432  }
2433  case Builtin::BI_InterlockedCompareExchange8:
2434  case Builtin::BI_InterlockedCompareExchange16:
2435  case Builtin::BI_InterlockedCompareExchange:
2436  case Builtin::BI_InterlockedCompareExchange64: {
2437  AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2438  EmitScalarExpr(E->getArg(0)),
2439  EmitScalarExpr(E->getArg(2)),
2440  EmitScalarExpr(E->getArg(1)),
2441  AtomicOrdering::SequentiallyConsistent,
2442  AtomicOrdering::SequentiallyConsistent);
2443  CXI->setVolatile(true);
2444  return RValue::get(Builder.CreateExtractValue(CXI, 0));
2445  }
2446  case Builtin::BI_InterlockedIncrement16:
2447  case Builtin::BI_InterlockedIncrement:
2448  return RValue::get(
2449  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2450  case Builtin::BI_InterlockedDecrement16:
2451  case Builtin::BI_InterlockedDecrement:
2452  return RValue::get(
2453  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2454  case Builtin::BI_InterlockedAnd8:
2455  case Builtin::BI_InterlockedAnd16:
2456  case Builtin::BI_InterlockedAnd:
2457  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2458  case Builtin::BI_InterlockedExchangeAdd8:
2459  case Builtin::BI_InterlockedExchangeAdd16:
2460  case Builtin::BI_InterlockedExchangeAdd:
2461  return RValue::get(
2462  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2463  case Builtin::BI_InterlockedExchangeSub8:
2464  case Builtin::BI_InterlockedExchangeSub16:
2465  case Builtin::BI_InterlockedExchangeSub:
2466  return RValue::get(
2467  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2468  case Builtin::BI_InterlockedOr8:
2469  case Builtin::BI_InterlockedOr16:
2470  case Builtin::BI_InterlockedOr:
2471  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2472  case Builtin::BI_InterlockedXor8:
2473  case Builtin::BI_InterlockedXor16:
2474  case Builtin::BI_InterlockedXor:
2475  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2476  case Builtin::BI_interlockedbittestandset:
2477  return RValue::get(
2478  EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2479 
2480  case Builtin::BI__exception_code:
2481  case Builtin::BI_exception_code:
2482  return RValue::get(EmitSEHExceptionCode());
2483  case Builtin::BI__exception_info:
2484  case Builtin::BI_exception_info:
2485  return RValue::get(EmitSEHExceptionInfo());
2486  case Builtin::BI__abnormal_termination:
2487  case Builtin::BI_abnormal_termination:
2488  return RValue::get(EmitSEHAbnormalTermination());
2489  case Builtin::BI_setjmpex: {
2490  if (getTarget().getTriple().isOSMSVCRT()) {
2491  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2492  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2493  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2494  llvm::Attribute::ReturnsTwice);
2495  llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2496  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2497  "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2498  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2499  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2500  llvm::Value *FrameAddr =
2501  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2502  ConstantInt::get(Int32Ty, 0));
2503  llvm::Value *Args[] = {Buf, FrameAddr};
2504  llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2505  CS.setAttributes(ReturnsTwiceAttr);
2506  return RValue::get(CS.getInstruction());
2507  }
2508  break;
2509  }
2510  case Builtin::BI_setjmp: {
2511  if (getTarget().getTriple().isOSMSVCRT()) {
2512  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2513  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2514  llvm::Attribute::ReturnsTwice);
2515  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2516  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2517  llvm::CallSite CS;
2518  if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2519  llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2520  llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2521  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2522  "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2523  llvm::Value *Count = ConstantInt::get(IntTy, 0);
2524  llvm::Value *Args[] = {Buf, Count};
2525  CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2526  } else {
2527  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2528  llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2529  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2530  "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2531  llvm::Value *FrameAddr =
2532  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2533  ConstantInt::get(Int32Ty, 0));
2534  llvm::Value *Args[] = {Buf, FrameAddr};
2535  CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2536  }
2537  CS.setAttributes(ReturnsTwiceAttr);
2538  return RValue::get(CS.getInstruction());
2539  }
2540  break;
2541  }
2542 
2543  case Builtin::BI__GetExceptionInfo: {
2544  if (llvm::GlobalVariable *GV =
2545  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2546  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2547  break;
2548  }
2549 
2550  case Builtin::BI__fastfail:
2551  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2552 
2553  case Builtin::BI__builtin_coro_size: {
2554  auto & Context = getContext();
2555  auto SizeTy = Context.getSizeType();
2556  auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2557  Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2558  return RValue::get(Builder.CreateCall(F));
2559  }
2560 
2561  case Builtin::BI__builtin_coro_id:
2562  return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2563  case Builtin::BI__builtin_coro_promise:
2564  return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2565  case Builtin::BI__builtin_coro_resume:
2566  return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2567  case Builtin::BI__builtin_coro_frame:
2568  return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2569  case Builtin::BI__builtin_coro_free:
2570  return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2571  case Builtin::BI__builtin_coro_destroy:
2572  return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2573  case Builtin::BI__builtin_coro_done:
2574  return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2575  case Builtin::BI__builtin_coro_alloc:
2576  return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2577  case Builtin::BI__builtin_coro_begin:
2578  return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2579  case Builtin::BI__builtin_coro_end:
2580  return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2581  case Builtin::BI__builtin_coro_suspend:
2582  return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2583  case Builtin::BI__builtin_coro_param:
2584  return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2585 
2586  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2587  case Builtin::BIread_pipe:
2588  case Builtin::BIwrite_pipe: {
2589  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2590  *Arg1 = EmitScalarExpr(E->getArg(1));
2591  CGOpenCLRuntime OpenCLRT(CGM);
2592  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2593  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2594 
2595  // Type of the generic packet parameter.
2596  unsigned GenericAS =
2597  getContext().getTargetAddressSpace(LangAS::opencl_generic);
2598  llvm::Type *I8PTy = llvm::PointerType::get(
2599  llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2600 
2601  // Testing which overloaded version we should generate the call for.
2602  if (2U == E->getNumArgs()) {
2603  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2604  : "__write_pipe_2";
2605  // Creating a generic function type to be able to call with any builtin or
2606  // user defined type.
2607  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2608  llvm::FunctionType *FTy = llvm::FunctionType::get(
2609  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2610  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2611  return RValue::get(
2612  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2613  {Arg0, BCast, PacketSize, PacketAlign}));
2614  } else {
2615  assert(4 == E->getNumArgs() &&
2616  "Illegal number of parameters to pipe function");
2617  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2618  : "__write_pipe_4";
2619 
2620  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2621  Int32Ty, Int32Ty};
2622  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2623  *Arg3 = EmitScalarExpr(E->getArg(3));
2624  llvm::FunctionType *FTy = llvm::FunctionType::get(
2625  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2626  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2627  // We know the third argument is an integer type, but we may need to cast
2628  // it to i32.
2629  if (Arg2->getType() != Int32Ty)
2630  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2631  return RValue::get(Builder.CreateCall(
2632  CGM.CreateRuntimeFunction(FTy, Name),
2633  {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2634  }
2635  }
2636  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2637  // functions
2638  case Builtin::BIreserve_read_pipe:
2639  case Builtin::BIreserve_write_pipe:
2640  case Builtin::BIwork_group_reserve_read_pipe:
2641  case Builtin::BIwork_group_reserve_write_pipe:
2642  case Builtin::BIsub_group_reserve_read_pipe:
2643  case Builtin::BIsub_group_reserve_write_pipe: {
2644  // Composing the mangled name for the function.
2645  const char *Name;
2646  if (BuiltinID == Builtin::BIreserve_read_pipe)
2647  Name = "__reserve_read_pipe";
2648  else if (BuiltinID == Builtin::BIreserve_write_pipe)
2649  Name = "__reserve_write_pipe";
2650  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2651  Name = "__work_group_reserve_read_pipe";
2652  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2653  Name = "__work_group_reserve_write_pipe";
2654  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2655  Name = "__sub_group_reserve_read_pipe";
2656  else
2657  Name = "__sub_group_reserve_write_pipe";
2658 
2659  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2660  *Arg1 = EmitScalarExpr(E->getArg(1));
2661  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2662  CGOpenCLRuntime OpenCLRT(CGM);
2663  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2664  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2665 
2666  // Building the generic function prototype.
2667  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2668  llvm::FunctionType *FTy = llvm::FunctionType::get(
2669  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2670  // We know the second argument is an integer type, but we may need to cast
2671  // it to i32.
2672  if (Arg1->getType() != Int32Ty)
2673  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2674  return RValue::get(
2675  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2676  {Arg0, Arg1, PacketSize, PacketAlign}));
2677  }
2678  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2679  // functions
2680  case Builtin::BIcommit_read_pipe:
2681  case Builtin::BIcommit_write_pipe:
2682  case Builtin::BIwork_group_commit_read_pipe:
2683  case Builtin::BIwork_group_commit_write_pipe:
2684  case Builtin::BIsub_group_commit_read_pipe:
2685  case Builtin::BIsub_group_commit_write_pipe: {
2686  const char *Name;
2687  if (BuiltinID == Builtin::BIcommit_read_pipe)
2688  Name = "__commit_read_pipe";
2689  else if (BuiltinID == Builtin::BIcommit_write_pipe)
2690  Name = "__commit_write_pipe";
2691  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2692  Name = "__work_group_commit_read_pipe";
2693  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2694  Name = "__work_group_commit_write_pipe";
2695  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2696  Name = "__sub_group_commit_read_pipe";
2697  else
2698  Name = "__sub_group_commit_write_pipe";
2699 
2700  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2701  *Arg1 = EmitScalarExpr(E->getArg(1));
2702  CGOpenCLRuntime OpenCLRT(CGM);
2703  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2704  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2705 
2706  // Building the generic function prototype.
2707  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2708  llvm::FunctionType *FTy =
2709  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2710  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2711 
2712  return RValue::get(
2713  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2714  {Arg0, Arg1, PacketSize, PacketAlign}));
2715  }
2716  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2717  case Builtin::BIget_pipe_num_packets:
2718  case Builtin::BIget_pipe_max_packets: {
2719  const char *Name;
2720  if (BuiltinID == Builtin::BIget_pipe_num_packets)
2721  Name = "__get_pipe_num_packets";
2722  else
2723  Name = "__get_pipe_max_packets";
2724 
2725  // Building the generic function prototype.
2726  Value *Arg0 = EmitScalarExpr(E->getArg(0));
2727  CGOpenCLRuntime OpenCLRT(CGM);
2728  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2729  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2730  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2731  llvm::FunctionType *FTy = llvm::FunctionType::get(
2732  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2733 
2734  return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2735  {Arg0, PacketSize, PacketAlign}));
2736  }
2737 
2738  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2739  case Builtin::BIto_global:
2740  case Builtin::BIto_local:
2741  case Builtin::BIto_private: {
2742  auto Arg0 = EmitScalarExpr(E->getArg(0));
2743  auto NewArgT = llvm::PointerType::get(Int8Ty,
2744  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2745  auto NewRetT = llvm::PointerType::get(Int8Ty,
2746  CGM.getContext().getTargetAddressSpace(
2748  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2749  llvm::Value *NewArg;
2750  if (Arg0->getType()->getPointerAddressSpace() !=
2751  NewArgT->getPointerAddressSpace())
2752  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2753  else
2754  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2755  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2756  auto NewCall =
2757  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2758  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2759  ConvertType(E->getType())));
2760  }
2761 
2762  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2763  // It contains four different overload formats specified in Table 6.13.17.1.
2764  case Builtin::BIenqueue_kernel: {
2765  StringRef Name; // Generated function call name
2766  unsigned NumArgs = E->getNumArgs();
2767 
2768  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2769  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2770  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2771 
2772  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2773  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2774  LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2775  llvm::Value *Range = NDRangeL.getAddress().getPointer();
2776  llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2777 
2778  if (NumArgs == 4) {
2779  // The most basic form of the call with parameters:
2780  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2781  Name = "__enqueue_kernel_basic";
2782  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
2783  GenericVoidPtrTy};
2784  llvm::FunctionType *FTy = llvm::FunctionType::get(
2785  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2786 
2787  auto Info =
2788  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
2789  llvm::Value *Kernel =
2790  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2791  llvm::Value *Block =
2792  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2793 
2794  AttrBuilder B;
2795  B.addAttribute(Attribute::ByVal);
2796  llvm::AttributeList ByValAttrSet =
2797  llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2798 
2799  auto RTCall =
2800  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2801  {Queue, Flags, Range, Kernel, Block});
2802  RTCall->setAttributes(ByValAttrSet);
2803  return RValue::get(RTCall);
2804  }
2805  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2806 
2807  // Create a temporary array to hold the sizes of local pointer arguments
2808  // for the block. \p First is the position of the first size argument.
2809  auto CreateArrayForSizeVar = [=](unsigned First) {
2810  auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
2811  auto *Arr = Builder.CreateAlloca(AT);
2812  llvm::Value *Ptr;
2813  // Each of the following arguments specifies the size of the corresponding
2814  // argument passed to the enqueued block.
2815  auto *Zero = llvm::ConstantInt::get(IntTy, 0);
2816  for (unsigned I = First; I < NumArgs; ++I) {
2817  auto *Index = llvm::ConstantInt::get(IntTy, I - First);
2818  auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
2819  if (I == First)
2820  Ptr = GEP;
2821  auto *V =
2822  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
2823  Builder.CreateAlignedStore(
2824  V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
2825  }
2826  return Ptr;
2827  };
2828 
2829  // Could have events and/or vaargs.
2830  if (E->getArg(3)->getType()->isBlockPointerType()) {
2831  // No events passed, but has variadic arguments.
2832  Name = "__enqueue_kernel_vaargs";
2833  auto Info =
2834  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
2835  llvm::Value *Kernel =
2836  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2837  auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2838  auto *PtrToSizeArray = CreateArrayForSizeVar(4);
2839 
2840  // Create a vector of the arguments, as well as a constant value to
2841  // express to the runtime the number of variadic arguments.
2842  std::vector<llvm::Value *> Args = {
2843  Queue, Flags, Range,
2844  Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
2845  PtrToSizeArray};
2846  std::vector<llvm::Type *> ArgTys = {
2847  QueueTy, IntTy, RangeTy,
2848  GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
2849  PtrToSizeArray->getType()};
2850 
2851  llvm::FunctionType *FTy = llvm::FunctionType::get(
2852  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2853  return RValue::get(
2854  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2856  }
2857  // Any calls now have event arguments passed.
2858  if (NumArgs >= 7) {
2859  llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2860  llvm::Type *EventPtrTy = EventTy->getPointerTo(
2861  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2862 
2863  llvm::Value *NumEvents =
2864  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2865  llvm::Value *EventList =
2866  E->getArg(4)->getType()->isArrayType()
2867  ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2868  : EmitScalarExpr(E->getArg(4));
2869  llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2870  // Convert to generic address space.
2871  EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2872  ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2873  auto Info =
2874  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
2875  llvm::Value *Kernel =
2876  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2877  llvm::Value *Block =
2878  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2879 
2880  std::vector<llvm::Type *> ArgTys = {
2881  QueueTy, Int32Ty, RangeTy, Int32Ty,
2882  EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
2883 
2884  std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
2885  EventList, ClkEvent, Kernel, Block};
2886 
2887  if (NumArgs == 7) {
2888  // Has events but no variadics.
2889  Name = "__enqueue_kernel_basic_events";
2890  llvm::FunctionType *FTy = llvm::FunctionType::get(
2891  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2892  return RValue::get(
2893  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2895  }
2896  // Has event info and variadics
2897  // Pass the number of variadics to the runtime function too.
2898  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2899  ArgTys.push_back(Int32Ty);
2900  Name = "__enqueue_kernel_events_vaargs";
2901 
2902  auto *PtrToSizeArray = CreateArrayForSizeVar(7);
2903  Args.push_back(PtrToSizeArray);
2904  ArgTys.push_back(PtrToSizeArray->getType());
2905 
2906  llvm::FunctionType *FTy = llvm::FunctionType::get(
2907  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2908  return RValue::get(
2909  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2911  }
2912  LLVM_FALLTHROUGH;
2913  }
2914  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2915  // parameter.
2916  case Builtin::BIget_kernel_work_group_size: {
2917  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2918  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2919  auto Info =
2920  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
2921  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2922  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2923  return RValue::get(Builder.CreateCall(
2924  CGM.CreateRuntimeFunction(
2925  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
2926  false),
2927  "__get_kernel_work_group_size_impl"),
2928  {Kernel, Arg}));
2929  }
2930  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2931  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2932  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2933  auto Info =
2934  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
2935  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2936  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2937  return RValue::get(Builder.CreateCall(
2938  CGM.CreateRuntimeFunction(
2939  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
2940  false),
2941  "__get_kernel_preferred_work_group_multiple_impl"),
2942  {Kernel, Arg}));
2943  }
2944  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
2945  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
2946  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2947  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2948  LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
2949  llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
2950  auto Info =
2951  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
2952  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2953  Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2954  const char *Name =
2955  BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
2956  ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
2957  : "__get_kernel_sub_group_count_for_ndrange_impl";
2958  return RValue::get(Builder.CreateCall(
2959  CGM.CreateRuntimeFunction(
2960  llvm::FunctionType::get(
2961  IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
2962  false),
2963  Name),
2964  {NDRange, Kernel, Block}));
2965  }
2966 
2967  case Builtin::BI__builtin_store_half:
2968  case Builtin::BI__builtin_store_halff: {
2969  Value *Val = EmitScalarExpr(E->getArg(0));
2970  Address Address = EmitPointerWithAlignment(E->getArg(1));
2971  Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
2972  return RValue::get(Builder.CreateStore(HalfVal, Address));
2973  }
2974  case Builtin::BI__builtin_load_half: {
2975  Address Address = EmitPointerWithAlignment(E->getArg(0));
2976  Value *HalfVal = Builder.CreateLoad(Address);
2977  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
2978  }
2979  case Builtin::BI__builtin_load_halff: {
2980  Address Address = EmitPointerWithAlignment(E->getArg(0));
2981  Value *HalfVal = Builder.CreateLoad(Address);
2982  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
2983  }
2984  case Builtin::BIprintf:
2985  if (getTarget().getTriple().isNVPTX())
2986  return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2987  break;
2988  case Builtin::BI__builtin_canonicalize:
2989  case Builtin::BI__builtin_canonicalizef:
2990  case Builtin::BI__builtin_canonicalizel:
2991  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2992 
2993  case Builtin::BI__builtin_thread_pointer: {
2994  if (!getContext().getTargetInfo().isTLSSupported())
2995  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2996  // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2997  break;
2998  }
2999  case Builtin::BI__builtin_os_log_format:
3000  return emitBuiltinOSLogFormat(*E);
3001 
3002  case Builtin::BI__builtin_os_log_format_buffer_size: {
3004  analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
3005  return RValue::get(ConstantInt::get(ConvertType(E->getType()),
3006  Layout.size().getQuantity()));
3007  }
3008 
3009  case Builtin::BI__xray_customevent: {
3010  if (!ShouldXRayInstrumentFunction())
3011  return RValue::getIgnored();
3012  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
3013  if (XRayAttr->neverXRayInstrument())
3014  return RValue::getIgnored();
3015  }
3016  Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
3017  auto FTy = F->getFunctionType();
3018  auto Arg0 = E->getArg(0);
3019  auto Arg0Val = EmitScalarExpr(Arg0);
3020  auto Arg0Ty = Arg0->getType();
3021  auto PTy0 = FTy->getParamType(0);
3022  if (PTy0 != Arg0Val->getType()) {
3023  if (Arg0Ty->isArrayType())
3024  Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3025  else
3026  Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3027  }
3028  auto Arg1 = EmitScalarExpr(E->getArg(1));
3029  auto PTy1 = FTy->getParamType(1);
3030  if (PTy1 != Arg1->getType())
3031  Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3032  return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3033  }
3034 
3035  case Builtin::BI__builtin_ms_va_start:
3036  case Builtin::BI__builtin_ms_va_end:
3037  return RValue::get(
3038  EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3039  BuiltinID == Builtin::BI__builtin_ms_va_start));
3040 
3041  case Builtin::BI__builtin_ms_va_copy: {
3042  // Lower this manually. We can't reliably determine whether or not any
3043  // given va_copy() is for a Win64 va_list from the calling convention
3044  // alone, because it's legal to do this from a System V ABI function.
3045  // With opaque pointer types, we won't have enough information in LLVM
3046  // IR to determine this from the argument types, either. Best to do it
3047  // now, while we have enough information.
3048  Address DestAddr = EmitMSVAListRef(E->getArg(0));
3049  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
3050 
3051  llvm::Type *BPP = Int8PtrPtrTy;
3052 
3053  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
3054  DestAddr.getAlignment());
3055  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
3056  SrcAddr.getAlignment());
3057 
3058  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
3059  return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
3060  }
3061  }
3062 
3063  // If this is an alias for a lib function (e.g. __builtin_sin), emit
3064  // the call using the normal call path, but using the unmangled
3065  // version of the function name.
3066  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
3067  return emitLibraryCall(*this, FD, E,
3068  CGM.getBuiltinLibFunction(FD, BuiltinID));
3069 
3070  // If this is a predefined lib function (e.g. malloc), emit the call
3071  // using exactly the normal call path.
3072  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
3073  return emitLibraryCall(*this, FD, E,
3074  cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
3075 
3076  // Check that a call to a target specific builtin has the correct target
3077  // features.
3078  // This is down here to avoid non-target specific builtins, however, if
3079  // generic builtins start to require generic target features then we
3080  // can move this up to the beginning of the function.
3081  checkTargetFeatures(E, FD);
3082 
3083  // See if we have a target specific intrinsic.
3084  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
3085  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
3086  StringRef Prefix =
3087  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
3088  if (!Prefix.empty()) {
3089  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
3090  // NOTE we dont need to perform a compatibility flag check here since the
3091  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
3092  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
3093  if (IntrinsicID == Intrinsic::not_intrinsic)
3094  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
3095  }
3096 
3097  if (IntrinsicID != Intrinsic::not_intrinsic) {
3099 
3100  // Find out if any arguments are required to be integer constant
3101  // expressions.
3102  unsigned ICEArguments = 0;
3104  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3105  assert(Error == ASTContext::GE_None && "Should not codegen an error");
3106 
3107  Function *F = CGM.getIntrinsic(IntrinsicID);
3108  llvm::FunctionType *FTy = F->getFunctionType();
3109 
3110  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
3111  Value *ArgValue;
3112  // If this is a normal argument, just emit it as a scalar.
3113  if ((ICEArguments & (1 << i)) == 0) {
3114  ArgValue = EmitScalarExpr(E->getArg(i));
3115  } else {
3116  // If this is required to be a constant, constant fold it so that we
3117  // know that the generated intrinsic gets a ConstantInt.
3118  llvm::APSInt Result;
3119  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
3120  assert(IsConst && "Constant arg isn't actually constant?");
3121  (void)IsConst;
3122  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
3123  }
3124 
3125  // If the intrinsic arg type is different from the builtin arg type
3126  // we need to do a bit cast.
3127  llvm::Type *PTy = FTy->getParamType(i);
3128  if (PTy != ArgValue->getType()) {
3129  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
3130  "Must be able to losslessly bit cast to param");
3131  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
3132  }
3133 
3134  Args.push_back(ArgValue);
3135  }
3136 
3137  Value *V = Builder.CreateCall(F, Args);
3138  QualType BuiltinRetType = E->getType();
3139 
3140  llvm::Type *RetTy = VoidTy;
3141  if (!BuiltinRetType->isVoidType())
3142  RetTy = ConvertType(BuiltinRetType);
3143 
3144  if (RetTy != V->getType()) {
3145  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
3146  "Must be able to losslessly bit cast result type");
3147  V = Builder.CreateBitCast(V, RetTy);
3148  }
3149 
3150  return RValue::get(V);
3151  }
3152 
3153  // See if we have a target specific builtin that needs to be lowered.
3154  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
3155  return RValue::get(V);
3156 
3157  ErrorUnsupported(E, "builtin function");
3158 
3159  // Unknown builtin, for now just dump it out and return undef.
3160  return GetUndefRValue(E->getType());
3161 }
3162 
3164  unsigned BuiltinID, const CallExpr *E,
3165  llvm::Triple::ArchType Arch) {
3166  switch (Arch) {
3167  case llvm::Triple::arm:
3168  case llvm::Triple::armeb:
3169  case llvm::Triple::thumb:
3170  case llvm::Triple::thumbeb:
3171  return CGF->EmitARMBuiltinExpr(BuiltinID, E);
3172  case llvm::Triple::aarch64:
3173  case llvm::Triple::aarch64_be:
3174  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
3175  case llvm::Triple::x86:
3176  case llvm::Triple::x86_64:
3177  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
3178  case llvm::Triple::ppc:
3179  case llvm::Triple::ppc64:
3180  case llvm::Triple::ppc64le:
3181  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
3182  case llvm::Triple::r600:
3183  case llvm::Triple::amdgcn:
3184  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
3185  case llvm::Triple::systemz:
3186  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
3187  case llvm::Triple::nvptx:
3188  case llvm::Triple::nvptx64:
3189  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
3190  case llvm::Triple::wasm32:
3191  case llvm::Triple::wasm64:
3192  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
3193  default:
3194  return nullptr;
3195  }
3196 }
3197 
3199  const CallExpr *E) {
3200  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
3201  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3203  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3204  getContext().getAuxTargetInfo()->getTriple().getArch());
3205  }
3206 
3207  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3208  getTarget().getTriple().getArch());
3209 }
3210 
3211 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3212  NeonTypeFlags TypeFlags,
3213  bool V1Ty=false) {
3214  int IsQuad = TypeFlags.isQuad();
3215  switch (TypeFlags.getEltType()) {
3216  case NeonTypeFlags::Int8:
3217  case NeonTypeFlags::Poly8:
3218  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
3219  case NeonTypeFlags::Int16:
3220  case NeonTypeFlags::Poly16:
3222  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3223  case NeonTypeFlags::Int32:
3224  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
3225  case NeonTypeFlags::Int64:
3226  case NeonTypeFlags::Poly64:
3227  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
3229  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3230  // There is a lot of i128 and f128 API missing.
3231  // so we use v16i8 to represent poly128 and get pattern matched.
3232  return llvm::VectorType::get(CGF->Int8Ty, 16);
3234  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3236  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3237  }
3238  llvm_unreachable("Unknown vector element type!");
3239 }
3240 
3241 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3242  NeonTypeFlags IntTypeFlags) {
3243  int IsQuad = IntTypeFlags.isQuad();
3244  switch (IntTypeFlags.getEltType()) {
3245  case NeonTypeFlags::Int32:
3246  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3247  case NeonTypeFlags::Int64:
3248  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3249  default:
3250  llvm_unreachable("Type can't be converted to floating-point!");
3251  }
3252 }
3253 
3255  unsigned nElts = V->getType()->getVectorNumElements();
3256  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3257  return Builder.CreateShuffleVector(V, V, SV, "lane");
3258 }
3259 
3261  const char *name,
3262  unsigned shift, bool rightshift) {
3263  unsigned j = 0;
3264  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3265  ai != ae; ++ai, ++j)
3266  if (shift > 0 && shift == j)
3267  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3268  else
3269  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3270 
3271  return Builder.CreateCall(F, Ops, name);
3272 }
3273 
3275  bool neg) {
3276  int SV = cast<ConstantInt>(V)->getSExtValue();
3277  return ConstantInt::get(Ty, neg ? -SV : SV);
3278 }
3279 
3280 // \brief Right-shift a vector by a constant.
3282  llvm::Type *Ty, bool usgn,
3283  const char *name) {
3284  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3285 
3286  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3287  int EltSize = VTy->getScalarSizeInBits();
3288 
3289  Vec = Builder.CreateBitCast(Vec, Ty);
3290 
3291  // lshr/ashr are undefined when the shift amount is equal to the vector
3292  // element size.
3293  if (ShiftAmt == EltSize) {
3294  if (usgn) {
3295  // Right-shifting an unsigned value by its size yields 0.
3296  return llvm::ConstantAggregateZero::get(VTy);
3297  } else {
3298  // Right-shifting a signed value by its size is equivalent
3299  // to a shift of size-1.
3300  --ShiftAmt;
3301  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3302  }
3303  }
3304 
3305  Shift = EmitNeonShiftVector(Shift, Ty, false);
3306  if (usgn)
3307  return Builder.CreateLShr(Vec, Shift, name);
3308  else
3309  return Builder.CreateAShr(Vec, Shift, name);
3310 }
3311 
3312 enum {
3313  AddRetType = (1 << 0),
3314  Add1ArgType = (1 << 1),
3315  Add2ArgTypes = (1 << 2),
3316 
3317  VectorizeRetType = (1 << 3),
3318  VectorizeArgTypes = (1 << 4),
3319 
3320  InventFloatType = (1 << 5),
3321  UnsignedAlts = (1 << 6),
3322 
3323  Use64BitVectors = (1 << 7),
3324  Use128BitVectors = (1 << 8),
3325 
3332 };
3333 
3334 namespace {
3335 struct NeonIntrinsicInfo {
3336  const char *NameHint;
3337  unsigned BuiltinID;
3338  unsigned LLVMIntrinsic;
3339  unsigned AltLLVMIntrinsic;
3340  unsigned TypeModifier;
3341 
3342  bool operator<(unsigned RHSBuiltinID) const {
3343  return BuiltinID < RHSBuiltinID;
3344  }
3345  bool operator<(const NeonIntrinsicInfo &TE) const {
3346  return BuiltinID < TE.BuiltinID;
3347  }
3348 };
3349 } // end anonymous namespace
3350 
3351 #define NEONMAP0(NameBase) \
3352  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3353 
3354 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3355  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3356  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3357 
3358 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3359  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3360  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3361  TypeModifier }
3362 
3363 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3364  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3365  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3366  NEONMAP1(vabs_v, arm_neon_vabs, 0),
3367  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3368  NEONMAP0(vaddhn_v),
3369  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3370  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3371  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3372  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3373  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3374  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3375  NEONMAP1(vcage_v, arm_neon_vacge, 0),
3376  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3377  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3378  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3379  NEONMAP1(vcale_v, arm_neon_vacge, 0),
3380  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3381  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3382  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3383  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3384  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3385  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3386  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3387  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3388  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3389  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3390  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3391  NEONMAP0(vcvt_f32_v),
3392  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3393  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3394  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3395  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3396  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3397  NEONMAP0(vcvt_s32_v),
3398  NEONMAP0(vcvt_s64_v),
3399  NEONMAP0(vcvt_u32_v),
3400  NEONMAP0(vcvt_u64_v),
3401  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3402  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3403  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3404  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3405  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3406  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3407  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3408  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3409  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3410  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3411  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3412  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3413  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3414  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3415  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3416  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3417  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3418  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3419  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3420  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3421  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3422  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3423  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3424  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3425  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3426  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3427  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3428  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3429  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3430  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3431  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3432  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3433  NEONMAP0(vcvtq_f32_v),
3434  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3435  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3436  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3437  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3438  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3439  NEONMAP0(vcvtq_s32_v),
3440  NEONMAP0(vcvtq_s64_v),
3441  NEONMAP0(vcvtq_u32_v),
3442  NEONMAP0(vcvtq_u64_v),
3443  NEONMAP0(vext_v),
3444  NEONMAP0(vextq_v),
3445  NEONMAP0(vfma_v),
3446  NEONMAP0(vfmaq_v),
3447  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3448  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3449  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3450  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3451  NEONMAP0(vld1_dup_v),
3452  NEONMAP1(vld1_v, arm_neon_vld1, 0),
3453  NEONMAP0(vld1q_dup_v),
3454  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3455  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3456  NEONMAP1(vld2_v, arm_neon_vld2, 0),
3457  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3458  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3459  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3460  NEONMAP1(vld3_v, arm_neon_vld3, 0),
3461  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3462  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3463  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3464  NEONMAP1(vld4_v, arm_neon_vld4, 0),
3465  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3466  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3467  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3468  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3469  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3470  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3471  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3472  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3473  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3474  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3475  NEONMAP0(vmovl_v),
3476  NEONMAP0(vmovn_v),
3477  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3478  NEONMAP0(vmull_v),
3479  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3480  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3481  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3482  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3483  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3484  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3485  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3486  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3487  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3488  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3489  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3490  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3491  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3492  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3493  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3494  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3495  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3496  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3497  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3498  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3499  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3500  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3501  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3502  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3503  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3504  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3505  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3506  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3507  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3508  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3509  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3510  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3511  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3512  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3513  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3514  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3515  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3516  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3517  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3518  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3519  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3520  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3521  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3522  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3523  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3524  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3525  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3526  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3527  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3528  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3529  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3530  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3531  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3532  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3533  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3534  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3535  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3536  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3537  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3538  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3539  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3540  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3541  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3542  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3543  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3544  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3545  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3546  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3547  NEONMAP0(vshl_n_v),
3548  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3549  NEONMAP0(vshll_n_v),
3550  NEONMAP0(vshlq_n_v),
3551  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3552  NEONMAP0(vshr_n_v),
3553  NEONMAP0(vshrn_n_v),
3554  NEONMAP0(vshrq_n_v),
3555  NEONMAP1(vst1_v, arm_neon_vst1, 0),
3556  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3557  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3558  NEONMAP1(vst2_v, arm_neon_vst2, 0),
3559  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3560  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3561  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3562  NEONMAP1(vst3_v, arm_neon_vst3, 0),
3563  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3564  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3565  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3566  NEONMAP1(vst4_v, arm_neon_vst4, 0),
3567  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3568  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3569  NEONMAP0(vsubhn_v),
3570  NEONMAP0(vtrn_v),
3571  NEONMAP0(vtrnq_v),
3572  NEONMAP0(vtst_v),
3573  NEONMAP0(vtstq_v),
3574  NEONMAP0(vuzp_v),
3575  NEONMAP0(vuzpq_v),
3576  NEONMAP0(vzip_v),
3577  NEONMAP0(vzipq_v)
3578 };
3579 
3580 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3581  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3582  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3583  NEONMAP0(vaddhn_v),
3584  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3585  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3586  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3587  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3588  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3589  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3590  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3591  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3592  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3593  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3594  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3595  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3596  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3597  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3598  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3599  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3600  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3601  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3602  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3603  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3604  NEONMAP0(vcvt_f32_v),
3605  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3606  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3607  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3608  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3609  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3610  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3611  NEONMAP0(vcvtq_f32_v),
3612  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3613  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3614  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3615  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3616  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3617  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3618  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3619  NEONMAP0(vext_v),
3620  NEONMAP0(vextq_v),
3621  NEONMAP0(vfma_v),
3622  NEONMAP0(vfmaq_v),
3623  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3624  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3625  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3626  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3627  NEONMAP0(vmovl_v),
3628  NEONMAP0(vmovn_v),
3629  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3630  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3631  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3632  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3633  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3634  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3635  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3636  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3637  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3638  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3639  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3640  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3641  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3642  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3643  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3644  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3645  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3646  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3647  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3648  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3649  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3650  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3651  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3652  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3653  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3654  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3655  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3656  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3657  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3658  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3659  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3660  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3661  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3662  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3663  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3664  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3665  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3666  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3667  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3668  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3669  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3670  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3671  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3672  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3673  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3674  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3675  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3676  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3677  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3678  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3679  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3680  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3681  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3682  NEONMAP0(vshl_n_v),
3683  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3684  NEONMAP0(vshll_n_v),
3685  NEONMAP0(vshlq_n_v),
3686  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3687  NEONMAP0(vshr_n_v),
3688  NEONMAP0(vshrn_n_v),
3689  NEONMAP0(vshrq_n_v),
3690  NEONMAP0(vsubhn_v),
3691  NEONMAP0(vtst_v),
3692  NEONMAP0(vtstq_v),
3693 };
3694 
3695 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3696  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3697  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3698  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3699  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3700  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3701  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3702  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3703  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3704  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3705  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3706  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3707  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3708  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3709  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3710  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3711  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3712  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3713  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3714  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3715  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3716  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3717  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3718  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3719  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3720  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3721  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3722  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3723  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3724  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3725  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3726  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3727  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3728  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3729  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3730  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3731  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3732  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3733  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3734  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3735  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3736  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3737  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3738  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3739  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3740  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3741  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3742  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3743  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3744  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3745  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3746  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3747  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3748  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3749  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3750  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3751  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3752  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3753  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3754  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3755  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3756  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3757  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3758  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3759  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3760  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3761  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3762  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3763  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3764  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3765  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3766  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3767  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3768  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3769  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3770  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3771  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3772  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3773  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3774  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3775  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3776  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3777  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3778  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3779  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3780  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3781  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3782  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3783  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3784  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3785  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3786  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3787  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3788  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3789  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3790  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3791  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3792  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3793  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3794  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3795  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3796  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3797  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3798  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3799  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3800  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3801  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3802  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3803  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3804  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3805  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3806  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3807  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3808  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3809  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3810  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3811  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3812  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3813  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3814  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3815  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3816  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3817  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3818  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3819  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3820  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3821  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3822  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3823  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3824  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3825  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3826  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3827  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3828  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3829  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3830  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3831  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3832  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3833  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3834  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3835  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3836  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3837  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3838  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3839  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3840  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3841  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3842  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3843  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3844  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3845  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3846  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3847  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3848  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3849  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3850  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3851  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3852  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3853  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3854  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3855  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3856  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3857  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3858  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3859  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3860  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3861  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3862  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3863  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3864  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3865  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3866  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3867  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3868  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3869  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3870  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3871  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3872  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3873  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3874  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3875  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3876  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3877  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3878  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3879  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3880  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3881  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3882  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3883  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3884  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3885  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3886  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3887  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3888 };
3889 
3890 #undef NEONMAP0
3891 #undef NEONMAP1
3892 #undef NEONMAP2
3893 
3895 
3898 
3899 
3900 static const NeonIntrinsicInfo *
3902  unsigned BuiltinID, bool &MapProvenSorted) {
3903 
3904 #ifndef NDEBUG
3905  if (!MapProvenSorted) {
3906  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3907  MapProvenSorted = true;
3908  }
3909 #endif
3910 
3911  const NeonIntrinsicInfo *Builtin =
3912  std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3913 
3914  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3915  return Builtin;
3916 
3917  return nullptr;
3918 }
3919 
3920 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3921  unsigned Modifier,
3922  llvm::Type *ArgType,
3923  const CallExpr *E) {
3924  int VectorSize = 0;
3925  if (Modifier & Use64BitVectors)
3926  VectorSize = 64;
3927  else if (Modifier & Use128BitVectors)
3928  VectorSize = 128;
3929 
3930  // Return type.
3932  if (Modifier & AddRetType) {
3933  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3934  if (Modifier & VectorizeRetType)
3935  Ty = llvm::VectorType::get(
3936  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3937 
3938  Tys.push_back(Ty);
3939  }
3940 
3941  // Arguments.
3942  if (Modifier & VectorizeArgTypes) {
3943  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3944  ArgType = llvm::VectorType::get(ArgType, Elts);
3945  }
3946 
3947  if (Modifier & (Add1ArgType | Add2ArgTypes))
3948  Tys.push_back(ArgType);
3949 
3950  if (Modifier & Add2ArgTypes)
3951  Tys.push_back(ArgType);
3952 
3953  if (Modifier & InventFloatType)
3954  Tys.push_back(FloatTy);
3955 
3956  return CGM.getIntrinsic(IntrinsicID, Tys);
3957 }
3958 
3960  const NeonIntrinsicInfo &SISDInfo,
3962  const CallExpr *E) {
3963  unsigned BuiltinID = SISDInfo.BuiltinID;
3964  unsigned int Int = SISDInfo.LLVMIntrinsic;
3965  unsigned Modifier = SISDInfo.TypeModifier;
3966  const char *s = SISDInfo.NameHint;
3967 
3968  switch (BuiltinID) {
3969  case NEON::BI__builtin_neon_vcled_s64:
3970  case NEON::BI__builtin_neon_vcled_u64:
3971  case NEON::BI__builtin_neon_vcles_f32:
3972  case NEON::BI__builtin_neon_vcled_f64:
3973  case NEON::BI__builtin_neon_vcltd_s64:
3974  case NEON::BI__builtin_neon_vcltd_u64:
3975  case NEON::BI__builtin_neon_vclts_f32:
3976  case NEON::BI__builtin_neon_vcltd_f64:
3977  case NEON::BI__builtin_neon_vcales_f32:
3978  case NEON::BI__builtin_neon_vcaled_f64:
3979  case NEON::BI__builtin_neon_vcalts_f32:
3980  case NEON::BI__builtin_neon_vcaltd_f64:
3981  // Only one direction of comparisons actually exist, cmle is actually a cmge
3982  // with swapped operands. The table gives us the right intrinsic but we
3983  // still need to do the swap.
3984  std::swap(Ops[0], Ops[1]);
3985  break;
3986  }
3987 
3988  assert(Int && "Generic code assumes a valid intrinsic");
3989 
3990  // Determine the type(s) of this overloaded AArch64 intrinsic.
3991  const Expr *Arg = E->getArg(0);
3992  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3993  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3994 
3995  int j = 0;
3996  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3997  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3998  ai != ae; ++ai, ++j) {
3999  llvm::Type *ArgTy = ai->getType();
4000  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
4001  ArgTy->getPrimitiveSizeInBits())
4002  continue;
4003 
4004  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
4005  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
4006  // it before inserting.
4007  Ops[j] =
4008  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
4009  Ops[j] =
4010  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
4011  }
4012 
4013  Value *Result = CGF.EmitNeonCall(F, Ops, s);
4014  llvm::Type *ResultType = CGF.ConvertType(E->getType());
4015  if (ResultType->getPrimitiveSizeInBits() <
4016  Result->getType()->getPrimitiveSizeInBits())
4017  return CGF.Builder.CreateExtractElement(Result, C0);
4018 
4019  return CGF.Builder.CreateBitCast(Result, ResultType, s);
4020 }
4021 
4023  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
4024  const char *NameHint, unsigned Modifier, const CallExpr *E,
4025  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
4026  // Get the last argument, which specifies the vector type.
4027  llvm::APSInt NeonTypeConst;
4028  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4029  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
4030  return nullptr;
4031 
4032  // Determine the type of this overloaded NEON intrinsic.
4033  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
4034  bool Usgn = Type.isUnsigned();
4035  bool Quad = Type.isQuad();
4036 
4037  llvm::VectorType *VTy = GetNeonType(this, Type);
4038  llvm::Type *Ty = VTy;
4039  if (!Ty)
4040  return nullptr;
4041 
4042  auto getAlignmentValue32 = [&](Address addr) -> Value* {
4043  return Builder.getInt32(addr.getAlignment().getQuantity());
4044  };
4045 
4046  unsigned Int = LLVMIntrinsic;
4047  if ((Modifier & UnsignedAlts) && !Usgn)
4048  Int = AltLLVMIntrinsic;
4049 
4050  switch (BuiltinID) {
4051  default: break;
4052  case NEON::BI__builtin_neon_vabs_v:
4053  case NEON::BI__builtin_neon_vabsq_v:
4054  if (VTy->getElementType()->isFloatingPointTy())
4055  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
4056  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
4057  case NEON::BI__builtin_neon_vaddhn_v: {
4058  llvm::VectorType *SrcTy =
4059  llvm::VectorType::getExtendedElementVectorType(VTy);
4060 
4061  // %sum = add <4 x i32> %lhs, %rhs
4062  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4063  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4064  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
4065 
4066  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4067  Constant *ShiftAmt =
4068  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4069  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
4070 
4071  // %res = trunc <4 x i32> %high to <4 x i16>
4072  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
4073  }
4074  case NEON::BI__builtin_neon_vcale_v:
4075  case NEON::BI__builtin_neon_vcaleq_v:
4076  case NEON::BI__builtin_neon_vcalt_v:
4077  case NEON::BI__builtin_neon_vcaltq_v:
4078  std::swap(Ops[0], Ops[1]);
4079  LLVM_FALLTHROUGH;
4080  case NEON::BI__builtin_neon_vcage_v:
4081  case NEON::BI__builtin_neon_vcageq_v:
4082  case NEON::BI__builtin_neon_vcagt_v:
4083  case NEON::BI__builtin_neon_vcagtq_v: {
4084  llvm::Type *VecFlt = llvm::VectorType::get(
4085  VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
4086  VTy->getNumElements());
4087  llvm::Type *Tys[] = { VTy, VecFlt };
4088  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4089  return EmitNeonCall(F, Ops, NameHint);
4090  }
4091  case NEON::BI__builtin_neon_vclz_v:
4092  case NEON::BI__builtin_neon_vclzq_v:
4093  // We generate target-independent intrinsic, which needs a second argument
4094  // for whether or not clz of zero is undefined; on ARM it isn't.
4095  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
4096  break;
4097  case NEON::BI__builtin_neon_vcvt_f32_v:
4098  case NEON::BI__builtin_neon_vcvtq_f32_v:
4099  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4100  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
4101  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4102  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4103  case NEON::BI__builtin_neon_vcvt_n_f32_v:
4104  case NEON::BI__builtin_neon_vcvt_n_f64_v:
4105  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
4106  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
4107  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
4108  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
4109  Function *F = CGM.getIntrinsic(Int, Tys);
4110  return EmitNeonCall(F, Ops, "vcvt_n");
4111  }
4112  case NEON::BI__builtin_neon_vcvt_n_s32_v:
4113  case NEON::BI__builtin_neon_vcvt_n_u32_v:
4114  case NEON::BI__builtin_neon_vcvt_n_s64_v:
4115  case NEON::BI__builtin_neon_vcvt_n_u64_v:
4116  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
4117  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
4118  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
4119  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
4120  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4121  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4122  return EmitNeonCall(F, Ops, "vcvt_n");
4123  }
4124  case NEON::BI__builtin_neon_vcvt_s32_v:
4125  case NEON::BI__builtin_neon_vcvt_u32_v:
4126  case NEON::BI__builtin_neon_vcvt_s64_v:
4127  case NEON::BI__builtin_neon_vcvt_u64_v:
4128  case NEON::BI__builtin_neon_vcvtq_s32_v:
4129  case NEON::BI__builtin_neon_vcvtq_u32_v:
4130  case NEON::BI__builtin_neon_vcvtq_s64_v:
4131  case NEON::BI__builtin_neon_vcvtq_u64_v: {
4132  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
4133  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
4134  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
4135  }
4136  case NEON::BI__builtin_neon_vcvta_s32_v:
4137  case NEON::BI__builtin_neon_vcvta_s64_v:
4138  case NEON::BI__builtin_neon_vcvta_u32_v:
4139  case NEON::BI__builtin_neon_vcvta_u64_v:
4140  case NEON::BI__builtin_neon_vcvtaq_s32_v:
4141  case NEON::BI__builtin_neon_vcvtaq_s64_v:
4142  case NEON::BI__builtin_neon_vcvtaq_u32_v:
4143  case NEON::BI__builtin_neon_vcvtaq_u64_v:
4144  case NEON::BI__builtin_neon_vcvtn_s32_v:
4145  case NEON::BI__builtin_neon_vcvtn_s64_v:
4146  case NEON::BI__builtin_neon_vcvtn_u32_v:
4147  case NEON::BI__builtin_neon_vcvtn_u64_v:
4148  case NEON::BI__builtin_neon_vcvtnq_s32_v:
4149  case NEON::BI__builtin_neon_vcvtnq_s64_v:
4150  case NEON::BI__builtin_neon_vcvtnq_u32_v:
4151  case NEON::BI__builtin_neon_vcvtnq_u64_v:
4152  case NEON::BI__builtin_neon_vcvtp_s32_v:
4153  case NEON::BI__builtin_neon_vcvtp_s64_v:
4154  case NEON::BI__builtin_neon_vcvtp_u32_v:
4155  case NEON::BI__builtin_neon_vcvtp_u64_v:
4156  case NEON::BI__builtin_neon_vcvtpq_s32_v:
4157  case NEON::BI__builtin_neon_vcvtpq_s64_v:
4158  case NEON::BI__builtin_neon_vcvtpq_u32_v:
4159  case NEON::BI__builtin_neon_vcvtpq_u64_v:
4160  case NEON::BI__builtin_neon_vcvtm_s32_v:
4161  case NEON::BI__builtin_neon_vcvtm_s64_v:
4162  case NEON::BI__builtin_neon_vcvtm_u32_v:
4163  case NEON::BI__builtin_neon_vcvtm_u64_v:
4164  case NEON::BI__builtin_neon_vcvtmq_s32_v:
4165  case NEON::BI__builtin_neon_vcvtmq_s64_v:
4166  case NEON::BI__builtin_neon_vcvtmq_u32_v:
4167  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
4168  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4169  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
4170  }
4171  case NEON::BI__builtin_neon_vext_v:
4172  case NEON::BI__builtin_neon_vextq_v: {
4173  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
4174  SmallVector<uint32_t, 16> Indices;
4175  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4176  Indices.push_back(i+CV);
4177 
4178  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4179  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4180  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
4181  }
4182  case NEON::BI__builtin_neon_vfma_v:
4183  case NEON::BI__builtin_neon_vfmaq_v: {
4184  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4185  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4186  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4187  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4188 
4189  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4190  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4191  }
4192  case NEON::BI__builtin_neon_vld1_v:
4193  case NEON::BI__builtin_neon_vld1q_v: {
4194  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4195  Ops.push_back(getAlignmentValue32(PtrOp0));
4196  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4197  }
4198  case NEON::BI__builtin_neon_vld2_v:
4199  case NEON::BI__builtin_neon_vld2q_v:
4200  case NEON::BI__builtin_neon_vld3_v:
4201  case NEON::BI__builtin_neon_vld3q_v:
4202  case NEON::BI__builtin_neon_vld4_v:
4203  case NEON::BI__builtin_neon_vld4q_v: {
4204  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4205  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4206  Value *Align = getAlignmentValue32(PtrOp1);
4207  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4208  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4209  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4210  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4211  }
4212  case NEON::BI__builtin_neon_vld1_dup_v:
4213  case NEON::BI__builtin_neon_vld1q_dup_v: {
4214  Value *V = UndefValue::get(Ty);
4215  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4216  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4217  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4218  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4219  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4220  return EmitNeonSplat(Ops[0], CI);
4221  }
4222  case NEON::BI__builtin_neon_vld2_lane_v:
4223  case NEON::BI__builtin_neon_vld2q_lane_v:
4224  case NEON::BI__builtin_neon_vld3_lane_v:
4225  case NEON::BI__builtin_neon_vld3q_lane_v:
4226  case NEON::BI__builtin_neon_vld4_lane_v:
4227  case NEON::BI__builtin_neon_vld4q_lane_v: {
4228  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4229  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4230  for (unsigned I = 2; I < Ops.size() - 1; ++I)
4231  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4232  Ops.push_back(getAlignmentValue32(PtrOp1));
4233  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4234  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4235  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4236  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4237  }
4238  case NEON::BI__builtin_neon_vmovl_v: {
4239  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4240  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4241  if (Usgn)
4242  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4243  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4244  }
4245  case NEON::BI__builtin_neon_vmovn_v: {
4246  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4247  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4248  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4249  }
4250  case NEON::BI__builtin_neon_vmull_v:
4251  // FIXME: the integer vmull operations could be emitted in terms of pure
4252  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4253  // hoisting the exts outside loops. Until global ISel comes along that can
4254  // see through such movement this leads to bad CodeGen. So we need an
4255  // intrinsic for now.
4256  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4257  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4258  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4259  case NEON::BI__builtin_neon_vpadal_v:
4260  case NEON::BI__builtin_neon_vpadalq_v: {
4261  // The source operand type has twice as many elements of half the size.
4262  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4263  llvm::Type *EltTy =
4264  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4265  llvm::Type *NarrowTy =
4266  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4267  llvm::Type *Tys[2] = { Ty, NarrowTy };
4268  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4269  }
4270  case NEON::BI__builtin_neon_vpaddl_v:
4271  case NEON::BI__builtin_neon_vpaddlq_v: {
4272  // The source operand type has twice as many elements of half the size.
4273  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4274  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4275  llvm::Type *NarrowTy =
4276  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4277  llvm::Type *Tys[2] = { Ty, NarrowTy };
4278  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4279  }
4280  case NEON::BI__builtin_neon_vqdmlal_v:
4281  case NEON::BI__builtin_neon_vqdmlsl_v: {
4282  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4283  Ops[1] =
4284  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4285  Ops.resize(2);
4286  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4287  }
4288  case NEON::BI__builtin_neon_vqshl_n_v:
4289  case NEON::BI__builtin_neon_vqshlq_n_v:
4290  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4291  1, false);
4292  case NEON::BI__builtin_neon_vqshlu_n_v:
4293  case NEON::BI__builtin_neon_vqshluq_n_v:
4294  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4295  1, false);
4296  case NEON::BI__builtin_neon_vrecpe_v:
4297  case NEON::BI__builtin_neon_vrecpeq_v:
4298  case NEON::BI__builtin_neon_vrsqrte_v:
4299  case NEON::BI__builtin_neon_vrsqrteq_v:
4300  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4301  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4302 
4303  case NEON::BI__builtin_neon_vrshr_n_v:
4304  case NEON::BI__builtin_neon_vrshrq_n_v:
4305  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4306  1, true);
4307  case NEON::BI__builtin_neon_vshl_n_v:
4308  case NEON::BI__builtin_neon_vshlq_n_v:
4309  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4310  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4311  "vshl_n");
4312  case NEON::BI__builtin_neon_vshll_n_v: {
4313  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4314  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4315  if (Usgn)
4316  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4317  else
4318  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4319  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4320  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4321  }
4322  case NEON::BI__builtin_neon_vshrn_n_v: {
4323  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4324  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4325  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4326  if (Usgn)
4327  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4328  else
4329  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4330  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4331  }
4332  case NEON::BI__builtin_neon_vshr_n_v:
4333  case NEON::BI__builtin_neon_vshrq_n_v:
4334  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4335  case NEON::BI__builtin_neon_vst1_v:
4336  case NEON::BI__builtin_neon_vst1q_v:
4337  case NEON::BI__builtin_neon_vst2_v:
4338  case NEON::BI__builtin_neon_vst2q_v:
4339  case NEON::BI__builtin_neon_vst3_v:
4340  case NEON::BI__builtin_neon_vst3q_v:
4341  case NEON::BI__builtin_neon_vst4_v:
4342  case NEON::BI__builtin_neon_vst4q_v:
4343  case NEON::BI__builtin_neon_vst2_lane_v:
4344  case NEON::BI__builtin_neon_vst2q_lane_v:
4345  case NEON::BI__builtin_neon_vst3_lane_v:
4346  case NEON::BI__builtin_neon_vst3q_lane_v:
4347  case NEON::BI__builtin_neon_vst4_lane_v:
4348  case NEON::BI__builtin_neon_vst4q_lane_v: {
4349  llvm::Type *Tys[] = {Int8PtrTy, Ty};
4350  Ops.push_back(getAlignmentValue32(PtrOp0));
4351  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4352  }
4353  case NEON::BI__builtin_neon_vsubhn_v: {
4354  llvm::VectorType *SrcTy =
4355  llvm::VectorType::getExtendedElementVectorType(VTy);
4356 
4357  // %sum = add <4 x i32> %lhs, %rhs
4358  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4359  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4360  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4361 
4362  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4363  Constant *ShiftAmt =
4364  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4365  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4366 
4367  // %res = trunc <4 x i32> %high to <4 x i16>
4368  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4369  }
4370  case NEON::BI__builtin_neon_vtrn_v:
4371  case NEON::BI__builtin_neon_vtrnq_v: {
4372  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4373  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4374  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4375  Value *SV = nullptr;
4376 
4377  for (unsigned vi = 0; vi != 2; ++vi) {
4378  SmallVector<uint32_t, 16> Indices;
4379  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4380  Indices.push_back(i+vi);
4381  Indices.push_back(i+e+vi);
4382  }
4383  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4384  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4385  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4386  }
4387  return SV;
4388  }
4389  case NEON::BI__builtin_neon_vtst_v:
4390  case NEON::BI__builtin_neon_vtstq_v: {
4391  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4392  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4393  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4394  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4395  ConstantAggregateZero::get(Ty));
4396  return Builder.CreateSExt(Ops[0], Ty, "vtst");
4397  }
4398  case NEON::BI__builtin_neon_vuzp_v:
4399  case NEON::BI__builtin_neon_vuzpq_v: {
4400  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4401  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4402  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4403  Value *SV = nullptr;
4404 
4405  for (unsigned vi = 0; vi != 2; ++vi) {
4406  SmallVector<uint32_t, 16> Indices;
4407  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4408  Indices.push_back(2*i+vi);
4409 
4410  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4411  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4412  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4413  }
4414  return SV;
4415  }
4416  case NEON::BI__builtin_neon_vzip_v:
4417  case NEON::BI__builtin_neon_vzipq_v: {
4418  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4419  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4420  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4421  Value *SV = nullptr;
4422 
4423  for (unsigned vi = 0; vi != 2; ++vi) {
4424  SmallVector<uint32_t, 16> Indices;
4425  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4426  Indices.push_back((i + vi*e) >> 1);
4427  Indices.push_back(((i + vi*e) >> 1)+e);
4428  }
4429  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4430  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4431  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4432  }
4433  return SV;
4434  }
4435  }
4436 
4437  assert(Int && "Expected valid intrinsic number");
4438 
4439  // Determine the type(s) of this overloaded AArch64 intrinsic.
4440  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4441 
4442  Value *Result = EmitNeonCall(F, Ops, NameHint);
4443  llvm::Type *ResultType = ConvertType(E->getType());
4444  // AArch64 intrinsic one-element vector type cast to
4445  // scalar type expected by the builtin
4446  return Builder.CreateBitCast(Result, ResultType, NameHint);
4447 }
4448 
4450  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4451  const CmpInst::Predicate Ip, const Twine &Name) {
4452  llvm::Type *OTy = Op->getType();
4453 
4454  // FIXME: this is utterly horrific. We should not be looking at previous
4455  // codegen context to find out what needs doing. Unfortunately TableGen
4456  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4457  // (etc).
4458  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4459  OTy = BI->getOperand(0)->getType();
4460 
4461  Op = Builder.CreateBitCast(Op, OTy);
4462  if (OTy->getScalarType()->isFloatingPointTy()) {
4463  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4464  } else {
4465  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4466  }
4467  return Builder.CreateSExt(Op, Ty, Name);
4468 }
4469 
4471  Value *ExtOp, Value *IndexOp,
4472  llvm::Type *ResTy, unsigned IntID,
4473  const char *Name) {
4474  SmallVector<Value *, 2> TblOps;
4475  if (ExtOp)
4476  TblOps.push_back(ExtOp);
4477 
4478  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4479  SmallVector<uint32_t, 16> Indices;
4480  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4481  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4482  Indices.push_back(2*i);
4483  Indices.push_back(2*i+1);
4484  }
4485 
4486  int PairPos = 0, End = Ops.size() - 1;
4487  while (PairPos < End) {
4488  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4489  Ops[PairPos+1], Indices,
4490  Name));
4491  PairPos += 2;
4492  }
4493 
4494  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4495  // of the 128-bit lookup table with zero.
4496  if (PairPos == End) {
4497  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4498  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4499  ZeroTbl, Indices, Name));
4500  }
4501 
4502  Function *TblF;
4503  TblOps.push_back(IndexOp);
4504  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4505 
4506  return CGF.EmitNeonCall(TblF, TblOps, Name);
4507 }
4508 
4509 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4510  unsigned Value;
4511  switch (BuiltinID) {
4512  default:
4513  return nullptr;
4514  case ARM::BI__builtin_arm_nop:
4515  Value = 0;
4516  break;
4517  case ARM::BI__builtin_arm_yield:
4518  case ARM::BI__yield:
4519  Value = 1;
4520  break;
4521  case ARM::BI__builtin_arm_wfe:
4522  case ARM::BI__wfe:
4523  Value = 2;
4524  break;
4525  case ARM::BI__builtin_arm_wfi:
4526  case ARM::BI__wfi:
4527  Value = 3;
4528  break;
4529  case ARM::BI__builtin_arm_sev:
4530  case ARM::BI__sev:
4531  Value = 4;
4532  break;
4533  case ARM::BI__builtin_arm_sevl:
4534  case ARM::BI__sevl:
4535  Value = 5;
4536  break;
4537  }
4538 
4539  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4540  llvm::ConstantInt::get(Int32Ty, Value));
4541 }
4542 
4543 // Generates the IR for the read/write special register builtin,
4544 // ValueType is the type of the value that is to be written or read,
4545 // RegisterType is the type of the register being written to or read from.
4547  const CallExpr *E,
4548  llvm::Type *RegisterType,
4549  llvm::Type *ValueType,
4550  bool IsRead,
4551  StringRef SysReg = "") {
4552  // write and register intrinsics only support 32 and 64 bit operations.
4553  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4554  && "Unsupported size for register.");
4555 
4556  CodeGen::CGBuilderTy &Builder = CGF.Builder;
4557  CodeGen::CodeGenModule &CGM = CGF.CGM;
4558  LLVMContext &Context = CGM.getLLVMContext();
4559 
4560  if (SysReg.empty()) {
4561  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4562  SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4563  }
4564 
4565  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4566  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4567  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4568 
4569  llvm::Type *Types[] = { RegisterType };
4570 
4571  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4572  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4573  && "Can't fit 64-bit value in 32-bit register");
4574 
4575  if (IsRead) {
4576  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4577  llvm::Value *Call = Builder.CreateCall(F, Metadata);
4578 
4579  if (MixedTypes)
4580  // Read into 64 bit register and then truncate result to 32 bit.
4581  return Builder.CreateTrunc(Call, ValueType);
4582 
4583  if (ValueType->isPointerTy())
4584  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4585  return Builder.CreateIntToPtr(Call, ValueType);
4586 
4587  return Call;
4588  }
4589 
4590  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4591  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4592  if (MixedTypes) {
4593  // Extend 32 bit write value to 64 bit to pass to write.
4594  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4595  return Builder.CreateCall(F, { Metadata, ArgValue });
4596  }
4597 
4598  if (ValueType->isPointerTy()) {
4599  // Have VoidPtrTy ArgValue but want to return an i32/i64.
4600  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4601  return Builder.CreateCall(F, { Metadata, ArgValue });
4602  }
4603 
4604  return Builder.CreateCall(F, { Metadata, ArgValue });
4605 }
4606 
4607 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4608 /// argument that specifies the vector type.
4609 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4610  switch (BuiltinID) {
4611  default: break;
4612  case NEON::BI__builtin_neon_vget_lane_i8:
4613  case NEON::BI__builtin_neon_vget_lane_i16:
4614  case NEON::BI__builtin_neon_vget_lane_i32:
4615  case NEON::BI__builtin_neon_vget_lane_i64:
4616  case NEON::BI__builtin_neon_vget_lane_f32:
4617  case NEON::BI__builtin_neon_vgetq_lane_i8:
4618  case NEON::BI__builtin_neon_vgetq_lane_i16:
4619  case NEON::BI__builtin_neon_vgetq_lane_i32:
4620  case NEON::BI__builtin_neon_vgetq_lane_i64:
4621  case NEON::BI__builtin_neon_vgetq_lane_f32:
4622  case NEON::BI__builtin_neon_vset_lane_i8:
4623  case NEON::BI__builtin_neon_vset_lane_i16:
4624  case NEON::BI__builtin_neon_vset_lane_i32:
4625  case NEON::BI__builtin_neon_vset_lane_i64:
4626  case NEON::BI__builtin_neon_vset_lane_f32:
4627  case NEON::BI__builtin_neon_vsetq_lane_i8:
4628  case NEON::BI__builtin_neon_vsetq_lane_i16:
4629  case NEON::BI__builtin_neon_vsetq_lane_i32:
4630  case NEON::BI__builtin_neon_vsetq_lane_i64:
4631  case NEON::BI__builtin_neon_vsetq_lane_f32:
4632  case NEON::BI__builtin_neon_vsha1h_u32:
4633  case NEON::BI__builtin_neon_vsha1cq_u32:
4634  case NEON::BI__builtin_neon_vsha1pq_u32:
4635  case NEON::BI__builtin_neon_vsha1mq_u32:
4636  case ARM::BI_MoveToCoprocessor:
4637  case ARM::BI_MoveToCoprocessor2:
4638  return false;
4639  }
4640  return true;
4641 }
4642 
4644  const CallExpr *E) {
4645  if (auto Hint = GetValueForARMHint(BuiltinID))
4646  return Hint;
4647 
4648  if (BuiltinID == ARM::BI__emit) {
4649  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4650  llvm::FunctionType *FTy =
4651  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4652 
4653  APSInt Value;
4654  if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4655  llvm_unreachable("Sema will ensure that the parameter is constant");
4656 
4657  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4658 
4659  llvm::InlineAsm *Emit =
4660  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4661  /*SideEffects=*/true)
4662  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4663  /*SideEffects=*/true);
4664 
4665  return Builder.CreateCall(Emit);
4666  }
4667 
4668  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4669  Value *Option = EmitScalarExpr(E->getArg(0));
4670  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4671  }
4672 
4673  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4674  Value *Address = EmitScalarExpr(E->getArg(0));
4675  Value *RW = EmitScalarExpr(E->getArg(1));
4676  Value *IsData = EmitScalarExpr(E->getArg(2));
4677 
4678  // Locality is not supported on ARM target
4679  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4680 
4681  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4682  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4683  }
4684 
4685  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4686  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4687  return Builder.CreateCall(
4688  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4689  }
4690 
4691  if (BuiltinID == ARM::BI__clear_cache) {
4692  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4693  const FunctionDecl *FD = E->getDirectCallee();
4694  Value *Ops[2];
4695  for (unsigned i = 0; i < 2; i++)
4696  Ops[i] = EmitScalarExpr(E->getArg(i));
4697  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4698  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4699  StringRef Name = FD->getName();
4700  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4701  }
4702 
4703  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4704  BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4705  Function *F;
4706 
4707  switch (BuiltinID) {
4708  default: llvm_unreachable("unexpected builtin");
4709  case ARM::BI__builtin_arm_mcrr:
4710  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4711  break;
4712  case ARM::BI__builtin_arm_mcrr2:
4713  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4714  break;
4715  }
4716 
4717  // MCRR{2} instruction has 5 operands but
4718  // the intrinsic has 4 because Rt and Rt2
4719  // are represented as a single unsigned 64
4720  // bit integer in the intrinsic definition
4721  // but internally it's represented as 2 32
4722  // bit integers.
4723 
4724  Value *Coproc = EmitScalarExpr(E->getArg(0));
4725  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4726  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4727  Value *CRm = EmitScalarExpr(E->getArg(3));
4728 
4729  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4730  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4731  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4732  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4733 
4734  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4735  }
4736 
4737  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4738  BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4739  Function *F;
4740 
4741  switch (BuiltinID) {
4742  default: llvm_unreachable("unexpected builtin");
4743  case ARM::BI__builtin_arm_mrrc:
4744  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4745  break;
4746  case ARM::BI__builtin_arm_mrrc2:
4747  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4748  break;
4749  }
4750 
4751  Value *Coproc = EmitScalarExpr(E->getArg(0));
4752  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4753  Value *CRm = EmitScalarExpr(E->getArg(2));
4754  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4755 
4756  // Returns an unsigned 64 bit integer, represented
4757  // as two 32 bit integers.
4758 
4759  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4760  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4761  Rt = Builder.CreateZExt(Rt, Int64Ty);
4762  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4763 
4764  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4765  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4766  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4767 
4768  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4769  }
4770 
4771  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4772  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4773  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4774  getContext().getTypeSize(E->getType()) == 64) ||
4775  BuiltinID == ARM::BI__ldrexd) {
4776  Function *F;
4777 
4778  switch (BuiltinID) {
4779  default: llvm_unreachable("unexpected builtin");
4780  case ARM::BI__builtin_arm_ldaex:
4781  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4782  break;
4783  case ARM::BI__builtin_arm_ldrexd:
4784  case ARM::BI__builtin_arm_ldrex:
4785  case ARM::BI__ldrexd:
4786  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4787  break;
4788  }
4789 
4790  Value *LdPtr = EmitScalarExpr(E->getArg(0));
4791  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4792  "ldrexd");
4793 
4794  Value *Val0 = Builder.CreateExtractValue(Val, 1);
4795  Value *Val1 = Builder.CreateExtractValue(Val, 0);
4796  Val0 = Builder.CreateZExt(Val0, Int64Ty);
4797  Val1 = Builder.CreateZExt(Val1, Int64Ty);
4798 
4799  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4800  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4801  Val = Builder.CreateOr(Val, Val1);
4802  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4803  }
4804 
4805  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4806  BuiltinID == ARM::BI__builtin_arm_ldaex) {
4807  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4808 
4809  QualType Ty = E->getType();
4810  llvm::Type *RealResTy = ConvertType(Ty);
4811  llvm::Type *PtrTy = llvm::IntegerType::get(
4812  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4813  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4814 
4815  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4816  ? Intrinsic::arm_ldaex
4817  : Intrinsic::arm_ldrex,
4818  PtrTy);
4819  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4820 
4821  if (RealResTy->isPointerTy())
4822  return Builder.CreateIntToPtr(Val, RealResTy);
4823  else {
4824  llvm::Type *IntResTy = llvm::IntegerType::get(
4825  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4826  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4827  return Builder.CreateBitCast(Val, RealResTy);
4828  }
4829  }
4830 
4831  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4832  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4833  BuiltinID == ARM::BI__builtin_arm_strex) &&
4834  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4835  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4836  ? Intrinsic::arm_stlexd
4837  : Intrinsic::arm_strexd);
4838  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4839 
4840  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4841  Value *Val = EmitScalarExpr(E->getArg(0));
4842  Builder.CreateStore(Val, Tmp);
4843 
4844  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4845  Val = Builder.CreateLoad(LdPtr);
4846 
4847  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4848  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4849  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4850  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4851  }
4852 
4853  if (BuiltinID == ARM::BI__builtin_arm_strex ||
4854  BuiltinID == ARM::BI__builtin_arm_stlex) {
4855  Value *StoreVal = EmitScalarExpr(E->getArg(0));
4856  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4857 
4858  QualType Ty = E->getArg(0)->getType();
4859  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4860  getContext().getTypeSize(Ty));
4861  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4862 
4863  if (StoreVal->getType()->isPointerTy())
4864  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4865  else {
4866  llvm::Type *IntTy = llvm::IntegerType::get(
4867  getLLVMContext(),
4868  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4869  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4870  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4871  }
4872 
4873  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4874  ? Intrinsic::arm_stlex
4875  : Intrinsic::arm_strex,
4876  StoreAddr->getType());
4877  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4878  }
4879 
4880  switch (BuiltinID) {
4881  case ARM::BI__iso_volatile_load8:
4882  case ARM::BI__iso_volatile_load16:
4883  case ARM::BI__iso_volatile_load32:
4884  case ARM::BI__iso_volatile_load64: {
4885  Value *Ptr = EmitScalarExpr(E->getArg(0));
4886  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4887  CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4888  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4889  LoadSize.getQuantity() * 8);
4890  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4891  llvm::LoadInst *Load =
4892  Builder.CreateAlignedLoad(Ptr, LoadSize);
4893  Load->setVolatile(true);
4894  return Load;
4895  }
4896  case ARM::BI__iso_volatile_store8:
4897  case ARM::BI__iso_volatile_store16:
4898  case ARM::BI__iso_volatile_store32:
4899  case ARM::BI__iso_volatile_store64: {
4900  Value *Ptr = EmitScalarExpr(E->getArg(0));
4901  Value *Value = EmitScalarExpr(E->getArg(1));
4902  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4903  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4904  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4905  StoreSize.getQuantity() * 8);
4906  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4907  llvm::StoreInst *Store =
4908  Builder.CreateAlignedStore(Value, Ptr,
4909  StoreSize);
4910  Store->setVolatile(true);
4911  return Store;
4912  }
4913  }
4914 
4915  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4916  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4917  return Builder.CreateCall(F);
4918  }
4919 
4920  // CRC32
4921  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4922  switch (BuiltinID) {
4923  case ARM::BI__builtin_arm_crc32b:
4924  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4925  case ARM::BI__builtin_arm_crc32cb:
4926  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4927  case ARM::BI__builtin_arm_crc32h:
4928  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4929  case ARM::BI__builtin_arm_crc32ch:
4930  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4931  case ARM::BI__builtin_arm_crc32w:
4932  case ARM::BI__builtin_arm_crc32d:
4933  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4934  case ARM::BI__builtin_arm_crc32cw:
4935  case ARM::BI__builtin_arm_crc32cd:
4936  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4937  }
4938 
4939  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4940  Value *Arg0 = EmitScalarExpr(E->getArg(0));
4941  Value *Arg1 = EmitScalarExpr(E->getArg(1));
4942 
4943  // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4944  // intrinsics, hence we need different codegen for these cases.
4945  if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4946  BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4947  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4948  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4949  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4950  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4951 
4952  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4953  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4954  return Builder.CreateCall(F, {Res, Arg1b});
4955  } else {
4956  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4957 
4958  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4959  return Builder.CreateCall(F, {Arg0, Arg1});
4960  }
4961  }
4962 
4963  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4964  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4965  BuiltinID == ARM::BI__builtin_arm_rsrp ||
4966  BuiltinID == ARM::BI__builtin_arm_wsr ||
4967  BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4968  BuiltinID == ARM::BI__builtin_arm_wsrp) {
4969 
4970  bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4971  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4972  BuiltinID == ARM::BI__builtin_arm_rsrp;
4973 
4974  bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4975  BuiltinID == ARM::BI__builtin_arm_wsrp;
4976 
4977  bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4978  BuiltinID == ARM::BI__builtin_arm_wsr64;
4979 
4980  llvm::Type *ValueType;
4981  llvm::Type *RegisterType;
4982  if (IsPointerBuiltin) {
4983  ValueType = VoidPtrTy;
4984  RegisterType = Int32Ty;
4985  } else if (Is64Bit) {
4986  ValueType = RegisterType = Int64Ty;
4987  } else {
4988  ValueType = RegisterType = Int32Ty;
4989  }
4990 
4991  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4992  }
4993 
4994  // Find out if any arguments are required to be integer constant
4995  // expressions.
4996  unsigned ICEArguments = 0;
4998  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4999  assert(Error == ASTContext::GE_None && "Should not codegen an error");
5000 
5001  auto getAlignmentValue32 = [&](Address addr) -> Value* {
5002  return Builder.getInt32(addr.getAlignment().getQuantity());
5003  };
5004 
5005  Address PtrOp0 = Address::invalid();
5006  Address PtrOp1 = Address::invalid();
5008  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
5009  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5010  for (unsigned i = 0, e = NumArgs; i != e; i++) {
5011  if (i == 0) {
5012  switch (BuiltinID) {
5013  case NEON::BI__builtin_neon_vld1_v:
5014  case NEON::BI__builtin_neon_vld1q_v:
5015  case NEON::BI__builtin_neon_vld1q_lane_v:
5016  case NEON::BI__builtin_neon_vld1_lane_v:
5017  case NEON::BI__builtin_neon_vld1_dup_v:
5018  case NEON::BI__builtin_neon_vld1q_dup_v:
5019  case NEON::BI__builtin_neon_vst1_v:
5020  case NEON::BI__builtin_neon_vst1q_v:
5021  case NEON::BI__builtin_neon_vst1q_lane_v:
5022  case NEON::BI__builtin_neon_vst1_lane_v:
5023  case NEON::BI__builtin_neon_vst2_v:
5024  case NEON::BI__builtin_neon_vst2q_v:
5025  case NEON::BI__builtin_neon_vst2_lane_v:
5026  case NEON::BI__builtin_neon_vst2q_lane_v:
5027  case NEON::BI__builtin_neon_vst3_v:
5028  case NEON::BI__builtin_neon_vst3q_v:
5029  case NEON::BI__builtin_neon_vst3_lane_v:
5030  case NEON::BI__builtin_neon_vst3q_lane_v:
5031  case NEON::BI__builtin_neon_vst4_v:
5032  case NEON::BI__builtin_neon_vst4q_v:
5033  case NEON::BI__builtin_neon_vst4_lane_v:
5034  case NEON::BI__builtin_neon_vst4q_lane_v:
5035  // Get the alignment for the argument in addition to the value;
5036  // we'll use it later.
5037  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5038  Ops.push_back(PtrOp0.getPointer());
5039  continue;
5040  }
5041  }
5042  if (i == 1) {
5043  switch (BuiltinID) {
5044  case NEON::BI__builtin_neon_vld2_v:
5045  case NEON::BI__builtin_neon_vld2q_v:
5046  case NEON::BI__builtin_neon_vld3_v:
5047  case NEON::BI__builtin_neon_vld3q_v:
5048  case NEON::BI__builtin_neon_vld4_v:
5049  case NEON::BI__builtin_neon_vld4q_v:
5050  case NEON::BI__builtin_neon_vld2_lane_v:
5051  case NEON::BI__builtin_neon_vld2q_lane_v:
5052  case NEON::BI__builtin_neon_vld3_lane_v:
5053  case NEON::BI__builtin_neon_vld3q_lane_v:
5054  case NEON::BI__builtin_neon_vld4_lane_v:
5055  case NEON::BI__builtin_neon_vld4q_lane_v:
5056  case NEON::BI__builtin_neon_vld2_dup_v:
5057  case NEON::BI__builtin_neon_vld3_dup_v:
5058  case NEON::BI__builtin_neon_vld4_dup_v:
5059  // Get the alignment for the argument in addition to the value;
5060  // we'll use it later.
5061  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
5062  Ops.push_back(PtrOp1.getPointer());
5063  continue;
5064  }
5065  }
5066 
5067  if ((ICEArguments & (1 << i)) == 0) {
5068  Ops.push_back(EmitScalarExpr(E->getArg(i)));
5069  } else {
5070  // If this is required to be a constant, constant fold it so that we know
5071  // that the generated intrinsic gets a ConstantInt.
5072  llvm::APSInt Result;
5073  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5074  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
5075  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5076  }
5077  }
5078 
5079  switch (BuiltinID) {
5080  default: break;
5081 
5082  case NEON::BI__builtin_neon_vget_lane_i8:
5083  case NEON::BI__builtin_neon_vget_lane_i16:
5084  case NEON::BI__builtin_neon_vget_lane_i32:
5085  case NEON::BI__builtin_neon_vget_lane_i64:
5086  case NEON::BI__builtin_neon_vget_lane_f32:
5087  case NEON::BI__builtin_neon_vgetq_lane_i8:
5088  case NEON::BI__builtin_neon_vgetq_lane_i16:
5089  case NEON::BI__builtin_neon_vgetq_lane_i32:
5090  case NEON::BI__builtin_neon_vgetq_lane_i64:
5091  case NEON::BI__builtin_neon_vgetq_lane_f32:
5092  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5093 
5094  case NEON::BI__builtin_neon_vset_lane_i8:
5095  case NEON::BI__builtin_neon_vset_lane_i16:
5096  case NEON::BI__builtin_neon_vset_lane_i32:
5097  case NEON::BI__builtin_neon_vset_lane_i64:
5098  case NEON::BI__builtin_neon_vset_lane_f32:
5099  case NEON::BI__builtin_neon_vsetq_lane_i8:
5100  case NEON::BI__builtin_neon_vsetq_lane_i16:
5101  case NEON::BI__builtin_neon_vsetq_lane_i32:
5102  case NEON::BI__builtin_neon_vsetq_lane_i64:
5103  case NEON::BI__builtin_neon_vsetq_lane_f32:
5104  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5105 
5106  case NEON::BI__builtin_neon_vsha1h_u32:
5107  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
5108  "vsha1h");
5109  case NEON::BI__builtin_neon_vsha1cq_u32:
5110  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
5111  "vsha1h");
5112  case NEON::BI__builtin_neon_vsha1pq_u32:
5113  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
5114  "vsha1h");
5115  case NEON::BI__builtin_neon_vsha1mq_u32:
5116  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
5117  "vsha1h");
5118 
5119  // The ARM _MoveToCoprocessor builtins put the input register value as
5120  // the first argument, but the LLVM intrinsic expects it as the third one.
5121  case ARM::BI_MoveToCoprocessor:
5122  case ARM::BI_MoveToCoprocessor2: {
5123  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
5124  Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
5125  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
5126  Ops[3], Ops[4], Ops[5]});
5127  }
5128  case ARM::BI_BitScanForward:
5129  case ARM::BI_BitScanForward64:
5130  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
5131  case ARM::BI_BitScanReverse:
5132  case ARM::BI_BitScanReverse64:
5133  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
5134 
5135  case ARM::BI_InterlockedAnd64:
5136  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
5137  case ARM::BI_InterlockedExchange64:
5138  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
5139  case ARM::BI_InterlockedExchangeAdd64:
5140  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
5141  case ARM::BI_InterlockedExchangeSub64:
5142  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
5143  case ARM::BI_InterlockedOr64:
5144  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
5145  case ARM::BI_InterlockedXor64:
5146  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
5147  case ARM::BI_InterlockedDecrement64:
5148  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
5149  case ARM::BI_InterlockedIncrement64:
5150  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
5151  }
5152 
5153  // Get the last argument, which specifies the vector type.
5154  assert(HasExtraArg);
5155  llvm::APSInt Result;
5156  const Expr *Arg = E->getArg(E->getNumArgs()-1);
5157  if (!Arg->isIntegerConstantExpr(Result, getContext()))
5158  return nullptr;
5159 
5160  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
5161  BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
5162  // Determine the overloaded type of this builtin.
5163  llvm::Type *Ty;
5164  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
5165  Ty = FloatTy;
5166  else
5167  Ty = DoubleTy;
5168 
5169  // Determine whether this is an unsigned conversion or not.
5170  bool usgn = Result.getZExtValue() == 1;
5171  unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
5172 
5173  // Call the appropriate intrinsic.
5174  Function *F = CGM.getIntrinsic(Int, Ty);
5175  return Builder.CreateCall(F, Ops, "vcvtr");
5176  }
5177 
5178  // Determine the type of this overloaded NEON intrinsic.
5179  NeonTypeFlags Type(Result.getZExtValue());
5180  bool usgn = Type.isUnsigned();
5181  bool rightShift = false;
5182 
5183  llvm::VectorType *VTy = GetNeonType(this, Type);
5184  llvm::Type *Ty = VTy;
5185  if (!Ty)
5186  return nullptr;
5187 
5188  // Many NEON builtins have identical semantics and uses in ARM and
5189  // AArch64. Emit these in a single function.
5190  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
5191  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5192  IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
5193  if (Builtin)
5194  return EmitCommonNeonBuiltinExpr(
5195  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5196  Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
5197 
5198  unsigned Int;
5199  switch (BuiltinID) {
5200  default: return nullptr;
5201  case NEON::BI__builtin_neon_vld1q_lane_v:
5202  // Handle 64-bit integer elements as a special case. Use shuffles of
5203  // one-element vectors to avoid poor code for i64 in the backend.
5204  if (VTy->getElementType()->isIntegerTy(64)) {
5205  // Extract the other lane.
5206  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5207  uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
5208  Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
5209  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5210  // Load the value as a one-element vector.
5211  Ty = llvm::VectorType::get(VTy->getElementType(), 1);
5212  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5213  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
5214  Value *Align = getAlignmentValue32(PtrOp0);
5215  Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
5216  // Combine them.
5217  uint32_t Indices[] = {1 - Lane, Lane};
5218  SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
5219  return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
5220  }
5221  // fall through
5222  case NEON::BI__builtin_neon_vld1_lane_v: {
5223  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5224  PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
5225  Value *Ld = Builder.CreateLoad(PtrOp0);
5226  return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
5227  }
5228  case NEON::BI__builtin_neon_vld2_dup_v:
5229  case NEON::BI__builtin_neon_vld3_dup_v:
5230  case NEON::BI__builtin_neon_vld4_dup_v: {
5231  // Handle 64-bit elements as a special-case. There is no "dup" needed.
5232  if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
5233  switch (BuiltinID) {
5234  case NEON::BI__builtin_neon_vld2_dup_v:
5235  Int = Intrinsic::arm_neon_vld2;
5236  break;
5237  case NEON::BI__builtin_neon_vld3_dup_v:
5238  Int = Intrinsic::arm_neon_vld3;
5239  break;
5240  case NEON::BI__builtin_neon_vld4_dup_v:
5241  Int = Intrinsic::arm_neon_vld4;
5242  break;
5243  default: llvm_unreachable("unknown vld_dup intrinsic?");
5244  }
5245  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5246  Function *F = CGM.getIntrinsic(Int, Tys);
5247  llvm::Value *Align = getAlignmentValue32(PtrOp1);
5248  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5249  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5250  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5251  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5252  }
5253  switch (BuiltinID) {
5254  case NEON::BI__builtin_neon_vld2_dup_v:
5255  Int = Intrinsic::arm_neon_vld2lane;
5256  break;
5257  case NEON::BI__builtin_neon_vld3_dup_v:
5258  Int = Intrinsic::arm_neon_vld3lane;
5259  break;
5260  case NEON::BI__builtin_neon_vld4_dup_v:
5261  Int = Intrinsic::arm_neon_vld4lane;
5262  break;
5263  default: llvm_unreachable("unknown vld_dup intrinsic?");
5264  }
5265  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5266  Function *F = CGM.getIntrinsic(Int, Tys);
5267  llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5268 
5270  Args.push_back(Ops[1]);
5271  Args.append(STy->getNumElements(), UndefValue::get(Ty));
5272 
5273  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5274  Args.push_back(CI);
5275  Args.push_back(getAlignmentValue32(PtrOp1));
5276 
5277  Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5278  // splat lane 0 to all elts in each vector of the result.
5279  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5280  Value *Val = Builder.CreateExtractValue(Ops[1], i);
5281  Value *Elt = Builder.CreateBitCast(Val, Ty);
5282  Elt = EmitNeonSplat(Elt, CI);
5283  Elt = Builder.CreateBitCast(Elt, Val->getType());
5284  Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5285  }
5286  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5287  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5288  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5289  }
5290  case NEON::BI__builtin_neon_vqrshrn_n_v:
5291  Int =
5292  usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5293  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5294  1, true);
5295  case NEON::BI__builtin_neon_vqrshrun_n_v:
5296  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5297  Ops, "vqrshrun_n", 1, true);
5298  case NEON::BI__builtin_neon_vqshrn_n_v:
5299  Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5300  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5301  1, true);
5302  case NEON::BI__builtin_neon_vqshrun_n_v:
5303  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5304  Ops, "vqshrun_n", 1, true);
5305  case NEON::BI__builtin_neon_vrecpe_v:
5306  case NEON::BI__builtin_neon_vrecpeq_v:
5307  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5308  Ops, "vrecpe");
5309  case NEON::BI__builtin_neon_vrshrn_n_v:
5310  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5311  Ops, "vrshrn_n", 1, true);
5312  case NEON::BI__builtin_neon_vrsra_n_v:
5313  case NEON::BI__builtin_neon_vrsraq_n_v:
5314  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5315  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5316  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5317  Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5318  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5319  return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5320  case NEON::BI__builtin_neon_vsri_n_v:
5321  case NEON::BI__builtin_neon_vsriq_n_v:
5322  rightShift = true;
5323  LLVM_FALLTHROUGH;
5324  case NEON::BI__builtin_neon_vsli_n_v:
5325  case NEON::BI__builtin_neon_vsliq_n_v:
5326  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5327  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5328  Ops, "vsli_n");
5329  case NEON::BI__builtin_neon_vsra_n_v:
5330  case NEON::BI__builtin_neon_vsraq_n_v:
5331  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5332  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5333  return Builder.CreateAdd(Ops[0], Ops[1]);
5334  case NEON::BI__builtin_neon_vst1q_lane_v:
5335  // Handle 64-bit integer elements as a special case. Use a shuffle to get
5336  // a one-element vector and avoid poor code for i64 in the backend.
5337  if (VTy->getElementType()->isIntegerTy(64)) {
5338  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5339  Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5340  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5341  Ops[2] = getAlignmentValue32(PtrOp0);
5342  llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5343  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5344  Tys), Ops);
5345  }
5346  // fall through
5347  case NEON::BI__builtin_neon_vst1_lane_v: {
5348  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5349  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5350  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5351  auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5352  return St;
5353  }
5354  case NEON::BI__builtin_neon_vtbl1_v:
5355  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5356  Ops, "vtbl1");
5357