clang  6.0.0svn
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/ASTContext.h"
22 #include "clang/AST/Decl.h"
25 #include "clang/Basic/TargetInfo.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include "llvm/Support/ConvertUTF.h"
34 #include "llvm/Support/ScopedPrinter.h"
35 #include "llvm/Support/TargetParser.h"
36 #include <sstream>
37 
38 using namespace clang;
39 using namespace CodeGen;
40 using namespace llvm;
41 
42 static
43 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
44  return std::min(High, std::max(Low, Value));
45 }
46 
47 /// getBuiltinLibFunction - Given a builtin id for a function like
48 /// "__builtin_fabsf", return a Function* for "fabsf".
50  unsigned BuiltinID) {
51  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
52 
53  // Get the name, skip over the __builtin_ prefix (if necessary).
54  StringRef Name;
55  GlobalDecl D(FD);
56 
57  // If the builtin has been declared explicitly with an assembler label,
58  // use the mangled name. This differs from the plain label on platforms
59  // that prefix labels.
60  if (FD->hasAttr<AsmLabelAttr>())
61  Name = getMangledName(D);
62  else
63  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
64 
65  llvm::FunctionType *Ty =
66  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
67 
68  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
69 }
70 
71 /// Emit the conversions required to turn the given value into an
72 /// integer of the given size.
74  QualType T, llvm::IntegerType *IntType) {
75  V = CGF.EmitToMemory(V, T);
76 
77  if (V->getType()->isPointerTy())
78  return CGF.Builder.CreatePtrToInt(V, IntType);
79 
80  assert(V->getType() == IntType);
81  return V;
82 }
83 
85  QualType T, llvm::Type *ResultType) {
86  V = CGF.EmitFromMemory(V, T);
87 
88  if (ResultType->isPointerTy())
89  return CGF.Builder.CreateIntToPtr(V, ResultType);
90 
91  assert(V->getType() == ResultType);
92  return V;
93 }
94 
95 /// Utility to insert an atomic instruction based on Instrinsic::ID
96 /// and the expression node.
98  llvm::AtomicRMWInst::BinOp Kind,
99  const CallExpr *E) {
100  QualType T = E->getType();
101  assert(E->getArg(0)->getType()->isPointerType());
102  assert(CGF.getContext().hasSameUnqualifiedType(T,
103  E->getArg(0)->getType()->getPointeeType()));
104  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
105 
106  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
107  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
108 
109  llvm::IntegerType *IntType =
110  llvm::IntegerType::get(CGF.getLLVMContext(),
111  CGF.getContext().getTypeSize(T));
112  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
113 
114  llvm::Value *Args[2];
115  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
116  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
117  llvm::Type *ValueType = Args[1]->getType();
118  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
119 
120  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
121  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
122  return EmitFromInt(CGF, Result, T, ValueType);
123 }
124 
126  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
127  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
128 
129  // Convert the type of the pointer to a pointer to the stored type.
130  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
131  Value *BC = CGF.Builder.CreateBitCast(
132  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
133  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
134  LV.setNontemporal(true);
135  CGF.EmitStoreOfScalar(Val, LV, false);
136  return nullptr;
137 }
138 
140  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
141 
142  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
143  LV.setNontemporal(true);
144  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
145 }
146 
148  llvm::AtomicRMWInst::BinOp Kind,
149  const CallExpr *E) {
150  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
151 }
152 
153 /// Utility to insert an atomic instruction based Instrinsic::ID and
154 /// the expression node, where the return value is the result of the
155 /// operation.
157  llvm::AtomicRMWInst::BinOp Kind,
158  const CallExpr *E,
159  Instruction::BinaryOps Op,
160  bool Invert = false) {
161  QualType T = E->getType();
162  assert(E->getArg(0)->getType()->isPointerType());
163  assert(CGF.getContext().hasSameUnqualifiedType(T,
164  E->getArg(0)->getType()->getPointeeType()));
165  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
166 
167  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
168  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
169 
170  llvm::IntegerType *IntType =
171  llvm::IntegerType::get(CGF.getLLVMContext(),
172  CGF.getContext().getTypeSize(T));
173  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
174 
175  llvm::Value *Args[2];
176  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
177  llvm::Type *ValueType = Args[1]->getType();
178  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
179  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
180 
181  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
182  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
183  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
184  if (Invert)
185  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
186  llvm::ConstantInt::get(IntType, -1));
187  Result = EmitFromInt(CGF, Result, T, ValueType);
188  return RValue::get(Result);
189 }
190 
191 /// @brief Utility to insert an atomic cmpxchg instruction.
192 ///
193 /// @param CGF The current codegen function.
194 /// @param E Builtin call expression to convert to cmpxchg.
195 /// arg0 - address to operate on
196 /// arg1 - value to compare with
197 /// arg2 - new value
198 /// @param ReturnBool Specifies whether to return success flag of
199 /// cmpxchg result or the old value.
200 ///
201 /// @returns result of cmpxchg, according to ReturnBool
203  bool ReturnBool) {
204  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
205  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
206  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
207 
208  llvm::IntegerType *IntType = llvm::IntegerType::get(
209  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
210  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
211 
212  Value *Args[3];
213  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
214  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
215  llvm::Type *ValueType = Args[1]->getType();
216  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
217  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
218 
219  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
220  Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
221  llvm::AtomicOrdering::SequentiallyConsistent);
222  if (ReturnBool)
223  // Extract boolean success flag and zext it to int.
224  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
225  CGF.ConvertType(E->getType()));
226  else
227  // Extract old value and emit it using the same type as compare value.
228  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
229  ValueType);
230 }
231 
232 // Emit a simple mangled intrinsic that has 1 argument and a return type
233 // matching the argument type.
235  const CallExpr *E,
236  unsigned IntrinsicID) {
237  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
238 
239  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240  return CGF.Builder.CreateCall(F, Src0);
241 }
242 
243 // Emit an intrinsic that has 2 operands of the same type as its result.
245  const CallExpr *E,
246  unsigned IntrinsicID) {
247  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249 
250  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
251  return CGF.Builder.CreateCall(F, { Src0, Src1 });
252 }
253 
254 // Emit an intrinsic that has 3 operands of the same type as its result.
256  const CallExpr *E,
257  unsigned IntrinsicID) {
258  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
259  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
260  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
261 
262  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
264 }
265 
266 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
268  const CallExpr *E,
269  unsigned IntrinsicID) {
270  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
271  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
272 
273  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
274  return CGF.Builder.CreateCall(F, {Src0, Src1});
275 }
276 
277 /// EmitFAbs - Emit a call to @llvm.fabs().
278 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
279  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
280  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
281  Call->setDoesNotAccessMemory();
282  return Call;
283 }
284 
285 /// Emit the computation of the sign bit for a floating point value. Returns
286 /// the i1 sign bit value.
288  LLVMContext &C = CGF.CGM.getLLVMContext();
289 
290  llvm::Type *Ty = V->getType();
291  int Width = Ty->getPrimitiveSizeInBits();
292  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
293  V = CGF.Builder.CreateBitCast(V, IntTy);
294  if (Ty->isPPC_FP128Ty()) {
295  // We want the sign bit of the higher-order double. The bitcast we just
296  // did works as if the double-double was stored to memory and then
297  // read as an i128. The "store" will put the higher-order double in the
298  // lower address in both little- and big-Endian modes, but the "load"
299  // will treat those bits as a different part of the i128: the low bits in
300  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
301  // we need to shift the high bits down to the low before truncating.
302  Width >>= 1;
303  if (CGF.getTarget().isBigEndian()) {
304  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
305  V = CGF.Builder.CreateLShr(V, ShiftCst);
306  }
307  // We are truncating value in order to extract the higher-order
308  // double, which we will be using to extract the sign from.
309  IntTy = llvm::IntegerType::get(C, Width);
310  V = CGF.Builder.CreateTrunc(V, IntTy);
311  }
312  Value *Zero = llvm::Constant::getNullValue(IntTy);
313  return CGF.Builder.CreateICmpSLT(V, Zero);
314 }
315 
317  const CallExpr *E, llvm::Constant *calleeValue) {
318  CGCallee callee = CGCallee::forDirect(calleeValue, FD);
319  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
320 }
321 
322 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
323 /// depending on IntrinsicID.
324 ///
325 /// \arg CGF The current codegen function.
326 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
327 /// \arg X The first argument to the llvm.*.with.overflow.*.
328 /// \arg Y The second argument to the llvm.*.with.overflow.*.
329 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
330 /// \returns The result (i.e. sum/product) returned by the intrinsic.
332  const llvm::Intrinsic::ID IntrinsicID,
334  llvm::Value *&Carry) {
335  // Make sure we have integers of the same width.
336  assert(X->getType() == Y->getType() &&
337  "Arguments must be the same type. (Did you forget to make sure both "
338  "arguments have the same integer width?)");
339 
340  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
341  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
342  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
343  return CGF.Builder.CreateExtractValue(Tmp, 0);
344 }
345 
347  unsigned IntrinsicID,
348  int low, int high) {
349  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
350  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
351  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
352  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
353  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
354  return Call;
355 }
356 
357 namespace {
358  struct WidthAndSignedness {
359  unsigned Width;
360  bool Signed;
361  };
362 }
363 
364 static WidthAndSignedness
366  const clang::QualType Type) {
367  assert(Type->isIntegerType() && "Given type is not an integer.");
368  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
369  bool Signed = Type->isSignedIntegerType();
370  return {Width, Signed};
371 }
372 
373 // Given one or more integer types, this function produces an integer type that
374 // encompasses them: any value in one of the given types could be expressed in
375 // the encompassing type.
376 static struct WidthAndSignedness
377 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
378  assert(Types.size() > 0 && "Empty list of types.");
379 
380  // If any of the given types is signed, we must return a signed type.
381  bool Signed = false;
382  for (const auto &Type : Types) {
383  Signed |= Type.Signed;
384  }
385 
386  // The encompassing type must have a width greater than or equal to the width
387  // of the specified types. Aditionally, if the encompassing type is signed,
388  // its width must be strictly greater than the width of any unsigned types
389  // given.
390  unsigned Width = 0;
391  for (const auto &Type : Types) {
392  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
393  if (Width < MinWidth) {
394  Width = MinWidth;
395  }
396  }
397 
398  return {Width, Signed};
399 }
400 
401 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
402  llvm::Type *DestType = Int8PtrTy;
403  if (ArgValue->getType() != DestType)
404  ArgValue =
405  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
406 
407  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
408  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
409 }
410 
411 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
412 /// __builtin_object_size(p, @p To) is correct
413 static bool areBOSTypesCompatible(int From, int To) {
414  // Note: Our __builtin_object_size implementation currently treats Type=0 and
415  // Type=2 identically. Encoding this implementation detail here may make
416  // improving __builtin_object_size difficult in the future, so it's omitted.
417  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
418 }
419 
420 static llvm::Value *
421 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
422  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
423 }
424 
425 llvm::Value *
426 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
427  llvm::IntegerType *ResType,
428  llvm::Value *EmittedE) {
429  uint64_t ObjectSize;
430  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
431  return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
432  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
433 }
434 
435 /// Returns a Value corresponding to the size of the given expression.
436 /// This Value may be either of the following:
437 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
438 /// it)
439 /// - A call to the @llvm.objectsize intrinsic
440 ///
441 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
442 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
443 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
444 llvm::Value *
445 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
446  llvm::IntegerType *ResType,
447  llvm::Value *EmittedE) {
448  // We need to reference an argument if the pointer is a parameter with the
449  // pass_object_size attribute.
450  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
451  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
452  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
453  if (Param != nullptr && PS != nullptr &&
454  areBOSTypesCompatible(PS->getType(), Type)) {
455  auto Iter = SizeArguments.find(Param);
456  assert(Iter != SizeArguments.end());
457 
458  const ImplicitParamDecl *D = Iter->second;
459  auto DIter = LocalDeclMap.find(D);
460  assert(DIter != LocalDeclMap.end());
461 
462  return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
463  getContext().getSizeType(), E->getLocStart());
464  }
465  }
466 
467  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
468  // evaluate E for side-effects. In either case, we shouldn't lower to
469  // @llvm.objectsize.
470  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
471  return getDefaultBuiltinObjectSizeResult(Type, ResType);
472 
473  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
474  assert(Ptr->getType()->isPointerTy() &&
475  "Non-pointer passed to __builtin_object_size?");
476 
477  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
478 
479  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
480  Value *Min = Builder.getInt1((Type & 2) != 0);
481  // For GCC compatability, __builtin_object_size treat NULL as unknown size.
482  Value *NullIsUnknown = Builder.getTrue();
483  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
484 }
485 
486 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
487 // handle them here.
489  _BitScanForward,
490  _BitScanReverse,
491  _InterlockedAnd,
492  _InterlockedDecrement,
493  _InterlockedExchange,
494  _InterlockedExchangeAdd,
495  _InterlockedExchangeSub,
496  _InterlockedIncrement,
497  _InterlockedOr,
498  _InterlockedXor,
499  _interlockedbittestandset,
500  __fastfail,
501 };
502 
504  const CallExpr *E) {
505  switch (BuiltinID) {
506  case MSVCIntrin::_BitScanForward:
507  case MSVCIntrin::_BitScanReverse: {
508  Value *ArgValue = EmitScalarExpr(E->getArg(1));
509 
510  llvm::Type *ArgType = ArgValue->getType();
511  llvm::Type *IndexType =
512  EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
513  llvm::Type *ResultType = ConvertType(E->getType());
514 
515  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
516  Value *ResZero = llvm::Constant::getNullValue(ResultType);
517  Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
518 
519  BasicBlock *Begin = Builder.GetInsertBlock();
520  BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
521  Builder.SetInsertPoint(End);
522  PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
523 
524  Builder.SetInsertPoint(Begin);
525  Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
526  BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
527  Builder.CreateCondBr(IsZero, End, NotZero);
528  Result->addIncoming(ResZero, Begin);
529 
530  Builder.SetInsertPoint(NotZero);
531  Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
532 
533  if (BuiltinID == MSVCIntrin::_BitScanForward) {
534  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
535  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
536  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
537  Builder.CreateStore(ZeroCount, IndexAddress, false);
538  } else {
539  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
540  Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
541 
542  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
543  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
544  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
545  Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
546  Builder.CreateStore(Index, IndexAddress, false);
547  }
548  Builder.CreateBr(End);
549  Result->addIncoming(ResOne, NotZero);
550 
551  Builder.SetInsertPoint(End);
552  return Result;
553  }
554  case MSVCIntrin::_InterlockedAnd:
555  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
556  case MSVCIntrin::_InterlockedExchange:
557  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
558  case MSVCIntrin::_InterlockedExchangeAdd:
559  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
560  case MSVCIntrin::_InterlockedExchangeSub:
561  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
562  case MSVCIntrin::_InterlockedOr:
563  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
564  case MSVCIntrin::_InterlockedXor:
565  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
566 
567  case MSVCIntrin::_interlockedbittestandset: {
568  llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
569  llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
570  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
571  AtomicRMWInst::Or, Addr,
572  Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
573  llvm::AtomicOrdering::SequentiallyConsistent);
574  // Shift the relevant bit to the least significant position, truncate to
575  // the result type, and test the low bit.
576  llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
577  llvm::Value *Truncated =
578  Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
579  return Builder.CreateAnd(Truncated,
580  ConstantInt::get(Truncated->getType(), 1));
581  }
582 
583  case MSVCIntrin::_InterlockedDecrement: {
584  llvm::Type *IntTy = ConvertType(E->getType());
585  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
586  AtomicRMWInst::Sub,
587  EmitScalarExpr(E->getArg(0)),
588  ConstantInt::get(IntTy, 1),
589  llvm::AtomicOrdering::SequentiallyConsistent);
590  return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
591  }
592  case MSVCIntrin::_InterlockedIncrement: {
593  llvm::Type *IntTy = ConvertType(E->getType());
594  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
595  AtomicRMWInst::Add,
596  EmitScalarExpr(E->getArg(0)),
597  ConstantInt::get(IntTy, 1),
598  llvm::AtomicOrdering::SequentiallyConsistent);
599  return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
600  }
601 
602  case MSVCIntrin::__fastfail: {
603  // Request immediate process termination from the kernel. The instruction
604  // sequences to do this are documented on MSDN:
605  // https://msdn.microsoft.com/en-us/library/dn774154.aspx
606  llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
607  StringRef Asm, Constraints;
608  switch (ISA) {
609  default:
610  ErrorUnsupported(E, "__fastfail call for this architecture");
611  break;
612  case llvm::Triple::x86:
613  case llvm::Triple::x86_64:
614  Asm = "int $$0x29";
615  Constraints = "{cx}";
616  break;
617  case llvm::Triple::thumb:
618  Asm = "udf #251";
619  Constraints = "{r0}";
620  break;
621  }
622  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
623  llvm::InlineAsm *IA =
624  llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
625  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
626  getLLVMContext(), llvm::AttributeList::FunctionIndex,
627  llvm::Attribute::NoReturn);
628  CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
629  CS.setAttributes(NoReturnAttr);
630  return CS.getInstruction();
631  }
632  }
633  llvm_unreachable("Incorrect MSVC intrinsic!");
634 }
635 
636 namespace {
637 // ARC cleanup for __builtin_os_log_format
638 struct CallObjCArcUse final : EHScopeStack::Cleanup {
639  CallObjCArcUse(llvm::Value *object) : object(object) {}
640  llvm::Value *object;
641 
642  void Emit(CodeGenFunction &CGF, Flags flags) override {
643  CGF.EmitARCIntrinsicUse(object);
644  }
645 };
646 }
647 
650  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
651  && "Unsupported builtin check kind");
652 
653  Value *ArgValue = EmitScalarExpr(E);
654  if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
655  return ArgValue;
656 
657  SanitizerScope SanScope(this);
658  Value *Cond = Builder.CreateICmpNE(
659  ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
660  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
661  SanitizerHandler::InvalidBuiltin,
662  {EmitCheckSourceLocation(E->getExprLoc()),
663  llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
664  None);
665  return ArgValue;
666 }
667 
668 /// Get the argument type for arguments to os_log_helper.
669 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
670  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
671  return C.getCanonicalType(UnsignedTy);
672 }
673 
675  const analyze_os_log::OSLogBufferLayout &Layout,
676  CharUnits BufferAlignment) {
677  ASTContext &Ctx = getContext();
678 
680  {
681  raw_svector_ostream OS(Name);
682  OS << "__os_log_helper";
683  OS << "_" << BufferAlignment.getQuantity();
684  OS << "_" << int(Layout.getSummaryByte());
685  OS << "_" << int(Layout.getNumArgsByte());
686  for (const auto &Item : Layout.Items)
687  OS << "_" << int(Item.getSizeByte()) << "_"
688  << int(Item.getDescriptorByte());
689  }
690 
691  if (llvm::Function *F = CGM.getModule().getFunction(Name))
692  return F;
693 
695  Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
697 
698  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
699  char Size = Layout.Items[I].getSizeByte();
700  if (!Size)
701  continue;
702 
703  Params.emplace_back(
704  Ctx, nullptr, SourceLocation(),
705  &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
707  }
708 
709  FunctionArgList Args;
710  for (auto &P : Params)
711  Args.push_back(&P);
712 
713  // The helper function has linkonce_odr linkage to enable the linker to merge
714  // identical functions. To ensure the merging always happens, 'noinline' is
715  // attached to the function when compiling with -Oz.
716  const CGFunctionInfo &FI =
717  CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
718  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
719  llvm::Function *Fn = llvm::Function::Create(
720  FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
721  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
722  CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
723  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
724 
725  // Attach 'noinline' at -Oz.
726  if (CGM.getCodeGenOpts().OptimizeSize == 2)
727  Fn->addFnAttr(llvm::Attribute::NoInline);
728 
729  auto NL = ApplyDebugLocation::CreateEmpty(*this);
730  IdentifierInfo *II = &Ctx.Idents.get(Name);
733  Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
734 
735  StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
736 
737  // Create a scope with an artificial location for the body of this function.
738  auto AL = ApplyDebugLocation::CreateArtificial(*this);
739 
741  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
742  BufferAlignment);
743  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
744  Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
745  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
746  Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
747 
748  unsigned I = 1;
749  for (const auto &Item : Layout.Items) {
750  Builder.CreateStore(
751  Builder.getInt8(Item.getDescriptorByte()),
752  Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
753  Builder.CreateStore(
754  Builder.getInt8(Item.getSizeByte()),
755  Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
756 
757  CharUnits Size = Item.size();
758  if (!Size.getQuantity())
759  continue;
760 
761  Address Arg = GetAddrOfLocalVar(&Params[I]);
762  Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
763  Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
764  "argDataCast");
765  Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
766  Offset += Size;
767  ++I;
768  }
769 
770  FinishFunction();
771 
772  return Fn;
773 }
774 
776  assert(E.getNumArgs() >= 2 &&
777  "__builtin_os_log_format takes at least 2 arguments");
778  ASTContext &Ctx = getContext();
781  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
782  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
783 
784  // Ignore argument 1, the format string. It is not currently used.
785  CallArgList Args;
786  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
787 
788  for (const auto &Item : Layout.Items) {
789  int Size = Item.getSizeByte();
790  if (!Size)
791  continue;
792 
793  llvm::Value *ArgVal;
794 
795  if (const Expr *TheExpr = Item.getExpr()) {
796  ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
797 
798  // Check if this is a retainable type.
799  if (TheExpr->getType()->isObjCRetainableType()) {
800  assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
801  "Only scalar can be a ObjC retainable type");
802  // Check if the object is constant, if not, save it in
803  // RetainableOperands.
804  if (!isa<Constant>(ArgVal))
805  RetainableOperands.push_back(ArgVal);
806  }
807  } else {
808  ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
809  }
810 
811  unsigned ArgValSize =
812  CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
813  llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
814  ArgValSize);
815  ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
816  CanQualType ArgTy = getOSLogArgType(Ctx, Size);
817  // If ArgVal has type x86_fp80, zero-extend ArgVal.
818  ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
819  Args.add(RValue::get(ArgVal), ArgTy);
820  }
821 
822  const CGFunctionInfo &FI =
823  CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
824  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
825  Layout, BufAddr.getAlignment());
826  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
827 
828  // Push a clang.arc.use cleanup for each object in RetainableOperands. The
829  // cleanup will cause the use to appear after the final log call, keeping
830  // the object valid while it’s held in the log buffer. Note that if there’s
831  // a release cleanup on the object, it will already be active; since
832  // cleanups are emitted in reverse order, the use will occur before the
833  // object is released.
834  if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
835  CGM.getCodeGenOpts().OptimizationLevel != 0)
836  for (llvm::Value *Object : RetainableOperands)
837  pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
838 
839  return RValue::get(BufAddr.getPointer());
840 }
841 
843  unsigned BuiltinID, const CallExpr *E,
844  ReturnValueSlot ReturnValue) {
845  // See if we can constant fold this builtin. If so, don't emit it at all.
846  Expr::EvalResult Result;
847  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
848  !Result.hasSideEffects()) {
849  if (Result.Val.isInt())
850  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
851  Result.Val.getInt()));
852  if (Result.Val.isFloat())
853  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
854  Result.Val.getFloat()));
855  }
856 
857  // There are LLVM math intrinsics/instructions corresponding to math library
858  // functions except the LLVM op will never set errno while the math library
859  // might. Also, math builtins have the same semantics as their math library
860  // twins. Thus, we can transform math library and builtin calls to their
861  // LLVM counterparts if the call is marked 'const' (known to never set errno).
862  if (FD->hasAttr<ConstAttr>()) {
863  switch (BuiltinID) {
864  case Builtin::BIceil:
865  case Builtin::BIceilf:
866  case Builtin::BIceill:
867  case Builtin::BI__builtin_ceil:
868  case Builtin::BI__builtin_ceilf:
869  case Builtin::BI__builtin_ceill:
870  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
871 
872  case Builtin::BIcopysign:
873  case Builtin::BIcopysignf:
874  case Builtin::BIcopysignl:
875  case Builtin::BI__builtin_copysign:
876  case Builtin::BI__builtin_copysignf:
877  case Builtin::BI__builtin_copysignl:
879 
880  case Builtin::BIcos:
881  case Builtin::BIcosf:
882  case Builtin::BIcosl:
883  case Builtin::BI__builtin_cos:
884  case Builtin::BI__builtin_cosf:
885  case Builtin::BI__builtin_cosl:
886  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
887 
888  case Builtin::BIexp:
889  case Builtin::BIexpf:
890  case Builtin::BIexpl:
891  case Builtin::BI__builtin_exp:
892  case Builtin::BI__builtin_expf:
893  case Builtin::BI__builtin_expl:
894  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
895 
896  case Builtin::BIexp2:
897  case Builtin::BIexp2f:
898  case Builtin::BIexp2l:
899  case Builtin::BI__builtin_exp2:
900  case Builtin::BI__builtin_exp2f:
901  case Builtin::BI__builtin_exp2l:
902  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
903 
904  case Builtin::BIfabs:
905  case Builtin::BIfabsf:
906  case Builtin::BIfabsl:
907  case Builtin::BI__builtin_fabs:
908  case Builtin::BI__builtin_fabsf:
909  case Builtin::BI__builtin_fabsl:
910  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
911 
912  case Builtin::BIfloor:
913  case Builtin::BIfloorf:
914  case Builtin::BIfloorl:
915  case Builtin::BI__builtin_floor:
916  case Builtin::BI__builtin_floorf:
917  case Builtin::BI__builtin_floorl:
918  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
919 
920  case Builtin::BIfma:
921  case Builtin::BIfmaf:
922  case Builtin::BIfmal:
923  case Builtin::BI__builtin_fma:
924  case Builtin::BI__builtin_fmaf:
925  case Builtin::BI__builtin_fmal:
926  return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
927 
928  case Builtin::BIfmax:
929  case Builtin::BIfmaxf:
930  case Builtin::BIfmaxl:
931  case Builtin::BI__builtin_fmax:
932  case Builtin::BI__builtin_fmaxf:
933  case Builtin::BI__builtin_fmaxl:
934  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
935 
936  case Builtin::BIfmin:
937  case Builtin::BIfminf:
938  case Builtin::BIfminl:
939  case Builtin::BI__builtin_fmin:
940  case Builtin::BI__builtin_fminf:
941  case Builtin::BI__builtin_fminl:
942  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
943 
944  // fmod() is a special-case. It maps to the frem instruction rather than an
945  // LLVM intrinsic.
946  case Builtin::BIfmod:
947  case Builtin::BIfmodf:
948  case Builtin::BIfmodl:
949  case Builtin::BI__builtin_fmod:
950  case Builtin::BI__builtin_fmodf:
951  case Builtin::BI__builtin_fmodl: {
952  Value *Arg1 = EmitScalarExpr(E->getArg(0));
953  Value *Arg2 = EmitScalarExpr(E->getArg(1));
954  return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
955  }
956 
957  case Builtin::BIlog:
958  case Builtin::BIlogf:
959  case Builtin::BIlogl:
960  case Builtin::BI__builtin_log:
961  case Builtin::BI__builtin_logf:
962  case Builtin::BI__builtin_logl:
963  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
964 
965  case Builtin::BIlog10:
966  case Builtin::BIlog10f:
967  case Builtin::BIlog10l:
968  case Builtin::BI__builtin_log10:
969  case Builtin::BI__builtin_log10f:
970  case Builtin::BI__builtin_log10l:
971  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
972 
973  case Builtin::BIlog2:
974  case Builtin::BIlog2f:
975  case Builtin::BIlog2l:
976  case Builtin::BI__builtin_log2:
977  case Builtin::BI__builtin_log2f:
978  case Builtin::BI__builtin_log2l:
979  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
980 
981  case Builtin::BInearbyint:
982  case Builtin::BInearbyintf:
983  case Builtin::BInearbyintl:
984  case Builtin::BI__builtin_nearbyint:
985  case Builtin::BI__builtin_nearbyintf:
986  case Builtin::BI__builtin_nearbyintl:
988 
989  case Builtin::BIpow:
990  case Builtin::BIpowf:
991  case Builtin::BIpowl:
992  case Builtin::BI__builtin_pow:
993  case Builtin::BI__builtin_powf:
994  case Builtin::BI__builtin_powl:
995  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
996 
997  case Builtin::BIrint:
998  case Builtin::BIrintf:
999  case Builtin::BIrintl:
1000  case Builtin::BI__builtin_rint:
1001  case Builtin::BI__builtin_rintf:
1002  case Builtin::BI__builtin_rintl:
1003  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
1004 
1005  case Builtin::BIround:
1006  case Builtin::BIroundf:
1007  case Builtin::BIroundl:
1008  case Builtin::BI__builtin_round:
1009  case Builtin::BI__builtin_roundf:
1010  case Builtin::BI__builtin_roundl:
1011  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
1012 
1013  case Builtin::BIsin:
1014  case Builtin::BIsinf:
1015  case Builtin::BIsinl:
1016  case Builtin::BI__builtin_sin:
1017  case Builtin::BI__builtin_sinf:
1018  case Builtin::BI__builtin_sinl:
1019  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
1020 
1021  case Builtin::BIsqrt:
1022  case Builtin::BIsqrtf:
1023  case Builtin::BIsqrtl:
1024  case Builtin::BI__builtin_sqrt:
1025  case Builtin::BI__builtin_sqrtf:
1026  case Builtin::BI__builtin_sqrtl:
1027  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
1028 
1029  case Builtin::BItrunc:
1030  case Builtin::BItruncf:
1031  case Builtin::BItruncl:
1032  case Builtin::BI__builtin_trunc:
1033  case Builtin::BI__builtin_truncf:
1034  case Builtin::BI__builtin_truncl:
1035  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
1036 
1037  default:
1038  break;
1039  }
1040  }
1041 
1042  switch (BuiltinID) {
1043  default: break;
1044  case Builtin::BI__builtin___CFStringMakeConstantString:
1045  case Builtin::BI__builtin___NSStringMakeConstantString:
1046  return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
1047  case Builtin::BI__builtin_stdarg_start:
1048  case Builtin::BI__builtin_va_start:
1049  case Builtin::BI__va_start:
1050  case Builtin::BI__builtin_va_end:
1051  return RValue::get(
1052  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
1053  ? EmitScalarExpr(E->getArg(0))
1054  : EmitVAListRef(E->getArg(0)).getPointer(),
1055  BuiltinID != Builtin::BI__builtin_va_end));
1056  case Builtin::BI__builtin_va_copy: {
1057  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
1058  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
1059 
1060  llvm::Type *Type = Int8PtrTy;
1061 
1062  DstPtr = Builder.CreateBitCast(DstPtr, Type);
1063  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
1064  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
1065  {DstPtr, SrcPtr}));
1066  }
1067  case Builtin::BI__builtin_abs:
1068  case Builtin::BI__builtin_labs:
1069  case Builtin::BI__builtin_llabs: {
1070  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1071 
1072  Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
1073  Value *CmpResult =
1074  Builder.CreateICmpSGE(ArgValue,
1075  llvm::Constant::getNullValue(ArgValue->getType()),
1076  "abscond");
1077  Value *Result =
1078  Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
1079 
1080  return RValue::get(Result);
1081  }
1082  case Builtin::BI__builtin_conj:
1083  case Builtin::BI__builtin_conjf:
1084  case Builtin::BI__builtin_conjl: {
1085  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1086  Value *Real = ComplexVal.first;
1087  Value *Imag = ComplexVal.second;
1088  Value *Zero =
1089  Imag->getType()->isFPOrFPVectorTy()
1090  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
1091  : llvm::Constant::getNullValue(Imag->getType());
1092 
1093  Imag = Builder.CreateFSub(Zero, Imag, "sub");
1094  return RValue::getComplex(std::make_pair(Real, Imag));
1095  }
1096  case Builtin::BI__builtin_creal:
1097  case Builtin::BI__builtin_crealf:
1098  case Builtin::BI__builtin_creall:
1099  case Builtin::BIcreal:
1100  case Builtin::BIcrealf:
1101  case Builtin::BIcreall: {
1102  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1103  return RValue::get(ComplexVal.first);
1104  }
1105 
1106  case Builtin::BI__builtin_cimag:
1107  case Builtin::BI__builtin_cimagf:
1108  case Builtin::BI__builtin_cimagl:
1109  case Builtin::BIcimag:
1110  case Builtin::BIcimagf:
1111  case Builtin::BIcimagl: {
1112  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1113  return RValue::get(ComplexVal.second);
1114  }
1115 
1116  case Builtin::BI__builtin_ctzs:
1117  case Builtin::BI__builtin_ctz:
1118  case Builtin::BI__builtin_ctzl:
1119  case Builtin::BI__builtin_ctzll: {
1120  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
1121 
1122  llvm::Type *ArgType = ArgValue->getType();
1123  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1124 
1125  llvm::Type *ResultType = ConvertType(E->getType());
1126  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1127  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1128  if (Result->getType() != ResultType)
1129  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1130  "cast");
1131  return RValue::get(Result);
1132  }
1133  case Builtin::BI__builtin_clzs:
1134  case Builtin::BI__builtin_clz:
1135  case Builtin::BI__builtin_clzl:
1136  case Builtin::BI__builtin_clzll: {
1137  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1138 
1139  llvm::Type *ArgType = ArgValue->getType();
1140  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1141 
1142  llvm::Type *ResultType = ConvertType(E->getType());
1143  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1144  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1145  if (Result->getType() != ResultType)
1146  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1147  "cast");
1148  return RValue::get(Result);
1149  }
1150  case Builtin::BI__builtin_ffs:
1151  case Builtin::BI__builtin_ffsl:
1152  case Builtin::BI__builtin_ffsll: {
1153  // ffs(x) -> x ? cttz(x) + 1 : 0
1154  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1155 
1156  llvm::Type *ArgType = ArgValue->getType();
1157  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1158 
1159  llvm::Type *ResultType = ConvertType(E->getType());
1160  Value *Tmp =
1161  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1162  llvm::ConstantInt::get(ArgType, 1));
1163  Value *Zero = llvm::Constant::getNullValue(ArgType);
1164  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1165  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1166  if (Result->getType() != ResultType)
1167  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1168  "cast");
1169  return RValue::get(Result);
1170  }
1171  case Builtin::BI__builtin_parity:
1172  case Builtin::BI__builtin_parityl:
1173  case Builtin::BI__builtin_parityll: {
1174  // parity(x) -> ctpop(x) & 1
1175  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1176 
1177  llvm::Type *ArgType = ArgValue->getType();
1178  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1179 
1180  llvm::Type *ResultType = ConvertType(E->getType());
1181  Value *Tmp = Builder.CreateCall(F, ArgValue);
1182  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1183  if (Result->getType() != ResultType)
1184  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1185  "cast");
1186  return RValue::get(Result);
1187  }
1188  case Builtin::BI__popcnt16:
1189  case Builtin::BI__popcnt:
1190  case Builtin::BI__popcnt64:
1191  case Builtin::BI__builtin_popcount:
1192  case Builtin::BI__builtin_popcountl:
1193  case Builtin::BI__builtin_popcountll: {
1194  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1195 
1196  llvm::Type *ArgType = ArgValue->getType();
1197  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1198 
1199  llvm::Type *ResultType = ConvertType(E->getType());
1200  Value *Result = Builder.CreateCall(F, ArgValue);
1201  if (Result->getType() != ResultType)
1202  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1203  "cast");
1204  return RValue::get(Result);
1205  }
1206  case Builtin::BI_rotr8:
1207  case Builtin::BI_rotr16:
1208  case Builtin::BI_rotr:
1209  case Builtin::BI_lrotr:
1210  case Builtin::BI_rotr64: {
1211  Value *Val = EmitScalarExpr(E->getArg(0));
1212  Value *Shift = EmitScalarExpr(E->getArg(1));
1213 
1214  llvm::Type *ArgType = Val->getType();
1215  Shift = Builder.CreateIntCast(Shift, ArgType, false);
1216  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1217  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1218  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1219 
1220  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1221  Shift = Builder.CreateAnd(Shift, Mask);
1222  Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
1223 
1224  Value *RightShifted = Builder.CreateLShr(Val, Shift);
1225  Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
1226  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1227 
1228  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1229  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1230  return RValue::get(Result);
1231  }
1232  case Builtin::BI_rotl8:
1233  case Builtin::BI_rotl16:
1234  case Builtin::BI_rotl:
1235  case Builtin::BI_lrotl:
1236  case Builtin::BI_rotl64: {
1237  Value *Val = EmitScalarExpr(E->getArg(0));
1238  Value *Shift = EmitScalarExpr(E->getArg(1));
1239 
1240  llvm::Type *ArgType = Val->getType();
1241  Shift = Builder.CreateIntCast(Shift, ArgType, false);
1242  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1243  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1244  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1245 
1246  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1247  Shift = Builder.CreateAnd(Shift, Mask);
1248  Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
1249 
1250  Value *LeftShifted = Builder.CreateShl(Val, Shift);
1251  Value *RightShifted = Builder.CreateLShr(Val, RightShift);
1252  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1253 
1254  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1255  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1256  return RValue::get(Result);
1257  }
1258  case Builtin::BI__builtin_unpredictable: {
1259  // Always return the argument of __builtin_unpredictable. LLVM does not
1260  // handle this builtin. Metadata for this builtin should be added directly
1261  // to instructions such as branches or switches that use it.
1262  return RValue::get(EmitScalarExpr(E->getArg(0)));
1263  }
1264  case Builtin::BI__builtin_expect: {
1265  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1266  llvm::Type *ArgType = ArgValue->getType();
1267 
1268  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1269  // Don't generate llvm.expect on -O0 as the backend won't use it for
1270  // anything.
1271  // Note, we still IRGen ExpectedValue because it could have side-effects.
1272  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1273  return RValue::get(ArgValue);
1274 
1275  Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1276  Value *Result =
1277  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1278  return RValue::get(Result);
1279  }
1280  case Builtin::BI__builtin_assume_aligned: {
1281  Value *PtrValue = EmitScalarExpr(E->getArg(0));
1282  Value *OffsetValue =
1283  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1284 
1285  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1286  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1287  unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
1288 
1289  EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
1290  return RValue::get(PtrValue);
1291  }
1292  case Builtin::BI__assume:
1293  case Builtin::BI__builtin_assume: {
1294  if (E->getArg(0)->HasSideEffects(getContext()))
1295  return RValue::get(nullptr);
1296 
1297  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1298  Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1299  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1300  }
1301  case Builtin::BI__builtin_bswap16:
1302  case Builtin::BI__builtin_bswap32:
1303  case Builtin::BI__builtin_bswap64: {
1304  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1305  }
1306  case Builtin::BI__builtin_bitreverse8:
1307  case Builtin::BI__builtin_bitreverse16:
1308  case Builtin::BI__builtin_bitreverse32:
1309  case Builtin::BI__builtin_bitreverse64: {
1310  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1311  }
1312  case Builtin::BI__builtin_object_size: {
1313  unsigned Type =
1314  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1315  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1316 
1317  // We pass this builtin onto the optimizer so that it can figure out the
1318  // object size in more complex cases.
1319  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1320  /*EmittedE=*/nullptr));
1321  }
1322  case Builtin::BI__builtin_prefetch: {
1323  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1324  // FIXME: Technically these constants should of type 'int', yes?
1325  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1326  llvm::ConstantInt::get(Int32Ty, 0);
1327  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1328  llvm::ConstantInt::get(Int32Ty, 3);
1329  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1330  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1331  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1332  }
1333  case Builtin::BI__builtin_readcyclecounter: {
1334  Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1335  return RValue::get(Builder.CreateCall(F));
1336  }
1337  case Builtin::BI__builtin___clear_cache: {
1338  Value *Begin = EmitScalarExpr(E->getArg(0));
1339  Value *End = EmitScalarExpr(E->getArg(1));
1340  Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1341  return RValue::get(Builder.CreateCall(F, {Begin, End}));
1342  }
1343  case Builtin::BI__builtin_trap:
1344  return RValue::get(EmitTrapCall(Intrinsic::trap));
1345  case Builtin::BI__debugbreak:
1346  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1347  case Builtin::BI__builtin_unreachable: {
1348  if (SanOpts.has(SanitizerKind::Unreachable)) {
1349  SanitizerScope SanScope(this);
1350  EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1351  SanitizerKind::Unreachable),
1352  SanitizerHandler::BuiltinUnreachable,
1353  EmitCheckSourceLocation(E->getExprLoc()), None);
1354  } else
1355  Builder.CreateUnreachable();
1356 
1357  // We do need to preserve an insertion point.
1358  EmitBlock(createBasicBlock("unreachable.cont"));
1359 
1360  return RValue::get(nullptr);
1361  }
1362 
1363  case Builtin::BI__builtin_powi:
1364  case Builtin::BI__builtin_powif:
1365  case Builtin::BI__builtin_powil: {
1366  Value *Base = EmitScalarExpr(E->getArg(0));
1367  Value *Exponent = EmitScalarExpr(E->getArg(1));
1368  llvm::Type *ArgType = Base->getType();
1369  Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1370  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1371  }
1372 
1373  case Builtin::BI__builtin_isgreater:
1374  case Builtin::BI__builtin_isgreaterequal:
1375  case Builtin::BI__builtin_isless:
1376  case Builtin::BI__builtin_islessequal:
1377  case Builtin::BI__builtin_islessgreater:
1378  case Builtin::BI__builtin_isunordered: {
1379  // Ordered comparisons: we know the arguments to these are matching scalar
1380  // floating point values.
1381  Value *LHS = EmitScalarExpr(E->getArg(0));
1382  Value *RHS = EmitScalarExpr(E->getArg(1));
1383 
1384  switch (BuiltinID) {
1385  default: llvm_unreachable("Unknown ordered comparison");
1386  case Builtin::BI__builtin_isgreater:
1387  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1388  break;
1389  case Builtin::BI__builtin_isgreaterequal:
1390  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1391  break;
1392  case Builtin::BI__builtin_isless:
1393  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1394  break;
1395  case Builtin::BI__builtin_islessequal:
1396  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1397  break;
1398  case Builtin::BI__builtin_islessgreater:
1399  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1400  break;
1401  case Builtin::BI__builtin_isunordered:
1402  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1403  break;
1404  }
1405  // ZExt bool to int type.
1406  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1407  }
1408  case Builtin::BI__builtin_isnan: {
1409  Value *V = EmitScalarExpr(E->getArg(0));
1410  V = Builder.CreateFCmpUNO(V, V, "cmp");
1411  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1412  }
1413 
1414  case Builtin::BIfinite:
1415  case Builtin::BI__finite:
1416  case Builtin::BIfinitef:
1417  case Builtin::BI__finitef:
1418  case Builtin::BIfinitel:
1419  case Builtin::BI__finitel:
1420  case Builtin::BI__builtin_isinf:
1421  case Builtin::BI__builtin_isfinite: {
1422  // isinf(x) --> fabs(x) == infinity
1423  // isfinite(x) --> fabs(x) != infinity
1424  // x != NaN via the ordered compare in either case.
1425  Value *V = EmitScalarExpr(E->getArg(0));
1426  Value *Fabs = EmitFAbs(*this, V);
1427  Constant *Infinity = ConstantFP::getInfinity(V->getType());
1428  CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1429  ? CmpInst::FCMP_OEQ
1430  : CmpInst::FCMP_ONE;
1431  Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1432  return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1433  }
1434 
1435  case Builtin::BI__builtin_isinf_sign: {
1436  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1437  Value *Arg = EmitScalarExpr(E->getArg(0));
1438  Value *AbsArg = EmitFAbs(*this, Arg);
1439  Value *IsInf = Builder.CreateFCmpOEQ(
1440  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1441  Value *IsNeg = EmitSignBit(*this, Arg);
1442 
1443  llvm::Type *IntTy = ConvertType(E->getType());
1444  Value *Zero = Constant::getNullValue(IntTy);
1445  Value *One = ConstantInt::get(IntTy, 1);
1446  Value *NegativeOne = ConstantInt::get(IntTy, -1);
1447  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1448  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1449  return RValue::get(Result);
1450  }
1451 
1452  case Builtin::BI__builtin_isnormal: {
1453  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1454  Value *V = EmitScalarExpr(E->getArg(0));
1455  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1456 
1457  Value *Abs = EmitFAbs(*this, V);
1458  Value *IsLessThanInf =
1459  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1460  APFloat Smallest = APFloat::getSmallestNormalized(
1461  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1462  Value *IsNormal =
1463  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1464  "isnormal");
1465  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1466  V = Builder.CreateAnd(V, IsNormal, "and");
1467  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1468  }
1469 
1470  case Builtin::BI__builtin_fpclassify: {
1471  Value *V = EmitScalarExpr(E->getArg(5));
1472  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1473 
1474  // Create Result
1475  BasicBlock *Begin = Builder.GetInsertBlock();
1476  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1477  Builder.SetInsertPoint(End);
1478  PHINode *Result =
1479  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1480  "fpclassify_result");
1481 
1482  // if (V==0) return FP_ZERO
1483  Builder.SetInsertPoint(Begin);
1484  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1485  "iszero");
1486  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1487  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1488  Builder.CreateCondBr(IsZero, End, NotZero);
1489  Result->addIncoming(ZeroLiteral, Begin);
1490 
1491  // if (V != V) return FP_NAN
1492  Builder.SetInsertPoint(NotZero);
1493  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1494  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1495  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1496  Builder.CreateCondBr(IsNan, End, NotNan);
1497  Result->addIncoming(NanLiteral, NotZero);
1498 
1499  // if (fabs(V) == infinity) return FP_INFINITY
1500  Builder.SetInsertPoint(NotNan);
1501  Value *VAbs = EmitFAbs(*this, V);
1502  Value *IsInf =
1503  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1504  "isinf");
1505  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1506  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1507  Builder.CreateCondBr(IsInf, End, NotInf);
1508  Result->addIncoming(InfLiteral, NotNan);
1509 
1510  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1511  Builder.SetInsertPoint(NotInf);
1512  APFloat Smallest = APFloat::getSmallestNormalized(
1513  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1514  Value *IsNormal =
1515  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1516  "isnormal");
1517  Value *NormalResult =
1518  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1519  EmitScalarExpr(E->getArg(3)));
1520  Builder.CreateBr(End);
1521  Result->addIncoming(NormalResult, NotInf);
1522 
1523  // return Result
1524  Builder.SetInsertPoint(End);
1525  return RValue::get(Result);
1526  }
1527 
1528  case Builtin::BIalloca:
1529  case Builtin::BI_alloca:
1530  case Builtin::BI__builtin_alloca: {
1531  Value *Size = EmitScalarExpr(E->getArg(0));
1532  const TargetInfo &TI = getContext().getTargetInfo();
1533  // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1534  unsigned SuitableAlignmentInBytes =
1535  CGM.getContext()
1536  .toCharUnitsFromBits(TI.getSuitableAlign())
1537  .getQuantity();
1538  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1539  AI->setAlignment(SuitableAlignmentInBytes);
1540  return RValue::get(AI);
1541  }
1542 
1543  case Builtin::BI__builtin_alloca_with_align: {
1544  Value *Size = EmitScalarExpr(E->getArg(0));
1545  Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1546  auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1547  unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1548  unsigned AlignmentInBytes =
1549  CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1550  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1551  AI->setAlignment(AlignmentInBytes);
1552  return RValue::get(AI);
1553  }
1554 
1555  case Builtin::BIbzero:
1556  case Builtin::BI__builtin_bzero: {
1557  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1558  Value *SizeVal = EmitScalarExpr(E->getArg(1));
1559  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1560  E->getArg(0)->getExprLoc(), FD, 0);
1561  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1562  return RValue::get(nullptr);
1563  }
1564  case Builtin::BImemcpy:
1565  case Builtin::BI__builtin_memcpy: {
1566  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1567  Address Src = EmitPointerWithAlignment(E->getArg(1));
1568  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1569  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1570  E->getArg(0)->getExprLoc(), FD, 0);
1571  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1572  E->getArg(1)->getExprLoc(), FD, 1);
1573  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1574  return RValue::get(Dest.getPointer());
1575  }
1576 
1577  case Builtin::BI__builtin_char_memchr:
1578  BuiltinID = Builtin::BI__builtin_memchr;
1579  break;
1580 
1581  case Builtin::BI__builtin___memcpy_chk: {
1582  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1583  llvm::APSInt Size, DstSize;
1584  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1585  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1586  break;
1587  if (Size.ugt(DstSize))
1588  break;
1589  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1590  Address Src = EmitPointerWithAlignment(E->getArg(1));
1591  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1592  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1593  return RValue::get(Dest.getPointer());
1594  }
1595 
1596  case Builtin::BI__builtin_objc_memmove_collectable: {
1597  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1598  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1599  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1600  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1601  DestAddr, SrcAddr, SizeVal);
1602  return RValue::get(DestAddr.getPointer());
1603  }
1604 
1605  case Builtin::BI__builtin___memmove_chk: {
1606  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1607  llvm::APSInt Size, DstSize;
1608  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1609  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1610  break;
1611  if (Size.ugt(DstSize))
1612  break;
1613  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1614  Address Src = EmitPointerWithAlignment(E->getArg(1));
1615  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1616  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1617  return RValue::get(Dest.getPointer());
1618  }
1619 
1620  case Builtin::BImemmove:
1621  case Builtin::BI__builtin_memmove: {
1622  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1623  Address Src = EmitPointerWithAlignment(E->getArg(1));
1624  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1625  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1626  E->getArg(0)->getExprLoc(), FD, 0);
1627  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1628  E->getArg(1)->getExprLoc(), FD, 1);
1629  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1630  return RValue::get(Dest.getPointer());
1631  }
1632  case Builtin::BImemset:
1633  case Builtin::BI__builtin_memset: {
1634  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1635  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1636  Builder.getInt8Ty());
1637  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1638  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1639  E->getArg(0)->getExprLoc(), FD, 0);
1640  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1641  return RValue::get(Dest.getPointer());
1642  }
1643  case Builtin::BI__builtin___memset_chk: {
1644  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1645  llvm::APSInt Size, DstSize;
1646  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1647  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1648  break;
1649  if (Size.ugt(DstSize))
1650  break;
1651  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1652  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1653  Builder.getInt8Ty());
1654  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1655  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1656  return RValue::get(Dest.getPointer());
1657  }
1658  case Builtin::BI__builtin_dwarf_cfa: {
1659  // The offset in bytes from the first argument to the CFA.
1660  //
1661  // Why on earth is this in the frontend? Is there any reason at
1662  // all that the backend can't reasonably determine this while
1663  // lowering llvm.eh.dwarf.cfa()?
1664  //
1665  // TODO: If there's a satisfactory reason, add a target hook for
1666  // this instead of hard-coding 0, which is correct for most targets.
1667  int32_t Offset = 0;
1668 
1669  Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1670  return RValue::get(Builder.CreateCall(F,
1671  llvm::ConstantInt::get(Int32Ty, Offset)));
1672  }
1673  case Builtin::BI__builtin_return_address: {
1674  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1675  getContext().UnsignedIntTy);
1676  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1677  return RValue::get(Builder.CreateCall(F, Depth));
1678  }
1679  case Builtin::BI_ReturnAddress: {
1680  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1681  return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1682  }
1683  case Builtin::BI__builtin_frame_address: {
1684  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1685  getContext().UnsignedIntTy);
1686  Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1687  return RValue::get(Builder.CreateCall(F, Depth));
1688  }
1689  case Builtin::BI__builtin_extract_return_addr: {
1690  Value *Address = EmitScalarExpr(E->getArg(0));
1691  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1692  return RValue::get(Result);
1693  }
1694  case Builtin::BI__builtin_frob_return_addr: {
1695  Value *Address = EmitScalarExpr(E->getArg(0));
1696  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1697  return RValue::get(Result);
1698  }
1699  case Builtin::BI__builtin_dwarf_sp_column: {
1700  llvm::IntegerType *Ty
1701  = cast<llvm::IntegerType>(ConvertType(E->getType()));
1702  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1703  if (Column == -1) {
1704  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1705  return RValue::get(llvm::UndefValue::get(Ty));
1706  }
1707  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1708  }
1709  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1710  Value *Address = EmitScalarExpr(E->getArg(0));
1711  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1712  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1713  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1714  }
1715  case Builtin::BI__builtin_eh_return: {
1716  Value *Int = EmitScalarExpr(E->getArg(0));
1717  Value *Ptr = EmitScalarExpr(E->getArg(1));
1718 
1719  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1720  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1721  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1722  Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1723  ? Intrinsic::eh_return_i32
1724  : Intrinsic::eh_return_i64);
1725  Builder.CreateCall(F, {Int, Ptr});
1726  Builder.CreateUnreachable();
1727 
1728  // We do need to preserve an insertion point.
1729  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1730 
1731  return RValue::get(nullptr);
1732  }
1733  case Builtin::BI__builtin_unwind_init: {
1734  Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1735  return RValue::get(Builder.CreateCall(F));
1736  }
1737  case Builtin::BI__builtin_extend_pointer: {
1738  // Extends a pointer to the size of an _Unwind_Word, which is
1739  // uint64_t on all platforms. Generally this gets poked into a
1740  // register and eventually used as an address, so if the
1741  // addressing registers are wider than pointers and the platform
1742  // doesn't implicitly ignore high-order bits when doing
1743  // addressing, we need to make sure we zext / sext based on
1744  // the platform's expectations.
1745  //
1746  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1747 
1748  // Cast the pointer to intptr_t.
1749  Value *Ptr = EmitScalarExpr(E->getArg(0));
1750  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1751 
1752  // If that's 64 bits, we're done.
1753  if (IntPtrTy->getBitWidth() == 64)
1754  return RValue::get(Result);
1755 
1756  // Otherwise, ask the codegen data what to do.
1757  if (getTargetHooks().extendPointerWithSExt())
1758  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1759  else
1760  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1761  }
1762  case Builtin::BI__builtin_setjmp: {
1763  // Buffer is a void**.
1764  Address Buf = EmitPointerWithAlignment(E->getArg(0));
1765 
1766  // Store the frame pointer to the setjmp buffer.
1767  Value *FrameAddr =
1768  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1769  ConstantInt::get(Int32Ty, 0));
1770  Builder.CreateStore(FrameAddr, Buf);
1771 
1772  // Store the stack pointer to the setjmp buffer.
1773  Value *StackAddr =
1774  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1775  Address StackSaveSlot =
1776  Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1777  Builder.CreateStore(StackAddr, StackSaveSlot);
1778 
1779  // Call LLVM's EH setjmp, which is lightweight.
1780  Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1781  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1782  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1783  }
1784  case Builtin::BI__builtin_longjmp: {
1785  Value *Buf = EmitScalarExpr(E->getArg(0));
1786  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1787 
1788  // Call LLVM's EH longjmp, which is lightweight.
1789  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1790 
1791  // longjmp doesn't return; mark this as unreachable.
1792  Builder.CreateUnreachable();
1793 
1794  // We do need to preserve an insertion point.
1795  EmitBlock(createBasicBlock("longjmp.cont"));
1796 
1797  return RValue::get(nullptr);
1798  }
1799  case Builtin::BI__sync_fetch_and_add:
1800  case Builtin::BI__sync_fetch_and_sub:
1801  case Builtin::BI__sync_fetch_and_or:
1802  case Builtin::BI__sync_fetch_and_and:
1803  case Builtin::BI__sync_fetch_and_xor:
1804  case Builtin::BI__sync_fetch_and_nand:
1805  case Builtin::BI__sync_add_and_fetch:
1806  case Builtin::BI__sync_sub_and_fetch:
1807  case Builtin::BI__sync_and_and_fetch:
1808  case Builtin::BI__sync_or_and_fetch:
1809  case Builtin::BI__sync_xor_and_fetch:
1810  case Builtin::BI__sync_nand_and_fetch:
1811  case Builtin::BI__sync_val_compare_and_swap:
1812  case Builtin::BI__sync_bool_compare_and_swap:
1813  case Builtin::BI__sync_lock_test_and_set:
1814  case Builtin::BI__sync_lock_release:
1815  case Builtin::BI__sync_swap:
1816  llvm_unreachable("Shouldn't make it through sema");
1817  case Builtin::BI__sync_fetch_and_add_1:
1818  case Builtin::BI__sync_fetch_and_add_2:
1819  case Builtin::BI__sync_fetch_and_add_4:
1820  case Builtin::BI__sync_fetch_and_add_8:
1821  case Builtin::BI__sync_fetch_and_add_16:
1822  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1823  case Builtin::BI__sync_fetch_and_sub_1:
1824  case Builtin::BI__sync_fetch_and_sub_2:
1825  case Builtin::BI__sync_fetch_and_sub_4:
1826  case Builtin::BI__sync_fetch_and_sub_8:
1827  case Builtin::BI__sync_fetch_and_sub_16:
1828  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1829  case Builtin::BI__sync_fetch_and_or_1:
1830  case Builtin::BI__sync_fetch_and_or_2:
1831  case Builtin::BI__sync_fetch_and_or_4:
1832  case Builtin::BI__sync_fetch_and_or_8:
1833  case Builtin::BI__sync_fetch_and_or_16:
1834  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1835  case Builtin::BI__sync_fetch_and_and_1:
1836  case Builtin::BI__sync_fetch_and_and_2:
1837  case Builtin::BI__sync_fetch_and_and_4:
1838  case Builtin::BI__sync_fetch_and_and_8:
1839  case Builtin::BI__sync_fetch_and_and_16:
1840  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1841  case Builtin::BI__sync_fetch_and_xor_1:
1842  case Builtin::BI__sync_fetch_and_xor_2:
1843  case Builtin::BI__sync_fetch_and_xor_4:
1844  case Builtin::BI__sync_fetch_and_xor_8:
1845  case Builtin::BI__sync_fetch_and_xor_16:
1846  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1847  case Builtin::BI__sync_fetch_and_nand_1:
1848  case Builtin::BI__sync_fetch_and_nand_2:
1849  case Builtin::BI__sync_fetch_and_nand_4:
1850  case Builtin::BI__sync_fetch_and_nand_8:
1851  case Builtin::BI__sync_fetch_and_nand_16:
1852  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1853 
1854  // Clang extensions: not overloaded yet.
1855  case Builtin::BI__sync_fetch_and_min:
1856  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1857  case Builtin::BI__sync_fetch_and_max:
1858  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1859  case Builtin::BI__sync_fetch_and_umin:
1860  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1861  case Builtin::BI__sync_fetch_and_umax:
1862  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1863 
1864  case Builtin::BI__sync_add_and_fetch_1:
1865  case Builtin::BI__sync_add_and_fetch_2:
1866  case Builtin::BI__sync_add_and_fetch_4:
1867  case Builtin::BI__sync_add_and_fetch_8:
1868  case Builtin::BI__sync_add_and_fetch_16:
1869  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1870  llvm::Instruction::Add);
1871  case Builtin::BI__sync_sub_and_fetch_1:
1872  case Builtin::BI__sync_sub_and_fetch_2:
1873  case Builtin::BI__sync_sub_and_fetch_4:
1874  case Builtin::BI__sync_sub_and_fetch_8:
1875  case Builtin::BI__sync_sub_and_fetch_16:
1876  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1877  llvm::Instruction::Sub);
1878  case Builtin::BI__sync_and_and_fetch_1:
1879  case Builtin::BI__sync_and_and_fetch_2:
1880  case Builtin::BI__sync_and_and_fetch_4:
1881  case Builtin::BI__sync_and_and_fetch_8:
1882  case Builtin::BI__sync_and_and_fetch_16:
1885  case Builtin::BI__sync_or_and_fetch_1:
1886  case Builtin::BI__sync_or_and_fetch_2:
1887  case Builtin::BI__sync_or_and_fetch_4:
1888  case Builtin::BI__sync_or_and_fetch_8:
1889  case Builtin::BI__sync_or_and_fetch_16:
1890  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1891  llvm::Instruction::Or);
1892  case Builtin::BI__sync_xor_and_fetch_1:
1893  case Builtin::BI__sync_xor_and_fetch_2:
1894  case Builtin::BI__sync_xor_and_fetch_4:
1895  case Builtin::BI__sync_xor_and_fetch_8:
1896  case Builtin::BI__sync_xor_and_fetch_16:
1897  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1898  llvm::Instruction::Xor);
1899  case Builtin::BI__sync_nand_and_fetch_1:
1900  case Builtin::BI__sync_nand_and_fetch_2:
1901  case Builtin::BI__sync_nand_and_fetch_4:
1902  case Builtin::BI__sync_nand_and_fetch_8:
1903  case Builtin::BI__sync_nand_and_fetch_16:
1904  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1905  llvm::Instruction::And, true);
1906 
1907  case Builtin::BI__sync_val_compare_and_swap_1:
1908  case Builtin::BI__sync_val_compare_and_swap_2:
1909  case Builtin::BI__sync_val_compare_and_swap_4:
1910  case Builtin::BI__sync_val_compare_and_swap_8:
1911  case Builtin::BI__sync_val_compare_and_swap_16:
1912  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1913 
1914  case Builtin::BI__sync_bool_compare_and_swap_1:
1915  case Builtin::BI__sync_bool_compare_and_swap_2:
1916  case Builtin::BI__sync_bool_compare_and_swap_4:
1917  case Builtin::BI__sync_bool_compare_and_swap_8:
1918  case Builtin::BI__sync_bool_compare_and_swap_16:
1919  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1920 
1921  case Builtin::BI__sync_swap_1:
1922  case Builtin::BI__sync_swap_2:
1923  case Builtin::BI__sync_swap_4:
1924  case Builtin::BI__sync_swap_8:
1925  case Builtin::BI__sync_swap_16:
1926  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1927 
1928  case Builtin::BI__sync_lock_test_and_set_1:
1929  case Builtin::BI__sync_lock_test_and_set_2:
1930  case Builtin::BI__sync_lock_test_and_set_4:
1931  case Builtin::BI__sync_lock_test_and_set_8:
1932  case Builtin::BI__sync_lock_test_and_set_16:
1933  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1934 
1935  case Builtin::BI__sync_lock_release_1:
1936  case Builtin::BI__sync_lock_release_2:
1937  case Builtin::BI__sync_lock_release_4:
1938  case Builtin::BI__sync_lock_release_8:
1939  case Builtin::BI__sync_lock_release_16: {
1940  Value *Ptr = EmitScalarExpr(E->getArg(0));
1941  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1942  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1943  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1944  StoreSize.getQuantity() * 8);
1945  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1946  llvm::StoreInst *Store =
1947  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1948  StoreSize);
1949  Store->setAtomic(llvm::AtomicOrdering::Release);
1950  return RValue::get(nullptr);
1951  }
1952 
1953  case Builtin::BI__sync_synchronize: {
1954  // We assume this is supposed to correspond to a C++0x-style
1955  // sequentially-consistent fence (i.e. this is only usable for
1956  // synchonization, not device I/O or anything like that). This intrinsic
1957  // is really badly designed in the sense that in theory, there isn't
1958  // any way to safely use it... but in practice, it mostly works
1959  // to use it with non-atomic loads and stores to get acquire/release
1960  // semantics.
1961  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1962  return RValue::get(nullptr);
1963  }
1964 
1965  case Builtin::BI__builtin_nontemporal_load:
1966  return RValue::get(EmitNontemporalLoad(*this, E));
1967  case Builtin::BI__builtin_nontemporal_store:
1968  return RValue::get(EmitNontemporalStore(*this, E));
1969  case Builtin::BI__c11_atomic_is_lock_free:
1970  case Builtin::BI__atomic_is_lock_free: {
1971  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1972  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1973  // _Atomic(T) is always properly-aligned.
1974  const char *LibCallName = "__atomic_is_lock_free";
1975  CallArgList Args;
1976  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1977  getContext().getSizeType());
1978  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1979  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1980  getContext().VoidPtrTy);
1981  else
1982  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1983  getContext().VoidPtrTy);
1984  const CGFunctionInfo &FuncInfo =
1985  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1986  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1987  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1988  return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1989  ReturnValueSlot(), Args);
1990  }
1991 
1992  case Builtin::BI__atomic_test_and_set: {
1993  // Look at the argument type to determine whether this is a volatile
1994  // operation. The parameter type is always volatile.
1995  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1996  bool Volatile =
1997  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1998 
1999  Value *Ptr = EmitScalarExpr(E->getArg(0));
2000  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
2001  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2002  Value *NewVal = Builder.getInt8(1);
2003  Value *Order = EmitScalarExpr(E->getArg(1));
2004  if (isa<llvm::ConstantInt>(Order)) {
2005  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2006  AtomicRMWInst *Result = nullptr;
2007  switch (ord) {
2008  case 0: // memory_order_relaxed
2009  default: // invalid order
2010  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2011  llvm::AtomicOrdering::Monotonic);
2012  break;
2013  case 1: // memory_order_consume
2014  case 2: // memory_order_acquire
2015  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2016  llvm::AtomicOrdering::Acquire);
2017  break;
2018  case 3: // memory_order_release
2019  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2020  llvm::AtomicOrdering::Release);
2021  break;
2022  case 4: // memory_order_acq_rel
2023 
2024  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2025  llvm::AtomicOrdering::AcquireRelease);
2026  break;
2027  case 5: // memory_order_seq_cst
2028  Result = Builder.CreateAtomicRMW(
2029  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2030  llvm::AtomicOrdering::SequentiallyConsistent);
2031  break;
2032  }
2033  Result->setVolatile(Volatile);
2034  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2035  }
2036 
2037  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2038 
2039  llvm::BasicBlock *BBs[5] = {
2040  createBasicBlock("monotonic", CurFn),
2041  createBasicBlock("acquire", CurFn),
2042  createBasicBlock("release", CurFn),
2043  createBasicBlock("acqrel", CurFn),
2044  createBasicBlock("seqcst", CurFn)
2045  };
2046  llvm::AtomicOrdering Orders[5] = {
2047  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
2048  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
2049  llvm::AtomicOrdering::SequentiallyConsistent};
2050 
2051  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2052  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2053 
2054  Builder.SetInsertPoint(ContBB);
2055  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
2056 
2057  for (unsigned i = 0; i < 5; ++i) {
2058  Builder.SetInsertPoint(BBs[i]);
2059  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
2060  Ptr, NewVal, Orders[i]);
2061  RMW->setVolatile(Volatile);
2062  Result->addIncoming(RMW, BBs[i]);
2063  Builder.CreateBr(ContBB);
2064  }
2065 
2066  SI->addCase(Builder.getInt32(0), BBs[0]);
2067  SI->addCase(Builder.getInt32(1), BBs[1]);
2068  SI->addCase(Builder.getInt32(2), BBs[1]);
2069  SI->addCase(Builder.getInt32(3), BBs[2]);
2070  SI->addCase(Builder.getInt32(4), BBs[3]);
2071  SI->addCase(Builder.getInt32(5), BBs[4]);
2072 
2073  Builder.SetInsertPoint(ContBB);
2074  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2075  }
2076 
2077  case Builtin::BI__atomic_clear: {
2078  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2079  bool Volatile =
2080  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2081 
2082  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
2083  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
2084  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2085  Value *NewVal = Builder.getInt8(0);
2086  Value *Order = EmitScalarExpr(E->getArg(1));
2087  if (isa<llvm::ConstantInt>(Order)) {
2088  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2089  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2090  switch (ord) {
2091  case 0: // memory_order_relaxed
2092  default: // invalid order
2093  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
2094  break;
2095  case 3: // memory_order_release
2096  Store->setOrdering(llvm::AtomicOrdering::Release);
2097  break;
2098  case 5: // memory_order_seq_cst
2099  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
2100  break;
2101  }
2102  return RValue::get(nullptr);
2103  }
2104 
2105  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2106 
2107  llvm::BasicBlock *BBs[3] = {
2108  createBasicBlock("monotonic", CurFn),
2109  createBasicBlock("release", CurFn),
2110  createBasicBlock("seqcst", CurFn)
2111  };
2112  llvm::AtomicOrdering Orders[3] = {
2113  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
2114  llvm::AtomicOrdering::SequentiallyConsistent};
2115 
2116  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2117  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2118 
2119  for (unsigned i = 0; i < 3; ++i) {
2120  Builder.SetInsertPoint(BBs[i]);
2121  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2122  Store->setOrdering(Orders[i]);
2123  Builder.CreateBr(ContBB);
2124  }
2125 
2126  SI->addCase(Builder.getInt32(0), BBs[0]);
2127  SI->addCase(Builder.getInt32(3), BBs[1]);
2128  SI->addCase(Builder.getInt32(5), BBs[2]);
2129 
2130  Builder.SetInsertPoint(ContBB);
2131  return RValue::get(nullptr);
2132  }
2133 
2134  case Builtin::BI__atomic_thread_fence:
2135  case Builtin::BI__atomic_signal_fence:
2136  case Builtin::BI__c11_atomic_thread_fence:
2137  case Builtin::BI__c11_atomic_signal_fence: {
2138  llvm::SyncScope::ID SSID;
2139  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2140  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2141  SSID = llvm::SyncScope::SingleThread;
2142  else
2143  SSID = llvm::SyncScope::System;
2144  Value *Order = EmitScalarExpr(E->getArg(0));
2145  if (isa<llvm::ConstantInt>(Order)) {
2146  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2147  switch (ord) {
2148  case 0: // memory_order_relaxed
2149  default: // invalid order
2150  break;
2151  case 1: // memory_order_consume
2152  case 2: // memory_order_acquire
2153  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2154  break;
2155  case 3: // memory_order_release
2156  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2157  break;
2158  case 4: // memory_order_acq_rel
2159  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2160  break;
2161  case 5: // memory_order_seq_cst
2162  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2163  break;
2164  }
2165  return RValue::get(nullptr);
2166  }
2167 
2168  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2169  AcquireBB = createBasicBlock("acquire", CurFn);
2170  ReleaseBB = createBasicBlock("release", CurFn);
2171  AcqRelBB = createBasicBlock("acqrel", CurFn);
2172  SeqCstBB = createBasicBlock("seqcst", CurFn);
2173  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2174 
2175  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2176  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2177 
2178  Builder.SetInsertPoint(AcquireBB);
2179  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2180  Builder.CreateBr(ContBB);
2181  SI->addCase(Builder.getInt32(1), AcquireBB);
2182  SI->addCase(Builder.getInt32(2), AcquireBB);
2183 
2184  Builder.SetInsertPoint(ReleaseBB);
2185  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2186  Builder.CreateBr(ContBB);
2187  SI->addCase(Builder.getInt32(3), ReleaseBB);
2188 
2189  Builder.SetInsertPoint(AcqRelBB);
2190  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2191  Builder.CreateBr(ContBB);
2192  SI->addCase(Builder.getInt32(4), AcqRelBB);
2193 
2194  Builder.SetInsertPoint(SeqCstBB);
2195  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2196  Builder.CreateBr(ContBB);
2197  SI->addCase(Builder.getInt32(5), SeqCstBB);
2198 
2199  Builder.SetInsertPoint(ContBB);
2200  return RValue::get(nullptr);
2201  }
2202 
2203  case Builtin::BI__builtin_signbit:
2204  case Builtin::BI__builtin_signbitf:
2205  case Builtin::BI__builtin_signbitl: {
2206  return RValue::get(
2207  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
2208  ConvertType(E->getType())));
2209  }
2210  case Builtin::BI__annotation: {
2211  // Re-encode each wide string to UTF8 and make an MDString.
2213  for (const Expr *Arg : E->arguments()) {
2214  const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
2215  assert(Str->getCharByteWidth() == 2);
2216  StringRef WideBytes = Str->getBytes();
2217  std::string StrUtf8;
2218  if (!convertUTF16ToUTF8String(
2219  makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
2220  CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
2221  continue;
2222  }
2223  Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
2224  }
2225 
2226  // Build and MDTuple of MDStrings and emit the intrinsic call.
2227  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
2228  MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
2229  Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
2230  return RValue::getIgnored();
2231  }
2232  case Builtin::BI__builtin_annotation: {
2233  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
2234  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
2235  AnnVal->getType());
2236 
2237  // Get the annotation string, go through casts. Sema requires this to be a
2238  // non-wide string literal, potentially casted, so the cast<> is safe.
2239  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
2240  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
2241  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
2242  }
2243  case Builtin::BI__builtin_addcb:
2244  case Builtin::BI__builtin_addcs:
2245  case Builtin::BI__builtin_addc:
2246  case Builtin::BI__builtin_addcl:
2247  case Builtin::BI__builtin_addcll:
2248  case Builtin::BI__builtin_subcb:
2249  case Builtin::BI__builtin_subcs:
2250  case Builtin::BI__builtin_subc:
2251  case Builtin::BI__builtin_subcl:
2252  case Builtin::BI__builtin_subcll: {
2253 
2254  // We translate all of these builtins from expressions of the form:
2255  // int x = ..., y = ..., carryin = ..., carryout, result;
2256  // result = __builtin_addc(x, y, carryin, &carryout);
2257  //
2258  // to LLVM IR of the form:
2259  //
2260  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2261  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2262  // %carry1 = extractvalue {i32, i1} %tmp1, 1
2263  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2264  // i32 %carryin)
2265  // %result = extractvalue {i32, i1} %tmp2, 0
2266  // %carry2 = extractvalue {i32, i1} %tmp2, 1
2267  // %tmp3 = or i1 %carry1, %carry2
2268  // %tmp4 = zext i1 %tmp3 to i32
2269  // store i32 %tmp4, i32* %carryout
2270 
2271  // Scalarize our inputs.
2272  llvm::Value *X = EmitScalarExpr(E->getArg(0));
2273  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2274  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
2275  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2276 
2277  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2278  llvm::Intrinsic::ID IntrinsicId;
2279  switch (BuiltinID) {
2280  default: llvm_unreachable("Unknown multiprecision builtin id.");
2281  case Builtin::BI__builtin_addcb:
2282  case Builtin::BI__builtin_addcs:
2283  case Builtin::BI__builtin_addc:
2284  case Builtin::BI__builtin_addcl:
2285  case Builtin::BI__builtin_addcll:
2286  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2287  break;
2288  case Builtin::BI__builtin_subcb:
2289  case Builtin::BI__builtin_subcs:
2290  case Builtin::BI__builtin_subc:
2291  case Builtin::BI__builtin_subcl:
2292  case Builtin::BI__builtin_subcll:
2293  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2294  break;
2295  }
2296 
2297  // Construct our resulting LLVM IR expression.
2298  llvm::Value *Carry1;
2299  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2300  X, Y, Carry1);
2301  llvm::Value *Carry2;
2302  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2303  Sum1, Carryin, Carry2);
2304  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2305  X->getType());
2306  Builder.CreateStore(CarryOut, CarryOutPtr);
2307  return RValue::get(Sum2);
2308  }
2309 
2310  case Builtin::BI__builtin_add_overflow:
2311  case Builtin::BI__builtin_sub_overflow:
2312  case Builtin::BI__builtin_mul_overflow: {
2313  const clang::Expr *LeftArg = E->getArg(0);
2314  const clang::Expr *RightArg = E->getArg(1);
2315  const clang::Expr *ResultArg = E->getArg(2);
2316 
2317  clang::QualType ResultQTy =
2318  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2319 
2320  WidthAndSignedness LeftInfo =
2321  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2322  WidthAndSignedness RightInfo =
2323  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2324  WidthAndSignedness ResultInfo =
2325  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2326  WidthAndSignedness EncompassingInfo =
2327  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2328 
2329  llvm::Type *EncompassingLLVMTy =
2330  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2331 
2332  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2333 
2334  llvm::Intrinsic::ID IntrinsicId;
2335  switch (BuiltinID) {
2336  default:
2337  llvm_unreachable("Unknown overflow builtin id.");
2338  case Builtin::BI__builtin_add_overflow:
2339  IntrinsicId = EncompassingInfo.Signed
2340  ? llvm::Intrinsic::sadd_with_overflow
2341  : llvm::Intrinsic::uadd_with_overflow;
2342  break;
2343  case Builtin::BI__builtin_sub_overflow:
2344  IntrinsicId = EncompassingInfo.Signed
2345  ? llvm::Intrinsic::ssub_with_overflow
2346  : llvm::Intrinsic::usub_with_overflow;
2347  break;
2348  case Builtin::BI__builtin_mul_overflow:
2349  IntrinsicId = EncompassingInfo.Signed
2350  ? llvm::Intrinsic::smul_with_overflow
2351  : llvm::Intrinsic::umul_with_overflow;
2352  break;
2353  }
2354 
2355  llvm::Value *Left = EmitScalarExpr(LeftArg);
2356  llvm::Value *Right = EmitScalarExpr(RightArg);
2357  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2358 
2359  // Extend each operand to the encompassing type.
2360  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2361  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2362 
2363  // Perform the operation on the extended values.
2364  llvm::Value *Overflow, *Result;
2365  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2366 
2367  if (EncompassingInfo.Width > ResultInfo.Width) {
2368  // The encompassing type is wider than the result type, so we need to
2369  // truncate it.
2370  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2371 
2372  // To see if the truncation caused an overflow, we will extend
2373  // the result and then compare it to the original result.
2374  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2375  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2376  llvm::Value *TruncationOverflow =
2377  Builder.CreateICmpNE(Result, ResultTruncExt);
2378 
2379  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2380  Result = ResultTrunc;
2381  }
2382 
2383  // Finally, store the result using the pointer.
2384  bool isVolatile =
2385  ResultArg->getType()->getPointeeType().isVolatileQualified();
2386  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2387 
2388  return RValue::get(Overflow);
2389  }
2390 
2391  case Builtin::BI__builtin_uadd_overflow:
2392  case Builtin::BI__builtin_uaddl_overflow:
2393  case Builtin::BI__builtin_uaddll_overflow:
2394  case Builtin::BI__builtin_usub_overflow:
2395  case Builtin::BI__builtin_usubl_overflow:
2396  case Builtin::BI__builtin_usubll_overflow:
2397  case Builtin::BI__builtin_umul_overflow:
2398  case Builtin::BI__builtin_umull_overflow:
2399  case Builtin::BI__builtin_umulll_overflow:
2400  case Builtin::BI__builtin_sadd_overflow:
2401  case Builtin::BI__builtin_saddl_overflow:
2402  case Builtin::BI__builtin_saddll_overflow:
2403  case Builtin::BI__builtin_ssub_overflow:
2404  case Builtin::BI__builtin_ssubl_overflow:
2405  case Builtin::BI__builtin_ssubll_overflow:
2406  case Builtin::BI__builtin_smul_overflow:
2407  case Builtin::BI__builtin_smull_overflow:
2408  case Builtin::BI__builtin_smulll_overflow: {
2409 
2410  // We translate all of these builtins directly to the relevant llvm IR node.
2411 
2412  // Scalarize our inputs.
2413  llvm::Value *X = EmitScalarExpr(E->getArg(0));
2414  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2415  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2416 
2417  // Decide which of the overflow intrinsics we are lowering to:
2418  llvm::Intrinsic::ID IntrinsicId;
2419  switch (BuiltinID) {
2420  default: llvm_unreachable("Unknown overflow builtin id.");
2421  case Builtin::BI__builtin_uadd_overflow:
2422  case Builtin::BI__builtin_uaddl_overflow:
2423  case Builtin::BI__builtin_uaddll_overflow:
2424  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2425  break;
2426  case Builtin::BI__builtin_usub_overflow:
2427  case Builtin::BI__builtin_usubl_overflow:
2428  case Builtin::BI__builtin_usubll_overflow:
2429  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2430  break;
2431  case Builtin::BI__builtin_umul_overflow:
2432  case Builtin::BI__builtin_umull_overflow:
2433  case Builtin::BI__builtin_umulll_overflow:
2434  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2435  break;
2436  case Builtin::BI__builtin_sadd_overflow:
2437  case Builtin::BI__builtin_saddl_overflow:
2438  case Builtin::BI__builtin_saddll_overflow:
2439  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2440  break;
2441  case Builtin::BI__builtin_ssub_overflow:
2442  case Builtin::BI__builtin_ssubl_overflow:
2443  case Builtin::BI__builtin_ssubll_overflow:
2444  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2445  break;
2446  case Builtin::BI__builtin_smul_overflow:
2447  case Builtin::BI__builtin_smull_overflow:
2448  case Builtin::BI__builtin_smulll_overflow:
2449  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2450  break;
2451  }
2452 
2453 
2454  llvm::Value *Carry;
2455  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2456  Builder.CreateStore(Sum, SumOutPtr);
2457 
2458  return RValue::get(Carry);
2459  }
2460  case Builtin::BI__builtin_addressof:
2461  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2462  case Builtin::BI__builtin_operator_new:
2463  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2464  E->getArg(0), false);
2465  case Builtin::BI__builtin_operator_delete:
2466  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2467  E->getArg(0), true);
2468  case Builtin::BI__noop:
2469  // __noop always evaluates to an integer literal zero.
2470  return RValue::get(ConstantInt::get(IntTy, 0));
2471  case Builtin::BI__builtin_call_with_static_chain: {
2472  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2473  const Expr *Chain = E->getArg(1);
2474  return EmitCall(Call->getCallee()->getType(),
2475  EmitCallee(Call->getCallee()), Call, ReturnValue,
2476  EmitScalarExpr(Chain));
2477  }
2478  case Builtin::BI_InterlockedExchange8:
2479  case Builtin::BI_InterlockedExchange16:
2480  case Builtin::BI_InterlockedExchange:
2481  case Builtin::BI_InterlockedExchangePointer:
2482  return RValue::get(
2483  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2484  case Builtin::BI_InterlockedCompareExchangePointer: {
2485  llvm::Type *RTy;
2486  llvm::IntegerType *IntType =
2487  IntegerType::get(getLLVMContext(),
2488  getContext().getTypeSize(E->getType()));
2489  llvm::Type *IntPtrType = IntType->getPointerTo();
2490 
2491  llvm::Value *Destination =
2492  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2493 
2494  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2495  RTy = Exchange->getType();
2496  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2497 
2498  llvm::Value *Comparand =
2499  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2500 
2501  auto Result =
2502  Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2503  AtomicOrdering::SequentiallyConsistent,
2504  AtomicOrdering::SequentiallyConsistent);
2505  Result->setVolatile(true);
2506 
2507  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2508  0),
2509  RTy));
2510  }
2511  case Builtin::BI_InterlockedCompareExchange8:
2512  case Builtin::BI_InterlockedCompareExchange16:
2513  case Builtin::BI_InterlockedCompareExchange:
2514  case Builtin::BI_InterlockedCompareExchange64: {
2515  AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2516  EmitScalarExpr(E->getArg(0)),
2517  EmitScalarExpr(E->getArg(2)),
2518  EmitScalarExpr(E->getArg(1)),
2519  AtomicOrdering::SequentiallyConsistent,
2520  AtomicOrdering::SequentiallyConsistent);
2521  CXI->setVolatile(true);
2522  return RValue::get(Builder.CreateExtractValue(CXI, 0));
2523  }
2524  case Builtin::BI_InterlockedIncrement16:
2525  case Builtin::BI_InterlockedIncrement:
2526  return RValue::get(
2527  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2528  case Builtin::BI_InterlockedDecrement16:
2529  case Builtin::BI_InterlockedDecrement:
2530  return RValue::get(
2531  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2532  case Builtin::BI_InterlockedAnd8:
2533  case Builtin::BI_InterlockedAnd16:
2534  case Builtin::BI_InterlockedAnd:
2535  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2536  case Builtin::BI_InterlockedExchangeAdd8:
2537  case Builtin::BI_InterlockedExchangeAdd16:
2538  case Builtin::BI_InterlockedExchangeAdd:
2539  return RValue::get(
2540  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2541  case Builtin::BI_InterlockedExchangeSub8:
2542  case Builtin::BI_InterlockedExchangeSub16:
2543  case Builtin::BI_InterlockedExchangeSub:
2544  return RValue::get(
2545  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2546  case Builtin::BI_InterlockedOr8:
2547  case Builtin::BI_InterlockedOr16:
2548  case Builtin::BI_InterlockedOr:
2549  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2550  case Builtin::BI_InterlockedXor8:
2551  case Builtin::BI_InterlockedXor16:
2552  case Builtin::BI_InterlockedXor:
2553  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2554  case Builtin::BI_interlockedbittestandset:
2555  return RValue::get(
2556  EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2557 
2558  case Builtin::BI__exception_code:
2559  case Builtin::BI_exception_code:
2560  return RValue::get(EmitSEHExceptionCode());
2561  case Builtin::BI__exception_info:
2562  case Builtin::BI_exception_info:
2563  return RValue::get(EmitSEHExceptionInfo());
2564  case Builtin::BI__abnormal_termination:
2565  case Builtin::BI_abnormal_termination:
2566  return RValue::get(EmitSEHAbnormalTermination());
2567  case Builtin::BI_setjmpex: {
2568  if (getTarget().getTriple().isOSMSVCRT()) {
2569  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2570  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2571  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2572  llvm::Attribute::ReturnsTwice);
2573  llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2574  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2575  "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2576  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2577  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2578  llvm::Value *FrameAddr =
2579  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2580  ConstantInt::get(Int32Ty, 0));
2581  llvm::Value *Args[] = {Buf, FrameAddr};
2582  llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2583  CS.setAttributes(ReturnsTwiceAttr);
2584  return RValue::get(CS.getInstruction());
2585  }
2586  break;
2587  }
2588  case Builtin::BI_setjmp: {
2589  if (getTarget().getTriple().isOSMSVCRT()) {
2590  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2591  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2592  llvm::Attribute::ReturnsTwice);
2593  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2594  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2595  llvm::CallSite CS;
2596  if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2597  llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2598  llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2599  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2600  "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2601  llvm::Value *Count = ConstantInt::get(IntTy, 0);
2602  llvm::Value *Args[] = {Buf, Count};
2603  CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2604  } else {
2605  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2606  llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2607  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2608  "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2609  llvm::Value *FrameAddr =
2610  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2611  ConstantInt::get(Int32Ty, 0));
2612  llvm::Value *Args[] = {Buf, FrameAddr};
2613  CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2614  }
2615  CS.setAttributes(ReturnsTwiceAttr);
2616  return RValue::get(CS.getInstruction());
2617  }
2618  break;
2619  }
2620 
2621  case Builtin::BI__GetExceptionInfo: {
2622  if (llvm::GlobalVariable *GV =
2623  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2624  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2625  break;
2626  }
2627 
2628  case Builtin::BI__fastfail:
2629  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2630 
2631  case Builtin::BI__builtin_coro_size: {
2632  auto & Context = getContext();
2633  auto SizeTy = Context.getSizeType();
2634  auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2635  Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2636  return RValue::get(Builder.CreateCall(F));
2637  }
2638 
2639  case Builtin::BI__builtin_coro_id:
2640  return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2641  case Builtin::BI__builtin_coro_promise:
2642  return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2643  case Builtin::BI__builtin_coro_resume:
2644  return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2645  case Builtin::BI__builtin_coro_frame:
2646  return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2647  case Builtin::BI__builtin_coro_free:
2648  return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2649  case Builtin::BI__builtin_coro_destroy:
2650  return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2651  case Builtin::BI__builtin_coro_done:
2652  return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2653  case Builtin::BI__builtin_coro_alloc:
2654  return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2655  case Builtin::BI__builtin_coro_begin:
2656  return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2657  case Builtin::BI__builtin_coro_end:
2658  return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2659  case Builtin::BI__builtin_coro_suspend:
2660  return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2661  case Builtin::BI__builtin_coro_param:
2662  return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2663 
2664  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2665  case Builtin::BIread_pipe:
2666  case Builtin::BIwrite_pipe: {
2667  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2668  *Arg1 = EmitScalarExpr(E->getArg(1));
2669  CGOpenCLRuntime OpenCLRT(CGM);
2670  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2671  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2672 
2673  // Type of the generic packet parameter.
2674  unsigned GenericAS =
2675  getContext().getTargetAddressSpace(LangAS::opencl_generic);
2676  llvm::Type *I8PTy = llvm::PointerType::get(
2677  llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2678 
2679  // Testing which overloaded version we should generate the call for.
2680  if (2U == E->getNumArgs()) {
2681  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2682  : "__write_pipe_2";
2683  // Creating a generic function type to be able to call with any builtin or
2684  // user defined type.
2685  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2686  llvm::FunctionType *FTy = llvm::FunctionType::get(
2687  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2688  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2689  return RValue::get(
2690  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2691  {Arg0, BCast, PacketSize, PacketAlign}));
2692  } else {
2693  assert(4 == E->getNumArgs() &&
2694  "Illegal number of parameters to pipe function");
2695  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2696  : "__write_pipe_4";
2697 
2698  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2699  Int32Ty, Int32Ty};
2700  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2701  *Arg3 = EmitScalarExpr(E->getArg(3));
2702  llvm::FunctionType *FTy = llvm::FunctionType::get(
2703  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2704  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2705  // We know the third argument is an integer type, but we may need to cast
2706  // it to i32.
2707  if (Arg2->getType() != Int32Ty)
2708  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2709  return RValue::get(Builder.CreateCall(
2710  CGM.CreateRuntimeFunction(FTy, Name),
2711  {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2712  }
2713  }
2714  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2715  // functions
2716  case Builtin::BIreserve_read_pipe:
2717  case Builtin::BIreserve_write_pipe:
2718  case Builtin::BIwork_group_reserve_read_pipe:
2719  case Builtin::BIwork_group_reserve_write_pipe:
2720  case Builtin::BIsub_group_reserve_read_pipe:
2721  case Builtin::BIsub_group_reserve_write_pipe: {
2722  // Composing the mangled name for the function.
2723  const char *Name;
2724  if (BuiltinID == Builtin::BIreserve_read_pipe)
2725  Name = "__reserve_read_pipe";
2726  else if (BuiltinID == Builtin::BIreserve_write_pipe)
2727  Name = "__reserve_write_pipe";
2728  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2729  Name = "__work_group_reserve_read_pipe";
2730  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2731  Name = "__work_group_reserve_write_pipe";
2732  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2733  Name = "__sub_group_reserve_read_pipe";
2734  else
2735  Name = "__sub_group_reserve_write_pipe";
2736 
2737  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2738  *Arg1 = EmitScalarExpr(E->getArg(1));
2739  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2740  CGOpenCLRuntime OpenCLRT(CGM);
2741  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2742  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2743 
2744  // Building the generic function prototype.
2745  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2746  llvm::FunctionType *FTy = llvm::FunctionType::get(
2747  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2748  // We know the second argument is an integer type, but we may need to cast
2749  // it to i32.
2750  if (Arg1->getType() != Int32Ty)
2751  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2752  return RValue::get(
2753  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2754  {Arg0, Arg1, PacketSize, PacketAlign}));
2755  }
2756  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2757  // functions
2758  case Builtin::BIcommit_read_pipe:
2759  case Builtin::BIcommit_write_pipe:
2760  case Builtin::BIwork_group_commit_read_pipe:
2761  case Builtin::BIwork_group_commit_write_pipe:
2762  case Builtin::BIsub_group_commit_read_pipe:
2763  case Builtin::BIsub_group_commit_write_pipe: {
2764  const char *Name;
2765  if (BuiltinID == Builtin::BIcommit_read_pipe)
2766  Name = "__commit_read_pipe";
2767  else if (BuiltinID == Builtin::BIcommit_write_pipe)
2768  Name = "__commit_write_pipe";
2769  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2770  Name = "__work_group_commit_read_pipe";
2771  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2772  Name = "__work_group_commit_write_pipe";
2773  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2774  Name = "__sub_group_commit_read_pipe";
2775  else
2776  Name = "__sub_group_commit_write_pipe";
2777 
2778  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2779  *Arg1 = EmitScalarExpr(E->getArg(1));
2780  CGOpenCLRuntime OpenCLRT(CGM);
2781  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2782  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2783 
2784  // Building the generic function prototype.
2785  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2786  llvm::FunctionType *FTy =
2787  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2788  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2789 
2790  return RValue::get(
2791  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2792  {Arg0, Arg1, PacketSize, PacketAlign}));
2793  }
2794  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2795  case Builtin::BIget_pipe_num_packets:
2796  case Builtin::BIget_pipe_max_packets: {
2797  const char *Name;
2798  if (BuiltinID == Builtin::BIget_pipe_num_packets)
2799  Name = "__get_pipe_num_packets";
2800  else
2801  Name = "__get_pipe_max_packets";
2802 
2803  // Building the generic function prototype.
2804  Value *Arg0 = EmitScalarExpr(E->getArg(0));
2805  CGOpenCLRuntime OpenCLRT(CGM);
2806  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2807  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2808  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2809  llvm::FunctionType *FTy = llvm::FunctionType::get(
2810  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2811 
2812  return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2813  {Arg0, PacketSize, PacketAlign}));
2814  }
2815 
2816  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2817  case Builtin::BIto_global:
2818  case Builtin::BIto_local:
2819  case Builtin::BIto_private: {
2820  auto Arg0 = EmitScalarExpr(E->getArg(0));
2821  auto NewArgT = llvm::PointerType::get(Int8Ty,
2822  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2823  auto NewRetT = llvm::PointerType::get(Int8Ty,
2824  CGM.getContext().getTargetAddressSpace(
2826  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2827  llvm::Value *NewArg;
2828  if (Arg0->getType()->getPointerAddressSpace() !=
2829  NewArgT->getPointerAddressSpace())
2830  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2831  else
2832  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2833  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2834  auto NewCall =
2835  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2836  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2837  ConvertType(E->getType())));
2838  }
2839 
2840  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2841  // It contains four different overload formats specified in Table 6.13.17.1.
2842  case Builtin::BIenqueue_kernel: {
2843  StringRef Name; // Generated function call name
2844  unsigned NumArgs = E->getNumArgs();
2845 
2846  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2847  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2848  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2849 
2850  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2851  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2852  LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2853  llvm::Value *Range = NDRangeL.getAddress().getPointer();
2854  llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2855 
2856  if (NumArgs == 4) {
2857  // The most basic form of the call with parameters:
2858  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2859  Name = "__enqueue_kernel_basic";
2860  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
2861  GenericVoidPtrTy};
2862  llvm::FunctionType *FTy = llvm::FunctionType::get(
2863  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2864 
2865  auto Info =
2866  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
2867  llvm::Value *Kernel =
2868  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2869  llvm::Value *Block =
2870  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2871 
2872  AttrBuilder B;
2873  B.addAttribute(Attribute::ByVal);
2874  llvm::AttributeList ByValAttrSet =
2875  llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2876 
2877  auto RTCall =
2878  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2879  {Queue, Flags, Range, Kernel, Block});
2880  RTCall->setAttributes(ByValAttrSet);
2881  return RValue::get(RTCall);
2882  }
2883  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2884 
2885  // Create a temporary array to hold the sizes of local pointer arguments
2886  // for the block. \p First is the position of the first size argument.
2887  auto CreateArrayForSizeVar = [=](unsigned First) {
2888  auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
2889  auto *Arr = Builder.CreateAlloca(AT);
2890  llvm::Value *Ptr;
2891  // Each of the following arguments specifies the size of the corresponding
2892  // argument passed to the enqueued block.
2893  auto *Zero = llvm::ConstantInt::get(IntTy, 0);
2894  for (unsigned I = First; I < NumArgs; ++I) {
2895  auto *Index = llvm::ConstantInt::get(IntTy, I - First);
2896  auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
2897  if (I == First)
2898  Ptr = GEP;
2899  auto *V =
2900  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
2901  Builder.CreateAlignedStore(
2902  V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
2903  }
2904  return Ptr;
2905  };
2906 
2907  // Could have events and/or vaargs.
2908  if (E->getArg(3)->getType()->isBlockPointerType()) {
2909  // No events passed, but has variadic arguments.
2910  Name = "__enqueue_kernel_vaargs";
2911  auto Info =
2912  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
2913  llvm::Value *Kernel =
2914  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2915  auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2916  auto *PtrToSizeArray = CreateArrayForSizeVar(4);
2917 
2918  // Create a vector of the arguments, as well as a constant value to
2919  // express to the runtime the number of variadic arguments.
2920  std::vector<llvm::Value *> Args = {
2921  Queue, Flags, Range,
2922  Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
2923  PtrToSizeArray};
2924  std::vector<llvm::Type *> ArgTys = {
2925  QueueTy, IntTy, RangeTy,
2926  GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
2927  PtrToSizeArray->getType()};
2928 
2929  llvm::FunctionType *FTy = llvm::FunctionType::get(
2930  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2931  return RValue::get(
2932  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2934  }
2935  // Any calls now have event arguments passed.
2936  if (NumArgs >= 7) {
2937  llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2938  llvm::Type *EventPtrTy = EventTy->getPointerTo(
2939  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2940 
2941  llvm::Value *NumEvents =
2942  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2943  llvm::Value *EventList =
2944  E->getArg(4)->getType()->isArrayType()
2945  ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2946  : EmitScalarExpr(E->getArg(4));
2947  llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2948  // Convert to generic address space.
2949  EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2950  ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2951  auto Info =
2952  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
2953  llvm::Value *Kernel =
2954  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2955  llvm::Value *Block =
2956  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2957 
2958  std::vector<llvm::Type *> ArgTys = {
2959  QueueTy, Int32Ty, RangeTy, Int32Ty,
2960  EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
2961 
2962  std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
2963  EventList, ClkEvent, Kernel, Block};
2964 
2965  if (NumArgs == 7) {
2966  // Has events but no variadics.
2967  Name = "__enqueue_kernel_basic_events";
2968  llvm::FunctionType *FTy = llvm::FunctionType::get(
2969  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2970  return RValue::get(
2971  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2973  }
2974  // Has event info and variadics
2975  // Pass the number of variadics to the runtime function too.
2976  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2977  ArgTys.push_back(Int32Ty);
2978  Name = "__enqueue_kernel_events_vaargs";
2979 
2980  auto *PtrToSizeArray = CreateArrayForSizeVar(7);
2981  Args.push_back(PtrToSizeArray);
2982  ArgTys.push_back(PtrToSizeArray->getType());
2983 
2984  llvm::FunctionType *FTy = llvm::FunctionType::get(
2985  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2986  return RValue::get(
2987  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2989  }
2990  LLVM_FALLTHROUGH;
2991  }
2992  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2993  // parameter.
2994  case Builtin::BIget_kernel_work_group_size: {
2995  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2996  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2997  auto Info =
2998  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
2999  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3000  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3001  return RValue::get(Builder.CreateCall(
3002  CGM.CreateRuntimeFunction(
3003  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3004  false),
3005  "__get_kernel_work_group_size_impl"),
3006  {Kernel, Arg}));
3007  }
3008  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
3009  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3010  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3011  auto Info =
3012  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3013  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3014  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3015  return RValue::get(Builder.CreateCall(
3016  CGM.CreateRuntimeFunction(
3017  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3018  false),
3019  "__get_kernel_preferred_work_group_multiple_impl"),
3020  {Kernel, Arg}));
3021  }
3022  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
3023  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
3024  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3025  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3026  LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
3027  llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
3028  auto Info =
3029  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
3030  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3031  Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3032  const char *Name =
3033  BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
3034  ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
3035  : "__get_kernel_sub_group_count_for_ndrange_impl";
3036  return RValue::get(Builder.CreateCall(
3037  CGM.CreateRuntimeFunction(
3038  llvm::FunctionType::get(
3039  IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
3040  false),
3041  Name),
3042  {NDRange, Kernel, Block}));
3043  }
3044 
3045  case Builtin::BI__builtin_store_half:
3046  case Builtin::BI__builtin_store_halff: {
3047  Value *Val = EmitScalarExpr(E->getArg(0));
3048  Address Address = EmitPointerWithAlignment(E->getArg(1));
3049  Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
3050  return RValue::get(Builder.CreateStore(HalfVal, Address));
3051  }
3052  case Builtin::BI__builtin_load_half: {
3053  Address Address = EmitPointerWithAlignment(E->getArg(0));
3054  Value *HalfVal = Builder.CreateLoad(Address);
3055  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
3056  }
3057  case Builtin::BI__builtin_load_halff: {
3058  Address Address = EmitPointerWithAlignment(E->getArg(0));
3059  Value *HalfVal = Builder.CreateLoad(Address);
3060  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
3061  }
3062  case Builtin::BIprintf:
3063  if (getTarget().getTriple().isNVPTX())
3064  return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
3065  break;
3066  case Builtin::BI__builtin_canonicalize:
3067  case Builtin::BI__builtin_canonicalizef:
3068  case Builtin::BI__builtin_canonicalizel:
3069  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
3070 
3071  case Builtin::BI__builtin_thread_pointer: {
3072  if (!getContext().getTargetInfo().isTLSSupported())
3073  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
3074  // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
3075  break;
3076  }
3077  case Builtin::BI__builtin_os_log_format:
3078  return emitBuiltinOSLogFormat(*E);
3079 
3080  case Builtin::BI__builtin_os_log_format_buffer_size: {
3082  analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
3083  return RValue::get(ConstantInt::get(ConvertType(E->getType()),
3084  Layout.size().getQuantity()));
3085  }
3086 
3087  case Builtin::BI__xray_customevent: {
3088  if (!ShouldXRayInstrumentFunction())
3089  return RValue::getIgnored();
3090  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3091  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
3092  return RValue::getIgnored();
3093 
3094  Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
3095  auto FTy = F->getFunctionType();
3096  auto Arg0 = E->getArg(0);
3097  auto Arg0Val = EmitScalarExpr(Arg0);
3098  auto Arg0Ty = Arg0->getType();
3099  auto PTy0 = FTy->getParamType(0);
3100  if (PTy0 != Arg0Val->getType()) {
3101  if (Arg0Ty->isArrayType())
3102  Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3103  else
3104  Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3105  }
3106  auto Arg1 = EmitScalarExpr(E->getArg(1));
3107  auto PTy1 = FTy->getParamType(1);
3108  if (PTy1 != Arg1->getType())
3109  Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3110  return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3111  }
3112 
3113  case Builtin::BI__builtin_ms_va_start:
3114  case Builtin::BI__builtin_ms_va_end:
3115  return RValue::get(
3116  EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3117  BuiltinID == Builtin::BI__builtin_ms_va_start));
3118 
3119  case Builtin::BI__builtin_ms_va_copy: {
3120  // Lower this manually. We can't reliably determine whether or not any
3121  // given va_copy() is for a Win64 va_list from the calling convention
3122  // alone, because it's legal to do this from a System V ABI function.
3123  // With opaque pointer types, we won't have enough information in LLVM
3124  // IR to determine this from the argument types, either. Best to do it
3125  // now, while we have enough information.
3126  Address DestAddr = EmitMSVAListRef(E->getArg(0));
3127  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
3128 
3129  llvm::Type *BPP = Int8PtrPtrTy;
3130 
3131  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
3132  DestAddr.getAlignment());
3133  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
3134  SrcAddr.getAlignment());
3135 
3136  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
3137  return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
3138  }
3139  }
3140 
3141  // If this is an alias for a lib function (e.g. __builtin_sin), emit
3142  // the call using the normal call path, but using the unmangled
3143  // version of the function name.
3144  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
3145  return emitLibraryCall(*this, FD, E,
3146  CGM.getBuiltinLibFunction(FD, BuiltinID));
3147 
3148  // If this is a predefined lib function (e.g. malloc), emit the call
3149  // using exactly the normal call path.
3150  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
3151  return emitLibraryCall(*this, FD, E,
3152  cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
3153 
3154  // Check that a call to a target specific builtin has the correct target
3155  // features.
3156  // This is down here to avoid non-target specific builtins, however, if
3157  // generic builtins start to require generic target features then we
3158  // can move this up to the beginning of the function.
3159  checkTargetFeatures(E, FD);
3160 
3161  // See if we have a target specific intrinsic.
3162  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
3163  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
3164  StringRef Prefix =
3165  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
3166  if (!Prefix.empty()) {
3167  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
3168  // NOTE we dont need to perform a compatibility flag check here since the
3169  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
3170  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
3171  if (IntrinsicID == Intrinsic::not_intrinsic)
3172  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
3173  }
3174 
3175  if (IntrinsicID != Intrinsic::not_intrinsic) {
3177 
3178  // Find out if any arguments are required to be integer constant
3179  // expressions.
3180  unsigned ICEArguments = 0;
3182  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3183  assert(Error == ASTContext::GE_None && "Should not codegen an error");
3184 
3185  Function *F = CGM.getIntrinsic(IntrinsicID);
3186  llvm::FunctionType *FTy = F->getFunctionType();
3187 
3188  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
3189  Value *ArgValue;
3190  // If this is a normal argument, just emit it as a scalar.
3191  if ((ICEArguments & (1 << i)) == 0) {
3192  ArgValue = EmitScalarExpr(E->getArg(i));
3193  } else {
3194  // If this is required to be a constant, constant fold it so that we
3195  // know that the generated intrinsic gets a ConstantInt.
3196  llvm::APSInt Result;
3197  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
3198  assert(IsConst && "Constant arg isn't actually constant?");
3199  (void)IsConst;
3200  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
3201  }
3202 
3203  // If the intrinsic arg type is different from the builtin arg type
3204  // we need to do a bit cast.
3205  llvm::Type *PTy = FTy->getParamType(i);
3206  if (PTy != ArgValue->getType()) {
3207  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
3208  "Must be able to losslessly bit cast to param");
3209  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
3210  }
3211 
3212  Args.push_back(ArgValue);
3213  }
3214 
3215  Value *V = Builder.CreateCall(F, Args);
3216  QualType BuiltinRetType = E->getType();
3217 
3218  llvm::Type *RetTy = VoidTy;
3219  if (!BuiltinRetType->isVoidType())
3220  RetTy = ConvertType(BuiltinRetType);
3221 
3222  if (RetTy != V->getType()) {
3223  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
3224  "Must be able to losslessly bit cast result type");
3225  V = Builder.CreateBitCast(V, RetTy);
3226  }
3227 
3228  return RValue::get(V);
3229  }
3230 
3231  // See if we have a target specific builtin that needs to be lowered.
3232  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
3233  return RValue::get(V);
3234 
3235  ErrorUnsupported(E, "builtin function");
3236 
3237  // Unknown builtin, for now just dump it out and return undef.
3238  return GetUndefRValue(E->getType());
3239 }
3240 
3242  unsigned BuiltinID, const CallExpr *E,
3243  llvm::Triple::ArchType Arch) {
3244  switch (Arch) {
3245  case llvm::Triple::arm:
3246  case llvm::Triple::armeb:
3247  case llvm::Triple::thumb:
3248  case llvm::Triple::thumbeb:
3249  return CGF->EmitARMBuiltinExpr(BuiltinID, E);
3250  case llvm::Triple::aarch64:
3251  case llvm::Triple::aarch64_be:
3252  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
3253  case llvm::Triple::x86:
3254  case llvm::Triple::x86_64:
3255  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
3256  case llvm::Triple::ppc:
3257  case llvm::Triple::ppc64:
3258  case llvm::Triple::ppc64le:
3259  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
3260  case llvm::Triple::r600:
3261  case llvm::Triple::amdgcn:
3262  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
3263  case llvm::Triple::systemz:
3264  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
3265  case llvm::Triple::nvptx:
3266  case llvm::Triple::nvptx64:
3267  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
3268  case llvm::Triple::wasm32:
3269  case llvm::Triple::wasm64:
3270  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
3271  default:
3272  return nullptr;
3273  }
3274 }
3275 
3277  const CallExpr *E) {
3278  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
3279  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3281  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3282  getContext().getAuxTargetInfo()->getTriple().getArch());
3283  }
3284 
3285  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3286  getTarget().getTriple().getArch());
3287 }
3288 
3289 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3290  NeonTypeFlags TypeFlags,
3291  bool V1Ty=false) {
3292  int IsQuad = TypeFlags.isQuad();
3293  switch (TypeFlags.getEltType()) {
3294  case NeonTypeFlags::Int8:
3295  case NeonTypeFlags::Poly8:
3296  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
3297  case NeonTypeFlags::Int16:
3298  case NeonTypeFlags::Poly16:
3300  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3301  case NeonTypeFlags::Int32:
3302  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
3303  case NeonTypeFlags::Int64:
3304  case NeonTypeFlags::Poly64:
3305  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
3307  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3308  // There is a lot of i128 and f128 API missing.
3309  // so we use v16i8 to represent poly128 and get pattern matched.
3310  return llvm::VectorType::get(CGF->Int8Ty, 16);
3312  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3314  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3315  }
3316  llvm_unreachable("Unknown vector element type!");
3317 }
3318 
3319 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3320  NeonTypeFlags IntTypeFlags) {
3321  int IsQuad = IntTypeFlags.isQuad();
3322  switch (IntTypeFlags.getEltType()) {
3323  case NeonTypeFlags::Int32:
3324  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3325  case NeonTypeFlags::Int64:
3326  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3327  default:
3328  llvm_unreachable("Type can't be converted to floating-point!");
3329  }
3330 }
3331 
3333  unsigned nElts = V->getType()->getVectorNumElements();
3334  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3335  return Builder.CreateShuffleVector(V, V, SV, "lane");
3336 }
3337 
3339  const char *name,
3340  unsigned shift, bool rightshift) {
3341  unsigned j = 0;
3342  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3343  ai != ae; ++ai, ++j)
3344  if (shift > 0 && shift == j)
3345  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3346  else
3347  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3348 
3349  return Builder.CreateCall(F, Ops, name);
3350 }
3351 
3353  bool neg) {
3354  int SV = cast<ConstantInt>(V)->getSExtValue();
3355  return ConstantInt::get(Ty, neg ? -SV : SV);
3356 }
3357 
3358 // \brief Right-shift a vector by a constant.
3360  llvm::Type *Ty, bool usgn,
3361  const char *name) {
3362  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3363 
3364  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3365  int EltSize = VTy->getScalarSizeInBits();
3366 
3367  Vec = Builder.CreateBitCast(Vec, Ty);
3368 
3369  // lshr/ashr are undefined when the shift amount is equal to the vector
3370  // element size.
3371  if (ShiftAmt == EltSize) {
3372  if (usgn) {
3373  // Right-shifting an unsigned value by its size yields 0.
3374  return llvm::ConstantAggregateZero::get(VTy);
3375  } else {
3376  // Right-shifting a signed value by its size is equivalent
3377  // to a shift of size-1.
3378  --ShiftAmt;
3379  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3380  }
3381  }
3382 
3383  Shift = EmitNeonShiftVector(Shift, Ty, false);
3384  if (usgn)
3385  return Builder.CreateLShr(Vec, Shift, name);
3386  else
3387  return Builder.CreateAShr(Vec, Shift, name);
3388 }
3389 
3390 enum {
3391  AddRetType = (1 << 0),
3392  Add1ArgType = (1 << 1),
3393  Add2ArgTypes = (1 << 2),
3394 
3395  VectorizeRetType = (1 << 3),
3396  VectorizeArgTypes = (1 << 4),
3397 
3398  InventFloatType = (1 << 5),
3399  UnsignedAlts = (1 << 6),
3400 
3401  Use64BitVectors = (1 << 7),
3402  Use128BitVectors = (1 << 8),
3403 
3410 };
3411 
3412 namespace {
3413 struct NeonIntrinsicInfo {
3414  const char *NameHint;
3415  unsigned BuiltinID;
3416  unsigned LLVMIntrinsic;
3417  unsigned AltLLVMIntrinsic;
3418  unsigned TypeModifier;
3419 
3420  bool operator<(unsigned RHSBuiltinID) const {
3421  return BuiltinID < RHSBuiltinID;
3422  }
3423  bool operator<(const NeonIntrinsicInfo &TE) const {
3424  return BuiltinID < TE.BuiltinID;
3425  }
3426 };
3427 } // end anonymous namespace
3428 
3429 #define NEONMAP0(NameBase) \
3430  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3431 
3432 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3433  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3434  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3435 
3436 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3437  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3438  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3439  TypeModifier }
3440 
3441 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3442  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3443  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3444  NEONMAP1(vabs_v, arm_neon_vabs, 0),
3445  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3446  NEONMAP0(vaddhn_v),
3447  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3448  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3449  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3450  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3451  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3452  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3453  NEONMAP1(vcage_v, arm_neon_vacge, 0),
3454  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3455  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3456  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3457  NEONMAP1(vcale_v, arm_neon_vacge, 0),
3458  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3459  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3460  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3461  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3462  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3463  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3464  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3465  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3466  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3467  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3468  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3469  NEONMAP0(vcvt_f32_v),
3470  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3471  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3472  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3473  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3474  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3475  NEONMAP0(vcvt_s32_v),
3476  NEONMAP0(vcvt_s64_v),
3477  NEONMAP0(vcvt_u32_v),
3478  NEONMAP0(vcvt_u64_v),
3479  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3480  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3481  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3482  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3483  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3484  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3485  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3486  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3487  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3488  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3489  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3490  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3491  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3492  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3493  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3494  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3495  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3496  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3497  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3498  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3499  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3500  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3501  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3502  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3503  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3504  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3505  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3506  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3507  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3508  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3509  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3510  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3511  NEONMAP0(vcvtq_f32_v),
3512  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3513  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3514  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3515  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3516  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3517  NEONMAP0(vcvtq_s32_v),
3518  NEONMAP0(vcvtq_s64_v),
3519  NEONMAP0(vcvtq_u32_v),
3520  NEONMAP0(vcvtq_u64_v),
3521  NEONMAP0(vext_v),
3522  NEONMAP0(vextq_v),
3523  NEONMAP0(vfma_v),
3524  NEONMAP0(vfmaq_v),
3525  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3526  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3527  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3528  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3529  NEONMAP0(vld1_dup_v),
3530  NEONMAP1(vld1_v, arm_neon_vld1, 0),
3531  NEONMAP0(vld1q_dup_v),
3532  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3533  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3534  NEONMAP1(vld2_v, arm_neon_vld2, 0),
3535  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3536  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3537  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3538  NEONMAP1(vld3_v, arm_neon_vld3, 0),
3539  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3540  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3541  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3542  NEONMAP1(vld4_v, arm_neon_vld4, 0),
3543  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3544  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3545  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3546  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3547  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3548  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3549  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3550  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3551  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3552  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3553  NEONMAP0(vmovl_v),
3554  NEONMAP0(vmovn_v),
3555  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3556  NEONMAP0(vmull_v),
3557  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3558  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3559  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3560  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3561  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3562  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3563  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3564  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3565  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3566  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3567  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3568  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3569  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3570  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3571  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3572  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3573  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3574  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3575  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3576  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3577  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3578  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3579  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3580  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3581  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3582  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3583  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3584  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3585  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3586  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3587  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3588  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3589  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3590  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3591  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3592  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3593  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3594  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3595  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3596  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3597  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3598  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3599  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3600  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3601  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3602  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3603  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3604  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3605  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3606  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3607  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3608  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3609  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3610  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3611  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3612  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3613  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3614  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3615  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3616  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3617  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3618  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3619  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3620  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3621  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3622  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3623  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3624  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3625  NEONMAP0(vshl_n_v),
3626  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3627  NEONMAP0(vshll_n_v),
3628  NEONMAP0(vshlq_n_v),
3629  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3630  NEONMAP0(vshr_n_v),
3631  NEONMAP0(vshrn_n_v),
3632  NEONMAP0(vshrq_n_v),
3633  NEONMAP1(vst1_v, arm_neon_vst1, 0),
3634  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3635  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3636  NEONMAP1(vst2_v, arm_neon_vst2, 0),
3637  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3638  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3639  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3640  NEONMAP1(vst3_v, arm_neon_vst3, 0),
3641  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3642  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3643  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3644  NEONMAP1(vst4_v, arm_neon_vst4, 0),
3645  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3646  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3647  NEONMAP0(vsubhn_v),
3648  NEONMAP0(vtrn_v),
3649  NEONMAP0(vtrnq_v),
3650  NEONMAP0(vtst_v),
3651  NEONMAP0(vtstq_v),
3652  NEONMAP0(vuzp_v),
3653  NEONMAP0(vuzpq_v),
3654  NEONMAP0(vzip_v),
3655  NEONMAP0(vzipq_v)
3656 };
3657 
3658 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3659  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3660  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3661  NEONMAP0(vaddhn_v),
3662  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3663  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3664  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3665  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3666  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3667  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3668  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3669  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3670  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3671  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3672  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3673  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3674  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3675  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3676  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3677  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3678  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3679  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3680  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3681  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3682  NEONMAP0(vcvt_f32_v),
3683  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3684  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3685  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3686  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3687  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3688  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3689  NEONMAP0(vcvtq_f32_v),
3690  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3691  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3692  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3693  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3694  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3695  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3696  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3697  NEONMAP0(vext_v),
3698  NEONMAP0(vextq_v),
3699  NEONMAP0(vfma_v),
3700  NEONMAP0(vfmaq_v),
3701  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3702  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3703  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3704  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3705  NEONMAP0(vmovl_v),
3706  NEONMAP0(vmovn_v),
3707  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3708  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3709  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3710  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3711  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3712  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3713  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3714  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3715  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3716  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3717  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3718  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3719  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3720  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3721  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3722  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3723  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3724  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3725  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3726  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3727  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3728  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3729  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3730  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3731  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3732  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3733  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3734  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3735  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3736  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3737  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3738  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3739  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3740  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3741  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3742  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3743  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3744  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3745  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3746  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3747  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3748  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3749  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3750  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3751  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3752  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3753  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3754  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3755  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3756  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3757  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3758  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3759  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3760  NEONMAP0(vshl_n_v),
3761  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3762  NEONMAP0(vshll_n_v),
3763  NEONMAP0(vshlq_n_v),
3764  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3765  NEONMAP0(vshr_n_v),
3766  NEONMAP0(vshrn_n_v),
3767  NEONMAP0(vshrq_n_v),
3768  NEONMAP0(vsubhn_v),
3769  NEONMAP0(vtst_v),
3770  NEONMAP0(vtstq_v),
3771 };
3772 
3773 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3774  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3775  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3776  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3777  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3778  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3779  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3780  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3781  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3782  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3783  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3784  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3785  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3786  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3787  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3788  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3789  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3790  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3791  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3792  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3793  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3794  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3795  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3796  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3797  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3798  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3799  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3800  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3801  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3802  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3803  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3804  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3805  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3806  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3807  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3808  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3809  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3810  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3811  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3812  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3813  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3814  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3815  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3816  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3817  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3818  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3819  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3820  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3821  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3822  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3823  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3824  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3825  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3826  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3827  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3828  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3829  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3830  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3831  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3832  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3833  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3834  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3835  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3836  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3837  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3838  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3839  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3840  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3841  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3842  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3843  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3844  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3845  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3846  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3847  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3848  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3849  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3850  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3851  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3852  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3853  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3854  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3855  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3856  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3857  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3858  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3859  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3860  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3861  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3862  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3863  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3864  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3865  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3866  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3867  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3868  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3869  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3870  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3871  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3872  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3873  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3874  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3875  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3876  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3877  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3878  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3879  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3880  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3881  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3882  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3883  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3884  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3885  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3886  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3887  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3888  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3889  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3890  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3891  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3892  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3893  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3894  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3895  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3896  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3897  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3898  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3899  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3900  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3901  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3902  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3903  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3904  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3905  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3906  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3907  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3908  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3909  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3910  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3911  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3912  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3913  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3914  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3915  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3916  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3917  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3918  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3919  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3920  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3921  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3922  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3923  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3924  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3925  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3926  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3927  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3928  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3929  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3930  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3931  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3932  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3933  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3934  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3935  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3936  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3937  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3938  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3939  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3940  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3941  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3942  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3943  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3944  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3945  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3946  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3947  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3948  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3949  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3950  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3951  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3952  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3953  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3954  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3955  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3956  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3957  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3958  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3959  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3960  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3961  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3962  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3963  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3964  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3965  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3966 };
3967 
3968 #undef NEONMAP0
3969 #undef NEONMAP1
3970 #undef NEONMAP2
3971 
3973 
3976 
3977 
3978 static const NeonIntrinsicInfo *
3980  unsigned BuiltinID, bool &MapProvenSorted) {
3981 
3982 #ifndef NDEBUG
3983  if (!MapProvenSorted) {
3984  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3985  MapProvenSorted = true;
3986  }
3987 #endif
3988 
3989  const NeonIntrinsicInfo *Builtin =
3990  std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3991 
3992  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3993  return Builtin;
3994 
3995  return nullptr;
3996 }
3997 
3998 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3999  unsigned Modifier,
4000  llvm::Type *ArgType,
4001  const CallExpr *E) {
4002  int VectorSize = 0;
4003  if (Modifier & Use64BitVectors)
4004  VectorSize = 64;
4005  else if (Modifier & Use128BitVectors)
4006  VectorSize = 128;
4007 
4008  // Return type.
4010  if (Modifier & AddRetType) {
4011  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
4012  if (Modifier & VectorizeRetType)
4013  Ty = llvm::VectorType::get(
4014  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
4015 
4016  Tys.push_back(Ty);
4017  }
4018 
4019  // Arguments.
4020  if (Modifier & VectorizeArgTypes) {
4021  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
4022  ArgType = llvm::VectorType::get(ArgType, Elts);
4023  }
4024 
4025  if (Modifier & (Add1ArgType | Add2ArgTypes))
4026  Tys.push_back(ArgType);
4027 
4028  if (Modifier & Add2ArgTypes)
4029  Tys.push_back(ArgType);
4030 
4031  if (Modifier & InventFloatType)
4032  Tys.push_back(FloatTy);
4033 
4034  return CGM.getIntrinsic(IntrinsicID, Tys);
4035 }
4036 
4038  const NeonIntrinsicInfo &SISDInfo,
4040  const CallExpr *E) {
4041  unsigned BuiltinID = SISDInfo.BuiltinID;
4042  unsigned int Int = SISDInfo.LLVMIntrinsic;
4043  unsigned Modifier = SISDInfo.TypeModifier;
4044  const char *s = SISDInfo.NameHint;
4045 
4046  switch (BuiltinID) {
4047  case NEON::BI__builtin_neon_vcled_s64:
4048  case NEON::BI__builtin_neon_vcled_u64:
4049  case NEON::BI__builtin_neon_vcles_f32:
4050  case NEON::BI__builtin_neon_vcled_f64:
4051  case NEON::BI__builtin_neon_vcltd_s64:
4052  case NEON::BI__builtin_neon_vcltd_u64:
4053  case NEON::BI__builtin_neon_vclts_f32:
4054  case NEON::BI__builtin_neon_vcltd_f64:
4055  case NEON::BI__builtin_neon_vcales_f32:
4056  case NEON::BI__builtin_neon_vcaled_f64:
4057  case NEON::BI__builtin_neon_vcalts_f32:
4058  case NEON::BI__builtin_neon_vcaltd_f64:
4059  // Only one direction of comparisons actually exist, cmle is actually a cmge
4060  // with swapped operands. The table gives us the right intrinsic but we
4061  // still need to do the swap.
4062  std::swap(Ops[0], Ops[1]);
4063  break;
4064  }
4065 
4066  assert(Int && "Generic code assumes a valid intrinsic");
4067 
4068  // Determine the type(s) of this overloaded AArch64 intrinsic.
4069  const Expr *Arg = E->getArg(0);
4070  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
4071  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
4072 
4073  int j = 0;
4074  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
4075  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
4076  ai != ae; ++ai, ++j) {
4077  llvm::Type *ArgTy = ai->getType();
4078  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
4079  ArgTy->getPrimitiveSizeInBits())
4080  continue;
4081 
4082  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
4083  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
4084  // it before inserting.
4085  Ops[j] =
4086  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
4087  Ops[j] =
4088  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
4089  }
4090 
4091  Value *Result = CGF.EmitNeonCall(F, Ops, s);
4092  llvm::Type *ResultType = CGF.ConvertType(E->getType());
4093  if (ResultType->getPrimitiveSizeInBits() <
4094  Result->getType()->getPrimitiveSizeInBits())
4095  return CGF.Builder.CreateExtractElement(Result, C0);
4096 
4097  return CGF.Builder.CreateBitCast(Result, ResultType, s);
4098 }
4099 
4101  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
4102  const char *NameHint, unsigned Modifier, const CallExpr *E,
4103  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
4104  // Get the last argument, which specifies the vector type.
4105  llvm::APSInt NeonTypeConst;
4106  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4107  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
4108  return nullptr;
4109 
4110  // Determine the type of this overloaded NEON intrinsic.
4111  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
4112  bool Usgn = Type.isUnsigned();
4113  bool Quad = Type.isQuad();
4114 
4115  llvm::VectorType *VTy = GetNeonType(this, Type);
4116  llvm::Type *Ty = VTy;
4117  if (!Ty)
4118  return nullptr;
4119 
4120  auto getAlignmentValue32 = [&](Address addr) -> Value* {
4121  return Builder.getInt32(addr.getAlignment().getQuantity());
4122  };
4123 
4124  unsigned Int = LLVMIntrinsic;
4125  if ((Modifier & UnsignedAlts) && !Usgn)
4126  Int = AltLLVMIntrinsic;
4127 
4128  switch (BuiltinID) {
4129  default: break;
4130  case NEON::BI__builtin_neon_vabs_v:
4131  case NEON::BI__builtin_neon_vabsq_v:
4132  if (VTy->getElementType()->isFloatingPointTy())
4133  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
4134  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
4135  case NEON::BI__builtin_neon_vaddhn_v: {
4136  llvm::VectorType *SrcTy =
4137  llvm::VectorType::getExtendedElementVectorType(VTy);
4138 
4139  // %sum = add <4 x i32> %lhs, %rhs
4140  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4141  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4142  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
4143 
4144  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4145  Constant *ShiftAmt =
4146  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4147  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
4148 
4149  // %res = trunc <4 x i32> %high to <4 x i16>
4150  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
4151  }
4152  case NEON::BI__builtin_neon_vcale_v:
4153  case NEON::BI__builtin_neon_vcaleq_v:
4154  case NEON::BI__builtin_neon_vcalt_v:
4155  case NEON::BI__builtin_neon_vcaltq_v:
4156  std::swap(Ops[0], Ops[1]);
4157  LLVM_FALLTHROUGH;
4158  case NEON::BI__builtin_neon_vcage_v:
4159  case NEON::BI__builtin_neon_vcageq_v:
4160  case NEON::BI__builtin_neon_vcagt_v:
4161  case NEON::BI__builtin_neon_vcagtq_v: {
4162  llvm::Type *VecFlt = llvm::VectorType::get(
4163  VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
4164  VTy->getNumElements());
4165  llvm::Type *Tys[] = { VTy, VecFlt };
4166  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4167  return EmitNeonCall(F, Ops, NameHint);
4168  }
4169  case NEON::BI__builtin_neon_vclz_v:
4170  case NEON::BI__builtin_neon_vclzq_v:
4171  // We generate target-independent intrinsic, which needs a second argument
4172  // for whether or not clz of zero is undefined; on ARM it isn't.
4173  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
4174  break;
4175  case NEON::BI__builtin_neon_vcvt_f32_v:
4176  case NEON::BI__builtin_neon_vcvtq_f32_v:
4177  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4178  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
4179  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4180  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4181  case NEON::BI__builtin_neon_vcvt_n_f32_v:
4182  case NEON::BI__builtin_neon_vcvt_n_f64_v:
4183  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
4184  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
4185  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
4186  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
4187  Function *F = CGM.getIntrinsic(Int, Tys);
4188  return EmitNeonCall(F, Ops, "vcvt_n");
4189  }
4190  case NEON::BI__builtin_neon_vcvt_n_s32_v:
4191  case NEON::BI__builtin_neon_vcvt_n_u32_v:
4192  case NEON::BI__builtin_neon_vcvt_n_s64_v:
4193  case NEON::BI__builtin_neon_vcvt_n_u64_v:
4194  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
4195  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
4196  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
4197  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
4198  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4199  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4200  return EmitNeonCall(F, Ops, "vcvt_n");
4201  }
4202  case NEON::BI__builtin_neon_vcvt_s32_v:
4203  case NEON::BI__builtin_neon_vcvt_u32_v:
4204  case NEON::BI__builtin_neon_vcvt_s64_v:
4205  case NEON::BI__builtin_neon_vcvt_u64_v:
4206  case NEON::BI__builtin_neon_vcvtq_s32_v:
4207  case NEON::BI__builtin_neon_vcvtq_u32_v:
4208  case NEON::BI__builtin_neon_vcvtq_s64_v:
4209  case NEON::BI__builtin_neon_vcvtq_u64_v: {
4210  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
4211  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
4212  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
4213  }
4214  case NEON::BI__builtin_neon_vcvta_s32_v:
4215  case NEON::BI__builtin_neon_vcvta_s64_v:
4216  case NEON::BI__builtin_neon_vcvta_u32_v:
4217  case NEON::BI__builtin_neon_vcvta_u64_v:
4218  case NEON::BI__builtin_neon_vcvtaq_s32_v:
4219  case NEON::BI__builtin_neon_vcvtaq_s64_v:
4220  case NEON::BI__builtin_neon_vcvtaq_u32_v:
4221  case NEON::BI__builtin_neon_vcvtaq_u64_v:
4222  case NEON::BI__builtin_neon_vcvtn_s32_v:
4223  case NEON::BI__builtin_neon_vcvtn_s64_v:
4224  case NEON::BI__builtin_neon_vcvtn_u32_v:
4225  case NEON::BI__builtin_neon_vcvtn_u64_v:
4226  case NEON::BI__builtin_neon_vcvtnq_s32_v:
4227  case NEON::BI__builtin_neon_vcvtnq_s64_v:
4228  case NEON::BI__builtin_neon_vcvtnq_u32_v:
4229  case NEON::BI__builtin_neon_vcvtnq_u64_v:
4230  case NEON::BI__builtin_neon_vcvtp_s32_v:
4231  case NEON::BI__builtin_neon_vcvtp_s64_v:
4232  case NEON::BI__builtin_neon_vcvtp_u32_v:
4233  case NEON::BI__builtin_neon_vcvtp_u64_v:
4234  case NEON::BI__builtin_neon_vcvtpq_s32_v:
4235  case NEON::BI__builtin_neon_vcvtpq_s64_v:
4236  case NEON::BI__builtin_neon_vcvtpq_u32_v:
4237  case NEON::BI__builtin_neon_vcvtpq_u64_v:
4238  case NEON::BI__builtin_neon_vcvtm_s32_v:
4239  case NEON::BI__builtin_neon_vcvtm_s64_v:
4240  case NEON::BI__builtin_neon_vcvtm_u32_v:
4241  case NEON::BI__builtin_neon_vcvtm_u64_v:
4242  case NEON::BI__builtin_neon_vcvtmq_s32_v:
4243  case NEON::BI__builtin_neon_vcvtmq_s64_v:
4244  case NEON::BI__builtin_neon_vcvtmq_u32_v:
4245  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
4246  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4247  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
4248  }
4249  case NEON::BI__builtin_neon_vext_v:
4250  case NEON::BI__builtin_neon_vextq_v: {
4251  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
4252  SmallVector<uint32_t, 16> Indices;
4253  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4254  Indices.push_back(i+CV);
4255 
4256  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4257  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4258  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
4259  }
4260  case NEON::BI__builtin_neon_vfma_v:
4261  case NEON::BI__builtin_neon_vfmaq_v: {
4262  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4263  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4264  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4265  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4266 
4267  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4268  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4269  }
4270  case NEON::BI__builtin_neon_vld1_v:
4271  case NEON::BI__builtin_neon_vld1q_v: {
4272  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4273  Ops.push_back(getAlignmentValue32(PtrOp0));
4274  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4275  }
4276  case NEON::BI__builtin_neon_vld2_v:
4277  case NEON::BI__builtin_neon_vld2q_v:
4278  case NEON::BI__builtin_neon_vld3_v:
4279  case NEON::BI__builtin_neon_vld3q_v:
4280  case NEON::BI__builtin_neon_vld4_v:
4281  case NEON::BI__builtin_neon_vld4q_v: {
4282  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4283  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4284  Value *Align = getAlignmentValue32(PtrOp1);
4285  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4286  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4287  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4288  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4289  }
4290  case NEON::BI__builtin_neon_vld1_dup_v:
4291  case NEON::BI__builtin_neon_vld1q_dup_v: {
4292  Value *V = UndefValue::get(Ty);
4293  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4294  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4295  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4296  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4297  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4298  return EmitNeonSplat(Ops[0], CI);
4299  }
4300  case NEON::BI__builtin_neon_vld2_lane_v:
4301  case NEON::BI__builtin_neon_vld2q_lane_v:
4302  case NEON::BI__builtin_neon_vld3_lane_v:
4303  case NEON::BI__builtin_neon_vld3q_lane_v:
4304  case NEON::BI__builtin_neon_vld4_lane_v:
4305  case NEON::BI__builtin_neon_vld4q_lane_v: {
4306  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4307  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4308  for (unsigned I = 2; I < Ops.size() - 1; ++I)
4309  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4310  Ops.push_back(getAlignmentValue32(PtrOp1));
4311  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4312  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4313  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4314  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4315  }
4316  case NEON::BI__builtin_neon_vmovl_v: {
4317  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4318  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4319  if (Usgn)
4320  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4321  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4322  }
4323  case NEON::BI__builtin_neon_vmovn_v: {
4324  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4325  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4326  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4327  }
4328  case NEON::BI__builtin_neon_vmull_v:
4329  // FIXME: the integer vmull operations could be emitted in terms of pure
4330  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4331  // hoisting the exts outside loops. Until global ISel comes along that can
4332  // see through such movement this leads to bad CodeGen. So we need an
4333  // intrinsic for now.
4334  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4335  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4336  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4337  case NEON::BI__builtin_neon_vpadal_v:
4338  case NEON::BI__builtin_neon_vpadalq_v: {
4339  // The source operand type has twice as many elements of half the size.
4340  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4341  llvm::Type *EltTy =
4342  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4343  llvm::Type *NarrowTy =
4344  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4345  llvm::Type *Tys[2] = { Ty, NarrowTy };
4346  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4347  }
4348  case NEON::BI__builtin_neon_vpaddl_v:
4349  case NEON::BI__builtin_neon_vpaddlq_v: {
4350  // The source operand type has twice as many elements of half the size.
4351  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4352  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4353  llvm::Type *NarrowTy =
4354  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4355  llvm::Type *Tys[2] = { Ty, NarrowTy };
4356  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4357  }
4358  case NEON::BI__builtin_neon_vqdmlal_v:
4359  case NEON::BI__builtin_neon_vqdmlsl_v: {
4360  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4361  Ops[1] =
4362  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4363  Ops.resize(2);
4364  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4365  }
4366  case NEON::BI__builtin_neon_vqshl_n_v:
4367  case NEON::BI__builtin_neon_vqshlq_n_v:
4368  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4369  1, false);
4370  case NEON::BI__builtin_neon_vqshlu_n_v:
4371  case NEON::BI__builtin_neon_vqshluq_n_v:
4372  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4373  1, false);
4374  case NEON::BI__builtin_neon_vrecpe_v:
4375  case NEON::BI__builtin_neon_vrecpeq_v:
4376  case NEON::BI__builtin_neon_vrsqrte_v:
4377  case NEON::BI__builtin_neon_vrsqrteq_v:
4378  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4379  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4380 
4381  case NEON::BI__builtin_neon_vrshr_n_v:
4382  case NEON::BI__builtin_neon_vrshrq_n_v:
4383  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4384  1, true);
4385  case NEON::BI__builtin_neon_vshl_n_v:
4386  case NEON::BI__builtin_neon_vshlq_n_v:
4387  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4388  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4389  "vshl_n");
4390  case NEON::BI__builtin_neon_vshll_n_v: {
4391  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4392  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4393  if (Usgn)
4394  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4395  else
4396  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4397  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4398  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4399  }
4400  case NEON::BI__builtin_neon_vshrn_n_v: {
4401  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4402  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4403  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4404  if (Usgn)
4405  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4406  else
4407  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4408  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4409  }
4410  case NEON::BI__builtin_neon_vshr_n_v:
4411  case NEON::BI__builtin_neon_vshrq_n_v:
4412  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4413  case NEON::BI__builtin_neon_vst1_v:
4414  case NEON::BI__builtin_neon_vst1q_v:
4415  case NEON::BI__builtin_neon_vst2_v:
4416  case NEON::BI__builtin_neon_vst2q_v:
4417  case NEON::BI__builtin_neon_vst3_v:
4418  case NEON::BI__builtin_neon_vst3q_v:
4419  case NEON::BI__builtin_neon_vst4_v:
4420  case NEON::BI__builtin_neon_vst4q_v:
4421  case NEON::BI__builtin_neon_vst2_lane_v:
4422  case NEON::BI__builtin_neon_vst2q_lane_v:
4423  case NEON::BI__builtin_neon_vst3_lane_v:
4424  case NEON::BI__builtin_neon_vst3q_lane_v:
4425  case NEON::BI__builtin_neon_vst4_lane_v:
4426  case NEON::BI__builtin_neon_vst4q_lane_v: {
4427  llvm::Type *Tys[] = {Int8PtrTy, Ty};
4428  Ops.push_back(getAlignmentValue32(PtrOp0));
4429  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4430  }
4431  case NEON::BI__builtin_neon_vsubhn_v: {
4432  llvm::VectorType *SrcTy =
4433  llvm::VectorType::getExtendedElementVectorType(VTy);
4434 
4435  // %sum = add <4 x i32> %lhs, %rhs
4436  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4437  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4438  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4439 
4440  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4441  Constant *ShiftAmt =
4442  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4443  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4444 
4445  // %res = trunc <4 x i32> %high to <4 x i16>
4446  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4447  }
4448  case NEON::BI__builtin_neon_vtrn_v:
4449  case NEON::BI__builtin_neon_vtrnq_v: {
4450  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4451  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4452  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4453  Value *SV = nullptr;
4454 
4455  for (unsigned vi = 0; vi != 2; ++vi) {
4456  SmallVector<uint32_t, 16> Indices;
4457  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4458  Indices.push_back(i+vi);
4459  Indices.push_back(i+e+vi);
4460  }
4461  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4462  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4463  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4464  }
4465  return SV;
4466  }
4467  case NEON::BI__builtin_neon_vtst_v:
4468  case NEON::BI__builtin_neon_vtstq_v: {
4469  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4470  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4471  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4472  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4473  ConstantAggregateZero::get(Ty));
4474  return Builder.CreateSExt(Ops[0], Ty, "vtst");
4475  }
4476  case NEON::BI__builtin_neon_vuzp_v:
4477  case NEON::BI__builtin_neon_vuzpq_v: {
4478  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4479  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4480  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4481  Value *SV = nullptr;
4482 
4483  for (unsigned vi = 0; vi != 2; ++vi) {
4484  SmallVector<uint32_t, 16> Indices;
4485  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4486  Indices.push_back(2*i+vi);
4487 
4488  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4489  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4490  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4491  }
4492  return SV;
4493  }
4494  case NEON::BI__builtin_neon_vzip_v:
4495  case NEON::BI__builtin_neon_vzipq_v: {
4496  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4497  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4498  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4499  Value *SV = nullptr;
4500 
4501  for (unsigned vi = 0; vi != 2; ++vi) {
4502  SmallVector<uint32_t, 16> Indices;
4503  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4504  Indices.push_back((i + vi*e) >> 1);
4505  Indices.push_back(((i + vi*e) >> 1)+e);
4506  }
4507  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4508  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4509  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4510  }
4511  return SV;
4512  }
4513  }
4514 
4515  assert(Int && "Expected valid intrinsic number");
4516 
4517  // Determine the type(s) of this overloaded AArch64 intrinsic.
4518  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4519 
4520  Value *Result = EmitNeonCall(F, Ops, NameHint);
4521  llvm::Type *ResultType = ConvertType(E->getType());
4522  // AArch64 intrinsic one-element vector type cast to
4523  // scalar type expected by the builtin
4524  return Builder.CreateBitCast(Result, ResultType, NameHint);
4525 }
4526 
4528  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4529  const CmpInst::Predicate Ip, const Twine &Name) {
4530  llvm::Type *OTy = Op->getType();
4531 
4532  // FIXME: this is utterly horrific. We should not be looking at previous
4533  // codegen context to find out what needs doing. Unfortunately TableGen
4534  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4535  // (etc).
4536  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4537  OTy = BI->getOperand(0)->getType();
4538 
4539  Op = Builder.CreateBitCast(Op, OTy);
4540  if (OTy->getScalarType()->isFloatingPointTy()) {
4541  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4542  } else {
4543  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4544  }
4545  return Builder.CreateSExt(Op, Ty, Name);
4546 }
4547 
4549  Value *ExtOp, Value *IndexOp,
4550  llvm::Type *ResTy, unsigned IntID,
4551  const char *Name) {
4552  SmallVector<Value *, 2> TblOps;
4553  if (ExtOp)
4554  TblOps.push_back(ExtOp);
4555 
4556  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4557  SmallVector<uint32_t, 16> Indices;
4558  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4559  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4560  Indices.push_back(2*i);
4561  Indices.push_back(2*i+1);
4562  }
4563 
4564  int PairPos = 0, End = Ops.size() - 1;
4565  while (PairPos < End) {
4566  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4567  Ops[PairPos+1], Indices,
4568  Name));
4569  PairPos += 2;
4570  }
4571 
4572  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4573  // of the 128-bit lookup table with zero.
4574  if (PairPos == End) {
4575  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4576  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4577  ZeroTbl, Indices, Name));
4578  }
4579 
4580  Function *TblF;
4581  TblOps.push_back(IndexOp);
4582  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4583 
4584  return CGF.EmitNeonCall(TblF, TblOps, Name);
4585 }
4586 
4587 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4588  unsigned Value;
4589  switch (BuiltinID) {
4590  default:
4591  return nullptr;
4592  case ARM::BI__builtin_arm_nop:
4593  Value = 0;
4594  break;
4595  case ARM::BI__builtin_arm_yield:
4596  case ARM::BI__yield:
4597  Value = 1;
4598  break;
4599  case ARM::BI__builtin_arm_wfe:
4600  case ARM::BI__wfe:
4601  Value = 2;
4602  break;
4603  case ARM::BI__builtin_arm_wfi:
4604  case ARM::BI__wfi:
4605  Value = 3;
4606  break;
4607  case ARM::BI__builtin_arm_sev:
4608  case ARM::BI__sev:
4609  Value = 4;
4610  break;
4611  case ARM::BI__builtin_arm_sevl:
4612  case ARM::BI__sevl:
4613  Value = 5;
4614  break;
4615  }
4616 
4617  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4618  llvm::ConstantInt::get(Int32Ty, Value));
4619 }
4620 
4621 // Generates the IR for the read/write special register builtin,
4622 // ValueType is the type of the value that is to be written or read,
4623 // RegisterType is the type of the register being written to or read from.
4625  const CallExpr *E,
4626  llvm::Type *RegisterType,
4627  llvm::Type *ValueType,
4628  bool IsRead,
4629  StringRef SysReg = "") {
4630  // write and register intrinsics only support 32 and 64 bit operations.
4631  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4632  && "Unsupported size for register.");
4633 
4634  CodeGen::CGBuilderTy &Builder = CGF.Builder;
4635  CodeGen::CodeGenModule &CGM = CGF.CGM;
4636  LLVMContext &Context = CGM.getLLVMContext();
4637 
4638  if (SysReg.empty()) {
4639  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4640  SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4641  }
4642 
4643  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4644  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4645  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4646 
4647  llvm::Type *Types[] = { RegisterType };
4648 
4649  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4650  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4651  && "Can't fit 64-bit value in 32-bit register");
4652 
4653  if (IsRead) {
4654  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4655  llvm::Value *Call = Builder.CreateCall(F, Metadata);
4656 
4657  if (MixedTypes)
4658  // Read into 64 bit register and then truncate result to 32 bit.
4659  return Builder.CreateTrunc(Call, ValueType);
4660 
4661  if (ValueType->isPointerTy())
4662  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4663  return Builder.CreateIntToPtr(Call, ValueType);
4664 
4665  return Call;
4666  }
4667 
4668  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4669  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4670  if (MixedTypes) {
4671  // Extend 32 bit write value to 64 bit to pass to write.
4672  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4673  return Builder.CreateCall(F, { Metadata, ArgValue });
4674  }
4675 
4676  if (ValueType->isPointerTy()) {
4677  // Have VoidPtrTy ArgValue but want to return an i32/i64.
4678  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4679  return Builder.CreateCall(F, { Metadata, ArgValue });
4680  }
4681 
4682  return Builder.CreateCall(F, { Metadata, ArgValue });
4683 }
4684 
4685 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4686 /// argument that specifies the vector type.
4687 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4688  switch (BuiltinID) {
4689  default: break;
4690  case NEON::BI__builtin_neon_vget_lane_i8:
4691  case NEON::BI__builtin_neon_vget_lane_i16:
4692  case NEON::BI__builtin_neon_vget_lane_i32:
4693  case NEON::BI__builtin_neon_vget_lane_i64:
4694  case NEON::BI__builtin_neon_vget_lane_f32:
4695  case NEON::BI__builtin_neon_vgetq_lane_i8:
4696  case NEON::BI__builtin_neon_vgetq_lane_i16:
4697  case NEON::BI__builtin_neon_vgetq_lane_i32:
4698  case NEON::BI__builtin_neon_vgetq_lane_i64:
4699  case NEON::BI__builtin_neon_vgetq_lane_f32:
4700  case NEON::BI__builtin_neon_vset_lane_i8:
4701  case NEON::BI__builtin_neon_vset_lane_i16:
4702  case NEON::BI__builtin_neon_vset_lane_i32:
4703  case NEON::BI__builtin_neon_vset_lane_i64:
4704  case NEON::BI__builtin_neon_vset_lane_f32:
4705  case NEON::BI__builtin_neon_vsetq_lane_i8:
4706  case NEON::BI__builtin_neon_vsetq_lane_i16:
4707  case NEON::BI__builtin_neon_vsetq_lane_i32:
4708  case NEON::BI__builtin_neon_vsetq_lane_i64:
4709  case NEON::BI__builtin_neon_vsetq_lane_f32:
4710  case NEON::BI__builtin_neon_vsha1h_u32:
4711  case NEON::BI__builtin_neon_vsha1cq_u32:
4712  case NEON::BI__builtin_neon_vsha1pq_u32:
4713  case NEON::BI__builtin_neon_vsha1mq_u32:
4714  case clang::ARM::BI_MoveToCoprocessor:
4715  case clang::ARM::BI_MoveToCoprocessor2:
4716  return false;
4717  }
4718  return true;
4719 }
4720 
4722  const CallExpr *E) {
4723  if (auto Hint = GetValueForARMHint(BuiltinID))
4724  return Hint;
4725 
4726  if (BuiltinID == ARM::BI__emit) {
4727  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4728  llvm::FunctionType *FTy =
4729  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4730 
4731  APSInt Value;
4732  if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4733  llvm_unreachable("Sema will ensure that the parameter is constant");
4734 
4735  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4736 
4737  llvm::InlineAsm *Emit =
4738  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4739  /*SideEffects=*/true)
4740  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4741  /*SideEffects=*/true);
4742 
4743  return Builder.CreateCall(Emit);
4744  }
4745 
4746  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4747  Value *Option = EmitScalarExpr(E->getArg(0));
4748  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4749  }
4750 
4751  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4752  Value *Address = EmitScalarExpr(E->getArg(0));
4753  Value *RW = EmitScalarExpr(E->getArg(1));
4754  Value *IsData = EmitScalarExpr(E->getArg(2));
4755 
4756  // Locality is not supported on ARM target
4757  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4758 
4759  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4760  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4761  }
4762 
4763  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4764  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4765  return Builder.CreateCall(
4766  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4767  }
4768 
4769  if (BuiltinID == ARM::BI__clear_cache) {
4770  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4771  const FunctionDecl *FD = E->getDirectCallee();
4772  Value *Ops[2];
4773  for (unsigned i = 0; i < 2; i++)
4774  Ops[i] = EmitScalarExpr(E->getArg(i));
4775  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4776  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4777  StringRef Name = FD->getName();
4778  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4779  }
4780 
4781  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4782  BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4783  Function *F;
4784 
4785  switch (BuiltinID) {
4786  default: llvm_unreachable("unexpected builtin");
4787  case ARM::BI__builtin_arm_mcrr:
4788  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4789  break;
4790  case ARM::BI__builtin_arm_mcrr2:
4791  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4792  break;
4793  }
4794 
4795  // MCRR{2} instruction has 5 operands but
4796  // the intrinsic has 4 because Rt and Rt2
4797  // are represented as a single unsigned 64
4798  // bit integer in the intrinsic definition
4799  // but internally it's represented as 2 32
4800  // bit integers.
4801 
4802  Value *Coproc = EmitScalarExpr(E->getArg(0));
4803  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4804  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4805  Value *CRm = EmitScalarExpr(E->getArg(3));
4806 
4807  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4808  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4809  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4810  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4811 
4812  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4813  }
4814 
4815  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4816  BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4817  Function *F;
4818 
4819  switch (BuiltinID) {
4820  default: llvm_unreachable("unexpected builtin");
4821  case ARM::BI__builtin_arm_mrrc:
4822  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4823  break;
4824  case ARM::BI__builtin_arm_mrrc2:
4825  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4826  break;
4827  }
4828 
4829  Value *Coproc = EmitScalarExpr(E->getArg(0));
4830  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4831  Value *CRm = EmitScalarExpr(E->getArg(2));
4832  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4833 
4834  // Returns an unsigned 64 bit integer, represented
4835  // as two 32 bit integers.
4836 
4837  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4838  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4839  Rt = Builder.CreateZExt(Rt, Int64Ty);
4840  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4841 
4842  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4843  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4844  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4845 
4846  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4847  }
4848 
4849  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4850  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4851  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4852  getContext().getTypeSize(E->getType()) == 64) ||
4853  BuiltinID == ARM::BI__ldrexd) {
4854  Function *F;
4855 
4856  switch (BuiltinID) {
4857  default: llvm_unreachable("unexpected builtin");
4858  case ARM::BI__builtin_arm_ldaex:
4859  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4860  break;
4861  case ARM::BI__builtin_arm_ldrexd:
4862  case ARM::BI__builtin_arm_ldrex:
4863  case ARM::BI__ldrexd:
4864  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4865  break;
4866  }
4867 
4868  Value *LdPtr = EmitScalarExpr(E->getArg(0));
4869  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4870  "ldrexd");
4871 
4872  Value *Val0 = Builder.CreateExtractValue(Val, 1);
4873  Value *Val1 = Builder.CreateExtractValue(Val, 0);
4874  Val0 = Builder.CreateZExt(Val0, Int64Ty);
4875  Val1 = Builder.CreateZExt(Val1, Int64Ty);
4876 
4877  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4878  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4879  Val = Builder.CreateOr(Val, Val1);
4880  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4881  }
4882 
4883  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4884  BuiltinID == ARM::BI__builtin_arm_ldaex) {
4885  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4886 
4887  QualType Ty = E->getType();
4888  llvm::Type *RealResTy = ConvertType(Ty);
4889  llvm::Type *PtrTy = llvm::IntegerType::get(
4890  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4891  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4892 
4893  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4894  ? Intrinsic::arm_ldaex
4895  : Intrinsic::arm_ldrex,
4896  PtrTy);
4897  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4898 
4899  if (RealResTy->isPointerTy())
4900  return Builder.CreateIntToPtr(Val, RealResTy);
4901  else {
4902  llvm::Type *IntResTy = llvm::IntegerType::get(
4903  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4904  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4905  return Builder.CreateBitCast(Val, RealResTy);
4906  }
4907  }
4908 
4909  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4910  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4911  BuiltinID == ARM::BI__builtin_arm_strex) &&
4912  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4913  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4914  ? Intrinsic::arm_stlexd
4915  : Intrinsic::arm_strexd);
4916  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4917 
4918  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4919  Value *Val = EmitScalarExpr(E->getArg(0));
4920  Builder.CreateStore(Val, Tmp);
4921 
4922  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4923  Val = Builder.CreateLoad(LdPtr);
4924 
4925  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4926  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4927  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4928  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4929  }
4930 
4931  if (BuiltinID == ARM::BI__builtin_arm_strex ||
4932  BuiltinID == ARM::BI__builtin_arm_stlex) {
4933  Value *StoreVal = EmitScalarExpr(E->getArg(0));
4934  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4935 
4936  QualType Ty = E->getArg(0)->getType();
4937  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4938  getContext().getTypeSize(Ty));
4939  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4940 
4941  if (StoreVal->getType()->isPointerTy())
4942  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4943  else {
4944  llvm::Type *IntTy = llvm::IntegerType::get(
4945  getLLVMContext(),
4946  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4947  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4948  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4949  }
4950 
4951  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4952  ? Intrinsic::arm_stlex
4953  : Intrinsic::arm_strex,
4954  StoreAddr->getType());
4955  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4956  }
4957 
4958  switch (BuiltinID) {
4959  case ARM::BI__iso_volatile_load8:
4960  case ARM::BI__iso_volatile_load16:
4961  case ARM::BI__iso_volatile_load32:
4962  case ARM::BI__iso_volatile_load64: {
4963  Value *Ptr = EmitScalarExpr(E->getArg(0));
4964  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4965  CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4966  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4967  LoadSize.getQuantity() * 8);
4968  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4969  llvm::LoadInst *Load =
4970  Builder.CreateAlignedLoad(Ptr, LoadSize);
4971  Load->setVolatile(true);
4972  return Load;
4973  }
4974  case ARM::BI__iso_volatile_store8:
4975  case ARM::BI__iso_volatile_store16:
4976  case ARM::BI__iso_volatile_store32:
4977  case ARM::BI__iso_volatile_store64: {
4978  Value *Ptr = EmitScalarExpr(E->getArg(0));
4979  Value *Value = EmitScalarExpr(E->getArg(1));
4980  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4981  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4982  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4983  StoreSize.getQuantity() * 8);
4984  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4985  llvm::StoreInst *Store =
4986  Builder.CreateAlignedStore(Value, Ptr,
4987  StoreSize);
4988  Store->setVolatile(true);
4989  return Store;
4990  }
4991  }
4992 
4993  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4994  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4995  return Builder.CreateCall(F);
4996  }
4997 
4998  // CRC32
4999  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5000  switch (BuiltinID) {
5001  case ARM::BI__builtin_arm_crc32b:
5002  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
5003  case ARM::BI__builtin_arm_crc32cb:
5004  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
5005  case ARM::BI__builtin_arm_crc32h:
5006  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
5007  case ARM::BI__builtin_arm_crc32ch:
5008  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
5009  case ARM::BI__builtin_arm_crc32w:
5010  case ARM::BI__builtin_arm_crc32d:
5011  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
5012  case ARM::BI__builtin_arm_crc32cw:
5013  case ARM::BI__builtin_arm_crc32cd:
5014  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
5015  }
5016 
5017  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5018  Value *Arg0 = EmitScalarExpr(E->getArg(0));
5019  Value *Arg1 = EmitScalarExpr(E->getArg(1));
5020 
5021  // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
5022  // intrinsics, hence we need different codegen for these cases.
5023  if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
5024  BuiltinID == ARM::BI__builtin_arm_crc32cd) {
5025  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
5026  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
5027  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
5028  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
5029 
5030  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5031  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
5032  return Builder.CreateCall(F, {Res, Arg1b});
5033  } else {
5034  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
5035 
5036  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5037  return Builder.CreateCall(F, {Arg0, Arg1});
5038  }
5039  }
5040 
5041  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
5042  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5043  BuiltinID == ARM::BI__builtin_arm_rsrp ||
5044  BuiltinID == ARM::BI__builtin_arm_wsr ||
5045  BuiltinID == ARM::BI__builtin_arm_wsr64 ||
5046  BuiltinID == ARM::BI__builtin_arm_wsrp) {
5047 
5048  bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
5049  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5050  BuiltinID == ARM::BI__builtin_arm_rsrp;
5051 
5052  bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
5053  BuiltinID == ARM::BI__builtin_arm_wsrp;
5054 
5055  bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5056  BuiltinID == ARM::BI__builtin_arm_wsr64;
5057 
5058  llvm::Type *ValueType;
5059  llvm::Type *RegisterType;
5060  if (IsPointerBuiltin) {
5061  ValueType = VoidPtrTy;
5062  RegisterType = Int32Ty;
5063  } else if (Is64Bit) {
5064  ValueType = RegisterType = Int64Ty;
5065  } else {
5066  ValueType = RegisterType = Int32Ty;
5067  }
5068 
5069  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5070  }
5071 
5072  // Find out if any arguments are required to be integer constant
5073  // expressions.
5074  unsigned ICEArguments = 0;
5076  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5077  assert(Error == ASTContext::GE_None && "Should not codegen an error");
5078 
5079  auto getAlignmentValue32 = [&](Address addr) -> Value* {
5080  return Builder.getInt32(addr.getAlignment().getQuantity());
5081  };
5082 
5083  Address PtrOp0 = Address::invalid();
5084  Address PtrOp1 = Address::invalid();
5086  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
5087  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5088  for (unsigned i = 0, e = NumArgs; i != e; i++) {
5089  if (i == 0) {
5090  switch (BuiltinID) {
5091  case NEON::BI__builtin_neon_vld1_v:
5092  case NEON::BI__builtin_neon_vld1q_v:
5093  case NEON::BI__builtin_neon_vld1q_lane_v:
5094  case NEON::BI__builtin_neon_vld1_lane_v:
5095  case NEON::BI__builtin_neon_vld1_dup_v:
5096  case NEON::BI__builtin_neon_vld1q_dup_v:
5097  case NEON::BI__builtin_neon_vst1_v:
5098  case NEON::BI__builtin_neon_vst1q_v:
5099  case NEON::BI__builtin_neon_vst1q_lane_v:
5100  case NEON::BI__builtin_neon_vst1_lane_v:
5101  case NEON::BI__builtin_neon_vst2_v:
5102  case NEON::BI__builtin_neon_vst2q_v:
5103  case NEON::BI__builtin_neon_vst2_lane_v:
5104  case NEON::BI__builtin_neon_vst2q_lane_v:
5105  case NEON::BI__builtin_neon_vst3_v:
5106  case NEON::BI__builtin_neon_vst3q_v:
5107  case NEON::BI__builtin_neon_vst3_lane_v:
5108  case NEON::BI__builtin_neon_vst3q_lane_v:
5109  case NEON::BI__builtin_neon_vst4_v:
5110  case NEON::BI__builtin_neon_vst4q_v:
5111  case NEON::BI__builtin_neon_vst4_lane_v:
5112  case NEON::BI__builtin_neon_vst4q_lane_v:
5113  // Get the alignment for the argument in addition to the value;
5114  // we'll use it later.
5115  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5116  Ops.push_back(PtrOp0.getPointer());
5117  continue;
5118  }
5119  }
5120  if (i == 1) {
5121  switch (BuiltinID) {
5122  case NEON::BI__builtin_neon_vld2_v:
5123  case NEON::BI__builtin_neon_vld2q_v:
5124  case NEON::BI__builtin_neon_vld3_v:
5125  case NEON::BI__builtin_neon_vld3q_v:
5126  case NEON::BI__builtin_neon_vld4_v:
5127  case NEON::BI__builtin_neon_vld4q_v:
5128  case NEON::BI__builtin_neon_vld2_lane_v:
5129  case NEON::BI__builtin_neon_vld2q_lane_v:
5130  case NEON::BI__builtin_neon_vld3_lane_v:
5131  case NEON::BI__builtin_neon_vld3q_lane_v:
5132  case NEON::BI__builtin_neon_vld4_lane_v:
5133  case NEON::BI__builtin_neon_vld4q_lane_v:
5134  case NEON::BI__builtin_neon_vld2_dup_v:
5135  case NEON::BI__builtin_neon_vld3_dup_v:
5136  case NEON::BI__builtin_neon_vld4_dup_v:
5137  // Get the alignment for the argument in addition to the value;
5138  // we'll use it later.
5139  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
5140  Ops.push_back(PtrOp1.getPointer());
5141  continue;
5142  }
5143  }
5144 
5145  if ((ICEArguments & (1 << i)) == 0) {
5146  Ops.push_back(EmitScalarExpr(E->getArg(i)));
5147  } else {
5148  // If this is required to be a constant, constant fold it so that we know
5149  // that the generated intrinsic gets a ConstantInt.
5150  llvm::APSInt Result;
5151  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5152  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
5153  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5154  }
5155  }
5156 
5157  switch (BuiltinID) {
5158  default: break;
5159 
5160  case NEON::BI__builtin_neon_vget_lane_i8:
5161  case NEON::BI__builtin_neon_vget_lane_i16:
5162  case NEON::BI__builtin_neon_vget_lane_i32:
5163  case NEON::BI__builtin_neon_vget_lane_i64:
5164  case NEON::BI__builtin_neon_vget_lane_f32:
5165  case NEON::BI__builtin_neon_vgetq_lane_i8:
5166  case NEON::BI__builtin_neon_vgetq_lane_i16:
5167  case NEON::BI__builtin_neon_vgetq_lane_i32:
5168  case NEON::BI__builtin_neon_vgetq_lane_i64:
5169  case NEON::BI__builtin_neon_vgetq_lane_f32:
5170  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5171 
5172  case NEON::BI__builtin_neon_vset_lane_i8:
5173  case NEON::BI__builtin_neon_vset_lane_i16:
5174  case NEON::BI__builtin_neon_vset_lane_i32:
5175  case NEON::BI__builtin_neon_vset_lane_i64:
5176  case NEON::BI__builtin_neon_vset_lane_f32:
5177  case NEON::BI__builtin_neon_vsetq_lane_i8:
5178  case NEON::BI__builtin_neon_vsetq_lane_i16:
5179  case NEON::BI__builtin_neon_vsetq_lane_i32:
5180  case NEON::BI__builtin_neon_vsetq_lane_i64:
5181  case NEON::BI__builtin_neon_vsetq_lane_f32:
5182  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5183 
5184  case NEON::BI__builtin_neon_vsha1h_u32:
5185  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
5186  "vsha1h");
5187  case NEON::BI__builtin_neon_vsha1cq_u32:
5188  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
5189  "vsha1h");
5190  case NEON::BI__builtin_neon_vsha1pq_u32:
5191  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
5192  "vsha1h");
5193  case NEON::BI__builtin_neon_vsha1mq_u32:
5194  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
5195  "vsha1h");
5196 
5197  // The ARM _MoveToCoprocessor builtins put the input register value as
5198  // the first argument, but the LLVM intrinsic expects it as the third one.
5199  case ARM::BI_MoveToCoprocessor:
5200  case ARM::BI_MoveToCoprocessor2: {
5201  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
5202  Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
5203  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
5204  Ops[3], Ops[4], Ops[5]});
5205  }
5206  case ARM::BI_BitScanForward:
5207  case ARM::BI_BitScanForward64:
5208  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
5209  case ARM::BI_BitScanReverse:
5210  case ARM::BI_BitScanReverse64:
5211  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
5212 
5213  case ARM::BI_InterlockedAnd64:
5214  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
5215  case ARM::BI_InterlockedExchange64:
5216  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
5217  case ARM::BI_InterlockedExchangeAdd64:
5218  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
5219  case ARM::BI_InterlockedExchangeSub64:
5220  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
5221  case ARM::BI_InterlockedOr64:
5222  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
5223  case ARM::BI_InterlockedXor64:
5224  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
5225  case ARM::BI_InterlockedDecrement64:
5226  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
5227  case ARM::BI_InterlockedIncrement64:
5228  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
5229  }
5230 
5231  // Get the last argument, which specifies the vector type.
5232  assert(HasExtraArg);
5233  llvm::APSInt Result;
5234  const Expr *Arg = E->getArg(E->getNumArgs()-1);
5235  if (!Arg->isIntegerConstantExpr(Result, getContext()))
5236  return nullptr;
5237 
5238  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
5239  BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
5240  // Determine the overloaded type of this builtin.
5241  llvm::Type *Ty;
5242  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
5243  Ty = FloatTy;
5244  else
5245  Ty = DoubleTy;
5246 
5247  // Determine whether this is an unsigned conversion or not.
5248  bool usgn = Result.getZExtValue() == 1;
5249  unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
5250 
5251  // Call the appropriate intrinsic.
5252  Function *F = CGM.getIntrinsic(Int, Ty);
5253  return Builder.CreateCall(F, Ops, "vcvtr");
5254  }
5255 
5256  // Determine the type of this overloaded NEON intrinsic.
5257  NeonTypeFlags Type(Result.getZExtValue());
5258  bool usgn = Type.isUnsigned();
5259  bool rightShift = false;
5260 
5261  llvm::VectorType *VTy = GetNeonType(this, Type);
5262  llvm::Type *Ty = VTy;
5263  if (!Ty)
5264  return nullptr;
5265 
5266  // Many NEON builtins have identical semantics and uses in ARM and
5267  // AArch64. Emit these in a single function.
5268  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
5269  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5270  IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
5271  if (Builtin)
5272  return EmitCommonNeonBuiltinExpr(
5273  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5274  Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
5275 
5276  unsigned Int;
5277  switch (BuiltinID) {
5278  default: return nullptr;
5279  case NEON::BI__builtin_neon_vld1q_lane_v:
5280  // Handle 64-bit integer elements as a special case. Use shuffles of
5281  // one-element vectors to avoid poor code for i64 in the backend.
5282  if (VTy->getElementType()->isIntegerTy(64)) {
5283  // Extract the other lane.
5284  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5285  uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
5286  Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
5287  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5288  // Load the value as a one-element vector.
5289  Ty = llvm::VectorType::get(VTy->getElementType(), 1);
5290  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5291  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
5292  Value *Align = getAlignmentValue32(PtrOp0);
5293  Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
5294  // Combine them.
5295  uint32_t Indices[] = {1 - Lane, Lane};
5296  SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
5297  return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
5298  }
5299  // fall through
5300  case NEON::BI__builtin_neon_vld1_lane_v: {
5301  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5302  PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
5303  Value *Ld = Builder.CreateLoad(PtrOp0);
5304  return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
5305  }
5306  case NEON::BI__builtin_neon_vld2_dup_v:
5307  case NEON::BI__builtin_neon_vld3_dup_v:
5308  case NEON::BI__builtin_neon_vld4_dup_v: {
5309  // Handle 64-bit elements as a special-case. There is no "dup" needed.
5310  if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
5311  switch (BuiltinID) {
5312  case NEON::BI__builtin_neon_vld2_dup_v:
5313  Int = Intrinsic::arm_neon_vld2;
5314  break;
5315  case NEON::BI__builtin_neon_vld3_dup_v:
5316  Int = Intrinsic::arm_neon_vld3;
5317  break;
5318  case NEON::BI__builtin_neon_vld4_dup_v:
5319  Int = Intrinsic::arm_neon_vld4;
5320  break;
5321  default: llvm_unreachable("unknown vld_dup intrinsic?");
5322  }
5323  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5324  Function *F = CGM.getIntrinsic(Int, Tys);
5325  llvm::Value *Align = getAlignmentValue32(PtrOp1);
5326  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5327  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5328  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5329  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5330  }
5331  switch (BuiltinID) {
5332  case NEON::BI__builtin_neon_vld2_dup_v:
5333  Int = Intrinsic::arm_neon_vld2lane;
5334  break;
5335  case NEON::BI__builtin_neon_vld3_dup_v:
5336  Int = Intrinsic::arm_neon_vld3lane;
5337  break;
5338  case NEON::BI__builtin_neon_vld4_dup_v:
5339  Int = Intrinsic::arm_neon_vld4lane;
5340  break;
5341  default: llvm_unreachable("unknown vld_dup intrinsic?");
5342  }
5343  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5344  Function *F = CGM.getIntrinsic(Int, Tys);
5345  llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5346 
5348  Args.push_back(Ops[1]);
5349  Args.append(STy->getNumElements(), UndefValue::get(Ty));
5350 
5351  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5352  Args.push_back(CI);
5353  Args.push_back(getAlignmentValue32(PtrOp1));
5354 
5355  Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5356  // splat lane 0 to all elts in each vector of the result.
5357  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5358  Value *Val = Builder.CreateExtractValue(Ops[1], i);
5359