clang  6.0.0svn
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/ASTContext.h"
22 #include "clang/AST/Decl.h"
25 #include "clang/Basic/TargetInfo.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include "llvm/Support/ConvertUTF.h"
34 #include "llvm/Support/ScopedPrinter.h"
35 #include "llvm/Support/TargetParser.h"
36 #include <sstream>
37 
38 using namespace clang;
39 using namespace CodeGen;
40 using namespace llvm;
41 
42 static
43 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
44  return std::min(High, std::max(Low, Value));
45 }
46 
47 /// getBuiltinLibFunction - Given a builtin id for a function like
48 /// "__builtin_fabsf", return a Function* for "fabsf".
50  unsigned BuiltinID) {
51  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
52 
53  // Get the name, skip over the __builtin_ prefix (if necessary).
54  StringRef Name;
55  GlobalDecl D(FD);
56 
57  // If the builtin has been declared explicitly with an assembler label,
58  // use the mangled name. This differs from the plain label on platforms
59  // that prefix labels.
60  if (FD->hasAttr<AsmLabelAttr>())
61  Name = getMangledName(D);
62  else
63  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
64 
65  llvm::FunctionType *Ty =
66  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
67 
68  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
69 }
70 
71 /// Emit the conversions required to turn the given value into an
72 /// integer of the given size.
74  QualType T, llvm::IntegerType *IntType) {
75  V = CGF.EmitToMemory(V, T);
76 
77  if (V->getType()->isPointerTy())
78  return CGF.Builder.CreatePtrToInt(V, IntType);
79 
80  assert(V->getType() == IntType);
81  return V;
82 }
83 
85  QualType T, llvm::Type *ResultType) {
86  V = CGF.EmitFromMemory(V, T);
87 
88  if (ResultType->isPointerTy())
89  return CGF.Builder.CreateIntToPtr(V, ResultType);
90 
91  assert(V->getType() == ResultType);
92  return V;
93 }
94 
95 /// Utility to insert an atomic instruction based on Instrinsic::ID
96 /// and the expression node.
98  llvm::AtomicRMWInst::BinOp Kind,
99  const CallExpr *E) {
100  QualType T = E->getType();
101  assert(E->getArg(0)->getType()->isPointerType());
102  assert(CGF.getContext().hasSameUnqualifiedType(T,
103  E->getArg(0)->getType()->getPointeeType()));
104  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
105 
106  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
107  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
108 
109  llvm::IntegerType *IntType =
110  llvm::IntegerType::get(CGF.getLLVMContext(),
111  CGF.getContext().getTypeSize(T));
112  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
113 
114  llvm::Value *Args[2];
115  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
116  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
117  llvm::Type *ValueType = Args[1]->getType();
118  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
119 
120  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
121  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
122  return EmitFromInt(CGF, Result, T, ValueType);
123 }
124 
126  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
127  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
128 
129  // Convert the type of the pointer to a pointer to the stored type.
130  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
131  Value *BC = CGF.Builder.CreateBitCast(
132  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
133  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
134  LV.setNontemporal(true);
135  CGF.EmitStoreOfScalar(Val, LV, false);
136  return nullptr;
137 }
138 
140  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
141 
142  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
143  LV.setNontemporal(true);
144  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
145 }
146 
148  llvm::AtomicRMWInst::BinOp Kind,
149  const CallExpr *E) {
150  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
151 }
152 
153 /// Utility to insert an atomic instruction based Instrinsic::ID and
154 /// the expression node, where the return value is the result of the
155 /// operation.
157  llvm::AtomicRMWInst::BinOp Kind,
158  const CallExpr *E,
159  Instruction::BinaryOps Op,
160  bool Invert = false) {
161  QualType T = E->getType();
162  assert(E->getArg(0)->getType()->isPointerType());
163  assert(CGF.getContext().hasSameUnqualifiedType(T,
164  E->getArg(0)->getType()->getPointeeType()));
165  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
166 
167  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
168  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
169 
170  llvm::IntegerType *IntType =
171  llvm::IntegerType::get(CGF.getLLVMContext(),
172  CGF.getContext().getTypeSize(T));
173  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
174 
175  llvm::Value *Args[2];
176  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
177  llvm::Type *ValueType = Args[1]->getType();
178  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
179  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
180 
181  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
182  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
183  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
184  if (Invert)
185  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
186  llvm::ConstantInt::get(IntType, -1));
187  Result = EmitFromInt(CGF, Result, T, ValueType);
188  return RValue::get(Result);
189 }
190 
191 /// @brief Utility to insert an atomic cmpxchg instruction.
192 ///
193 /// @param CGF The current codegen function.
194 /// @param E Builtin call expression to convert to cmpxchg.
195 /// arg0 - address to operate on
196 /// arg1 - value to compare with
197 /// arg2 - new value
198 /// @param ReturnBool Specifies whether to return success flag of
199 /// cmpxchg result or the old value.
200 ///
201 /// @returns result of cmpxchg, according to ReturnBool
203  bool ReturnBool) {
204  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
205  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
206  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
207 
208  llvm::IntegerType *IntType = llvm::IntegerType::get(
209  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
210  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
211 
212  Value *Args[3];
213  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
214  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
215  llvm::Type *ValueType = Args[1]->getType();
216  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
217  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
218 
219  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
220  Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
221  llvm::AtomicOrdering::SequentiallyConsistent);
222  if (ReturnBool)
223  // Extract boolean success flag and zext it to int.
224  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
225  CGF.ConvertType(E->getType()));
226  else
227  // Extract old value and emit it using the same type as compare value.
228  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
229  ValueType);
230 }
231 
232 // Emit a simple mangled intrinsic that has 1 argument and a return type
233 // matching the argument type.
235  const CallExpr *E,
236  unsigned IntrinsicID) {
237  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
238 
239  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240  return CGF.Builder.CreateCall(F, Src0);
241 }
242 
243 // Emit an intrinsic that has 2 operands of the same type as its result.
245  const CallExpr *E,
246  unsigned IntrinsicID) {
247  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249 
250  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
251  return CGF.Builder.CreateCall(F, { Src0, Src1 });
252 }
253 
254 // Emit an intrinsic that has 3 operands of the same type as its result.
256  const CallExpr *E,
257  unsigned IntrinsicID) {
258  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
259  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
260  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
261 
262  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
264 }
265 
266 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
268  const CallExpr *E,
269  unsigned IntrinsicID) {
270  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
271  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
272 
273  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
274  return CGF.Builder.CreateCall(F, {Src0, Src1});
275 }
276 
277 /// EmitFAbs - Emit a call to @llvm.fabs().
278 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
279  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
280  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
281  Call->setDoesNotAccessMemory();
282  return Call;
283 }
284 
285 /// Emit the computation of the sign bit for a floating point value. Returns
286 /// the i1 sign bit value.
288  LLVMContext &C = CGF.CGM.getLLVMContext();
289 
290  llvm::Type *Ty = V->getType();
291  int Width = Ty->getPrimitiveSizeInBits();
292  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
293  V = CGF.Builder.CreateBitCast(V, IntTy);
294  if (Ty->isPPC_FP128Ty()) {
295  // We want the sign bit of the higher-order double. The bitcast we just
296  // did works as if the double-double was stored to memory and then
297  // read as an i128. The "store" will put the higher-order double in the
298  // lower address in both little- and big-Endian modes, but the "load"
299  // will treat those bits as a different part of the i128: the low bits in
300  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
301  // we need to shift the high bits down to the low before truncating.
302  Width >>= 1;
303  if (CGF.getTarget().isBigEndian()) {
304  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
305  V = CGF.Builder.CreateLShr(V, ShiftCst);
306  }
307  // We are truncating value in order to extract the higher-order
308  // double, which we will be using to extract the sign from.
309  IntTy = llvm::IntegerType::get(C, Width);
310  V = CGF.Builder.CreateTrunc(V, IntTy);
311  }
312  Value *Zero = llvm::Constant::getNullValue(IntTy);
313  return CGF.Builder.CreateICmpSLT(V, Zero);
314 }
315 
317  const CallExpr *E, llvm::Constant *calleeValue) {
318  CGCallee callee = CGCallee::forDirect(calleeValue, FD);
319  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
320 }
321 
322 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
323 /// depending on IntrinsicID.
324 ///
325 /// \arg CGF The current codegen function.
326 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
327 /// \arg X The first argument to the llvm.*.with.overflow.*.
328 /// \arg Y The second argument to the llvm.*.with.overflow.*.
329 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
330 /// \returns The result (i.e. sum/product) returned by the intrinsic.
332  const llvm::Intrinsic::ID IntrinsicID,
334  llvm::Value *&Carry) {
335  // Make sure we have integers of the same width.
336  assert(X->getType() == Y->getType() &&
337  "Arguments must be the same type. (Did you forget to make sure both "
338  "arguments have the same integer width?)");
339 
340  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
341  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
342  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
343  return CGF.Builder.CreateExtractValue(Tmp, 0);
344 }
345 
347  unsigned IntrinsicID,
348  int low, int high) {
349  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
350  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
351  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
352  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
353  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
354  return Call;
355 }
356 
357 namespace {
358  struct WidthAndSignedness {
359  unsigned Width;
360  bool Signed;
361  };
362 }
363 
364 static WidthAndSignedness
366  const clang::QualType Type) {
367  assert(Type->isIntegerType() && "Given type is not an integer.");
368  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
369  bool Signed = Type->isSignedIntegerType();
370  return {Width, Signed};
371 }
372 
373 // Given one or more integer types, this function produces an integer type that
374 // encompasses them: any value in one of the given types could be expressed in
375 // the encompassing type.
376 static struct WidthAndSignedness
377 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
378  assert(Types.size() > 0 && "Empty list of types.");
379 
380  // If any of the given types is signed, we must return a signed type.
381  bool Signed = false;
382  for (const auto &Type : Types) {
383  Signed |= Type.Signed;
384  }
385 
386  // The encompassing type must have a width greater than or equal to the width
387  // of the specified types. Aditionally, if the encompassing type is signed,
388  // its width must be strictly greater than the width of any unsigned types
389  // given.
390  unsigned Width = 0;
391  for (const auto &Type : Types) {
392  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
393  if (Width < MinWidth) {
394  Width = MinWidth;
395  }
396  }
397 
398  return {Width, Signed};
399 }
400 
401 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
402  llvm::Type *DestType = Int8PtrTy;
403  if (ArgValue->getType() != DestType)
404  ArgValue =
405  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
406 
407  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
408  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
409 }
410 
411 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
412 /// __builtin_object_size(p, @p To) is correct
413 static bool areBOSTypesCompatible(int From, int To) {
414  // Note: Our __builtin_object_size implementation currently treats Type=0 and
415  // Type=2 identically. Encoding this implementation detail here may make
416  // improving __builtin_object_size difficult in the future, so it's omitted.
417  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
418 }
419 
420 static llvm::Value *
421 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
422  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
423 }
424 
425 llvm::Value *
426 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
427  llvm::IntegerType *ResType,
428  llvm::Value *EmittedE) {
429  uint64_t ObjectSize;
430  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
431  return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
432  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
433 }
434 
435 /// Returns a Value corresponding to the size of the given expression.
436 /// This Value may be either of the following:
437 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
438 /// it)
439 /// - A call to the @llvm.objectsize intrinsic
440 ///
441 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
442 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
443 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
444 llvm::Value *
445 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
446  llvm::IntegerType *ResType,
447  llvm::Value *EmittedE) {
448  // We need to reference an argument if the pointer is a parameter with the
449  // pass_object_size attribute.
450  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
451  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
452  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
453  if (Param != nullptr && PS != nullptr &&
454  areBOSTypesCompatible(PS->getType(), Type)) {
455  auto Iter = SizeArguments.find(Param);
456  assert(Iter != SizeArguments.end());
457 
458  const ImplicitParamDecl *D = Iter->second;
459  auto DIter = LocalDeclMap.find(D);
460  assert(DIter != LocalDeclMap.end());
461 
462  return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
463  getContext().getSizeType(), E->getLocStart());
464  }
465  }
466 
467  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
468  // evaluate E for side-effects. In either case, we shouldn't lower to
469  // @llvm.objectsize.
470  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
471  return getDefaultBuiltinObjectSizeResult(Type, ResType);
472 
473  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
474  assert(Ptr->getType()->isPointerTy() &&
475  "Non-pointer passed to __builtin_object_size?");
476 
477  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
478 
479  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
480  Value *Min = Builder.getInt1((Type & 2) != 0);
481  // For GCC compatability, __builtin_object_size treat NULL as unknown size.
482  Value *NullIsUnknown = Builder.getTrue();
483  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
484 }
485 
486 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
487 // handle them here.
489  _BitScanForward,
490  _BitScanReverse,
491  _InterlockedAnd,
492  _InterlockedDecrement,
493  _InterlockedExchange,
494  _InterlockedExchangeAdd,
495  _InterlockedExchangeSub,
496  _InterlockedIncrement,
497  _InterlockedOr,
498  _InterlockedXor,
499  _interlockedbittestandset,
500  __fastfail,
501 };
502 
504  const CallExpr *E) {
505  switch (BuiltinID) {
506  case MSVCIntrin::_BitScanForward:
507  case MSVCIntrin::_BitScanReverse: {
508  Value *ArgValue = EmitScalarExpr(E->getArg(1));
509 
510  llvm::Type *ArgType = ArgValue->getType();
511  llvm::Type *IndexType =
512  EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
513  llvm::Type *ResultType = ConvertType(E->getType());
514 
515  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
516  Value *ResZero = llvm::Constant::getNullValue(ResultType);
517  Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
518 
519  BasicBlock *Begin = Builder.GetInsertBlock();
520  BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
521  Builder.SetInsertPoint(End);
522  PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
523 
524  Builder.SetInsertPoint(Begin);
525  Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
526  BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
527  Builder.CreateCondBr(IsZero, End, NotZero);
528  Result->addIncoming(ResZero, Begin);
529 
530  Builder.SetInsertPoint(NotZero);
531  Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
532 
533  if (BuiltinID == MSVCIntrin::_BitScanForward) {
534  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
535  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
536  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
537  Builder.CreateStore(ZeroCount, IndexAddress, false);
538  } else {
539  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
540  Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
541 
542  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
543  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
544  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
545  Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
546  Builder.CreateStore(Index, IndexAddress, false);
547  }
548  Builder.CreateBr(End);
549  Result->addIncoming(ResOne, NotZero);
550 
551  Builder.SetInsertPoint(End);
552  return Result;
553  }
554  case MSVCIntrin::_InterlockedAnd:
555  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
556  case MSVCIntrin::_InterlockedExchange:
557  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
558  case MSVCIntrin::_InterlockedExchangeAdd:
559  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
560  case MSVCIntrin::_InterlockedExchangeSub:
561  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
562  case MSVCIntrin::_InterlockedOr:
563  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
564  case MSVCIntrin::_InterlockedXor:
565  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
566 
567  case MSVCIntrin::_interlockedbittestandset: {
568  llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
569  llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
570  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
571  AtomicRMWInst::Or, Addr,
572  Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
573  llvm::AtomicOrdering::SequentiallyConsistent);
574  // Shift the relevant bit to the least significant position, truncate to
575  // the result type, and test the low bit.
576  llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
577  llvm::Value *Truncated =
578  Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
579  return Builder.CreateAnd(Truncated,
580  ConstantInt::get(Truncated->getType(), 1));
581  }
582 
583  case MSVCIntrin::_InterlockedDecrement: {
584  llvm::Type *IntTy = ConvertType(E->getType());
585  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
586  AtomicRMWInst::Sub,
587  EmitScalarExpr(E->getArg(0)),
588  ConstantInt::get(IntTy, 1),
589  llvm::AtomicOrdering::SequentiallyConsistent);
590  return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
591  }
592  case MSVCIntrin::_InterlockedIncrement: {
593  llvm::Type *IntTy = ConvertType(E->getType());
594  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
595  AtomicRMWInst::Add,
596  EmitScalarExpr(E->getArg(0)),
597  ConstantInt::get(IntTy, 1),
598  llvm::AtomicOrdering::SequentiallyConsistent);
599  return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
600  }
601 
602  case MSVCIntrin::__fastfail: {
603  // Request immediate process termination from the kernel. The instruction
604  // sequences to do this are documented on MSDN:
605  // https://msdn.microsoft.com/en-us/library/dn774154.aspx
606  llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
607  StringRef Asm, Constraints;
608  switch (ISA) {
609  default:
610  ErrorUnsupported(E, "__fastfail call for this architecture");
611  break;
612  case llvm::Triple::x86:
613  case llvm::Triple::x86_64:
614  Asm = "int $$0x29";
615  Constraints = "{cx}";
616  break;
617  case llvm::Triple::thumb:
618  Asm = "udf #251";
619  Constraints = "{r0}";
620  break;
621  }
622  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
623  llvm::InlineAsm *IA =
624  llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
625  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
626  getLLVMContext(), llvm::AttributeList::FunctionIndex,
627  llvm::Attribute::NoReturn);
628  CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
629  CS.setAttributes(NoReturnAttr);
630  return CS.getInstruction();
631  }
632  }
633  llvm_unreachable("Incorrect MSVC intrinsic!");
634 }
635 
636 namespace {
637 // ARC cleanup for __builtin_os_log_format
638 struct CallObjCArcUse final : EHScopeStack::Cleanup {
639  CallObjCArcUse(llvm::Value *object) : object(object) {}
640  llvm::Value *object;
641 
642  void Emit(CodeGenFunction &CGF, Flags flags) override {
643  CGF.EmitARCIntrinsicUse(object);
644  }
645 };
646 }
647 
650  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
651  && "Unsupported builtin check kind");
652 
653  Value *ArgValue = EmitScalarExpr(E);
654  if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
655  return ArgValue;
656 
657  SanitizerScope SanScope(this);
658  Value *Cond = Builder.CreateICmpNE(
659  ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
660  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
661  SanitizerHandler::InvalidBuiltin,
662  {EmitCheckSourceLocation(E->getExprLoc()),
663  llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
664  None);
665  return ArgValue;
666 }
667 
668 /// Get the argument type for arguments to os_log_helper.
669 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
670  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
671  return C.getCanonicalType(UnsignedTy);
672 }
673 
675  const analyze_os_log::OSLogBufferLayout &Layout,
676  CharUnits BufferAlignment) {
677  ASTContext &Ctx = getContext();
678 
680  {
681  raw_svector_ostream OS(Name);
682  OS << "__os_log_helper";
683  OS << "_" << BufferAlignment.getQuantity();
684  OS << "_" << int(Layout.getSummaryByte());
685  OS << "_" << int(Layout.getNumArgsByte());
686  for (const auto &Item : Layout.Items)
687  OS << "_" << int(Item.getSizeByte()) << "_"
688  << int(Item.getDescriptorByte());
689  }
690 
691  if (llvm::Function *F = CGM.getModule().getFunction(Name))
692  return F;
693 
695  Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
697 
698  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
699  char Size = Layout.Items[I].getSizeByte();
700  if (!Size)
701  continue;
702 
703  Params.emplace_back(
704  Ctx, nullptr, SourceLocation(),
705  &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
707  }
708 
709  FunctionArgList Args;
710  for (auto &P : Params)
711  Args.push_back(&P);
712 
713  // The helper function has linkonce_odr linkage to enable the linker to merge
714  // identical functions. To ensure the merging always happens, 'noinline' is
715  // attached to the function when compiling with -Oz.
716  const CGFunctionInfo &FI =
717  CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
718  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
719  llvm::Function *Fn = llvm::Function::Create(
720  FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
721  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
722  CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
723  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
724 
725  // Attach 'noinline' at -Oz.
726  if (CGM.getCodeGenOpts().OptimizeSize == 2)
727  Fn->addFnAttr(llvm::Attribute::NoInline);
728 
729  auto NL = ApplyDebugLocation::CreateEmpty(*this);
730  IdentifierInfo *II = &Ctx.Idents.get(Name);
733  Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
734 
735  StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
736 
737  // Create a scope with an artificial location for the body of this function.
738  auto AL = ApplyDebugLocation::CreateArtificial(*this);
739 
741  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
742  BufferAlignment);
743  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
744  Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
745  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
746  Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
747 
748  unsigned I = 1;
749  for (const auto &Item : Layout.Items) {
750  Builder.CreateStore(
751  Builder.getInt8(Item.getDescriptorByte()),
752  Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
753  Builder.CreateStore(
754  Builder.getInt8(Item.getSizeByte()),
755  Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
756 
757  CharUnits Size = Item.size();
758  if (!Size.getQuantity())
759  continue;
760 
761  Address Arg = GetAddrOfLocalVar(&Params[I]);
762  Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
763  Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
764  "argDataCast");
765  Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
766  Offset += Size;
767  ++I;
768  }
769 
770  FinishFunction();
771 
772  return Fn;
773 }
774 
776  assert(E.getNumArgs() >= 2 &&
777  "__builtin_os_log_format takes at least 2 arguments");
778  ASTContext &Ctx = getContext();
781  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
782  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
783 
784  // Ignore argument 1, the format string. It is not currently used.
785  CallArgList Args;
786  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
787 
788  for (const auto &Item : Layout.Items) {
789  int Size = Item.getSizeByte();
790  if (!Size)
791  continue;
792 
793  llvm::Value *ArgVal;
794 
795  if (const Expr *TheExpr = Item.getExpr()) {
796  ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
797 
798  // Check if this is a retainable type.
799  if (TheExpr->getType()->isObjCRetainableType()) {
800  assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
801  "Only scalar can be a ObjC retainable type");
802  // Check if the object is constant, if not, save it in
803  // RetainableOperands.
804  if (!isa<Constant>(ArgVal))
805  RetainableOperands.push_back(ArgVal);
806  }
807  } else {
808  ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
809  }
810 
811  unsigned ArgValSize =
812  CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
813  llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
814  ArgValSize);
815  ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
816  CanQualType ArgTy = getOSLogArgType(Ctx, Size);
817  // If ArgVal has type x86_fp80, zero-extend ArgVal.
818  ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
819  Args.add(RValue::get(ArgVal), ArgTy);
820  }
821 
822  const CGFunctionInfo &FI =
823  CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
824  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
825  Layout, BufAddr.getAlignment());
826  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
827 
828  // Push a clang.arc.use cleanup for each object in RetainableOperands. The
829  // cleanup will cause the use to appear after the final log call, keeping
830  // the object valid while it’s held in the log buffer. Note that if there’s
831  // a release cleanup on the object, it will already be active; since
832  // cleanups are emitted in reverse order, the use will occur before the
833  // object is released.
834  if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
835  CGM.getCodeGenOpts().OptimizationLevel != 0)
836  for (llvm::Value *Object : RetainableOperands)
837  pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
838 
839  return RValue::get(BufAddr.getPointer());
840 }
841 
842 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
843 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
844  WidthAndSignedness Op1Info,
845  WidthAndSignedness Op2Info,
846  WidthAndSignedness ResultInfo) {
847  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
848  Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
849  Op1Info.Signed != Op2Info.Signed;
850 }
851 
852 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
853 /// the generic checked-binop irgen.
854 static RValue
856  WidthAndSignedness Op1Info, const clang::Expr *Op2,
857  WidthAndSignedness Op2Info,
858  const clang::Expr *ResultArg, QualType ResultQTy,
859  WidthAndSignedness ResultInfo) {
860  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
861  Op2Info, ResultInfo) &&
862  "Not a mixed-sign multipliction we can specialize");
863 
864  // Emit the signed and unsigned operands.
865  const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
866  const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
867  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
868  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
869 
870  llvm::Type *OpTy = Signed->getType();
871  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
872  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
873  llvm::Type *ResTy = ResultPtr.getElementType();
874 
875  // Take the absolute value of the signed operand.
876  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
877  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
878  llvm::Value *AbsSigned =
879  CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
880 
881  // Perform a checked unsigned multiplication.
882  llvm::Value *UnsignedOverflow;
883  llvm::Value *UnsignedResult =
884  EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
885  Unsigned, UnsignedOverflow);
886 
887  llvm::Value *Overflow, *Result;
888  if (ResultInfo.Signed) {
889  // Signed overflow occurs if the result is greater than INT_MAX or lesser
890  // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
891  auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
892  .zextOrSelf(Op1Info.Width);
893  llvm::Value *MaxResult =
894  CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
895  CGF.Builder.CreateZExt(IsNegative, OpTy));
896  llvm::Value *SignedOverflow =
897  CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
898  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
899 
900  // Prepare the signed result (possibly by negating it).
901  llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
902  llvm::Value *SignedResult =
903  CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
904  Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
905  } else {
906  // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
907  llvm::Value *Underflow = CGF.Builder.CreateAnd(
908  IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
909  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
910  if (ResultInfo.Width < Op1Info.Width) {
911  auto IntMax =
912  llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
913  llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
914  UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
915  Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
916  }
917 
918  Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy);
919  }
920  assert(Overflow && Result && "Missing overflow or result");
921 
922  bool isVolatile =
923  ResultArg->getType()->getPointeeType().isVolatileQualified();
924  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
925  isVolatile);
926  return RValue::get(Overflow);
927 }
928 
930  unsigned BuiltinID, const CallExpr *E,
931  ReturnValueSlot ReturnValue) {
932  // See if we can constant fold this builtin. If so, don't emit it at all.
933  Expr::EvalResult Result;
934  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
935  !Result.hasSideEffects()) {
936  if (Result.Val.isInt())
937  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
938  Result.Val.getInt()));
939  if (Result.Val.isFloat())
940  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
941  Result.Val.getFloat()));
942  }
943 
944  // There are LLVM math intrinsics/instructions corresponding to math library
945  // functions except the LLVM op will never set errno while the math library
946  // might. Also, math builtins have the same semantics as their math library
947  // twins. Thus, we can transform math library and builtin calls to their
948  // LLVM counterparts if the call is marked 'const' (known to never set errno).
949  if (FD->hasAttr<ConstAttr>()) {
950  switch (BuiltinID) {
951  case Builtin::BIceil:
952  case Builtin::BIceilf:
953  case Builtin::BIceill:
954  case Builtin::BI__builtin_ceil:
955  case Builtin::BI__builtin_ceilf:
956  case Builtin::BI__builtin_ceill:
957  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
958 
959  case Builtin::BIcopysign:
960  case Builtin::BIcopysignf:
961  case Builtin::BIcopysignl:
962  case Builtin::BI__builtin_copysign:
963  case Builtin::BI__builtin_copysignf:
964  case Builtin::BI__builtin_copysignl:
966 
967  case Builtin::BIcos:
968  case Builtin::BIcosf:
969  case Builtin::BIcosl:
970  case Builtin::BI__builtin_cos:
971  case Builtin::BI__builtin_cosf:
972  case Builtin::BI__builtin_cosl:
973  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
974 
975  case Builtin::BIexp:
976  case Builtin::BIexpf:
977  case Builtin::BIexpl:
978  case Builtin::BI__builtin_exp:
979  case Builtin::BI__builtin_expf:
980  case Builtin::BI__builtin_expl:
981  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
982 
983  case Builtin::BIexp2:
984  case Builtin::BIexp2f:
985  case Builtin::BIexp2l:
986  case Builtin::BI__builtin_exp2:
987  case Builtin::BI__builtin_exp2f:
988  case Builtin::BI__builtin_exp2l:
989  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
990 
991  case Builtin::BIfabs:
992  case Builtin::BIfabsf:
993  case Builtin::BIfabsl:
994  case Builtin::BI__builtin_fabs:
995  case Builtin::BI__builtin_fabsf:
996  case Builtin::BI__builtin_fabsl:
997  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
998 
999  case Builtin::BIfloor:
1000  case Builtin::BIfloorf:
1001  case Builtin::BIfloorl:
1002  case Builtin::BI__builtin_floor:
1003  case Builtin::BI__builtin_floorf:
1004  case Builtin::BI__builtin_floorl:
1005  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
1006 
1007  case Builtin::BIfma:
1008  case Builtin::BIfmaf:
1009  case Builtin::BIfmal:
1010  case Builtin::BI__builtin_fma:
1011  case Builtin::BI__builtin_fmaf:
1012  case Builtin::BI__builtin_fmal:
1013  return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
1014 
1015  case Builtin::BIfmax:
1016  case Builtin::BIfmaxf:
1017  case Builtin::BIfmaxl:
1018  case Builtin::BI__builtin_fmax:
1019  case Builtin::BI__builtin_fmaxf:
1020  case Builtin::BI__builtin_fmaxl:
1021  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
1022 
1023  case Builtin::BIfmin:
1024  case Builtin::BIfminf:
1025  case Builtin::BIfminl:
1026  case Builtin::BI__builtin_fmin:
1027  case Builtin::BI__builtin_fminf:
1028  case Builtin::BI__builtin_fminl:
1029  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
1030 
1031  // fmod() is a special-case. It maps to the frem instruction rather than an
1032  // LLVM intrinsic.
1033  case Builtin::BIfmod:
1034  case Builtin::BIfmodf:
1035  case Builtin::BIfmodl:
1036  case Builtin::BI__builtin_fmod:
1037  case Builtin::BI__builtin_fmodf:
1038  case Builtin::BI__builtin_fmodl: {
1039  Value *Arg1 = EmitScalarExpr(E->getArg(0));
1040  Value *Arg2 = EmitScalarExpr(E->getArg(1));
1041  return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
1042  }
1043 
1044  case Builtin::BIlog:
1045  case Builtin::BIlogf:
1046  case Builtin::BIlogl:
1047  case Builtin::BI__builtin_log:
1048  case Builtin::BI__builtin_logf:
1049  case Builtin::BI__builtin_logl:
1050  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
1051 
1052  case Builtin::BIlog10:
1053  case Builtin::BIlog10f:
1054  case Builtin::BIlog10l:
1055  case Builtin::BI__builtin_log10:
1056  case Builtin::BI__builtin_log10f:
1057  case Builtin::BI__builtin_log10l:
1058  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
1059 
1060  case Builtin::BIlog2:
1061  case Builtin::BIlog2f:
1062  case Builtin::BIlog2l:
1063  case Builtin::BI__builtin_log2:
1064  case Builtin::BI__builtin_log2f:
1065  case Builtin::BI__builtin_log2l:
1066  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
1067 
1068  case Builtin::BInearbyint:
1069  case Builtin::BInearbyintf:
1070  case Builtin::BInearbyintl:
1071  case Builtin::BI__builtin_nearbyint:
1072  case Builtin::BI__builtin_nearbyintf:
1073  case Builtin::BI__builtin_nearbyintl:
1075 
1076  case Builtin::BIpow:
1077  case Builtin::BIpowf:
1078  case Builtin::BIpowl:
1079  case Builtin::BI__builtin_pow:
1080  case Builtin::BI__builtin_powf:
1081  case Builtin::BI__builtin_powl:
1082  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
1083 
1084  case Builtin::BIrint:
1085  case Builtin::BIrintf:
1086  case Builtin::BIrintl:
1087  case Builtin::BI__builtin_rint:
1088  case Builtin::BI__builtin_rintf:
1089  case Builtin::BI__builtin_rintl:
1090  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
1091 
1092  case Builtin::BIround:
1093  case Builtin::BIroundf:
1094  case Builtin::BIroundl:
1095  case Builtin::BI__builtin_round:
1096  case Builtin::BI__builtin_roundf:
1097  case Builtin::BI__builtin_roundl:
1098  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
1099 
1100  case Builtin::BIsin:
1101  case Builtin::BIsinf:
1102  case Builtin::BIsinl:
1103  case Builtin::BI__builtin_sin:
1104  case Builtin::BI__builtin_sinf:
1105  case Builtin::BI__builtin_sinl:
1106  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
1107 
1108  case Builtin::BIsqrt:
1109  case Builtin::BIsqrtf:
1110  case Builtin::BIsqrtl:
1111  case Builtin::BI__builtin_sqrt:
1112  case Builtin::BI__builtin_sqrtf:
1113  case Builtin::BI__builtin_sqrtl:
1114  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
1115 
1116  case Builtin::BItrunc:
1117  case Builtin::BItruncf:
1118  case Builtin::BItruncl:
1119  case Builtin::BI__builtin_trunc:
1120  case Builtin::BI__builtin_truncf:
1121  case Builtin::BI__builtin_truncl:
1122  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
1123 
1124  default:
1125  break;
1126  }
1127  }
1128 
1129  switch (BuiltinID) {
1130  default: break;
1131  case Builtin::BI__builtin___CFStringMakeConstantString:
1132  case Builtin::BI__builtin___NSStringMakeConstantString:
1133  return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
1134  case Builtin::BI__builtin_stdarg_start:
1135  case Builtin::BI__builtin_va_start:
1136  case Builtin::BI__va_start:
1137  case Builtin::BI__builtin_va_end:
1138  return RValue::get(
1139  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
1140  ? EmitScalarExpr(E->getArg(0))
1141  : EmitVAListRef(E->getArg(0)).getPointer(),
1142  BuiltinID != Builtin::BI__builtin_va_end));
1143  case Builtin::BI__builtin_va_copy: {
1144  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
1145  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
1146 
1147  llvm::Type *Type = Int8PtrTy;
1148 
1149  DstPtr = Builder.CreateBitCast(DstPtr, Type);
1150  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
1151  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
1152  {DstPtr, SrcPtr}));
1153  }
1154  case Builtin::BI__builtin_abs:
1155  case Builtin::BI__builtin_labs:
1156  case Builtin::BI__builtin_llabs: {
1157  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1158 
1159  Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
1160  Value *CmpResult =
1161  Builder.CreateICmpSGE(ArgValue,
1162  llvm::Constant::getNullValue(ArgValue->getType()),
1163  "abscond");
1164  Value *Result =
1165  Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
1166 
1167  return RValue::get(Result);
1168  }
1169  case Builtin::BI__builtin_conj:
1170  case Builtin::BI__builtin_conjf:
1171  case Builtin::BI__builtin_conjl: {
1172  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1173  Value *Real = ComplexVal.first;
1174  Value *Imag = ComplexVal.second;
1175  Value *Zero =
1176  Imag->getType()->isFPOrFPVectorTy()
1177  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
1178  : llvm::Constant::getNullValue(Imag->getType());
1179 
1180  Imag = Builder.CreateFSub(Zero, Imag, "sub");
1181  return RValue::getComplex(std::make_pair(Real, Imag));
1182  }
1183  case Builtin::BI__builtin_creal:
1184  case Builtin::BI__builtin_crealf:
1185  case Builtin::BI__builtin_creall:
1186  case Builtin::BIcreal:
1187  case Builtin::BIcrealf:
1188  case Builtin::BIcreall: {
1189  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1190  return RValue::get(ComplexVal.first);
1191  }
1192 
1193  case Builtin::BI__builtin_cimag:
1194  case Builtin::BI__builtin_cimagf:
1195  case Builtin::BI__builtin_cimagl:
1196  case Builtin::BIcimag:
1197  case Builtin::BIcimagf:
1198  case Builtin::BIcimagl: {
1199  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1200  return RValue::get(ComplexVal.second);
1201  }
1202 
1203  case Builtin::BI__builtin_ctzs:
1204  case Builtin::BI__builtin_ctz:
1205  case Builtin::BI__builtin_ctzl:
1206  case Builtin::BI__builtin_ctzll: {
1207  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
1208 
1209  llvm::Type *ArgType = ArgValue->getType();
1210  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1211 
1212  llvm::Type *ResultType = ConvertType(E->getType());
1213  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1214  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1215  if (Result->getType() != ResultType)
1216  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1217  "cast");
1218  return RValue::get(Result);
1219  }
1220  case Builtin::BI__builtin_clzs:
1221  case Builtin::BI__builtin_clz:
1222  case Builtin::BI__builtin_clzl:
1223  case Builtin::BI__builtin_clzll: {
1224  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1225 
1226  llvm::Type *ArgType = ArgValue->getType();
1227  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1228 
1229  llvm::Type *ResultType = ConvertType(E->getType());
1230  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1231  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1232  if (Result->getType() != ResultType)
1233  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1234  "cast");
1235  return RValue::get(Result);
1236  }
1237  case Builtin::BI__builtin_ffs:
1238  case Builtin::BI__builtin_ffsl:
1239  case Builtin::BI__builtin_ffsll: {
1240  // ffs(x) -> x ? cttz(x) + 1 : 0
1241  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1242 
1243  llvm::Type *ArgType = ArgValue->getType();
1244  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1245 
1246  llvm::Type *ResultType = ConvertType(E->getType());
1247  Value *Tmp =
1248  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1249  llvm::ConstantInt::get(ArgType, 1));
1250  Value *Zero = llvm::Constant::getNullValue(ArgType);
1251  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1252  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1253  if (Result->getType() != ResultType)
1254  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1255  "cast");
1256  return RValue::get(Result);
1257  }
1258  case Builtin::BI__builtin_parity:
1259  case Builtin::BI__builtin_parityl:
1260  case Builtin::BI__builtin_parityll: {
1261  // parity(x) -> ctpop(x) & 1
1262  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1263 
1264  llvm::Type *ArgType = ArgValue->getType();
1265  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1266 
1267  llvm::Type *ResultType = ConvertType(E->getType());
1268  Value *Tmp = Builder.CreateCall(F, ArgValue);
1269  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1270  if (Result->getType() != ResultType)
1271  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1272  "cast");
1273  return RValue::get(Result);
1274  }
1275  case Builtin::BI__popcnt16:
1276  case Builtin::BI__popcnt:
1277  case Builtin::BI__popcnt64:
1278  case Builtin::BI__builtin_popcount:
1279  case Builtin::BI__builtin_popcountl:
1280  case Builtin::BI__builtin_popcountll: {
1281  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1282 
1283  llvm::Type *ArgType = ArgValue->getType();
1284  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1285 
1286  llvm::Type *ResultType = ConvertType(E->getType());
1287  Value *Result = Builder.CreateCall(F, ArgValue);
1288  if (Result->getType() != ResultType)
1289  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1290  "cast");
1291  return RValue::get(Result);
1292  }
1293  case Builtin::BI_rotr8:
1294  case Builtin::BI_rotr16:
1295  case Builtin::BI_rotr:
1296  case Builtin::BI_lrotr:
1297  case Builtin::BI_rotr64: {
1298  Value *Val = EmitScalarExpr(E->getArg(0));
1299  Value *Shift = EmitScalarExpr(E->getArg(1));
1300 
1301  llvm::Type *ArgType = Val->getType();
1302  Shift = Builder.CreateIntCast(Shift, ArgType, false);
1303  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1304  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1305  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1306 
1307  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1308  Shift = Builder.CreateAnd(Shift, Mask);
1309  Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
1310 
1311  Value *RightShifted = Builder.CreateLShr(Val, Shift);
1312  Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
1313  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1314 
1315  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1316  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1317  return RValue::get(Result);
1318  }
1319  case Builtin::BI_rotl8:
1320  case Builtin::BI_rotl16:
1321  case Builtin::BI_rotl:
1322  case Builtin::BI_lrotl:
1323  case Builtin::BI_rotl64: {
1324  Value *Val = EmitScalarExpr(E->getArg(0));
1325  Value *Shift = EmitScalarExpr(E->getArg(1));
1326 
1327  llvm::Type *ArgType = Val->getType();
1328  Shift = Builder.CreateIntCast(Shift, ArgType, false);
1329  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1330  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1331  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1332 
1333  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1334  Shift = Builder.CreateAnd(Shift, Mask);
1335  Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
1336 
1337  Value *LeftShifted = Builder.CreateShl(Val, Shift);
1338  Value *RightShifted = Builder.CreateLShr(Val, RightShift);
1339  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1340 
1341  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1342  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1343  return RValue::get(Result);
1344  }
1345  case Builtin::BI__builtin_unpredictable: {
1346  // Always return the argument of __builtin_unpredictable. LLVM does not
1347  // handle this builtin. Metadata for this builtin should be added directly
1348  // to instructions such as branches or switches that use it.
1349  return RValue::get(EmitScalarExpr(E->getArg(0)));
1350  }
1351  case Builtin::BI__builtin_expect: {
1352  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1353  llvm::Type *ArgType = ArgValue->getType();
1354 
1355  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1356  // Don't generate llvm.expect on -O0 as the backend won't use it for
1357  // anything.
1358  // Note, we still IRGen ExpectedValue because it could have side-effects.
1359  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1360  return RValue::get(ArgValue);
1361 
1362  Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1363  Value *Result =
1364  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1365  return RValue::get(Result);
1366  }
1367  case Builtin::BI__builtin_assume_aligned: {
1368  Value *PtrValue = EmitScalarExpr(E->getArg(0));
1369  Value *OffsetValue =
1370  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1371 
1372  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1373  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1374  unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
1375 
1376  EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
1377  return RValue::get(PtrValue);
1378  }
1379  case Builtin::BI__assume:
1380  case Builtin::BI__builtin_assume: {
1381  if (E->getArg(0)->HasSideEffects(getContext()))
1382  return RValue::get(nullptr);
1383 
1384  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1385  Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1386  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1387  }
1388  case Builtin::BI__builtin_bswap16:
1389  case Builtin::BI__builtin_bswap32:
1390  case Builtin::BI__builtin_bswap64: {
1391  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1392  }
1393  case Builtin::BI__builtin_bitreverse8:
1394  case Builtin::BI__builtin_bitreverse16:
1395  case Builtin::BI__builtin_bitreverse32:
1396  case Builtin::BI__builtin_bitreverse64: {
1397  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1398  }
1399  case Builtin::BI__builtin_object_size: {
1400  unsigned Type =
1401  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1402  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1403 
1404  // We pass this builtin onto the optimizer so that it can figure out the
1405  // object size in more complex cases.
1406  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1407  /*EmittedE=*/nullptr));
1408  }
1409  case Builtin::BI__builtin_prefetch: {
1410  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1411  // FIXME: Technically these constants should of type 'int', yes?
1412  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1413  llvm::ConstantInt::get(Int32Ty, 0);
1414  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1415  llvm::ConstantInt::get(Int32Ty, 3);
1416  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1417  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1418  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1419  }
1420  case Builtin::BI__builtin_readcyclecounter: {
1421  Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1422  return RValue::get(Builder.CreateCall(F));
1423  }
1424  case Builtin::BI__builtin___clear_cache: {
1425  Value *Begin = EmitScalarExpr(E->getArg(0));
1426  Value *End = EmitScalarExpr(E->getArg(1));
1427  Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1428  return RValue::get(Builder.CreateCall(F, {Begin, End}));
1429  }
1430  case Builtin::BI__builtin_trap:
1431  return RValue::get(EmitTrapCall(Intrinsic::trap));
1432  case Builtin::BI__debugbreak:
1433  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1434  case Builtin::BI__builtin_unreachable: {
1435  if (SanOpts.has(SanitizerKind::Unreachable)) {
1436  SanitizerScope SanScope(this);
1437  EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1438  SanitizerKind::Unreachable),
1439  SanitizerHandler::BuiltinUnreachable,
1440  EmitCheckSourceLocation(E->getExprLoc()), None);
1441  } else
1442  Builder.CreateUnreachable();
1443 
1444  // We do need to preserve an insertion point.
1445  EmitBlock(createBasicBlock("unreachable.cont"));
1446 
1447  return RValue::get(nullptr);
1448  }
1449 
1450  case Builtin::BI__builtin_powi:
1451  case Builtin::BI__builtin_powif:
1452  case Builtin::BI__builtin_powil: {
1453  Value *Base = EmitScalarExpr(E->getArg(0));
1454  Value *Exponent = EmitScalarExpr(E->getArg(1));
1455  llvm::Type *ArgType = Base->getType();
1456  Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1457  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1458  }
1459 
1460  case Builtin::BI__builtin_isgreater:
1461  case Builtin::BI__builtin_isgreaterequal:
1462  case Builtin::BI__builtin_isless:
1463  case Builtin::BI__builtin_islessequal:
1464  case Builtin::BI__builtin_islessgreater:
1465  case Builtin::BI__builtin_isunordered: {
1466  // Ordered comparisons: we know the arguments to these are matching scalar
1467  // floating point values.
1468  Value *LHS = EmitScalarExpr(E->getArg(0));
1469  Value *RHS = EmitScalarExpr(E->getArg(1));
1470 
1471  switch (BuiltinID) {
1472  default: llvm_unreachable("Unknown ordered comparison");
1473  case Builtin::BI__builtin_isgreater:
1474  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1475  break;
1476  case Builtin::BI__builtin_isgreaterequal:
1477  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1478  break;
1479  case Builtin::BI__builtin_isless:
1480  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1481  break;
1482  case Builtin::BI__builtin_islessequal:
1483  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1484  break;
1485  case Builtin::BI__builtin_islessgreater:
1486  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1487  break;
1488  case Builtin::BI__builtin_isunordered:
1489  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1490  break;
1491  }
1492  // ZExt bool to int type.
1493  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1494  }
1495  case Builtin::BI__builtin_isnan: {
1496  Value *V = EmitScalarExpr(E->getArg(0));
1497  V = Builder.CreateFCmpUNO(V, V, "cmp");
1498  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1499  }
1500 
1501  case Builtin::BIfinite:
1502  case Builtin::BI__finite:
1503  case Builtin::BIfinitef:
1504  case Builtin::BI__finitef:
1505  case Builtin::BIfinitel:
1506  case Builtin::BI__finitel:
1507  case Builtin::BI__builtin_isinf:
1508  case Builtin::BI__builtin_isfinite: {
1509  // isinf(x) --> fabs(x) == infinity
1510  // isfinite(x) --> fabs(x) != infinity
1511  // x != NaN via the ordered compare in either case.
1512  Value *V = EmitScalarExpr(E->getArg(0));
1513  Value *Fabs = EmitFAbs(*this, V);
1514  Constant *Infinity = ConstantFP::getInfinity(V->getType());
1515  CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1516  ? CmpInst::FCMP_OEQ
1517  : CmpInst::FCMP_ONE;
1518  Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1519  return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1520  }
1521 
1522  case Builtin::BI__builtin_isinf_sign: {
1523  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1524  Value *Arg = EmitScalarExpr(E->getArg(0));
1525  Value *AbsArg = EmitFAbs(*this, Arg);
1526  Value *IsInf = Builder.CreateFCmpOEQ(
1527  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1528  Value *IsNeg = EmitSignBit(*this, Arg);
1529 
1530  llvm::Type *IntTy = ConvertType(E->getType());
1531  Value *Zero = Constant::getNullValue(IntTy);
1532  Value *One = ConstantInt::get(IntTy, 1);
1533  Value *NegativeOne = ConstantInt::get(IntTy, -1);
1534  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1535  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1536  return RValue::get(Result);
1537  }
1538 
1539  case Builtin::BI__builtin_isnormal: {
1540  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1541  Value *V = EmitScalarExpr(E->getArg(0));
1542  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1543 
1544  Value *Abs = EmitFAbs(*this, V);
1545  Value *IsLessThanInf =
1546  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1547  APFloat Smallest = APFloat::getSmallestNormalized(
1548  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1549  Value *IsNormal =
1550  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1551  "isnormal");
1552  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1553  V = Builder.CreateAnd(V, IsNormal, "and");
1554  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1555  }
1556 
1557  case Builtin::BI__builtin_fpclassify: {
1558  Value *V = EmitScalarExpr(E->getArg(5));
1559  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1560 
1561  // Create Result
1562  BasicBlock *Begin = Builder.GetInsertBlock();
1563  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1564  Builder.SetInsertPoint(End);
1565  PHINode *Result =
1566  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1567  "fpclassify_result");
1568 
1569  // if (V==0) return FP_ZERO
1570  Builder.SetInsertPoint(Begin);
1571  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1572  "iszero");
1573  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1574  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1575  Builder.CreateCondBr(IsZero, End, NotZero);
1576  Result->addIncoming(ZeroLiteral, Begin);
1577 
1578  // if (V != V) return FP_NAN
1579  Builder.SetInsertPoint(NotZero);
1580  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1581  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1582  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1583  Builder.CreateCondBr(IsNan, End, NotNan);
1584  Result->addIncoming(NanLiteral, NotZero);
1585 
1586  // if (fabs(V) == infinity) return FP_INFINITY
1587  Builder.SetInsertPoint(NotNan);
1588  Value *VAbs = EmitFAbs(*this, V);
1589  Value *IsInf =
1590  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1591  "isinf");
1592  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1593  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1594  Builder.CreateCondBr(IsInf, End, NotInf);
1595  Result->addIncoming(InfLiteral, NotNan);
1596 
1597  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1598  Builder.SetInsertPoint(NotInf);
1599  APFloat Smallest = APFloat::getSmallestNormalized(
1600  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1601  Value *IsNormal =
1602  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1603  "isnormal");
1604  Value *NormalResult =
1605  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1606  EmitScalarExpr(E->getArg(3)));
1607  Builder.CreateBr(End);
1608  Result->addIncoming(NormalResult, NotInf);
1609 
1610  // return Result
1611  Builder.SetInsertPoint(End);
1612  return RValue::get(Result);
1613  }
1614 
1615  case Builtin::BIalloca:
1616  case Builtin::BI_alloca:
1617  case Builtin::BI__builtin_alloca: {
1618  Value *Size = EmitScalarExpr(E->getArg(0));
1619  const TargetInfo &TI = getContext().getTargetInfo();
1620  // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1621  unsigned SuitableAlignmentInBytes =
1622  CGM.getContext()
1623  .toCharUnitsFromBits(TI.getSuitableAlign())
1624  .getQuantity();
1625  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1626  AI->setAlignment(SuitableAlignmentInBytes);
1627  return RValue::get(AI);
1628  }
1629 
1630  case Builtin::BI__builtin_alloca_with_align: {
1631  Value *Size = EmitScalarExpr(E->getArg(0));
1632  Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1633  auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1634  unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1635  unsigned AlignmentInBytes =
1636  CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1637  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1638  AI->setAlignment(AlignmentInBytes);
1639  return RValue::get(AI);
1640  }
1641 
1642  case Builtin::BIbzero:
1643  case Builtin::BI__builtin_bzero: {
1644  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1645  Value *SizeVal = EmitScalarExpr(E->getArg(1));
1646  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1647  E->getArg(0)->getExprLoc(), FD, 0);
1648  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1649  return RValue::get(nullptr);
1650  }
1651  case Builtin::BImemcpy:
1652  case Builtin::BI__builtin_memcpy: {
1653  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1654  Address Src = EmitPointerWithAlignment(E->getArg(1));
1655  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1656  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1657  E->getArg(0)->getExprLoc(), FD, 0);
1658  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1659  E->getArg(1)->getExprLoc(), FD, 1);
1660  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1661  return RValue::get(Dest.getPointer());
1662  }
1663 
1664  case Builtin::BI__builtin_char_memchr:
1665  BuiltinID = Builtin::BI__builtin_memchr;
1666  break;
1667 
1668  case Builtin::BI__builtin___memcpy_chk: {
1669  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1670  llvm::APSInt Size, DstSize;
1671  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1672  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1673  break;
1674  if (Size.ugt(DstSize))
1675  break;
1676  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1677  Address Src = EmitPointerWithAlignment(E->getArg(1));
1678  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1679  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1680  return RValue::get(Dest.getPointer());
1681  }
1682 
1683  case Builtin::BI__builtin_objc_memmove_collectable: {
1684  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1685  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1686  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1687  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1688  DestAddr, SrcAddr, SizeVal);
1689  return RValue::get(DestAddr.getPointer());
1690  }
1691 
1692  case Builtin::BI__builtin___memmove_chk: {
1693  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1694  llvm::APSInt Size, DstSize;
1695  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1696  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1697  break;
1698  if (Size.ugt(DstSize))
1699  break;
1700  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1701  Address Src = EmitPointerWithAlignment(E->getArg(1));
1702  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1703  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1704  return RValue::get(Dest.getPointer());
1705  }
1706 
1707  case Builtin::BImemmove:
1708  case Builtin::BI__builtin_memmove: {
1709  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1710  Address Src = EmitPointerWithAlignment(E->getArg(1));
1711  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1712  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1713  E->getArg(0)->getExprLoc(), FD, 0);
1714  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1715  E->getArg(1)->getExprLoc(), FD, 1);
1716  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1717  return RValue::get(Dest.getPointer());
1718  }
1719  case Builtin::BImemset:
1720  case Builtin::BI__builtin_memset: {
1721  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1722  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1723  Builder.getInt8Ty());
1724  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1725  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1726  E->getArg(0)->getExprLoc(), FD, 0);
1727  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1728  return RValue::get(Dest.getPointer());
1729  }
1730  case Builtin::BI__builtin___memset_chk: {
1731  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1732  llvm::APSInt Size, DstSize;
1733  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1734  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1735  break;
1736  if (Size.ugt(DstSize))
1737  break;
1738  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1739  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1740  Builder.getInt8Ty());
1741  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1742  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1743  return RValue::get(Dest.getPointer());
1744  }
1745  case Builtin::BI__builtin_dwarf_cfa: {
1746  // The offset in bytes from the first argument to the CFA.
1747  //
1748  // Why on earth is this in the frontend? Is there any reason at
1749  // all that the backend can't reasonably determine this while
1750  // lowering llvm.eh.dwarf.cfa()?
1751  //
1752  // TODO: If there's a satisfactory reason, add a target hook for
1753  // this instead of hard-coding 0, which is correct for most targets.
1754  int32_t Offset = 0;
1755 
1756  Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1757  return RValue::get(Builder.CreateCall(F,
1758  llvm::ConstantInt::get(Int32Ty, Offset)));
1759  }
1760  case Builtin::BI__builtin_return_address: {
1761  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1762  getContext().UnsignedIntTy);
1763  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1764  return RValue::get(Builder.CreateCall(F, Depth));
1765  }
1766  case Builtin::BI_ReturnAddress: {
1767  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1768  return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1769  }
1770  case Builtin::BI__builtin_frame_address: {
1771  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1772  getContext().UnsignedIntTy);
1773  Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1774  return RValue::get(Builder.CreateCall(F, Depth));
1775  }
1776  case Builtin::BI__builtin_extract_return_addr: {
1777  Value *Address = EmitScalarExpr(E->getArg(0));
1778  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1779  return RValue::get(Result);
1780  }
1781  case Builtin::BI__builtin_frob_return_addr: {
1782  Value *Address = EmitScalarExpr(E->getArg(0));
1783  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1784  return RValue::get(Result);
1785  }
1786  case Builtin::BI__builtin_dwarf_sp_column: {
1787  llvm::IntegerType *Ty
1788  = cast<llvm::IntegerType>(ConvertType(E->getType()));
1789  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1790  if (Column == -1) {
1791  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1792  return RValue::get(llvm::UndefValue::get(Ty));
1793  }
1794  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1795  }
1796  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1797  Value *Address = EmitScalarExpr(E->getArg(0));
1798  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1799  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1800  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1801  }
1802  case Builtin::BI__builtin_eh_return: {
1803  Value *Int = EmitScalarExpr(E->getArg(0));
1804  Value *Ptr = EmitScalarExpr(E->getArg(1));
1805 
1806  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1807  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1808  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1809  Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1810  ? Intrinsic::eh_return_i32
1811  : Intrinsic::eh_return_i64);
1812  Builder.CreateCall(F, {Int, Ptr});
1813  Builder.CreateUnreachable();
1814 
1815  // We do need to preserve an insertion point.
1816  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1817 
1818  return RValue::get(nullptr);
1819  }
1820  case Builtin::BI__builtin_unwind_init: {
1821  Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1822  return RValue::get(Builder.CreateCall(F));
1823  }
1824  case Builtin::BI__builtin_extend_pointer: {
1825  // Extends a pointer to the size of an _Unwind_Word, which is
1826  // uint64_t on all platforms. Generally this gets poked into a
1827  // register and eventually used as an address, so if the
1828  // addressing registers are wider than pointers and the platform
1829  // doesn't implicitly ignore high-order bits when doing
1830  // addressing, we need to make sure we zext / sext based on
1831  // the platform's expectations.
1832  //
1833  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1834 
1835  // Cast the pointer to intptr_t.
1836  Value *Ptr = EmitScalarExpr(E->getArg(0));
1837  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1838 
1839  // If that's 64 bits, we're done.
1840  if (IntPtrTy->getBitWidth() == 64)
1841  return RValue::get(Result);
1842 
1843  // Otherwise, ask the codegen data what to do.
1844  if (getTargetHooks().extendPointerWithSExt())
1845  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1846  else
1847  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1848  }
1849  case Builtin::BI__builtin_setjmp: {
1850  // Buffer is a void**.
1851  Address Buf = EmitPointerWithAlignment(E->getArg(0));
1852 
1853  // Store the frame pointer to the setjmp buffer.
1854  Value *FrameAddr =
1855  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1856  ConstantInt::get(Int32Ty, 0));
1857  Builder.CreateStore(FrameAddr, Buf);
1858 
1859  // Store the stack pointer to the setjmp buffer.
1860  Value *StackAddr =
1861  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1862  Address StackSaveSlot =
1863  Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1864  Builder.CreateStore(StackAddr, StackSaveSlot);
1865 
1866  // Call LLVM's EH setjmp, which is lightweight.
1867  Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1868  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1869  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1870  }
1871  case Builtin::BI__builtin_longjmp: {
1872  Value *Buf = EmitScalarExpr(E->getArg(0));
1873  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1874 
1875  // Call LLVM's EH longjmp, which is lightweight.
1876  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1877 
1878  // longjmp doesn't return; mark this as unreachable.
1879  Builder.CreateUnreachable();
1880 
1881  // We do need to preserve an insertion point.
1882  EmitBlock(createBasicBlock("longjmp.cont"));
1883 
1884  return RValue::get(nullptr);
1885  }
1886  case Builtin::BI__sync_fetch_and_add:
1887  case Builtin::BI__sync_fetch_and_sub:
1888  case Builtin::BI__sync_fetch_and_or:
1889  case Builtin::BI__sync_fetch_and_and:
1890  case Builtin::BI__sync_fetch_and_xor:
1891  case Builtin::BI__sync_fetch_and_nand:
1892  case Builtin::BI__sync_add_and_fetch:
1893  case Builtin::BI__sync_sub_and_fetch:
1894  case Builtin::BI__sync_and_and_fetch:
1895  case Builtin::BI__sync_or_and_fetch:
1896  case Builtin::BI__sync_xor_and_fetch:
1897  case Builtin::BI__sync_nand_and_fetch:
1898  case Builtin::BI__sync_val_compare_and_swap:
1899  case Builtin::BI__sync_bool_compare_and_swap:
1900  case Builtin::BI__sync_lock_test_and_set:
1901  case Builtin::BI__sync_lock_release:
1902  case Builtin::BI__sync_swap:
1903  llvm_unreachable("Shouldn't make it through sema");
1904  case Builtin::BI__sync_fetch_and_add_1:
1905  case Builtin::BI__sync_fetch_and_add_2:
1906  case Builtin::BI__sync_fetch_and_add_4:
1907  case Builtin::BI__sync_fetch_and_add_8:
1908  case Builtin::BI__sync_fetch_and_add_16:
1909  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1910  case Builtin::BI__sync_fetch_and_sub_1:
1911  case Builtin::BI__sync_fetch_and_sub_2:
1912  case Builtin::BI__sync_fetch_and_sub_4:
1913  case Builtin::BI__sync_fetch_and_sub_8:
1914  case Builtin::BI__sync_fetch_and_sub_16:
1915  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1916  case Builtin::BI__sync_fetch_and_or_1:
1917  case Builtin::BI__sync_fetch_and_or_2:
1918  case Builtin::BI__sync_fetch_and_or_4:
1919  case Builtin::BI__sync_fetch_and_or_8:
1920  case Builtin::BI__sync_fetch_and_or_16:
1921  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1922  case Builtin::BI__sync_fetch_and_and_1:
1923  case Builtin::BI__sync_fetch_and_and_2:
1924  case Builtin::BI__sync_fetch_and_and_4:
1925  case Builtin::BI__sync_fetch_and_and_8:
1926  case Builtin::BI__sync_fetch_and_and_16:
1927  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1928  case Builtin::BI__sync_fetch_and_xor_1:
1929  case Builtin::BI__sync_fetch_and_xor_2:
1930  case Builtin::BI__sync_fetch_and_xor_4:
1931  case Builtin::BI__sync_fetch_and_xor_8:
1932  case Builtin::BI__sync_fetch_and_xor_16:
1933  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1934  case Builtin::BI__sync_fetch_and_nand_1:
1935  case Builtin::BI__sync_fetch_and_nand_2:
1936  case Builtin::BI__sync_fetch_and_nand_4:
1937  case Builtin::BI__sync_fetch_and_nand_8:
1938  case Builtin::BI__sync_fetch_and_nand_16:
1939  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1940 
1941  // Clang extensions: not overloaded yet.
1942  case Builtin::BI__sync_fetch_and_min:
1943  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1944  case Builtin::BI__sync_fetch_and_max:
1945  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1946  case Builtin::BI__sync_fetch_and_umin:
1947  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1948  case Builtin::BI__sync_fetch_and_umax:
1949  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1950 
1951  case Builtin::BI__sync_add_and_fetch_1:
1952  case Builtin::BI__sync_add_and_fetch_2:
1953  case Builtin::BI__sync_add_and_fetch_4:
1954  case Builtin::BI__sync_add_and_fetch_8:
1955  case Builtin::BI__sync_add_and_fetch_16:
1956  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1957  llvm::Instruction::Add);
1958  case Builtin::BI__sync_sub_and_fetch_1:
1959  case Builtin::BI__sync_sub_and_fetch_2:
1960  case Builtin::BI__sync_sub_and_fetch_4:
1961  case Builtin::BI__sync_sub_and_fetch_8:
1962  case Builtin::BI__sync_sub_and_fetch_16:
1963  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1964  llvm::Instruction::Sub);
1965  case Builtin::BI__sync_and_and_fetch_1:
1966  case Builtin::BI__sync_and_and_fetch_2:
1967  case Builtin::BI__sync_and_and_fetch_4:
1968  case Builtin::BI__sync_and_and_fetch_8:
1969  case Builtin::BI__sync_and_and_fetch_16:
1972  case Builtin::BI__sync_or_and_fetch_1:
1973  case Builtin::BI__sync_or_and_fetch_2:
1974  case Builtin::BI__sync_or_and_fetch_4:
1975  case Builtin::BI__sync_or_and_fetch_8:
1976  case Builtin::BI__sync_or_and_fetch_16:
1977  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1978  llvm::Instruction::Or);
1979  case Builtin::BI__sync_xor_and_fetch_1:
1980  case Builtin::BI__sync_xor_and_fetch_2:
1981  case Builtin::BI__sync_xor_and_fetch_4:
1982  case Builtin::BI__sync_xor_and_fetch_8:
1983  case Builtin::BI__sync_xor_and_fetch_16:
1984  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1985  llvm::Instruction::Xor);
1986  case Builtin::BI__sync_nand_and_fetch_1:
1987  case Builtin::BI__sync_nand_and_fetch_2:
1988  case Builtin::BI__sync_nand_and_fetch_4:
1989  case Builtin::BI__sync_nand_and_fetch_8:
1990  case Builtin::BI__sync_nand_and_fetch_16:
1991  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1992  llvm::Instruction::And, true);
1993 
1994  case Builtin::BI__sync_val_compare_and_swap_1:
1995  case Builtin::BI__sync_val_compare_and_swap_2:
1996  case Builtin::BI__sync_val_compare_and_swap_4:
1997  case Builtin::BI__sync_val_compare_and_swap_8:
1998  case Builtin::BI__sync_val_compare_and_swap_16:
1999  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
2000 
2001  case Builtin::BI__sync_bool_compare_and_swap_1:
2002  case Builtin::BI__sync_bool_compare_and_swap_2:
2003  case Builtin::BI__sync_bool_compare_and_swap_4:
2004  case Builtin::BI__sync_bool_compare_and_swap_8:
2005  case Builtin::BI__sync_bool_compare_and_swap_16:
2006  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
2007 
2008  case Builtin::BI__sync_swap_1:
2009  case Builtin::BI__sync_swap_2:
2010  case Builtin::BI__sync_swap_4:
2011  case Builtin::BI__sync_swap_8:
2012  case Builtin::BI__sync_swap_16:
2013  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2014 
2015  case Builtin::BI__sync_lock_test_and_set_1:
2016  case Builtin::BI__sync_lock_test_and_set_2:
2017  case Builtin::BI__sync_lock_test_and_set_4:
2018  case Builtin::BI__sync_lock_test_and_set_8:
2019  case Builtin::BI__sync_lock_test_and_set_16:
2020  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2021 
2022  case Builtin::BI__sync_lock_release_1:
2023  case Builtin::BI__sync_lock_release_2:
2024  case Builtin::BI__sync_lock_release_4:
2025  case Builtin::BI__sync_lock_release_8:
2026  case Builtin::BI__sync_lock_release_16: {
2027  Value *Ptr = EmitScalarExpr(E->getArg(0));
2028  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
2029  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
2030  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
2031  StoreSize.getQuantity() * 8);
2032  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
2033  llvm::StoreInst *Store =
2034  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
2035  StoreSize);
2036  Store->setAtomic(llvm::AtomicOrdering::Release);
2037  return RValue::get(nullptr);
2038  }
2039 
2040  case Builtin::BI__sync_synchronize: {
2041  // We assume this is supposed to correspond to a C++0x-style
2042  // sequentially-consistent fence (i.e. this is only usable for
2043  // synchonization, not device I/O or anything like that). This intrinsic
2044  // is really badly designed in the sense that in theory, there isn't
2045  // any way to safely use it... but in practice, it mostly works
2046  // to use it with non-atomic loads and stores to get acquire/release
2047  // semantics.
2048  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
2049  return RValue::get(nullptr);
2050  }
2051 
2052  case Builtin::BI__builtin_nontemporal_load:
2053  return RValue::get(EmitNontemporalLoad(*this, E));
2054  case Builtin::BI__builtin_nontemporal_store:
2055  return RValue::get(EmitNontemporalStore(*this, E));
2056  case Builtin::BI__c11_atomic_is_lock_free:
2057  case Builtin::BI__atomic_is_lock_free: {
2058  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
2059  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
2060  // _Atomic(T) is always properly-aligned.
2061  const char *LibCallName = "__atomic_is_lock_free";
2062  CallArgList Args;
2063  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
2064  getContext().getSizeType());
2065  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
2066  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
2067  getContext().VoidPtrTy);
2068  else
2069  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
2070  getContext().VoidPtrTy);
2071  const CGFunctionInfo &FuncInfo =
2072  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
2073  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
2074  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
2075  return EmitCall(FuncInfo, CGCallee::forDirect(Func),
2076  ReturnValueSlot(), Args);
2077  }
2078 
2079  case Builtin::BI__atomic_test_and_set: {
2080  // Look at the argument type to determine whether this is a volatile
2081  // operation. The parameter type is always volatile.
2082  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2083  bool Volatile =
2084  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2085 
2086  Value *Ptr = EmitScalarExpr(E->getArg(0));
2087  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
2088  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2089  Value *NewVal = Builder.getInt8(1);
2090  Value *Order = EmitScalarExpr(E->getArg(1));
2091  if (isa<llvm::ConstantInt>(Order)) {
2092  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2093  AtomicRMWInst *Result = nullptr;
2094  switch (ord) {
2095  case 0: // memory_order_relaxed
2096  default: // invalid order
2097  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2098  llvm::AtomicOrdering::Monotonic);
2099  break;
2100  case 1: // memory_order_consume
2101  case 2: // memory_order_acquire
2102  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2103  llvm::AtomicOrdering::Acquire);
2104  break;
2105  case 3: // memory_order_release
2106  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2107  llvm::AtomicOrdering::Release);
2108  break;
2109  case 4: // memory_order_acq_rel
2110 
2111  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2112  llvm::AtomicOrdering::AcquireRelease);
2113  break;
2114  case 5: // memory_order_seq_cst
2115  Result = Builder.CreateAtomicRMW(
2116  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2117  llvm::AtomicOrdering::SequentiallyConsistent);
2118  break;
2119  }
2120  Result->setVolatile(Volatile);
2121  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2122  }
2123 
2124  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2125 
2126  llvm::BasicBlock *BBs[5] = {
2127  createBasicBlock("monotonic", CurFn),
2128  createBasicBlock("acquire", CurFn),
2129  createBasicBlock("release", CurFn),
2130  createBasicBlock("acqrel", CurFn),
2131  createBasicBlock("seqcst", CurFn)
2132  };
2133  llvm::AtomicOrdering Orders[5] = {
2134  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
2135  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
2136  llvm::AtomicOrdering::SequentiallyConsistent};
2137 
2138  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2139  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2140 
2141  Builder.SetInsertPoint(ContBB);
2142  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
2143 
2144  for (unsigned i = 0; i < 5; ++i) {
2145  Builder.SetInsertPoint(BBs[i]);
2146  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
2147  Ptr, NewVal, Orders[i]);
2148  RMW->setVolatile(Volatile);
2149  Result->addIncoming(RMW, BBs[i]);
2150  Builder.CreateBr(ContBB);
2151  }
2152 
2153  SI->addCase(Builder.getInt32(0), BBs[0]);
2154  SI->addCase(Builder.getInt32(1), BBs[1]);
2155  SI->addCase(Builder.getInt32(2), BBs[1]);
2156  SI->addCase(Builder.getInt32(3), BBs[2]);
2157  SI->addCase(Builder.getInt32(4), BBs[3]);
2158  SI->addCase(Builder.getInt32(5), BBs[4]);
2159 
2160  Builder.SetInsertPoint(ContBB);
2161  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2162  }
2163 
2164  case Builtin::BI__atomic_clear: {
2165  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2166  bool Volatile =
2167  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2168 
2169  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
2170  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
2171  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2172  Value *NewVal = Builder.getInt8(0);
2173  Value *Order = EmitScalarExpr(E->getArg(1));
2174  if (isa<llvm::ConstantInt>(Order)) {
2175  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2176  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2177  switch (ord) {
2178  case 0: // memory_order_relaxed
2179  default: // invalid order
2180  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
2181  break;
2182  case 3: // memory_order_release
2183  Store->setOrdering(llvm::AtomicOrdering::Release);
2184  break;
2185  case 5: // memory_order_seq_cst
2186  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
2187  break;
2188  }
2189  return RValue::get(nullptr);
2190  }
2191 
2192  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2193 
2194  llvm::BasicBlock *BBs[3] = {
2195  createBasicBlock("monotonic", CurFn),
2196  createBasicBlock("release", CurFn),
2197  createBasicBlock("seqcst", CurFn)
2198  };
2199  llvm::AtomicOrdering Orders[3] = {
2200  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
2201  llvm::AtomicOrdering::SequentiallyConsistent};
2202 
2203  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2204  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2205 
2206  for (unsigned i = 0; i < 3; ++i) {
2207  Builder.SetInsertPoint(BBs[i]);
2208  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2209  Store->setOrdering(Orders[i]);
2210  Builder.CreateBr(ContBB);
2211  }
2212 
2213  SI->addCase(Builder.getInt32(0), BBs[0]);
2214  SI->addCase(Builder.getInt32(3), BBs[1]);
2215  SI->addCase(Builder.getInt32(5), BBs[2]);
2216 
2217  Builder.SetInsertPoint(ContBB);
2218  return RValue::get(nullptr);
2219  }
2220 
2221  case Builtin::BI__atomic_thread_fence:
2222  case Builtin::BI__atomic_signal_fence:
2223  case Builtin::BI__c11_atomic_thread_fence:
2224  case Builtin::BI__c11_atomic_signal_fence: {
2225  llvm::SyncScope::ID SSID;
2226  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2227  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2228  SSID = llvm::SyncScope::SingleThread;
2229  else
2230  SSID = llvm::SyncScope::System;
2231  Value *Order = EmitScalarExpr(E->getArg(0));
2232  if (isa<llvm::ConstantInt>(Order)) {
2233  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2234  switch (ord) {
2235  case 0: // memory_order_relaxed
2236  default: // invalid order
2237  break;
2238  case 1: // memory_order_consume
2239  case 2: // memory_order_acquire
2240  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2241  break;
2242  case 3: // memory_order_release
2243  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2244  break;
2245  case 4: // memory_order_acq_rel
2246  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2247  break;
2248  case 5: // memory_order_seq_cst
2249  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2250  break;
2251  }
2252  return RValue::get(nullptr);
2253  }
2254 
2255  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2256  AcquireBB = createBasicBlock("acquire", CurFn);
2257  ReleaseBB = createBasicBlock("release", CurFn);
2258  AcqRelBB = createBasicBlock("acqrel", CurFn);
2259  SeqCstBB = createBasicBlock("seqcst", CurFn);
2260  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2261 
2262  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2263  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2264 
2265  Builder.SetInsertPoint(AcquireBB);
2266  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2267  Builder.CreateBr(ContBB);
2268  SI->addCase(Builder.getInt32(1), AcquireBB);
2269  SI->addCase(Builder.getInt32(2), AcquireBB);
2270 
2271  Builder.SetInsertPoint(ReleaseBB);
2272  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2273  Builder.CreateBr(ContBB);
2274  SI->addCase(Builder.getInt32(3), ReleaseBB);
2275 
2276  Builder.SetInsertPoint(AcqRelBB);
2277  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2278  Builder.CreateBr(ContBB);
2279  SI->addCase(Builder.getInt32(4), AcqRelBB);
2280 
2281  Builder.SetInsertPoint(SeqCstBB);
2282  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2283  Builder.CreateBr(ContBB);
2284  SI->addCase(Builder.getInt32(5), SeqCstBB);
2285 
2286  Builder.SetInsertPoint(ContBB);
2287  return RValue::get(nullptr);
2288  }
2289 
2290  case Builtin::BI__builtin_signbit:
2291  case Builtin::BI__builtin_signbitf:
2292  case Builtin::BI__builtin_signbitl: {
2293  return RValue::get(
2294  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
2295  ConvertType(E->getType())));
2296  }
2297  case Builtin::BI__annotation: {
2298  // Re-encode each wide string to UTF8 and make an MDString.
2300  for (const Expr *Arg : E->arguments()) {
2301  const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
2302  assert(Str->getCharByteWidth() == 2);
2303  StringRef WideBytes = Str->getBytes();
2304  std::string StrUtf8;
2305  if (!convertUTF16ToUTF8String(
2306  makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
2307  CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
2308  continue;
2309  }
2310  Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
2311  }
2312 
2313  // Build and MDTuple of MDStrings and emit the intrinsic call.
2314  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
2315  MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
2316  Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
2317  return RValue::getIgnored();
2318  }
2319  case Builtin::BI__builtin_annotation: {
2320  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
2321  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
2322  AnnVal->getType());
2323 
2324  // Get the annotation string, go through casts. Sema requires this to be a
2325  // non-wide string literal, potentially casted, so the cast<> is safe.
2326  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
2327  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
2328  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
2329  }
2330  case Builtin::BI__builtin_addcb:
2331  case Builtin::BI__builtin_addcs:
2332  case Builtin::BI__builtin_addc:
2333  case Builtin::BI__builtin_addcl:
2334  case Builtin::BI__builtin_addcll:
2335  case Builtin::BI__builtin_subcb:
2336  case Builtin::BI__builtin_subcs:
2337  case Builtin::BI__builtin_subc:
2338  case Builtin::BI__builtin_subcl:
2339  case Builtin::BI__builtin_subcll: {
2340 
2341  // We translate all of these builtins from expressions of the form:
2342  // int x = ..., y = ..., carryin = ..., carryout, result;
2343  // result = __builtin_addc(x, y, carryin, &carryout);
2344  //
2345  // to LLVM IR of the form:
2346  //
2347  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2348  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2349  // %carry1 = extractvalue {i32, i1} %tmp1, 1
2350  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2351  // i32 %carryin)
2352  // %result = extractvalue {i32, i1} %tmp2, 0
2353  // %carry2 = extractvalue {i32, i1} %tmp2, 1
2354  // %tmp3 = or i1 %carry1, %carry2
2355  // %tmp4 = zext i1 %tmp3 to i32
2356  // store i32 %tmp4, i32* %carryout
2357 
2358  // Scalarize our inputs.
2359  llvm::Value *X = EmitScalarExpr(E->getArg(0));
2360  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2361  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
2362  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2363 
2364  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2365  llvm::Intrinsic::ID IntrinsicId;
2366  switch (BuiltinID) {
2367  default: llvm_unreachable("Unknown multiprecision builtin id.");
2368  case Builtin::BI__builtin_addcb:
2369  case Builtin::BI__builtin_addcs:
2370  case Builtin::BI__builtin_addc:
2371  case Builtin::BI__builtin_addcl:
2372  case Builtin::BI__builtin_addcll:
2373  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2374  break;
2375  case Builtin::BI__builtin_subcb:
2376  case Builtin::BI__builtin_subcs:
2377  case Builtin::BI__builtin_subc:
2378  case Builtin::BI__builtin_subcl:
2379  case Builtin::BI__builtin_subcll:
2380  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2381  break;
2382  }
2383 
2384  // Construct our resulting LLVM IR expression.
2385  llvm::Value *Carry1;
2386  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2387  X, Y, Carry1);
2388  llvm::Value *Carry2;
2389  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2390  Sum1, Carryin, Carry2);
2391  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2392  X->getType());
2393  Builder.CreateStore(CarryOut, CarryOutPtr);
2394  return RValue::get(Sum2);
2395  }
2396 
2397  case Builtin::BI__builtin_add_overflow:
2398  case Builtin::BI__builtin_sub_overflow:
2399  case Builtin::BI__builtin_mul_overflow: {
2400  const clang::Expr *LeftArg = E->getArg(0);
2401  const clang::Expr *RightArg = E->getArg(1);
2402  const clang::Expr *ResultArg = E->getArg(2);
2403 
2404  clang::QualType ResultQTy =
2405  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2406 
2407  WidthAndSignedness LeftInfo =
2408  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2409  WidthAndSignedness RightInfo =
2410  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2411  WidthAndSignedness ResultInfo =
2412  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2413 
2414  // Handle mixed-sign multiplication as a special case, because adding
2415  // runtime or backend support for our generic irgen would be too expensive.
2416  if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
2417  return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
2418  RightInfo, ResultArg, ResultQTy,
2419  ResultInfo);
2420 
2421  WidthAndSignedness EncompassingInfo =
2422  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2423 
2424  llvm::Type *EncompassingLLVMTy =
2425  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2426 
2427  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2428 
2429  llvm::Intrinsic::ID IntrinsicId;
2430  switch (BuiltinID) {
2431  default:
2432  llvm_unreachable("Unknown overflow builtin id.");
2433  case Builtin::BI__builtin_add_overflow:
2434  IntrinsicId = EncompassingInfo.Signed
2435  ? llvm::Intrinsic::sadd_with_overflow
2436  : llvm::Intrinsic::uadd_with_overflow;
2437  break;
2438  case Builtin::BI__builtin_sub_overflow:
2439  IntrinsicId = EncompassingInfo.Signed
2440  ? llvm::Intrinsic::ssub_with_overflow
2441  : llvm::Intrinsic::usub_with_overflow;
2442  break;
2443  case Builtin::BI__builtin_mul_overflow:
2444  IntrinsicId = EncompassingInfo.Signed
2445  ? llvm::Intrinsic::smul_with_overflow
2446  : llvm::Intrinsic::umul_with_overflow;
2447  break;
2448  }
2449 
2450  llvm::Value *Left = EmitScalarExpr(LeftArg);
2451  llvm::Value *Right = EmitScalarExpr(RightArg);
2452  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2453 
2454  // Extend each operand to the encompassing type.
2455  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2456  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2457 
2458  // Perform the operation on the extended values.
2459  llvm::Value *Overflow, *Result;
2460  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2461 
2462  if (EncompassingInfo.Width > ResultInfo.Width) {
2463  // The encompassing type is wider than the result type, so we need to
2464  // truncate it.
2465  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2466 
2467  // To see if the truncation caused an overflow, we will extend
2468  // the result and then compare it to the original result.
2469  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2470  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2471  llvm::Value *TruncationOverflow =
2472  Builder.CreateICmpNE(Result, ResultTruncExt);
2473 
2474  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2475  Result = ResultTrunc;
2476  }
2477 
2478  // Finally, store the result using the pointer.
2479  bool isVolatile =
2480  ResultArg->getType()->getPointeeType().isVolatileQualified();
2481  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2482 
2483  return RValue::get(Overflow);
2484  }
2485 
2486  case Builtin::BI__builtin_uadd_overflow:
2487  case Builtin::BI__builtin_uaddl_overflow:
2488  case Builtin::BI__builtin_uaddll_overflow:
2489  case Builtin::BI__builtin_usub_overflow:
2490  case Builtin::BI__builtin_usubl_overflow:
2491  case Builtin::BI__builtin_usubll_overflow:
2492  case Builtin::BI__builtin_umul_overflow:
2493  case Builtin::BI__builtin_umull_overflow:
2494  case Builtin::BI__builtin_umulll_overflow:
2495  case Builtin::BI__builtin_sadd_overflow:
2496  case Builtin::BI__builtin_saddl_overflow:
2497  case Builtin::BI__builtin_saddll_overflow:
2498  case Builtin::BI__builtin_ssub_overflow:
2499  case Builtin::BI__builtin_ssubl_overflow:
2500  case Builtin::BI__builtin_ssubll_overflow:
2501  case Builtin::BI__builtin_smul_overflow:
2502  case Builtin::BI__builtin_smull_overflow:
2503  case Builtin::BI__builtin_smulll_overflow: {
2504 
2505  // We translate all of these builtins directly to the relevant llvm IR node.
2506 
2507  // Scalarize our inputs.
2508  llvm::Value *X = EmitScalarExpr(E->getArg(0));
2509  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2510  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2511 
2512  // Decide which of the overflow intrinsics we are lowering to:
2513  llvm::Intrinsic::ID IntrinsicId;
2514  switch (BuiltinID) {
2515  default: llvm_unreachable("Unknown overflow builtin id.");
2516  case Builtin::BI__builtin_uadd_overflow:
2517  case Builtin::BI__builtin_uaddl_overflow:
2518  case Builtin::BI__builtin_uaddll_overflow:
2519  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2520  break;
2521  case Builtin::BI__builtin_usub_overflow:
2522  case Builtin::BI__builtin_usubl_overflow:
2523  case Builtin::BI__builtin_usubll_overflow:
2524  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2525  break;
2526  case Builtin::BI__builtin_umul_overflow:
2527  case Builtin::BI__builtin_umull_overflow:
2528  case Builtin::BI__builtin_umulll_overflow:
2529  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2530  break;
2531  case Builtin::BI__builtin_sadd_overflow:
2532  case Builtin::BI__builtin_saddl_overflow:
2533  case Builtin::BI__builtin_saddll_overflow:
2534  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2535  break;
2536  case Builtin::BI__builtin_ssub_overflow:
2537  case Builtin::BI__builtin_ssubl_overflow:
2538  case Builtin::BI__builtin_ssubll_overflow:
2539  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2540  break;
2541  case Builtin::BI__builtin_smul_overflow:
2542  case Builtin::BI__builtin_smull_overflow:
2543  case Builtin::BI__builtin_smulll_overflow:
2544  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2545  break;
2546  }
2547 
2548 
2549  llvm::Value *Carry;
2550  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2551  Builder.CreateStore(Sum, SumOutPtr);
2552 
2553  return RValue::get(Carry);
2554  }
2555  case Builtin::BI__builtin_addressof:
2556  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2557  case Builtin::BI__builtin_operator_new:
2558  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2559  E->getArg(0), false);
2560  case Builtin::BI__builtin_operator_delete:
2561  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2562  E->getArg(0), true);
2563  case Builtin::BI__noop:
2564  // __noop always evaluates to an integer literal zero.
2565  return RValue::get(ConstantInt::get(IntTy, 0));
2566  case Builtin::BI__builtin_call_with_static_chain: {
2567  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2568  const Expr *Chain = E->getArg(1);
2569  return EmitCall(Call->getCallee()->getType(),
2570  EmitCallee(Call->getCallee()), Call, ReturnValue,
2571  EmitScalarExpr(Chain));
2572  }
2573  case Builtin::BI_InterlockedExchange8:
2574  case Builtin::BI_InterlockedExchange16:
2575  case Builtin::BI_InterlockedExchange:
2576  case Builtin::BI_InterlockedExchangePointer:
2577  return RValue::get(
2578  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2579  case Builtin::BI_InterlockedCompareExchangePointer: {
2580  llvm::Type *RTy;
2581  llvm::IntegerType *IntType =
2582  IntegerType::get(getLLVMContext(),
2583  getContext().getTypeSize(E->getType()));
2584  llvm::Type *IntPtrType = IntType->getPointerTo();
2585 
2586  llvm::Value *Destination =
2587  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2588 
2589  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2590  RTy = Exchange->getType();
2591  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2592 
2593  llvm::Value *Comparand =
2594  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2595 
2596  auto Result =
2597  Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2598  AtomicOrdering::SequentiallyConsistent,
2599  AtomicOrdering::SequentiallyConsistent);
2600  Result->setVolatile(true);
2601 
2602  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2603  0),
2604  RTy));
2605  }
2606  case Builtin::BI_InterlockedCompareExchange8:
2607  case Builtin::BI_InterlockedCompareExchange16:
2608  case Builtin::BI_InterlockedCompareExchange:
2609  case Builtin::BI_InterlockedCompareExchange64: {
2610  AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2611  EmitScalarExpr(E->getArg(0)),
2612  EmitScalarExpr(E->getArg(2)),
2613  EmitScalarExpr(E->getArg(1)),
2614  AtomicOrdering::SequentiallyConsistent,
2615  AtomicOrdering::SequentiallyConsistent);
2616  CXI->setVolatile(true);
2617  return RValue::get(Builder.CreateExtractValue(CXI, 0));
2618  }
2619  case Builtin::BI_InterlockedIncrement16:
2620  case Builtin::BI_InterlockedIncrement:
2621  return RValue::get(
2622  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2623  case Builtin::BI_InterlockedDecrement16:
2624  case Builtin::BI_InterlockedDecrement:
2625  return RValue::get(
2626  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2627  case Builtin::BI_InterlockedAnd8:
2628  case Builtin::BI_InterlockedAnd16:
2629  case Builtin::BI_InterlockedAnd:
2630  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2631  case Builtin::BI_InterlockedExchangeAdd8:
2632  case Builtin::BI_InterlockedExchangeAdd16:
2633  case Builtin::BI_InterlockedExchangeAdd:
2634  return RValue::get(
2635  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2636  case Builtin::BI_InterlockedExchangeSub8:
2637  case Builtin::BI_InterlockedExchangeSub16:
2638  case Builtin::BI_InterlockedExchangeSub:
2639  return RValue::get(
2640  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2641  case Builtin::BI_InterlockedOr8:
2642  case Builtin::BI_InterlockedOr16:
2643  case Builtin::BI_InterlockedOr:
2644  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2645  case Builtin::BI_InterlockedXor8:
2646  case Builtin::BI_InterlockedXor16:
2647  case Builtin::BI_InterlockedXor:
2648  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2649  case Builtin::BI_interlockedbittestandset:
2650  return RValue::get(
2651  EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2652 
2653  case Builtin::BI__exception_code:
2654  case Builtin::BI_exception_code:
2655  return RValue::get(EmitSEHExceptionCode());
2656  case Builtin::BI__exception_info:
2657  case Builtin::BI_exception_info:
2658  return RValue::get(EmitSEHExceptionInfo());
2659  case Builtin::BI__abnormal_termination:
2660  case Builtin::BI_abnormal_termination:
2661  return RValue::get(EmitSEHAbnormalTermination());
2662  case Builtin::BI_setjmpex: {
2663  if (getTarget().getTriple().isOSMSVCRT()) {
2664  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2665  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2666  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2667  llvm::Attribute::ReturnsTwice);
2668  llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2669  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2670  "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2671  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2672  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2673  llvm::Value *FrameAddr =
2674  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2675  ConstantInt::get(Int32Ty, 0));
2676  llvm::Value *Args[] = {Buf, FrameAddr};
2677  llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2678  CS.setAttributes(ReturnsTwiceAttr);
2679  return RValue::get(CS.getInstruction());
2680  }
2681  break;
2682  }
2683  case Builtin::BI_setjmp: {
2684  if (getTarget().getTriple().isOSMSVCRT()) {
2685  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2686  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2687  llvm::Attribute::ReturnsTwice);
2688  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2689  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2690  llvm::CallSite CS;
2691  if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2692  llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2693  llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2694  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2695  "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2696  llvm::Value *Count = ConstantInt::get(IntTy, 0);
2697  llvm::Value *Args[] = {Buf, Count};
2698  CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2699  } else {
2700  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2701  llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2702  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2703  "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2704  llvm::Value *FrameAddr =
2705  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2706  ConstantInt::get(Int32Ty, 0));
2707  llvm::Value *Args[] = {Buf, FrameAddr};
2708  CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2709  }
2710  CS.setAttributes(ReturnsTwiceAttr);
2711  return RValue::get(CS.getInstruction());
2712  }
2713  break;
2714  }
2715 
2716  case Builtin::BI__GetExceptionInfo: {
2717  if (llvm::GlobalVariable *GV =
2718  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2719  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2720  break;
2721  }
2722 
2723  case Builtin::BI__fastfail:
2724  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2725 
2726  case Builtin::BI__builtin_coro_size: {
2727  auto & Context = getContext();
2728  auto SizeTy = Context.getSizeType();
2729  auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2730  Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2731  return RValue::get(Builder.CreateCall(F));
2732  }
2733 
2734  case Builtin::BI__builtin_coro_id:
2735  return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2736  case Builtin::BI__builtin_coro_promise:
2737  return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2738  case Builtin::BI__builtin_coro_resume:
2739  return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2740  case Builtin::BI__builtin_coro_frame:
2741  return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2742  case Builtin::BI__builtin_coro_free:
2743  return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2744  case Builtin::BI__builtin_coro_destroy:
2745  return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2746  case Builtin::BI__builtin_coro_done:
2747  return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2748  case Builtin::BI__builtin_coro_alloc:
2749  return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2750  case Builtin::BI__builtin_coro_begin:
2751  return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2752  case Builtin::BI__builtin_coro_end:
2753  return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2754  case Builtin::BI__builtin_coro_suspend:
2755  return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2756  case Builtin::BI__builtin_coro_param:
2757  return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2758 
2759  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2760  case Builtin::BIread_pipe:
2761  case Builtin::BIwrite_pipe: {
2762  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2763  *Arg1 = EmitScalarExpr(E->getArg(1));
2764  CGOpenCLRuntime OpenCLRT(CGM);
2765  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2766  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2767 
2768  // Type of the generic packet parameter.
2769  unsigned GenericAS =
2770  getContext().getTargetAddressSpace(LangAS::opencl_generic);
2771  llvm::Type *I8PTy = llvm::PointerType::get(
2772  llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2773 
2774  // Testing which overloaded version we should generate the call for.
2775  if (2U == E->getNumArgs()) {
2776  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2777  : "__write_pipe_2";
2778  // Creating a generic function type to be able to call with any builtin or
2779  // user defined type.
2780  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2781  llvm::FunctionType *FTy = llvm::FunctionType::get(
2782  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2783  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2784  return RValue::get(
2785  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2786  {Arg0, BCast, PacketSize, PacketAlign}));
2787  } else {
2788  assert(4 == E->getNumArgs() &&
2789  "Illegal number of parameters to pipe function");
2790  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2791  : "__write_pipe_4";
2792 
2793  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2794  Int32Ty, Int32Ty};
2795  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2796  *Arg3 = EmitScalarExpr(E->getArg(3));
2797  llvm::FunctionType *FTy = llvm::FunctionType::get(
2798  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2799  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2800  // We know the third argument is an integer type, but we may need to cast
2801  // it to i32.
2802  if (Arg2->getType() != Int32Ty)
2803  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2804  return RValue::get(Builder.CreateCall(
2805  CGM.CreateRuntimeFunction(FTy, Name),
2806  {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2807  }
2808  }
2809  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2810  // functions
2811  case Builtin::BIreserve_read_pipe:
2812  case Builtin::BIreserve_write_pipe:
2813  case Builtin::BIwork_group_reserve_read_pipe:
2814  case Builtin::BIwork_group_reserve_write_pipe:
2815  case Builtin::BIsub_group_reserve_read_pipe:
2816  case Builtin::BIsub_group_reserve_write_pipe: {
2817  // Composing the mangled name for the function.
2818  const char *Name;
2819  if (BuiltinID == Builtin::BIreserve_read_pipe)
2820  Name = "__reserve_read_pipe";
2821  else if (BuiltinID == Builtin::BIreserve_write_pipe)
2822  Name = "__reserve_write_pipe";
2823  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2824  Name = "__work_group_reserve_read_pipe";
2825  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2826  Name = "__work_group_reserve_write_pipe";
2827  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2828  Name = "__sub_group_reserve_read_pipe";
2829  else
2830  Name = "__sub_group_reserve_write_pipe";
2831 
2832  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2833  *Arg1 = EmitScalarExpr(E->getArg(1));
2834  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2835  CGOpenCLRuntime OpenCLRT(CGM);
2836  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2837  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2838 
2839  // Building the generic function prototype.
2840  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2841  llvm::FunctionType *FTy = llvm::FunctionType::get(
2842  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2843  // We know the second argument is an integer type, but we may need to cast
2844  // it to i32.
2845  if (Arg1->getType() != Int32Ty)
2846  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2847  return RValue::get(
2848  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2849  {Arg0, Arg1, PacketSize, PacketAlign}));
2850  }
2851  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2852  // functions
2853  case Builtin::BIcommit_read_pipe:
2854  case Builtin::BIcommit_write_pipe:
2855  case Builtin::BIwork_group_commit_read_pipe:
2856  case Builtin::BIwork_group_commit_write_pipe:
2857  case Builtin::BIsub_group_commit_read_pipe:
2858  case Builtin::BIsub_group_commit_write_pipe: {
2859  const char *Name;
2860  if (BuiltinID == Builtin::BIcommit_read_pipe)
2861  Name = "__commit_read_pipe";
2862  else if (BuiltinID == Builtin::BIcommit_write_pipe)
2863  Name = "__commit_write_pipe";
2864  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2865  Name = "__work_group_commit_read_pipe";
2866  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2867  Name = "__work_group_commit_write_pipe";
2868  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2869  Name = "__sub_group_commit_read_pipe";
2870  else
2871  Name = "__sub_group_commit_write_pipe";
2872 
2873  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2874  *Arg1 = EmitScalarExpr(E->getArg(1));
2875  CGOpenCLRuntime OpenCLRT(CGM);
2876  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2877  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2878 
2879  // Building the generic function prototype.
2880  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2881  llvm::FunctionType *FTy =
2882  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2883  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2884 
2885  return RValue::get(
2886  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2887  {Arg0, Arg1, PacketSize, PacketAlign}));
2888  }
2889  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2890  case Builtin::BIget_pipe_num_packets:
2891  case Builtin::BIget_pipe_max_packets: {
2892  const char *Name;
2893  if (BuiltinID == Builtin::BIget_pipe_num_packets)
2894  Name = "__get_pipe_num_packets";
2895  else
2896  Name = "__get_pipe_max_packets";
2897 
2898  // Building the generic function prototype.
2899  Value *Arg0 = EmitScalarExpr(E->getArg(0));
2900  CGOpenCLRuntime OpenCLRT(CGM);
2901  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2902  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2903  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2904  llvm::FunctionType *FTy = llvm::FunctionType::get(
2905  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2906 
2907  return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2908  {Arg0, PacketSize, PacketAlign}));
2909  }
2910 
2911  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2912  case Builtin::BIto_global:
2913  case Builtin::BIto_local:
2914  case Builtin::BIto_private: {
2915  auto Arg0 = EmitScalarExpr(E->getArg(0));
2916  auto NewArgT = llvm::PointerType::get(Int8Ty,
2917  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2918  auto NewRetT = llvm::PointerType::get(Int8Ty,
2919  CGM.getContext().getTargetAddressSpace(
2921  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2922  llvm::Value *NewArg;
2923  if (Arg0->getType()->getPointerAddressSpace() !=
2924  NewArgT->getPointerAddressSpace())
2925  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2926  else
2927  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2928  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2929  auto NewCall =
2930  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2931  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2932  ConvertType(E->getType())));
2933  }
2934 
2935  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2936  // It contains four different overload formats specified in Table 6.13.17.1.
2937  case Builtin::BIenqueue_kernel: {
2938  StringRef Name; // Generated function call name
2939  unsigned NumArgs = E->getNumArgs();
2940 
2941  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2942  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2943  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2944 
2945  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2946  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2947  LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2948  llvm::Value *Range = NDRangeL.getAddress().getPointer();
2949  llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2950 
2951  if (NumArgs == 4) {
2952  // The most basic form of the call with parameters:
2953  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2954  Name = "__enqueue_kernel_basic";
2955  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
2956  GenericVoidPtrTy};
2957  llvm::FunctionType *FTy = llvm::FunctionType::get(
2958  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2959 
2960  auto Info =
2961  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
2962  llvm::Value *Kernel =
2963  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2964  llvm::Value *Block =
2965  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2966 
2967  AttrBuilder B;
2968  B.addAttribute(Attribute::ByVal);
2969  llvm::AttributeList ByValAttrSet =
2970  llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2971 
2972  auto RTCall =
2973  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2974  {Queue, Flags, Range, Kernel, Block});
2975  RTCall->setAttributes(ByValAttrSet);
2976  return RValue::get(RTCall);
2977  }
2978  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2979 
2980  // Create a temporary array to hold the sizes of local pointer arguments
2981  // for the block. \p First is the position of the first size argument.
2982  auto CreateArrayForSizeVar = [=](unsigned First) {
2983  auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
2984  auto *Arr = Builder.CreateAlloca(AT);
2985  llvm::Value *Ptr;
2986  // Each of the following arguments specifies the size of the corresponding
2987  // argument passed to the enqueued block.
2988  auto *Zero = llvm::ConstantInt::get(IntTy, 0);
2989  for (unsigned I = First; I < NumArgs; ++I) {
2990  auto *Index = llvm::ConstantInt::get(IntTy, I - First);
2991  auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
2992  if (I == First)
2993  Ptr = GEP;
2994  auto *V =
2995  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
2996  Builder.CreateAlignedStore(
2997  V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
2998  }
2999  return Ptr;
3000  };
3001 
3002  // Could have events and/or vaargs.
3003  if (E->getArg(3)->getType()->isBlockPointerType()) {
3004  // No events passed, but has variadic arguments.
3005  Name = "__enqueue_kernel_vaargs";
3006  auto Info =
3007  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3008  llvm::Value *Kernel =
3009  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3010  auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3011  auto *PtrToSizeArray = CreateArrayForSizeVar(4);
3012 
3013  // Create a vector of the arguments, as well as a constant value to
3014  // express to the runtime the number of variadic arguments.
3015  std::vector<llvm::Value *> Args = {
3016  Queue, Flags, Range,
3017  Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
3018  PtrToSizeArray};
3019  std::vector<llvm::Type *> ArgTys = {
3020  QueueTy, IntTy, RangeTy,
3021  GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
3022  PtrToSizeArray->getType()};
3023 
3024  llvm::FunctionType *FTy = llvm::FunctionType::get(
3025  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3026  return RValue::get(
3027  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3029  }
3030  // Any calls now have event arguments passed.
3031  if (NumArgs >= 7) {
3032  llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
3033  llvm::Type *EventPtrTy = EventTy->getPointerTo(
3034  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3035 
3036  llvm::Value *NumEvents =
3037  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
3038  llvm::Value *EventList =
3039  E->getArg(4)->getType()->isArrayType()
3040  ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
3041  : EmitScalarExpr(E->getArg(4));
3042  llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
3043  // Convert to generic address space.
3044  EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
3045  ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
3046  auto Info =
3047  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
3048  llvm::Value *Kernel =
3049  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3050  llvm::Value *Block =
3051  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3052 
3053  std::vector<llvm::Type *> ArgTys = {
3054  QueueTy, Int32Ty, RangeTy, Int32Ty,
3055  EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
3056 
3057  std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
3058  EventList, ClkEvent, Kernel, Block};
3059 
3060  if (NumArgs == 7) {
3061  // Has events but no variadics.
3062  Name = "__enqueue_kernel_basic_events";
3063  llvm::FunctionType *FTy = llvm::FunctionType::get(
3064  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3065  return RValue::get(
3066  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3068  }
3069  // Has event info and variadics
3070  // Pass the number of variadics to the runtime function too.
3071  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
3072  ArgTys.push_back(Int32Ty);
3073  Name = "__enqueue_kernel_events_vaargs";
3074 
3075  auto *PtrToSizeArray = CreateArrayForSizeVar(7);
3076  Args.push_back(PtrToSizeArray);
3077  ArgTys.push_back(PtrToSizeArray->getType());
3078 
3079  llvm::FunctionType *FTy = llvm::FunctionType::get(
3080  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3081  return RValue::get(
3082  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3084  }
3085  LLVM_FALLTHROUGH;
3086  }
3087  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
3088  // parameter.
3089  case Builtin::BIget_kernel_work_group_size: {
3090  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3091  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3092  auto Info =
3093  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3094  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3095  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3096  return RValue::get(Builder.CreateCall(
3097  CGM.CreateRuntimeFunction(
3098  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3099  false),
3100  "__get_kernel_work_group_size_impl"),
3101  {Kernel, Arg}));
3102  }
3103  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
3104  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3105  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3106  auto Info =
3107  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3108  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3109  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3110  return RValue::get(Builder.CreateCall(
3111  CGM.CreateRuntimeFunction(
3112  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3113  false),
3114  "__get_kernel_preferred_work_group_multiple_impl"),
3115  {Kernel, Arg}));
3116  }
3117  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
3118  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
3119  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3120  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3121  LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
3122  llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
3123  auto Info =
3124  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
3125  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3126  Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3127  const char *Name =
3128  BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
3129  ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
3130  : "__get_kernel_sub_group_count_for_ndrange_impl";
3131  return RValue::get(Builder.CreateCall(
3132  CGM.CreateRuntimeFunction(
3133  llvm::FunctionType::get(
3134  IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
3135  false),
3136  Name),
3137  {NDRange, Kernel, Block}));
3138  }
3139 
3140  case Builtin::BI__builtin_store_half:
3141  case Builtin::BI__builtin_store_halff: {
3142  Value *Val = EmitScalarExpr(E->getArg(0));
3143  Address Address = EmitPointerWithAlignment(E->getArg(1));
3144  Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
3145  return RValue::get(Builder.CreateStore(HalfVal, Address));
3146  }
3147  case Builtin::BI__builtin_load_half: {
3148  Address Address = EmitPointerWithAlignment(E->getArg(0));
3149  Value *HalfVal = Builder.CreateLoad(Address);
3150  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
3151  }
3152  case Builtin::BI__builtin_load_halff: {
3153  Address Address = EmitPointerWithAlignment(E->getArg(0));
3154  Value *HalfVal = Builder.CreateLoad(Address);
3155  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
3156  }
3157  case Builtin::BIprintf:
3158  if (getTarget().getTriple().isNVPTX())
3159  return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
3160  break;
3161  case Builtin::BI__builtin_canonicalize:
3162  case Builtin::BI__builtin_canonicalizef:
3163  case Builtin::BI__builtin_canonicalizel:
3164  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
3165 
3166  case Builtin::BI__builtin_thread_pointer: {
3167  if (!getContext().getTargetInfo().isTLSSupported())
3168  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
3169  // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
3170  break;
3171  }
3172  case Builtin::BI__builtin_os_log_format:
3173  return emitBuiltinOSLogFormat(*E);
3174 
3175  case Builtin::BI__builtin_os_log_format_buffer_size: {
3177  analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
3178  return RValue::get(ConstantInt::get(ConvertType(E->getType()),
3179  Layout.size().getQuantity()));
3180  }
3181 
3182  case Builtin::BI__xray_customevent: {
3183  if (!ShouldXRayInstrumentFunction())
3184  return RValue::getIgnored();
3185  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3186  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
3187  return RValue::getIgnored();
3188 
3189  Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
3190  auto FTy = F->getFunctionType();
3191  auto Arg0 = E->getArg(0);
3192  auto Arg0Val = EmitScalarExpr(Arg0);
3193  auto Arg0Ty = Arg0->getType();
3194  auto PTy0 = FTy->getParamType(0);
3195  if (PTy0 != Arg0Val->getType()) {
3196  if (Arg0Ty->isArrayType())
3197  Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3198  else
3199  Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3200  }
3201  auto Arg1 = EmitScalarExpr(E->getArg(1));
3202  auto PTy1 = FTy->getParamType(1);
3203  if (PTy1 != Arg1->getType())
3204  Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3205  return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3206  }
3207 
3208  case Builtin::BI__builtin_ms_va_start:
3209  case Builtin::BI__builtin_ms_va_end:
3210  return RValue::get(
3211  EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3212  BuiltinID == Builtin::BI__builtin_ms_va_start));
3213 
3214  case Builtin::BI__builtin_ms_va_copy: {
3215  // Lower this manually. We can't reliably determine whether or not any
3216  // given va_copy() is for a Win64 va_list from the calling convention
3217  // alone, because it's legal to do this from a System V ABI function.
3218  // With opaque pointer types, we won't have enough information in LLVM
3219  // IR to determine this from the argument types, either. Best to do it
3220  // now, while we have enough information.
3221  Address DestAddr = EmitMSVAListRef(E->getArg(0));
3222  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
3223 
3224  llvm::Type *BPP = Int8PtrPtrTy;
3225 
3226  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
3227  DestAddr.getAlignment());
3228  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
3229  SrcAddr.getAlignment());
3230 
3231  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
3232  return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
3233  }
3234  }
3235 
3236  // If this is an alias for a lib function (e.g. __builtin_sin), emit
3237  // the call using the normal call path, but using the unmangled
3238  // version of the function name.
3239  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
3240  return emitLibraryCall(*this, FD, E,
3241  CGM.getBuiltinLibFunction(FD, BuiltinID));
3242 
3243  // If this is a predefined lib function (e.g. malloc), emit the call
3244  // using exactly the normal call path.
3245  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
3246  return emitLibraryCall(*this, FD, E,
3247  cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
3248 
3249  // Check that a call to a target specific builtin has the correct target
3250  // features.
3251  // This is down here to avoid non-target specific builtins, however, if
3252  // generic builtins start to require generic target features then we
3253  // can move this up to the beginning of the function.
3254  checkTargetFeatures(E, FD);
3255 
3256  // See if we have a target specific intrinsic.
3257  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
3258  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
3259  StringRef Prefix =
3260  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
3261  if (!Prefix.empty()) {
3262  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
3263  // NOTE we dont need to perform a compatibility flag check here since the
3264  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
3265  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
3266  if (IntrinsicID == Intrinsic::not_intrinsic)
3267  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
3268  }
3269 
3270  if (IntrinsicID != Intrinsic::not_intrinsic) {
3272 
3273  // Find out if any arguments are required to be integer constant
3274  // expressions.
3275  unsigned ICEArguments = 0;
3277  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3278  assert(Error == ASTContext::GE_None && "Should not codegen an error");
3279 
3280  Function *F = CGM.getIntrinsic(IntrinsicID);
3281  llvm::FunctionType *FTy = F->getFunctionType();
3282 
3283  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
3284  Value *ArgValue;
3285  // If this is a normal argument, just emit it as a scalar.
3286  if ((ICEArguments & (1 << i)) == 0) {
3287  ArgValue = EmitScalarExpr(E->getArg(i));
3288  } else {
3289  // If this is required to be a constant, constant fold it so that we
3290  // know that the generated intrinsic gets a ConstantInt.
3291  llvm::APSInt Result;
3292  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
3293  assert(IsConst && "Constant arg isn't actually constant?");
3294  (void)IsConst;
3295  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
3296  }
3297 
3298  // If the intrinsic arg type is different from the builtin arg type
3299  // we need to do a bit cast.
3300  llvm::Type *PTy = FTy->getParamType(i);
3301  if (PTy != ArgValue->getType()) {
3302  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
3303  "Must be able to losslessly bit cast to param");
3304  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
3305  }
3306 
3307  Args.push_back(ArgValue);
3308  }
3309 
3310  Value *V = Builder.CreateCall(F, Args);
3311  QualType BuiltinRetType = E->getType();
3312 
3313  llvm::Type *RetTy = VoidTy;
3314  if (!BuiltinRetType->isVoidType())
3315  RetTy = ConvertType(BuiltinRetType);
3316 
3317  if (RetTy != V->getType()) {
3318  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
3319  "Must be able to losslessly bit cast result type");
3320  V = Builder.CreateBitCast(V, RetTy);
3321  }
3322 
3323  return RValue::get(V);
3324  }
3325 
3326  // See if we have a target specific builtin that needs to be lowered.
3327  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
3328  return RValue::get(V);
3329 
3330  ErrorUnsupported(E, "builtin function");
3331 
3332  // Unknown builtin, for now just dump it out and return undef.
3333  return GetUndefRValue(E->getType());
3334 }
3335 
3337  unsigned BuiltinID, const CallExpr *E,
3338  llvm::Triple::ArchType Arch) {
3339  switch (Arch) {
3340  case llvm::Triple::arm:
3341  case llvm::Triple::armeb:
3342  case llvm::Triple::thumb:
3343  case llvm::Triple::thumbeb:
3344  return CGF->EmitARMBuiltinExpr(BuiltinID, E);
3345  case llvm::Triple::aarch64:
3346  case llvm::Triple::aarch64_be:
3347  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
3348  case llvm::Triple::x86:
3349  case llvm::Triple::x86_64:
3350  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
3351  case llvm::Triple::ppc:
3352  case llvm::Triple::ppc64:
3353  case llvm::Triple::ppc64le:
3354  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
3355  case llvm::Triple::r600:
3356  case llvm::Triple::amdgcn:
3357  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
3358  case llvm::Triple::systemz:
3359  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
3360  case llvm::Triple::nvptx:
3361  case llvm::Triple::nvptx64:
3362  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
3363  case llvm::Triple::wasm32:
3364  case llvm::Triple::wasm64:
3365  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
3366  case llvm::Triple::hexagon:
3367  return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
3368  default:
3369  return nullptr;
3370  }
3371 }
3372 
3374  const CallExpr *E) {
3375  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
3376  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3378  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3379  getContext().getAuxTargetInfo()->getTriple().getArch());
3380  }
3381 
3382  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3383  getTarget().getTriple().getArch());
3384 }
3385 
3386 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3387  NeonTypeFlags TypeFlags,
3388  bool V1Ty=false) {
3389  int IsQuad = TypeFlags.isQuad();
3390  switch (TypeFlags.getEltType()) {
3391  case NeonTypeFlags::Int8:
3392  case NeonTypeFlags::Poly8:
3393  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
3394  case NeonTypeFlags::Int16:
3395  case NeonTypeFlags::Poly16:
3397  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3398  case NeonTypeFlags::Int32:
3399  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
3400  case NeonTypeFlags::Int64:
3401  case NeonTypeFlags::Poly64:
3402  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
3404  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3405  // There is a lot of i128 and f128 API missing.
3406  // so we use v16i8 to represent poly128 and get pattern matched.
3407  return llvm::VectorType::get(CGF->Int8Ty, 16);
3409  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3411  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3412  }
3413  llvm_unreachable("Unknown vector element type!");
3414 }
3415 
3416 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3417  NeonTypeFlags IntTypeFlags) {
3418  int IsQuad = IntTypeFlags.isQuad();
3419  switch (IntTypeFlags.getEltType()) {
3420  case NeonTypeFlags::Int32:
3421  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3422  case NeonTypeFlags::Int64:
3423  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3424  default:
3425  llvm_unreachable("Type can't be converted to floating-point!");
3426  }
3427 }
3428 
3430  unsigned nElts = V->getType()->getVectorNumElements();
3431  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3432  return Builder.CreateShuffleVector(V, V, SV, "lane");
3433 }
3434 
3436  const char *name,
3437  unsigned shift, bool rightshift) {
3438  unsigned j = 0;
3439  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3440  ai != ae; ++ai, ++j)
3441  if (shift > 0 && shift == j)
3442  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3443  else
3444  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3445 
3446  return Builder.CreateCall(F, Ops, name);
3447 }
3448 
3450  bool neg) {
3451  int SV = cast<ConstantInt>(V)->getSExtValue();
3452  return ConstantInt::get(Ty, neg ? -SV : SV);
3453 }
3454 
3455 // \brief Right-shift a vector by a constant.
3457  llvm::Type *Ty, bool usgn,
3458  const char *name) {
3459  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3460 
3461  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3462  int EltSize = VTy->getScalarSizeInBits();
3463 
3464  Vec = Builder.CreateBitCast(Vec, Ty);
3465 
3466  // lshr/ashr are undefined when the shift amount is equal to the vector
3467  // element size.
3468  if (ShiftAmt == EltSize) {
3469  if (usgn) {
3470  // Right-shifting an unsigned value by its size yields 0.
3471  return llvm::ConstantAggregateZero::get(VTy);
3472  } else {
3473  // Right-shifting a signed value by its size is equivalent
3474  // to a shift of size-1.
3475  --ShiftAmt;
3476  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3477  }
3478  }
3479 
3480  Shift = EmitNeonShiftVector(Shift, Ty, false);
3481  if (usgn)
3482  return Builder.CreateLShr(Vec, Shift, name);
3483  else
3484  return Builder.CreateAShr(Vec, Shift, name);
3485 }
3486 
3487 enum {
3488  AddRetType = (1 << 0),
3489  Add1ArgType = (1 << 1),
3490  Add2ArgTypes = (1 << 2),
3491 
3492  VectorizeRetType = (1 << 3),
3493  VectorizeArgTypes = (1 << 4),
3494 
3495  InventFloatType = (1 << 5),
3496  UnsignedAlts = (1 << 6),
3497 
3498  Use64BitVectors = (1 << 7),
3499  Use128BitVectors = (1 << 8),
3500 
3507 };
3508 
3509 namespace {
3510 struct NeonIntrinsicInfo {
3511  const char *NameHint;
3512  unsigned BuiltinID;
3513  unsigned LLVMIntrinsic;
3514  unsigned AltLLVMIntrinsic;
3515  unsigned TypeModifier;
3516 
3517  bool operator<(unsigned RHSBuiltinID) const {
3518  return BuiltinID < RHSBuiltinID;
3519  }
3520  bool operator<(const NeonIntrinsicInfo &TE) const {
3521  return BuiltinID < TE.BuiltinID;
3522  }
3523 };
3524 } // end anonymous namespace
3525 
3526 #define NEONMAP0(NameBase) \
3527  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3528 
3529 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3530  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3531  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3532 
3533 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3534  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3535  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3536  TypeModifier }
3537 
3538 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3539  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3540  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3541  NEONMAP1(vabs_v, arm_neon_vabs, 0),
3542  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3543  NEONMAP0(vaddhn_v),
3544  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3545  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3546  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3547  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3548  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3549  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3550  NEONMAP1(vcage_v, arm_neon_vacge, 0),
3551  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3552  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3553  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3554  NEONMAP1(vcale_v, arm_neon_vacge, 0),
3555  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3556  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3557  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3558  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3559  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3560  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3561  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3562  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3563  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3564  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3565  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3566  NEONMAP0(vcvt_f32_v),
3567  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3568  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3569  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3570  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3571  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3572  NEONMAP0(vcvt_s32_v),
3573  NEONMAP0(vcvt_s64_v),
3574  NEONMAP0(vcvt_u32_v),
3575  NEONMAP0(vcvt_u64_v),
3576  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3577  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3578  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3579  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3580  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3581  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3582  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3583  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3584  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3585  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3586  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3587  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3588  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3589  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3590  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3591  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3592  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3593  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3594  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3595  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3596  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3597  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3598  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3599  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3600  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3601  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3602  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3603  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3604  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3605  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3606  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3607  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3608  NEONMAP0(vcvtq_f32_v),
3609  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3610  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3611  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3612  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3613  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3614  NEONMAP0(vcvtq_s32_v),
3615  NEONMAP0(vcvtq_s64_v),
3616  NEONMAP0(vcvtq_u32_v),
3617  NEONMAP0(vcvtq_u64_v),
3618  NEONMAP0(vext_v),
3619  NEONMAP0(vextq_v),
3620  NEONMAP0(vfma_v),
3621  NEONMAP0(vfmaq_v),
3622  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3623  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3624  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3625  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3626  NEONMAP0(vld1_dup_v),
3627  NEONMAP1(vld1_v, arm_neon_vld1, 0),
3628  NEONMAP0(vld1q_dup_v),
3629  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3630  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3631  NEONMAP1(vld2_v, arm_neon_vld2, 0),
3632  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3633  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3634  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3635  NEONMAP1(vld3_v, arm_neon_vld3, 0),
3636  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3637  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3638  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3639  NEONMAP1(vld4_v, arm_neon_vld4, 0),
3640  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3641  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3642  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3643  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3644  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3645  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3646  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3647  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3648  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3649  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3650  NEONMAP0(vmovl_v),
3651  NEONMAP0(vmovn_v),
3652  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3653  NEONMAP0(vmull_v),
3654  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3655  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3656  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3657  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3658  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3659  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3660  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3661  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3662  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3663  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3664  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3665  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3666  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3667  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3668  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3669  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3670  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3671  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3672  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3673  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3674  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3675  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3676  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3677  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3678  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3679  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3680  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3681  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3682  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3683  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3684  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3685  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3686  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3687  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3688  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3689  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3690  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3691  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3692  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3693  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3694  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3695  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3696  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3697  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3698  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3699  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3700  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3701  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3702  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3703  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3704  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3705  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3706  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3707  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3708  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3709  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3710  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3711  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3712  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3713  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3714  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3715  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3716  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3717  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3718  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3719  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3720  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3721  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3722  NEONMAP0(vshl_n_v),
3723  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3724  NEONMAP0(vshll_n_v),
3725  NEONMAP0(vshlq_n_v),
3726  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3727  NEONMAP0(vshr_n_v),
3728  NEONMAP0(vshrn_n_v),
3729  NEONMAP0(vshrq_n_v),
3730  NEONMAP1(vst1_v, arm_neon_vst1, 0),
3731  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3732  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3733  NEONMAP1(vst2_v, arm_neon_vst2, 0),
3734  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3735  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3736  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3737  NEONMAP1(vst3_v, arm_neon_vst3, 0),
3738  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3739  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3740  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3741  NEONMAP1(vst4_v, arm_neon_vst4, 0),
3742  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3743  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3744  NEONMAP0(vsubhn_v),
3745  NEONMAP0(vtrn_v),
3746  NEONMAP0(vtrnq_v),
3747  NEONMAP0(vtst_v),
3748  NEONMAP0(vtstq_v),
3749  NEONMAP0(vuzp_v),
3750  NEONMAP0(vuzpq_v),
3751  NEONMAP0(vzip_v),
3752  NEONMAP0(vzipq_v)
3753 };
3754 
3755 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3756  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3757  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3758  NEONMAP0(vaddhn_v),
3759  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3760  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3761  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3762  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3763  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3764  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3765  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3766  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3767  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3768  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3769  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3770  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3771  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3772  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3773  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3774  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3775  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3776  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3777  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3778  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3779  NEONMAP0(vcvt_f32_v),
3780  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3781  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3782  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3783  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3784  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3785  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3786  NEONMAP0(vcvtq_f32_v),
3787  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3788  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3789  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3790  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3791  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3792  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3793  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3794  NEONMAP0(vext_v),
3795  NEONMAP0(vextq_v),
3796  NEONMAP0(vfma_v),
3797  NEONMAP0(vfmaq_v),
3798  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3799  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3800  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3801  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3802  NEONMAP0(vmovl_v),
3803  NEONMAP0(vmovn_v),
3804  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3805  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3806  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3807  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3808  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3809  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3810  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3811  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3812  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3813  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3814  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3815  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3816  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3817  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3818  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3819  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3820  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3821  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3822  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3823  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3824  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3825  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3826  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3827  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3828  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3829  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3830  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3831  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3832  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3833  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3834  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3835  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3836  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3837  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3838  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3839  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3840  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3841  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3842  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3843  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3844  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3845  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3846  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3847  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3848  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3849  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3850  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3851  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3852  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3853  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3854  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3855  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3856  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3857  NEONMAP0(vshl_n_v),
3858  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3859  NEONMAP0(vshll_n_v),
3860  NEONMAP0(vshlq_n_v),
3861  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3862  NEONMAP0(vshr_n_v),
3863  NEONMAP0(vshrn_n_v),
3864  NEONMAP0(vshrq_n_v),
3865  NEONMAP0(vsubhn_v),
3866  NEONMAP0(vtst_v),
3867  NEONMAP0(vtstq_v),
3868 };
3869 
3870 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3871  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3872  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3873  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3874  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3875  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3876  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3877  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3878  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3879  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3880  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3881  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3882  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3883  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3884  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3885  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3886  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3887  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3888  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3889  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3890  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3891  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3892  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3893  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3894  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3895  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3896  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3897  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3898  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3899  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3900  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3901  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3902  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3903  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3904  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3905  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3906  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3907  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3908  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3909  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3910  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3911  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3912  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3913  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3914  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3915  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3916  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3917  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3918  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3919  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3920  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3921  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3922  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3923  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3924  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3925  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3926  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3927  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3928  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3929  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3930  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3931  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3932  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3933  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3934  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3935  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3936  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3937  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3938  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3939  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3940  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3941  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3942  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3943  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3944  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3945  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3946  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3947  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3948  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3949  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3950  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3951  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3952  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3953  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3954  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3955  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3956  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3957  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3958  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3959  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3960  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3961  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3962  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3963  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3964  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3965  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3966  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3967  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3968  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3969  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3970  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3971  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3972  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3973  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3974  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3975  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3976  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3977  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3978  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3979  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3980  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3981  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3982  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3983  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3984  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3985  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3986  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3987  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3988  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3989  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3990  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3991  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3992  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3993  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3994  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3995  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3996  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3997  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3998  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3999  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4000  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4001  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4002  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4003  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4004  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4005  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
4006  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
4007  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4008  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4009  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4010  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4011  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
4012  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
4013  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
4014  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
4015  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4016  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4017  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
4018  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
4019  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
4020  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4021  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4022  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4023  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4024  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
4025  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4026  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4027  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4028  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4029  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
4030  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
4031  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4032  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4033  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
4034  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
4035  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
4036  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
4037  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
4038  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
4039  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
4040  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
4041  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
4042  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
4043  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
4044  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
4045  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
4046  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
4047  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
4048  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
4049  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
4050  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
4051  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
4052  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
4053  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4054  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
4055  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4056  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
4057  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
4058  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
4059  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4060  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
4061  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4062  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
4063 };
4064 
4065 #undef NEONMAP0
4066 #undef NEONMAP1
4067 #undef NEONMAP2
4068 
4070 
4073 
4074 
4075 static const NeonIntrinsicInfo *
4077  unsigned BuiltinID, bool &MapProvenSorted) {
4078 
4079 #ifndef NDEBUG
4080  if (!MapProvenSorted) {
4081  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
4082  MapProvenSorted = true;
4083  }
4084 #endif
4085 
4086  const NeonIntrinsicInfo *Builtin =
4087  std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
4088 
4089  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
4090  return Builtin;
4091 
4092  return nullptr;
4093 }
4094 
4095 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
4096  unsigned Modifier,
4097  llvm::Type *ArgType,
4098  const CallExpr *E) {
4099  int VectorSize = 0;
4100  if (Modifier & Use64BitVectors)
4101  VectorSize = 64;
4102  else if (Modifier & Use128BitVectors)
4103  VectorSize = 128;
4104 
4105  // Return type.
4107  if (Modifier & AddRetType) {
4108  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
4109  if (Modifier & VectorizeRetType)
4110  Ty = llvm::VectorType::get(
4111  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
4112 
4113  Tys.push_back(Ty);
4114  }
4115 
4116  // Arguments.
4117  if (Modifier & VectorizeArgTypes) {
4118  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
4119  ArgType = llvm::VectorType::get(ArgType, Elts);
4120  }
4121 
4122  if (Modifier & (Add1ArgType | Add2ArgTypes))
4123  Tys.push_back(ArgType);
4124 
4125  if (Modifier & Add2ArgTypes)
4126  Tys.push_back(ArgType);
4127 
4128  if (Modifier & InventFloatType)
4129  Tys.push_back(FloatTy);
4130 
4131  return CGM.getIntrinsic(IntrinsicID, Tys);
4132 }
4133 
4135  const NeonIntrinsicInfo &SISDInfo,
4137  const CallExpr *E) {
4138  unsigned BuiltinID = SISDInfo.BuiltinID;
4139  unsigned int Int = SISDInfo.LLVMIntrinsic;
4140  unsigned Modifier = SISDInfo.TypeModifier;
4141  const char *s = SISDInfo.NameHint;
4142 
4143  switch (BuiltinID) {
4144  case NEON::BI__builtin_neon_vcled_s64:
4145  case NEON::BI__builtin_neon_vcled_u64:
4146  case NEON::BI__builtin_neon_vcles_f32:
4147  case NEON::BI__builtin_neon_vcled_f64:
4148  case NEON::BI__builtin_neon_vcltd_s64:
4149  case NEON::BI__builtin_neon_vcltd_u64:
4150  case NEON::BI__builtin_neon_vclts_f32:
4151  case NEON::BI__builtin_neon_vcltd_f64:
4152  case NEON::BI__builtin_neon_vcales_f32:
4153  case NEON::BI__builtin_neon_vcaled_f64:
4154  case NEON::BI__builtin_neon_vcalts_f32:
4155  case NEON::BI__builtin_neon_vcaltd_f64:
4156  // Only one direction of comparisons actually exist, cmle is actually a cmge
4157  // with swapped operands. The table gives us the right intrinsic but we
4158  // still need to do the swap.
4159  std::swap(Ops[0], Ops[1]);
4160  break;
4161  }
4162 
4163  assert(Int && "Generic code assumes a valid intrinsic");
4164 
4165  // Determine the type(s) of this overloaded AArch64 intrinsic.
4166  const Expr *Arg = E->getArg(0);
4167  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
4168  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
4169 
4170  int j = 0;
4171  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
4172  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
4173  ai != ae; ++ai, ++j) {
4174  llvm::Type *ArgTy = ai->getType();
4175  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
4176  ArgTy->getPrimitiveSizeInBits())
4177  continue;
4178 
4179  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
4180  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
4181  // it before inserting.
4182  Ops[j] =
4183  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
4184  Ops[j] =
4185  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
4186  }
4187 
4188  Value *Result = CGF.EmitNeonCall(F, Ops, s);
4189  llvm::Type *ResultType = CGF.ConvertType(E->getType());
4190  if (ResultType->getPrimitiveSizeInBits() <
4191  Result->getType()->getPrimitiveSizeInBits())
4192  return CGF.Builder.CreateExtractElement(Result, C0);
4193 
4194  return CGF.Builder.CreateBitCast(Result, ResultType, s);
4195 }
4196 
4198  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
4199  const char *NameHint, unsigned Modifier, const CallExpr *E,
4200  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
4201  // Get the last argument, which specifies the vector type.
4202  llvm::APSInt NeonTypeConst;
4203  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4204  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
4205  return nullptr;
4206 
4207  // Determine the type of this overloaded NEON intrinsic.
4208  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
4209  bool Usgn = Type.isUnsigned();
4210  bool Quad = Type.isQuad();
4211 
4212  llvm::VectorType *VTy = GetNeonType(this, Type);
4213  llvm::Type *Ty = VTy;
4214  if (!Ty)
4215  return nullptr;
4216 
4217  auto getAlignmentValue32 = [&](Address addr) -> Value* {
4218  return Builder.getInt32(addr.getAlignment().getQuantity());
4219  };
4220 
4221  unsigned Int = LLVMIntrinsic;
4222  if ((Modifier & UnsignedAlts) && !Usgn)
4223  Int = AltLLVMIntrinsic;
4224 
4225  switch (BuiltinID) {
4226  default: break;
4227  case NEON::BI__builtin_neon_vabs_v:
4228  case NEON::BI__builtin_neon_vabsq_v:
4229  if (VTy->getElementType()->isFloatingPointTy())
4230  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
4231  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
4232  case NEON::BI__builtin_neon_vaddhn_v: {
4233  llvm::VectorType *SrcTy =
4234  llvm::VectorType::getExtendedElementVectorType(VTy);
4235 
4236  // %sum = add <4 x i32> %lhs, %rhs
4237  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4238  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4239  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
4240 
4241  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4242  Constant *ShiftAmt =
4243  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4244  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
4245 
4246  // %res = trunc <4 x i32> %high to <4 x i16>
4247  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
4248  }
4249  case NEON::BI__builtin_neon_vcale_v:
4250  case NEON::BI__builtin_neon_vcaleq_v:
4251  case NEON::BI__builtin_neon_vcalt_v:
4252  case NEON::BI__builtin_neon_vcaltq_v:
4253  std::swap(Ops[0], Ops[1]);
4254  LLVM_FALLTHROUGH;
4255  case NEON::BI__builtin_neon_vcage_v:
4256  case NEON::BI__builtin_neon_vcageq_v:
4257  case NEON::BI__builtin_neon_vcagt_v:
4258  case NEON::BI__builtin_neon_vcagtq_v: {
4259  llvm::Type *VecFlt = llvm::VectorType::get(
4260  VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
4261  VTy->getNumElements());
4262  llvm::Type *Tys[] = { VTy, VecFlt };
4263  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4264  return EmitNeonCall(F, Ops, NameHint);
4265  }
4266  case NEON::BI__builtin_neon_vclz_v:
4267  case NEON::BI__builtin_neon_vclzq_v:
4268  // We generate target-independent intrinsic, which needs a second argument
4269  // for whether or not clz of zero is undefined; on ARM it isn't.
4270  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
4271  break;
4272  case NEON::BI__builtin_neon_vcvt_f32_v:
4273  case NEON::BI__builtin_neon_vcvtq_f32_v:
4274  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4275  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
4276  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4277  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4278  case NEON::BI__builtin_neon_vcvt_n_f32_v:
4279  case NEON::BI__builtin_neon_vcvt_n_f64_v:
4280  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
4281  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
4282  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
4283  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
4284  Function *F = CGM.getIntrinsic(Int, Tys);
4285  return EmitNeonCall(F, Ops, "vcvt_n");
4286  }
4287  case NEON::BI__builtin_neon_vcvt_n_s32_v:
4288  case NEON::BI__builtin_neon_vcvt_n_u32_v:
4289  case NEON::BI__builtin_neon_vcvt_n_s64_v:
4290  case NEON::BI__builtin_neon_vcvt_n_u64_v:
4291  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
4292  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
4293  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
4294  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
4295  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4296  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4297  return EmitNeonCall(F, Ops, "vcvt_n");
4298  }
4299  case NEON::BI__builtin_neon_vcvt_s32_v:
4300  case NEON::BI__builtin_neon_vcvt_u32_v:
4301  case NEON::BI__builtin_neon_vcvt_s64_v:
4302  case NEON::BI__builtin_neon_vcvt_u64_v:
4303  case NEON::BI__builtin_neon_vcvtq_s32_v:
4304  case NEON::BI__builtin_neon_vcvtq_u32_v:
4305  case NEON::BI__builtin_neon_vcvtq_s64_v:
4306  case NEON::BI__builtin_neon_vcvtq_u64_v: {
4307  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
4308  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
4309  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
4310  }
4311  case NEON::BI__builtin_neon_vcvta_s32_v:
4312  case NEON::BI__builtin_neon_vcvta_s64_v:
4313  case NEON::BI__builtin_neon_vcvta_u32_v:
4314  case NEON::BI__builtin_neon_vcvta_u64_v:
4315  case NEON::BI__builtin_neon_vcvtaq_s32_v:
4316  case NEON::BI__builtin_neon_vcvtaq_s64_v:
4317  case NEON::BI__builtin_neon_vcvtaq_u32_v:
4318  case NEON::BI__builtin_neon_vcvtaq_u64_v:
4319  case NEON::BI__builtin_neon_vcvtn_s32_v:
4320  case NEON::BI__builtin_neon_vcvtn_s64_v:
4321  case NEON::BI__builtin_neon_vcvtn_u32_v:
4322  case NEON::BI__builtin_neon_vcvtn_u64_v:
4323  case NEON::BI__builtin_neon_vcvtnq_s32_v:
4324  case NEON::BI__builtin_neon_vcvtnq_s64_v:
4325  case NEON::BI__builtin_neon_vcvtnq_u32_v:
4326  case NEON::BI__builtin_neon_vcvtnq_u64_v:
4327  case NEON::BI__builtin_neon_vcvtp_s32_v:
4328  case NEON::BI__builtin_neon_vcvtp_s64_v:
4329  case NEON::BI__builtin_neon_vcvtp_u32_v:
4330  case NEON::BI__builtin_neon_vcvtp_u64_v:
4331  case NEON::BI__builtin_neon_vcvtpq_s32_v:
4332  case NEON::BI__builtin_neon_vcvtpq_s64_v:
4333  case NEON::BI__builtin_neon_vcvtpq_u32_v:
4334  case NEON::BI__builtin_neon_vcvtpq_u64_v:
4335  case NEON::BI__builtin_neon_vcvtm_s32_v:
4336  case NEON::BI__builtin_neon_vcvtm_s64_v:
4337  case NEON::BI__builtin_neon_vcvtm_u32_v:
4338  case NEON::BI__builtin_neon_vcvtm_u64_v:
4339  case NEON::BI__builtin_neon_vcvtmq_s32_v:
4340  case NEON::BI__builtin_neon_vcvtmq_s64_v:
4341  case NEON::BI__builtin_neon_vcvtmq_u32_v:
4342  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
4343  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4344  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
4345  }
4346  case NEON::BI__builtin_neon_vext_v:
4347  case NEON::BI__builtin_neon_vextq_v: {
4348  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
4349  SmallVector<uint32_t, 16> Indices;
4350  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4351  Indices.push_back(i+CV);
4352 
4353  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4354  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4355  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
4356  }
4357  case NEON::BI__builtin_neon_vfma_v:
4358  case NEON::BI__builtin_neon_vfmaq_v: {
4359  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4360  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4361  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4362  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4363 
4364  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4365  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4366  }
4367  case NEON::BI__builtin_neon_vld1_v:
4368  case NEON::BI__builtin_neon_vld1q_v: {
4369  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4370  Ops.push_back(getAlignmentValue32(PtrOp0));
4371  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4372  }
4373  case NEON::BI__builtin_neon_vld2_v:
4374  case NEON::BI__builtin_neon_vld2q_v:
4375  case NEON::BI__builtin_neon_vld3_v:
4376  case NEON::BI__builtin_neon_vld3q_v:
4377  case NEON::BI__builtin_neon_vld4_v:
4378  case NEON::BI__builtin_neon_vld4q_v: {
4379  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4380  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4381  Value *Align = getAlignmentValue32(PtrOp1);
4382  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4383  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4384  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4385  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4386  }
4387  case NEON::BI__builtin_neon_vld1_dup_v:
4388  case NEON::BI__builtin_neon_vld1q_dup_v: {
4389  Value *V = UndefValue::get(Ty);
4390  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4391  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4392  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4393  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4394  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4395  return EmitNeonSplat(Ops[0], CI);
4396  }
4397  case NEON::BI__builtin_neon_vld2_lane_v:
4398  case NEON::BI__builtin_neon_vld2q_lane_v:
4399  case NEON::BI__builtin_neon_vld3_lane_v:
4400  case NEON::BI__builtin_neon_vld3q_lane_v:
4401  case NEON::BI__builtin_neon_vld4_lane_v:
4402  case NEON::BI__builtin_neon_vld4q_lane_v: {
4403  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4404  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4405  for (unsigned I = 2; I < Ops.size() - 1; ++I)
4406  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4407  Ops.push_back(getAlignmentValue32(PtrOp1));
4408  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4409  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4410  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4411  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4412  }
4413  case NEON::BI__builtin_neon_vmovl_v: {
4414  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4415  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4416  if (Usgn)
4417  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4418  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4419  }
4420  case NEON::BI__builtin_neon_vmovn_v: {
4421  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4422  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4423  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4424  }
4425  case NEON::BI__builtin_neon_vmull_v:
4426  // FIXME: the integer vmull operations could be emitted in terms of pure
4427  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4428  // hoisting the exts outside loops. Until global ISel comes along that can
4429  // see through such movement this leads to bad CodeGen. So we need an
4430  // intrinsic for now.
4431  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4432  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4433  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4434  case NEON::BI__builtin_neon_vpadal_v:
4435  case NEON::BI__builtin_neon_vpadalq_v: {
4436  // The source operand type has twice as many elements of half the size.
4437  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4438  llvm::Type *EltTy =
4439  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4440  llvm::Type *NarrowTy =
4441  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4442  llvm::Type *Tys[2] = { Ty, NarrowTy };
4443  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4444  }
4445  case NEON::BI__builtin_neon_vpaddl_v:
4446  case NEON::BI__builtin_neon_vpaddlq_v: {
4447  // The source operand type has twice as many elements of half the size.
4448  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4449  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4450  llvm::Type *NarrowTy =
4451  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4452  llvm::Type *Tys[2] = { Ty, NarrowTy };
4453  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4454  }
4455  case NEON::BI__builtin_neon_vqdmlal_v:
4456  case NEON::BI__builtin_neon_vqdmlsl_v: {
4457  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4458  Ops[1] =
4459  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4460  Ops.resize(2);
4461  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4462  }
4463  case NEON::BI__builtin_neon_vqshl_n_v:
4464  case NEON::BI__builtin_neon_vqshlq_n_v:
4465  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4466  1, false);
4467  case NEON::BI__builtin_neon_vqshlu_n_v:
4468  case NEON::BI__builtin_neon_vqshluq_n_v:
4469  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4470  1, false);
4471  case NEON::BI__builtin_neon_vrecpe_v:
4472  case NEON::BI__builtin_neon_vrecpeq_v:
4473  case NEON::BI__builtin_neon_vrsqrte_v:
4474  case NEON::BI__builtin_neon_vrsqrteq_v:
4475  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4476  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4477 
4478  case NEON::BI__builtin_neon_vrshr_n_v:
4479  case NEON::BI__builtin_neon_vrshrq_n_v:
4480  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4481  1, true);
4482  case NEON::BI__builtin_neon_vshl_n_v:
4483  case NEON::BI__builtin_neon_vshlq_n_v:
4484  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4485  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4486  "vshl_n");
4487  case NEON::BI__builtin_neon_vshll_n_v: {
4488  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4489  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4490  if (Usgn)
4491  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4492  else
4493  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4494  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4495  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4496  }
4497  case NEON::BI__builtin_neon_vshrn_n_v: {
4498  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4499  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4500  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4501  if (Usgn)
4502  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4503  else
4504  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4505  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4506  }
4507  case NEON::BI__builtin_neon_vshr_n_v:
4508  case NEON::BI__builtin_neon_vshrq_n_v:
4509  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4510  case NEON::BI__builtin_neon_vst1_v:
4511  case NEON::BI__builtin_neon_vst1q_v:
4512  case NEON::BI__builtin_neon_vst2_v:
4513  case NEON::BI__builtin_neon_vst2q_v:
4514  case NEON::BI__builtin_neon_vst3_v:
4515  case NEON::BI__builtin_neon_vst3q_v:
4516  case NEON::BI__builtin_neon_vst4_v:
4517  case NEON::BI__builtin_neon_vst4q_v:
4518  case NEON::BI__builtin_neon_vst2_lane_v:
4519  case NEON::BI__builtin_neon_vst2q_lane_v:
4520  case NEON::BI__builtin_neon_vst3_lane_v:
4521  case NEON::BI__builtin_neon_vst3q_lane_v:
4522  case NEON::BI__builtin_neon_vst4_lane_v:
4523  case NEON::BI__builtin_neon_vst4q_lane_v: {
4524  llvm::Type *Tys[] = {Int8PtrTy, Ty};
4525  Ops.push_back(getAlignmentValue32(PtrOp0));
4526  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4527  }
4528  case NEON::BI__builtin_neon_vsubhn_v: {
4529  llvm::VectorType *SrcTy =
4530  llvm::VectorType::getExtendedElementVectorType(VTy);
4531 
4532  // %sum = add <4 x i32> %lhs, %rhs
4533  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4534  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4535  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4536 
4537  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4538  Constant *ShiftAmt =
4539  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4540  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4541 
4542  // %res = trunc <4 x i32> %high to <4 x i16>
4543  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4544  }
4545  case NEON::BI__builtin_neon_vtrn_v:
4546  case NEON::BI__builtin_neon_vtrnq_v: {
4547  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4548  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4549  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4550  Value *SV = nullptr;
4551 
4552  for (unsigned vi = 0; vi != 2; ++vi) {
4553  SmallVector<uint32_t, 16> Indices;
4554  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4555  Indices.push_back(i+vi);
4556  Indices.push_back(i+e+vi);
4557  }
4558  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4559  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4560  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4561  }
4562  return SV;
4563  }
4564  case NEON::BI__builtin_neon_vtst_v:
4565  case NEON::BI__builtin_neon_vtstq_v: {
4566  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4567  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4568  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4569  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4570  ConstantAggregateZero::get(Ty));
4571  return Builder.CreateSExt(Ops[0], Ty, "vtst");
4572  }
4573  case NEON::BI__builtin_neon_vuzp_v:
4574  case NEON::BI__builtin_neon_vuzpq_v: {
4575  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4576  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4577  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4578  Value *SV = nullptr;
4579 
4580  for (unsigned vi = 0; vi != 2; ++vi) {
4581  SmallVector<uint32_t, 16> Indices;
4582  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4583  Indices.push_back(2*i+vi);
4584 
4585  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4586  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4587  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4588  }
4589  return SV;
4590  }
4591  case NEON::BI__builtin_neon_vzip_v:
4592  case NEON::BI__builtin_neon_vzipq_v: {
4593  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4594  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4595  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4596  Value *SV = nullptr;
4597 
4598  for (unsigned vi = 0; vi != 2; ++vi) {
4599  SmallVector<uint32_t, 16> Indices;
4600  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4601  Indices.push_back((i + vi*e) >> 1);
4602  Indices.push_back(((i + vi*e) >> 1)+e);
4603  }
4604  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4605  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4606  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4607  }
4608  return SV;
4609  }
4610  }
4611 
4612  assert(Int && "Expected valid intrinsic number");
4613 
4614  // Determine the type(s) of this overloaded AArch64 intrinsic.
4615  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4616 
4617  Value *Result = EmitNeonCall(F, Ops, NameHint);
4618  llvm::Type *ResultType = ConvertType(E->getType());
4619  // AArch64 intrinsic one-element vector type cast to
4620  // scalar type expected by the builtin
4621  return Builder.CreateBitCast(Result, ResultType, NameHint);
4622 }
4623 
4625  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4626  const CmpInst::Predicate Ip, const Twine &Name) {
4627  llvm::Type *OTy = Op->getType();
4628 
4629  // FIXME: this is utterly horrific. We should not be looking at previous
4630  // codegen context to find out what needs doing. Unfortunately TableGen
4631  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4632  // (etc).
4633  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4634  OTy = BI->getOperand(0)->getType();
4635 
4636  Op = Builder.CreateBitCast(Op, OTy);
4637  if (OTy->getScalarType()->isFloatingPointTy()) {
4638  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4639  } else {
4640  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4641  }
4642  return Builder.CreateSExt(Op, Ty, Name);
4643 }
4644 
4646  Value *ExtOp, Value *IndexOp,
4647  llvm::Type *ResTy, unsigned IntID,
4648  const char *Name) {
4649  SmallVector<Value *, 2> TblOps;
4650  if (ExtOp)
4651  TblOps.push_back(ExtOp);
4652 
4653  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4654  SmallVector<uint32_t, 16> Indices;
4655  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4656  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4657  Indices.push_back(2*i);
4658  Indices.push_back(2*i+1);
4659  }
4660 
4661  int PairPos = 0, End = Ops.size() - 1;
4662  while (PairPos < End) {
4663  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4664  Ops[PairPos+1], Indices,
4665  Name));
4666  PairPos += 2;
4667  }
4668 
4669  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4670  // of the 128-bit lookup table with zero.
4671  if (PairPos == End) {
4672  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4673  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4674  ZeroTbl, Indices, Name));
4675  }
4676 
4677  Function *TblF;
4678  TblOps.push_back(IndexOp);
4679  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4680 
4681  return CGF.EmitNeonCall(TblF, TblOps, Name);
4682 }
4683 
4684 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4685  unsigned Value;
4686  switch (BuiltinID) {
4687  default:
4688  return nullptr;
4689  case ARM::BI__builtin_arm_nop:
4690  Value = 0;
4691  break;
4692  case ARM::BI__builtin_arm_yield:
4693  case ARM::BI__yield:
4694  Value = 1;
4695  break;
4696  case ARM::BI__builtin_arm_wfe:
4697  case ARM::BI__wfe:
4698  Value = 2;
4699  break;
4700  case ARM::BI__builtin_arm_wfi:
4701  case ARM::BI__wfi:
4702  Value = 3;
4703  break;
4704  case ARM::BI__builtin_arm_sev:
4705  case ARM::BI__sev:
4706  Value = 4;
4707  break;
4708  case ARM::BI__builtin_arm_sevl:
4709  case ARM::BI__sevl:
4710  Value = 5;
4711  break;
4712  }
4713 
4714  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4715  llvm::ConstantInt::get(Int32Ty, Value));
4716 }
4717 
4718 // Generates the IR for the read/write special register builtin,
4719 // ValueType is the type of the value that is to be written or read,
4720 // RegisterType is the type of the register being written to or read from.
4722  const CallExpr *E,
4723  llvm::Type *RegisterType,
4724  llvm::Type *ValueType,
4725  bool IsRead,
4726  StringRef SysReg = "") {
4727  // write and register intrinsics only support 32 and 64 bit operations.
4728  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4729  && "Unsupported size for register.");
4730 
4731  CodeGen::CGBuilderTy &Builder = CGF.Builder;
4732  CodeGen::CodeGenModule &CGM = CGF.CGM;
4733  LLVMContext &Context = CGM.getLLVMContext();
4734 
4735  if (SysReg.empty()) {
4736  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4737  SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4738  }
4739 
4740  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4741  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4742  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4743 
4744  llvm::Type *Types[] = { RegisterType };
4745 
4746  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4747  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4748  && "Can't fit 64-bit value in 32-bit register");
4749 
4750  if (IsRead) {
4751  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4752  llvm::Value *Call = Builder.CreateCall(F, Metadata);
4753 
4754  if (MixedTypes)
4755  // Read into 64 bit register and then truncate result to 32 bit.
4756  return Builder.CreateTrunc(Call, ValueType);
4757 
4758  if (ValueType->isPointerTy())
4759  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4760  return Builder.CreateIntToPtr(Call, ValueType);
4761 
4762  return Call;
4763  }
4764 
4765  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4766  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4767  if (MixedTypes) {
4768  // Extend 32 bit write value to 64 bit to pass to write.
4769  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4770  return Builder.CreateCall(F, { Metadata, ArgValue });
4771  }
4772 
4773  if (ValueType->isPointerTy()) {
4774  // Have VoidPtrTy ArgValue but want to return an i32/i64.
4775  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4776  return Builder.CreateCall(F, { Metadata, ArgValue });
4777  }
4778 
4779  return Builder.CreateCall(F, { Metadata, ArgValue });
4780 }
4781 
4782 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4783 /// argument that specifies the vector type.
4784 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4785  switch (BuiltinID) {
4786  default: break;
4787  case NEON::BI__builtin_neon_vget_lane_i8:
4788  case NEON::BI__builtin_neon_vget_lane_i16:
4789  case NEON::BI__builtin_neon_vget_lane_i32:
4790  case NEON::BI__builtin_neon_vget_lane_i64:
4791  case NEON::BI__builtin_neon_vget_lane_f32:
4792  case NEON::BI__builtin_neon_vgetq_lane_i8:
4793  case NEON::BI__builtin_neon_vgetq_lane_i16:
4794  case NEON::BI__builtin_neon_vgetq_lane_i32:
4795  case NEON::BI__builtin_neon_vgetq_lane_i64:
4796  case NEON::BI__builtin_neon_vgetq_lane_f32:
4797  case NEON::BI__builtin_neon_vset_lane_i8:
4798  case NEON::BI__builtin_neon_vset_lane_i16:
4799  case NEON::BI__builtin_neon_vset_lane_i32:
4800  case NEON::BI__builtin_neon_vset_lane_i64:
4801  case NEON::BI__builtin_neon_vset_lane_f32:
4802  case NEON::BI__builtin_neon_vsetq_lane_i8:
4803  case NEON::BI__builtin_neon_vsetq_lane_i16:
4804  case NEON::BI__builtin_neon_vsetq_lane_i32:
4805  case NEON::BI__builtin_neon_vsetq_lane_i64:
4806  case NEON::BI__builtin_neon_vsetq_lane_f32:
4807  case NEON::BI__builtin_neon_vsha1h_u32:
4808  case NEON::BI__builtin_neon_vsha1cq_u32:
4809  case NEON::BI__builtin_neon_vsha1pq_u32:
4810  case NEON::BI__builtin_neon_vsha1mq_u32:
4811  case clang::ARM::BI_MoveToCoprocessor:
4812  case clang::ARM::BI_MoveToCoprocessor2:
4813  return false;
4814  }
4815  return true;
4816 }
4817 
4819  const CallExpr *E) {
4820  if (auto Hint = GetValueForARMHint(BuiltinID))
4821  return Hint;
4822 
4823  if (BuiltinID == ARM::BI__emit) {
4824  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4825  llvm::FunctionType *FTy =
4826  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4827 
4828  APSInt Value;
4829  if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4830  llvm_unreachable("Sema will ensure that the parameter is constant");
4831 
4832  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4833 
4834  llvm::InlineAsm *Emit =
4835  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4836  /*SideEffects=*/true)
4837  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4838  /*SideEffects=*/true);
4839 
4840  return Builder.CreateCall(Emit);
4841  }
4842 
4843  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4844  Value *Option = EmitScalarExpr(E->getArg(0));
4845  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4846  }
4847 
4848  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4849  Value *Address = EmitScalarExpr(E->getArg(0));
4850  Value *RW = EmitScalarExpr(E->getArg(1));
4851  Value *IsData = EmitScalarExpr(E->getArg(2));
4852 
4853  // Locality is not supported on ARM target
4854  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4855 
4856  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4857  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4858  }
4859 
4860  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4861  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4862  return Builder.CreateCall(
4863  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4864  }
4865 
4866  if (BuiltinID == ARM::BI__clear_cache) {
4867  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4868  const FunctionDecl *FD = E->getDirectCallee();
4869  Value *Ops[2];
4870  for (unsigned i = 0; i < 2; i++)
4871  Ops[i] = EmitScalarExpr(E->getArg(i));
4872  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4873  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4874  StringRef Name = FD->getName();
4875  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4876  }
4877 
4878  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4879  BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4880  Function *F;
4881 
4882  switch (BuiltinID) {
4883  default: llvm_unreachable("unexpected builtin");
4884  case ARM::BI__builtin_arm_mcrr:
4885  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4886  break;
4887  case ARM::BI__builtin_arm_mcrr2:
4888  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4889  break;
4890  }
4891 
4892  // MCRR{2} instruction has 5 operands but
4893  // the intrinsic has 4 because Rt and Rt2
4894  // are represented as a single unsigned 64
4895  // bit integer in the intrinsic definition
4896  // but internally it's represented as 2 32
4897  // bit integers.
4898 
4899  Value *Coproc = EmitScalarExpr(E->getArg(0));
4900  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4901  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4902  Value *CRm = EmitScalarExpr(E->getArg(3));
4903 
4904  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4905  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4906  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4907  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4908 
4909  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4910  }
4911 
4912  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4913  BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4914  Function *F;
4915 
4916  switch (BuiltinID) {
4917  default: llvm_unreachable("unexpected builtin");
4918  case ARM::BI__builtin_arm_mrrc:
4919  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4920  break;
4921  case ARM::BI__builtin_arm_mrrc2:
4922  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4923  break;
4924  }
4925 
4926  Value *Coproc = EmitScalarExpr(E->getArg(0));
4927  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4928  Value *CRm = EmitScalarExpr(E->getArg(2));
4929  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4930 
4931  // Returns an unsigned 64 bit integer, represented
4932  // as two 32 bit integers.
4933 
4934  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4935  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4936  Rt = Builder.CreateZExt(Rt, Int64Ty);
4937  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4938 
4939  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4940  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4941  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4942 
4943  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4944  }
4945 
4946  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4947  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4948  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4949  getContext().getTypeSize(E->getType()) == 64) ||
4950  BuiltinID == ARM::BI__ldrexd) {
4951  Function *F;
4952 
4953  switch (BuiltinID) {
4954  default: llvm_unreachable("unexpected builtin");
4955  case ARM::BI__builtin_arm_ldaex:
4956  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4957  break;
4958  case ARM::BI__builtin_arm_ldrexd:
4959  case ARM::BI__builtin_arm_ldrex:
4960  case ARM::BI__ldrexd:
4961  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4962  break;
4963  }
4964 
4965  Value *LdPtr = EmitScalarExpr(E->getArg(0));
4966  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4967  "ldrexd");
4968 
4969  Value *Val0 = Builder.CreateExtractValue(Val, 1);
4970  Value *Val1 = Builder.CreateExtractValue(Val, 0);
4971  Val0 = Builder.CreateZExt(Val0, Int64Ty);
4972  Val1 = Builder.CreateZExt(Val1, Int64Ty);
4973 
4974  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4975  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4976  Val = Builder.CreateOr(Val, Val1);
4977  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4978  }
4979 
4980  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4981  BuiltinID == ARM::BI__builtin_arm_ldaex) {
4982  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4983 
4984  QualType Ty = E->getType();
4985  llvm::Type *RealResTy = ConvertType(Ty);
4986  llvm::Type *PtrTy = llvm::IntegerType::get(
4987  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4988  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4989 
4990  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4991  ? Intrinsic::arm_ldaex
4992  : Intrinsic::arm_ldrex,
4993  PtrTy);
4994  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4995 
4996  if (RealResTy->isPointerTy())
4997  return Builder.CreateIntToPtr(Val, RealResTy);
4998  else {
4999  llvm::Type *IntResTy = llvm::IntegerType::get(
5000  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5001  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5002  return Builder.CreateBitCast(Val, RealResTy);
5003  }
5004  }
5005 
5006  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
5007  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
5008  BuiltinID == ARM::BI__builtin_arm_strex) &&
5009  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
5010  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
5011  ? Intrinsic::arm_stlexd
5012  : Intrinsic::arm_strexd);
5013  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
5014 
5015  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5016  Value *Val = EmitScalarExpr(E->getArg(0));
5017  Builder.CreateStore(Val, Tmp);
5018 
5019  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
5020  Val = Builder.CreateLoad(LdPtr);
5021 
5022  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5023  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5024  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
5025  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
5026  }
5027 
5028  if (BuiltinID == ARM::BI__builtin_arm_strex ||
5029  BuiltinID == ARM::BI__builtin_arm_stlex) {
5030  Value *StoreVal = EmitScalarExpr(E->getArg(0));
5031  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5032 
5033  QualType Ty = E->getArg(0)->getType();
5034  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5035  getContext().getTypeSize(Ty));
5036  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5037 
5038  if (StoreVal->getType()->isPointerTy())
5039  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
5040  else {
5041  llvm::Type *IntTy = llvm::IntegerType::get(
5042  getLLVMContext(),
5043  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5044  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5045  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
5046  }
5047 
5048  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
5049  ? Intrinsic::arm_stlex
5050  : Intrinsic::arm_strex,
5051  StoreAddr->getType());
5052  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
5053  }
5054 
5055  switch (BuiltinID) {
5056  case ARM::BI__iso_volatile_load8:
5057  case ARM::BI__iso_volatile_load16:
5058  case ARM::BI__iso_volatile_load32:
5059  case ARM::BI__iso_volatile_load64: {
5060  Value *Ptr = EmitScalarExpr(E->getArg(0));
5061  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5062  CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
5063  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5064  LoadSize.getQuantity() * 8);
5065  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5066  llvm::LoadInst *Load =
5067  Builder.CreateAlignedLoad(Ptr, LoadSize);
5068  Load->setVolatile(true);
5069  return Load;
5070  }
5071  case ARM::BI__iso_volatile_store8:
5072  case ARM::BI__iso_volatile_store16:
5073  case ARM::BI__iso_volatile_store32:
5074  case ARM::BI__iso_volatile_store64: {
5075  Value *Ptr = EmitScalarExpr(E->getArg(0));
5076  Value *Value = EmitScalarExpr(E->getArg(1));
5077  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5078  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
5079  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5080  StoreSize.getQuantity() * 8);
5081  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5082  llvm::StoreInst *Store =
5083  Builder.CreateAlignedStore(Value, Ptr,
5084  StoreSize);
5085  Store->setVolatile(true);
5086  return Store;
5087  }
5088  }
5089 
5090  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
5091  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
5092  return Builder.CreateCall(F);
5093  }
5094 
5095  // CRC32
5096  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5097  switch (BuiltinID) {
5098  case ARM::BI__builtin_arm_crc32b:
5099  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
5100  case ARM::BI__builtin_arm_crc32cb:
5101  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
5102  case ARM::BI__builtin_arm_crc32h:
5103  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
5104  case ARM::BI__builtin_arm_crc32ch:
5105  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
5106  case ARM::BI__builtin_arm_crc32w:
5107  case ARM::BI__builtin_arm_crc32d:
5108  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
5109  case ARM::BI__builtin_arm_crc32cw:
5110  case ARM::BI__builtin_arm_crc32cd:
5111  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
5112  }
5113 
5114  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5115  Value *Arg0 = EmitScalarExpr(E->getArg(0));
5116  Value *Arg1 = EmitScalarExpr(E->getArg(1));
5117 
5118  // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
5119  // intrinsics, hence we need different codegen for these cases.
5120  if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
5121  BuiltinID == ARM::BI__builtin_arm_crc32cd) {
5122  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
5123  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
5124  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
5125  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
5126 
5127  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5128  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
5129  return Builder.CreateCall(F, {Res, Arg1b});
5130  } else {
5131  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
5132 
5133  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5134  return Builder.CreateCall(F, {Arg0, Arg1});
5135  }
5136  }
5137 
5138  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
5139  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5140  BuiltinID == ARM::BI__builtin_arm_rsrp ||
5141  BuiltinID == ARM::BI__builtin_arm_wsr ||
5142  BuiltinID == ARM::BI__builtin_arm_wsr64 ||
5143  BuiltinID == ARM::BI__builtin_arm_wsrp) {
5144 
5145  bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
5146  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5147  BuiltinID == ARM::BI__builtin_arm_rsrp;
5148 
5149  bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
5150  BuiltinID == ARM::BI__builtin_arm_wsrp;
5151 
5152  bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5153  BuiltinID == ARM::BI__builtin_arm_wsr64;
5154 
5155  llvm::Type *ValueType;
5156  llvm::Type *RegisterType;
5157  if (IsPointerBuiltin) {
5158  ValueType = VoidPtrTy;
5159  RegisterType = Int32Ty;
5160  } else if (Is64Bit) {
5161  ValueType = RegisterType = Int64Ty;
5162  } else {
5163  ValueType = RegisterType = Int32Ty;
5164  }
5165 
5166  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5167  }
5168 
5169  // Find out if any arguments are required to be integer constant
5170  // expressions.
5171  unsigned ICEArguments = 0;
5173  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5174  assert(Error == ASTContext::GE_None && "Should not codegen an error");
5175 
5176  auto getAlignmentValue32 = [&](Address addr) -> Value* {
5177  return Builder.getInt32(addr.getAlignment().getQuantity());
5178  };
5179 
5180  Address PtrOp0 = Address::invalid();
5181  Address PtrOp1 = Address::invalid();
5183  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
5184  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5185  for (unsigned i = 0, e = NumArgs; i != e; i++) {
5186  if (i == 0) {
5187  switch (BuiltinID) {
5188  case NEON::BI__builtin_neon_vld1_v:
5189  case NEON::BI__builtin_neon_vld1q_v:
5190  case NEON::BI__builtin_neon_vld1q_lane_v:
5191  case NEON::BI__builtin_neon_vld1_lane_v:
5192  case NEON::BI__builtin_neon_vld1_dup_v:
5193  case NEON::BI__builtin_neon_vld1q_dup_v:
5194  case NEON::BI__builtin_neon_vst1_v:
5195  case NEON::BI__builtin_neon_vst1q_v:
5196  case NEON::BI__builtin_neon_vst1q_lane_v:
5197  case NEON::BI__builtin_neon_vst1_lane_v:
5198  case NEON::BI__builtin_neon_vst2_v:
5199  case NEON::BI__builtin_neon_vst2q_v:
5200  case NEON::BI__builtin_neon_vst2_lane_v:
5201  case NEON::BI__builtin_neon_vst2q_lane_v:
5202  case NEON::BI__builtin_neon_vst3_v:
5203  case NEON::BI__builtin_neon_vst3q_v:
5204  case NEON::BI__builtin_neon_vst3_lane_v:
5205  case NEON::BI__builtin_neon_vst3q_lane_v:
5206  case NEON::BI__builtin_neon_vst4_v:
5207  case NEON::BI__builtin_neon_vst4q_v:
5208  case NEON::BI__builtin_neon_vst4_lane_v:
5209  case NEON::BI__builtin_neon_vst4q_lane_v:
5210  // Get the alignment for the argument in addition to the value;
5211  // we'll use it later.
5212  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5213  Ops.push_back(PtrOp0.getPointer());
5214  continue;
5215  }
5216  }
5217  if (i == 1) {
5218  switch (BuiltinID) {
5219  case NEON::BI__builtin_neon_vld2_v:
5220  case NEON::BI__builtin_neon_vld2q_v:
5221  case NEON::BI__builtin_neon_vld3_v:
5222  case NEON::BI__builtin_neon_vld3q_v:
5223  case NEON::BI__builtin_neon_vld4_v:
5224  case NEON::BI__builtin_neon_vld4q_v:
5225  case NEON::BI__builtin_neon_vld2_lane_v:
5226  case NEON::BI__builtin_neon_vld2q_lane_v:
5227  case NEON::BI__builtin_neon_vld3_lane_v:
5228  case NEON::BI__builtin_neon_vld3q_lane_v:
5229  case NEON::BI__builtin_neon_vld4_lane_v:
5230  case NEON::BI__builtin_neon_vld4q_lane_v:
5231  case NEON::BI__builtin_neon_vld2_dup_v:
5232  case NEON::BI__builtin_neon_vld3_dup_v:
5233  case NEON::BI__builtin_neon_vld4_dup_v:
5234  // Get the alignment for the argument in addition to the value;
5235  // we'll use it later.
5236  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
5237  Ops.push_back(PtrOp1.getPointer());
5238  continue;
5239  }
5240  }
5241 
5242  if ((ICEArguments & (1 << i)) == 0) {
5243  Ops.push_back(EmitScalarExpr(E->getArg(i)));
5244  } else {
5245  // If this is required to be a constant, constant fold it so that we know
5246  // that the generated intrinsic gets a ConstantInt.
5247  llvm::APSInt Result;
5248  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5249  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
5250  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5251  }
5252  }
5253 
5254  switch (BuiltinID) {
5255  default: break;
5256 
5257  case NEON::BI__builtin_neon_vget_lane_i8:
5258  case NEON::BI__builtin_neon_vget_lane_i16:
5259  case NEON::BI__builtin_neon_vget_lane_i32:
5260  case NEON::BI__builtin_neon_vget_lane_i64:
5261  case NEON::BI__builtin_neon_vget_lane_f32:
5262  case NEON::BI__builtin_neon_vgetq_lane_i8:
5263  case NEON::BI__builtin_neon_vgetq_lane_i16:
5264  case NEON::BI__builtin_neon_vgetq_lane_i32:
5265  case NEON::BI__builtin_neon_vgetq_lane_i64:
5266  case NEON::BI__builtin_neon_vgetq_lane_f32:
5267  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5268 
5269  case NEON::BI__builtin_neon_vset_lane_i8:
5270  case NEON::BI__builtin_neon_vset_lane_i16:
5271  case NEON::BI__builtin_neon_vset_lane_i32:
5272  case NEON::BI__builtin_neon_vset_lane_i64:
5273  case NEON::BI__builtin_neon_vset_lane_f32:
5274  case NEON::BI__builtin_neon_vsetq_lane_i8:
5275  case NEON::BI__builtin_neon_vsetq_lane_i16:
5276  case NEON::BI__builtin_neon_vsetq_lane_i32:
5277  case NEON::BI__builtin_neon_vsetq_lane_i64:
5278  case NEON::BI__builtin_neon_vsetq_lane_f32:
5279  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5280 
5281  case NEON::BI__builtin_neon_vsha1h_u32:
5282  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
5283  "vsha1h");
5284  case NEON::BI__builtin_neon_vsha1cq_u32:
5285  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
5286  "vsha1h");
5287  case NEON::BI__builtin_neon_vsha1pq_u32:
5288  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
5289  "vsha1h");
5290  case NEON::BI__builtin_neon_vsha1mq_u32:
5291  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
5292  "vsha1h");
5293 
5294  // The ARM _MoveToCoprocessor builtins put the input register value as
5295  // the first argument, but the LLVM intrinsic expects it as the third one.
5296  case ARM::BI_MoveToCoprocessor:
5297  case ARM::BI_MoveToCoprocessor2: {
5298  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
5299  Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
5300  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
5301  Ops[3], Ops[4], Ops[5]});
5302  }
5303  case ARM::BI_BitScanForward:
5304  case ARM::BI_BitScanForward64:
5305  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
5306  case ARM::BI_BitScanReverse:
5307  case ARM::BI_BitScanReverse64:
5308  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
5309 
5310  case ARM::BI_InterlockedAnd64:
5311  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
5312  case ARM::BI_InterlockedExchange64:
5313  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
5314  case ARM::BI_InterlockedExchangeAdd64:
5315  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
5316  case ARM::BI_InterlockedExchangeSub64:
5317  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
5318  case ARM::BI_InterlockedOr64:
5319  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
5320  case ARM::BI_InterlockedXor64:
5321  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
5322  case ARM::BI_InterlockedDecrement64:
5323  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
5324  case ARM::BI_InterlockedIncrement64:
5325  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
5326  }
5327 
5328  // Get the last argument, which specifies the vector type.
5329  assert(HasExtraArg);
5330  llvm::APSInt Result;
5331  const Expr *Arg = E->getArg(E->getNumArgs()-1);
5332