clang  7.0.0svn
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/ASTContext.h"
22 #include "clang/AST/Decl.h"
25 #include "clang/Basic/TargetInfo.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include "llvm/Support/ConvertUTF.h"
34 #include "llvm/Support/ScopedPrinter.h"
35 #include "llvm/Support/TargetParser.h"
36 #include <sstream>
37 
38 using namespace clang;
39 using namespace CodeGen;
40 using namespace llvm;
41 
42 static
43 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
44  return std::min(High, std::max(Low, Value));
45 }
46 
47 /// getBuiltinLibFunction - Given a builtin id for a function like
48 /// "__builtin_fabsf", return a Function* for "fabsf".
50  unsigned BuiltinID) {
51  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
52 
53  // Get the name, skip over the __builtin_ prefix (if necessary).
54  StringRef Name;
55  GlobalDecl D(FD);
56 
57  // If the builtin has been declared explicitly with an assembler label,
58  // use the mangled name. This differs from the plain label on platforms
59  // that prefix labels.
60  if (FD->hasAttr<AsmLabelAttr>())
61  Name = getMangledName(D);
62  else
63  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
64 
65  llvm::FunctionType *Ty =
66  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
67 
68  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
69 }
70 
71 /// Emit the conversions required to turn the given value into an
72 /// integer of the given size.
74  QualType T, llvm::IntegerType *IntType) {
75  V = CGF.EmitToMemory(V, T);
76 
77  if (V->getType()->isPointerTy())
78  return CGF.Builder.CreatePtrToInt(V, IntType);
79 
80  assert(V->getType() == IntType);
81  return V;
82 }
83 
85  QualType T, llvm::Type *ResultType) {
86  V = CGF.EmitFromMemory(V, T);
87 
88  if (ResultType->isPointerTy())
89  return CGF.Builder.CreateIntToPtr(V, ResultType);
90 
91  assert(V->getType() == ResultType);
92  return V;
93 }
94 
95 /// Utility to insert an atomic instruction based on Instrinsic::ID
96 /// and the expression node.
98  llvm::AtomicRMWInst::BinOp Kind,
99  const CallExpr *E) {
100  QualType T = E->getType();
101  assert(E->getArg(0)->getType()->isPointerType());
102  assert(CGF.getContext().hasSameUnqualifiedType(T,
103  E->getArg(0)->getType()->getPointeeType()));
104  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
105 
106  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
107  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
108 
109  llvm::IntegerType *IntType =
110  llvm::IntegerType::get(CGF.getLLVMContext(),
111  CGF.getContext().getTypeSize(T));
112  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
113 
114  llvm::Value *Args[2];
115  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
116  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
117  llvm::Type *ValueType = Args[1]->getType();
118  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
119 
120  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
121  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
122  return EmitFromInt(CGF, Result, T, ValueType);
123 }
124 
126  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
127  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
128 
129  // Convert the type of the pointer to a pointer to the stored type.
130  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
131  Value *BC = CGF.Builder.CreateBitCast(
132  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
133  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
134  LV.setNontemporal(true);
135  CGF.EmitStoreOfScalar(Val, LV, false);
136  return nullptr;
137 }
138 
140  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
141 
142  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
143  LV.setNontemporal(true);
144  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
145 }
146 
148  llvm::AtomicRMWInst::BinOp Kind,
149  const CallExpr *E) {
150  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
151 }
152 
153 /// Utility to insert an atomic instruction based Instrinsic::ID and
154 /// the expression node, where the return value is the result of the
155 /// operation.
157  llvm::AtomicRMWInst::BinOp Kind,
158  const CallExpr *E,
159  Instruction::BinaryOps Op,
160  bool Invert = false) {
161  QualType T = E->getType();
162  assert(E->getArg(0)->getType()->isPointerType());
163  assert(CGF.getContext().hasSameUnqualifiedType(T,
164  E->getArg(0)->getType()->getPointeeType()));
165  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
166 
167  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
168  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
169 
170  llvm::IntegerType *IntType =
171  llvm::IntegerType::get(CGF.getLLVMContext(),
172  CGF.getContext().getTypeSize(T));
173  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
174 
175  llvm::Value *Args[2];
176  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
177  llvm::Type *ValueType = Args[1]->getType();
178  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
179  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
180 
181  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
182  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
183  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
184  if (Invert)
185  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
186  llvm::ConstantInt::get(IntType, -1));
187  Result = EmitFromInt(CGF, Result, T, ValueType);
188  return RValue::get(Result);
189 }
190 
191 /// @brief Utility to insert an atomic cmpxchg instruction.
192 ///
193 /// @param CGF The current codegen function.
194 /// @param E Builtin call expression to convert to cmpxchg.
195 /// arg0 - address to operate on
196 /// arg1 - value to compare with
197 /// arg2 - new value
198 /// @param ReturnBool Specifies whether to return success flag of
199 /// cmpxchg result or the old value.
200 ///
201 /// @returns result of cmpxchg, according to ReturnBool
203  bool ReturnBool) {
204  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
205  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
206  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
207 
208  llvm::IntegerType *IntType = llvm::IntegerType::get(
209  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
210  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
211 
212  Value *Args[3];
213  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
214  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
215  llvm::Type *ValueType = Args[1]->getType();
216  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
217  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
218 
219  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
220  Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
221  llvm::AtomicOrdering::SequentiallyConsistent);
222  if (ReturnBool)
223  // Extract boolean success flag and zext it to int.
224  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
225  CGF.ConvertType(E->getType()));
226  else
227  // Extract old value and emit it using the same type as compare value.
228  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
229  ValueType);
230 }
231 
232 // Emit a simple mangled intrinsic that has 1 argument and a return type
233 // matching the argument type.
235  const CallExpr *E,
236  unsigned IntrinsicID) {
237  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
238 
239  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240  return CGF.Builder.CreateCall(F, Src0);
241 }
242 
243 // Emit an intrinsic that has 2 operands of the same type as its result.
245  const CallExpr *E,
246  unsigned IntrinsicID) {
247  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249 
250  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
251  return CGF.Builder.CreateCall(F, { Src0, Src1 });
252 }
253 
254 // Emit an intrinsic that has 3 operands of the same type as its result.
256  const CallExpr *E,
257  unsigned IntrinsicID) {
258  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
259  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
260  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
261 
262  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
264 }
265 
266 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
268  const CallExpr *E,
269  unsigned IntrinsicID) {
270  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
271  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
272 
273  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
274  return CGF.Builder.CreateCall(F, {Src0, Src1});
275 }
276 
277 /// EmitFAbs - Emit a call to @llvm.fabs().
278 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
279  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
280  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
281  Call->setDoesNotAccessMemory();
282  return Call;
283 }
284 
285 /// Emit the computation of the sign bit for a floating point value. Returns
286 /// the i1 sign bit value.
288  LLVMContext &C = CGF.CGM.getLLVMContext();
289 
290  llvm::Type *Ty = V->getType();
291  int Width = Ty->getPrimitiveSizeInBits();
292  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
293  V = CGF.Builder.CreateBitCast(V, IntTy);
294  if (Ty->isPPC_FP128Ty()) {
295  // We want the sign bit of the higher-order double. The bitcast we just
296  // did works as if the double-double was stored to memory and then
297  // read as an i128. The "store" will put the higher-order double in the
298  // lower address in both little- and big-Endian modes, but the "load"
299  // will treat those bits as a different part of the i128: the low bits in
300  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
301  // we need to shift the high bits down to the low before truncating.
302  Width >>= 1;
303  if (CGF.getTarget().isBigEndian()) {
304  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
305  V = CGF.Builder.CreateLShr(V, ShiftCst);
306  }
307  // We are truncating value in order to extract the higher-order
308  // double, which we will be using to extract the sign from.
309  IntTy = llvm::IntegerType::get(C, Width);
310  V = CGF.Builder.CreateTrunc(V, IntTy);
311  }
312  Value *Zero = llvm::Constant::getNullValue(IntTy);
313  return CGF.Builder.CreateICmpSLT(V, Zero);
314 }
315 
317  const CallExpr *E, llvm::Constant *calleeValue) {
318  CGCallee callee = CGCallee::forDirect(calleeValue, FD);
319  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
320 }
321 
322 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
323 /// depending on IntrinsicID.
324 ///
325 /// \arg CGF The current codegen function.
326 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
327 /// \arg X The first argument to the llvm.*.with.overflow.*.
328 /// \arg Y The second argument to the llvm.*.with.overflow.*.
329 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
330 /// \returns The result (i.e. sum/product) returned by the intrinsic.
332  const llvm::Intrinsic::ID IntrinsicID,
334  llvm::Value *&Carry) {
335  // Make sure we have integers of the same width.
336  assert(X->getType() == Y->getType() &&
337  "Arguments must be the same type. (Did you forget to make sure both "
338  "arguments have the same integer width?)");
339 
340  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
341  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
342  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
343  return CGF.Builder.CreateExtractValue(Tmp, 0);
344 }
345 
347  unsigned IntrinsicID,
348  int low, int high) {
349  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
350  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
351  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
352  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
353  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
354  return Call;
355 }
356 
357 namespace {
358  struct WidthAndSignedness {
359  unsigned Width;
360  bool Signed;
361  };
362 }
363 
364 static WidthAndSignedness
366  const clang::QualType Type) {
367  assert(Type->isIntegerType() && "Given type is not an integer.");
368  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
369  bool Signed = Type->isSignedIntegerType();
370  return {Width, Signed};
371 }
372 
373 // Given one or more integer types, this function produces an integer type that
374 // encompasses them: any value in one of the given types could be expressed in
375 // the encompassing type.
376 static struct WidthAndSignedness
377 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
378  assert(Types.size() > 0 && "Empty list of types.");
379 
380  // If any of the given types is signed, we must return a signed type.
381  bool Signed = false;
382  for (const auto &Type : Types) {
383  Signed |= Type.Signed;
384  }
385 
386  // The encompassing type must have a width greater than or equal to the width
387  // of the specified types. Aditionally, if the encompassing type is signed,
388  // its width must be strictly greater than the width of any unsigned types
389  // given.
390  unsigned Width = 0;
391  for (const auto &Type : Types) {
392  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
393  if (Width < MinWidth) {
394  Width = MinWidth;
395  }
396  }
397 
398  return {Width, Signed};
399 }
400 
401 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
402  llvm::Type *DestType = Int8PtrTy;
403  if (ArgValue->getType() != DestType)
404  ArgValue =
405  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
406 
407  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
408  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
409 }
410 
411 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
412 /// __builtin_object_size(p, @p To) is correct
413 static bool areBOSTypesCompatible(int From, int To) {
414  // Note: Our __builtin_object_size implementation currently treats Type=0 and
415  // Type=2 identically. Encoding this implementation detail here may make
416  // improving __builtin_object_size difficult in the future, so it's omitted.
417  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
418 }
419 
420 static llvm::Value *
421 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
422  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
423 }
424 
425 llvm::Value *
426 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
427  llvm::IntegerType *ResType,
428  llvm::Value *EmittedE) {
429  uint64_t ObjectSize;
430  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
431  return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
432  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
433 }
434 
435 /// Returns a Value corresponding to the size of the given expression.
436 /// This Value may be either of the following:
437 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
438 /// it)
439 /// - A call to the @llvm.objectsize intrinsic
440 ///
441 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
442 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
443 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
444 llvm::Value *
445 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
446  llvm::IntegerType *ResType,
447  llvm::Value *EmittedE) {
448  // We need to reference an argument if the pointer is a parameter with the
449  // pass_object_size attribute.
450  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
451  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
452  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
453  if (Param != nullptr && PS != nullptr &&
454  areBOSTypesCompatible(PS->getType(), Type)) {
455  auto Iter = SizeArguments.find(Param);
456  assert(Iter != SizeArguments.end());
457 
458  const ImplicitParamDecl *D = Iter->second;
459  auto DIter = LocalDeclMap.find(D);
460  assert(DIter != LocalDeclMap.end());
461 
462  return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
463  getContext().getSizeType(), E->getLocStart());
464  }
465  }
466 
467  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
468  // evaluate E for side-effects. In either case, we shouldn't lower to
469  // @llvm.objectsize.
470  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
471  return getDefaultBuiltinObjectSizeResult(Type, ResType);
472 
473  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
474  assert(Ptr->getType()->isPointerTy() &&
475  "Non-pointer passed to __builtin_object_size?");
476 
477  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
478 
479  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
480  Value *Min = Builder.getInt1((Type & 2) != 0);
481  // For GCC compatability, __builtin_object_size treat NULL as unknown size.
482  Value *NullIsUnknown = Builder.getTrue();
483  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
484 }
485 
486 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
487 // handle them here.
489  _BitScanForward,
490  _BitScanReverse,
491  _InterlockedAnd,
492  _InterlockedDecrement,
493  _InterlockedExchange,
494  _InterlockedExchangeAdd,
495  _InterlockedExchangeSub,
496  _InterlockedIncrement,
497  _InterlockedOr,
498  _InterlockedXor,
499  _interlockedbittestandset,
500  __fastfail,
501 };
502 
504  const CallExpr *E) {
505  switch (BuiltinID) {
506  case MSVCIntrin::_BitScanForward:
507  case MSVCIntrin::_BitScanReverse: {
508  Value *ArgValue = EmitScalarExpr(E->getArg(1));
509 
510  llvm::Type *ArgType = ArgValue->getType();
511  llvm::Type *IndexType =
512  EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
513  llvm::Type *ResultType = ConvertType(E->getType());
514 
515  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
516  Value *ResZero = llvm::Constant::getNullValue(ResultType);
517  Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
518 
519  BasicBlock *Begin = Builder.GetInsertBlock();
520  BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
521  Builder.SetInsertPoint(End);
522  PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
523 
524  Builder.SetInsertPoint(Begin);
525  Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
526  BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
527  Builder.CreateCondBr(IsZero, End, NotZero);
528  Result->addIncoming(ResZero, Begin);
529 
530  Builder.SetInsertPoint(NotZero);
531  Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
532 
533  if (BuiltinID == MSVCIntrin::_BitScanForward) {
534  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
535  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
536  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
537  Builder.CreateStore(ZeroCount, IndexAddress, false);
538  } else {
539  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
540  Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
541 
542  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
543  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
544  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
545  Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
546  Builder.CreateStore(Index, IndexAddress, false);
547  }
548  Builder.CreateBr(End);
549  Result->addIncoming(ResOne, NotZero);
550 
551  Builder.SetInsertPoint(End);
552  return Result;
553  }
554  case MSVCIntrin::_InterlockedAnd:
555  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
556  case MSVCIntrin::_InterlockedExchange:
557  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
558  case MSVCIntrin::_InterlockedExchangeAdd:
559  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
560  case MSVCIntrin::_InterlockedExchangeSub:
561  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
562  case MSVCIntrin::_InterlockedOr:
563  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
564  case MSVCIntrin::_InterlockedXor:
565  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
566 
567  case MSVCIntrin::_interlockedbittestandset: {
568  llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
569  llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
570  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
571  AtomicRMWInst::Or, Addr,
572  Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
573  llvm::AtomicOrdering::SequentiallyConsistent);
574  // Shift the relevant bit to the least significant position, truncate to
575  // the result type, and test the low bit.
576  llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
577  llvm::Value *Truncated =
578  Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
579  return Builder.CreateAnd(Truncated,
580  ConstantInt::get(Truncated->getType(), 1));
581  }
582 
583  case MSVCIntrin::_InterlockedDecrement: {
584  llvm::Type *IntTy = ConvertType(E->getType());
585  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
586  AtomicRMWInst::Sub,
587  EmitScalarExpr(E->getArg(0)),
588  ConstantInt::get(IntTy, 1),
589  llvm::AtomicOrdering::SequentiallyConsistent);
590  return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
591  }
592  case MSVCIntrin::_InterlockedIncrement: {
593  llvm::Type *IntTy = ConvertType(E->getType());
594  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
595  AtomicRMWInst::Add,
596  EmitScalarExpr(E->getArg(0)),
597  ConstantInt::get(IntTy, 1),
598  llvm::AtomicOrdering::SequentiallyConsistent);
599  return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
600  }
601 
602  case MSVCIntrin::__fastfail: {
603  // Request immediate process termination from the kernel. The instruction
604  // sequences to do this are documented on MSDN:
605  // https://msdn.microsoft.com/en-us/library/dn774154.aspx
606  llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
607  StringRef Asm, Constraints;
608  switch (ISA) {
609  default:
610  ErrorUnsupported(E, "__fastfail call for this architecture");
611  break;
612  case llvm::Triple::x86:
613  case llvm::Triple::x86_64:
614  Asm = "int $$0x29";
615  Constraints = "{cx}";
616  break;
617  case llvm::Triple::thumb:
618  Asm = "udf #251";
619  Constraints = "{r0}";
620  break;
621  }
622  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
623  llvm::InlineAsm *IA =
624  llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
625  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
626  getLLVMContext(), llvm::AttributeList::FunctionIndex,
627  llvm::Attribute::NoReturn);
628  CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
629  CS.setAttributes(NoReturnAttr);
630  return CS.getInstruction();
631  }
632  }
633  llvm_unreachable("Incorrect MSVC intrinsic!");
634 }
635 
636 namespace {
637 // ARC cleanup for __builtin_os_log_format
638 struct CallObjCArcUse final : EHScopeStack::Cleanup {
639  CallObjCArcUse(llvm::Value *object) : object(object) {}
640  llvm::Value *object;
641 
642  void Emit(CodeGenFunction &CGF, Flags flags) override {
643  CGF.EmitARCIntrinsicUse(object);
644  }
645 };
646 }
647 
650  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
651  && "Unsupported builtin check kind");
652 
653  Value *ArgValue = EmitScalarExpr(E);
654  if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
655  return ArgValue;
656 
657  SanitizerScope SanScope(this);
658  Value *Cond = Builder.CreateICmpNE(
659  ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
660  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
661  SanitizerHandler::InvalidBuiltin,
662  {EmitCheckSourceLocation(E->getExprLoc()),
663  llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
664  None);
665  return ArgValue;
666 }
667 
668 /// Get the argument type for arguments to os_log_helper.
669 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
670  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
671  return C.getCanonicalType(UnsignedTy);
672 }
673 
675  const analyze_os_log::OSLogBufferLayout &Layout,
676  CharUnits BufferAlignment) {
677  ASTContext &Ctx = getContext();
678 
680  {
681  raw_svector_ostream OS(Name);
682  OS << "__os_log_helper";
683  OS << "_" << BufferAlignment.getQuantity();
684  OS << "_" << int(Layout.getSummaryByte());
685  OS << "_" << int(Layout.getNumArgsByte());
686  for (const auto &Item : Layout.Items)
687  OS << "_" << int(Item.getSizeByte()) << "_"
688  << int(Item.getDescriptorByte());
689  }
690 
691  if (llvm::Function *F = CGM.getModule().getFunction(Name))
692  return F;
693 
695  Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
697 
698  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
699  char Size = Layout.Items[I].getSizeByte();
700  if (!Size)
701  continue;
702 
703  Params.emplace_back(
704  Ctx, nullptr, SourceLocation(),
705  &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
707  }
708 
709  FunctionArgList Args;
710  for (auto &P : Params)
711  Args.push_back(&P);
712 
713  // The helper function has linkonce_odr linkage to enable the linker to merge
714  // identical functions. To ensure the merging always happens, 'noinline' is
715  // attached to the function when compiling with -Oz.
716  const CGFunctionInfo &FI =
717  CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
718  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
719  llvm::Function *Fn = llvm::Function::Create(
720  FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
721  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
722  CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
723  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
724 
725  // Attach 'noinline' at -Oz.
726  if (CGM.getCodeGenOpts().OptimizeSize == 2)
727  Fn->addFnAttr(llvm::Attribute::NoInline);
728 
729  auto NL = ApplyDebugLocation::CreateEmpty(*this);
730  IdentifierInfo *II = &Ctx.Idents.get(Name);
733  Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
734 
735  StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
736 
737  // Create a scope with an artificial location for the body of this function.
738  auto AL = ApplyDebugLocation::CreateArtificial(*this);
739 
741  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
742  BufferAlignment);
743  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
744  Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
745  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
746  Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
747 
748  unsigned I = 1;
749  for (const auto &Item : Layout.Items) {
750  Builder.CreateStore(
751  Builder.getInt8(Item.getDescriptorByte()),
752  Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
753  Builder.CreateStore(
754  Builder.getInt8(Item.getSizeByte()),
755  Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
756 
757  CharUnits Size = Item.size();
758  if (!Size.getQuantity())
759  continue;
760 
761  Address Arg = GetAddrOfLocalVar(&Params[I]);
762  Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
763  Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
764  "argDataCast");
765  Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
766  Offset += Size;
767  ++I;
768  }
769 
770  FinishFunction();
771 
772  return Fn;
773 }
774 
776  assert(E.getNumArgs() >= 2 &&
777  "__builtin_os_log_format takes at least 2 arguments");
778  ASTContext &Ctx = getContext();
781  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
782  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
783 
784  // Ignore argument 1, the format string. It is not currently used.
785  CallArgList Args;
786  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
787 
788  for (const auto &Item : Layout.Items) {
789  int Size = Item.getSizeByte();
790  if (!Size)
791  continue;
792 
793  llvm::Value *ArgVal;
794 
795  if (const Expr *TheExpr = Item.getExpr()) {
796  ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
797 
798  // Check if this is a retainable type.
799  if (TheExpr->getType()->isObjCRetainableType()) {
800  assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
801  "Only scalar can be a ObjC retainable type");
802  // Check if the object is constant, if not, save it in
803  // RetainableOperands.
804  if (!isa<Constant>(ArgVal))
805  RetainableOperands.push_back(ArgVal);
806  }
807  } else {
808  ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
809  }
810 
811  unsigned ArgValSize =
812  CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
813  llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
814  ArgValSize);
815  ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
816  CanQualType ArgTy = getOSLogArgType(Ctx, Size);
817  // If ArgVal has type x86_fp80, zero-extend ArgVal.
818  ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
819  Args.add(RValue::get(ArgVal), ArgTy);
820  }
821 
822  const CGFunctionInfo &FI =
823  CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
824  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
825  Layout, BufAddr.getAlignment());
826  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
827 
828  // Push a clang.arc.use cleanup for each object in RetainableOperands. The
829  // cleanup will cause the use to appear after the final log call, keeping
830  // the object valid while it’s held in the log buffer. Note that if there’s
831  // a release cleanup on the object, it will already be active; since
832  // cleanups are emitted in reverse order, the use will occur before the
833  // object is released.
834  if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
835  CGM.getCodeGenOpts().OptimizationLevel != 0)
836  for (llvm::Value *Object : RetainableOperands)
837  pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
838 
839  return RValue::get(BufAddr.getPointer());
840 }
841 
842 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
843 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
844  WidthAndSignedness Op1Info,
845  WidthAndSignedness Op2Info,
846  WidthAndSignedness ResultInfo) {
847  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
848  Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
849  Op1Info.Signed != Op2Info.Signed;
850 }
851 
852 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
853 /// the generic checked-binop irgen.
854 static RValue
856  WidthAndSignedness Op1Info, const clang::Expr *Op2,
857  WidthAndSignedness Op2Info,
858  const clang::Expr *ResultArg, QualType ResultQTy,
859  WidthAndSignedness ResultInfo) {
860  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
861  Op2Info, ResultInfo) &&
862  "Not a mixed-sign multipliction we can specialize");
863 
864  // Emit the signed and unsigned operands.
865  const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
866  const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
867  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
868  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
869 
870  llvm::Type *OpTy = Signed->getType();
871  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
872  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
873  llvm::Type *ResTy = ResultPtr.getElementType();
874 
875  // Take the absolute value of the signed operand.
876  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
877  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
878  llvm::Value *AbsSigned =
879  CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
880 
881  // Perform a checked unsigned multiplication.
882  llvm::Value *UnsignedOverflow;
883  llvm::Value *UnsignedResult =
884  EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
885  Unsigned, UnsignedOverflow);
886 
887  llvm::Value *Overflow, *Result;
888  if (ResultInfo.Signed) {
889  // Signed overflow occurs if the result is greater than INT_MAX or lesser
890  // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
891  auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
892  .zextOrSelf(Op1Info.Width);
893  llvm::Value *MaxResult =
894  CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
895  CGF.Builder.CreateZExt(IsNegative, OpTy));
896  llvm::Value *SignedOverflow =
897  CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
898  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
899 
900  // Prepare the signed result (possibly by negating it).
901  llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
902  llvm::Value *SignedResult =
903  CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
904  Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
905  } else {
906  // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
907  llvm::Value *Underflow = CGF.Builder.CreateAnd(
908  IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
909  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
910  if (ResultInfo.Width < Op1Info.Width) {
911  auto IntMax =
912  llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
913  llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
914  UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
915  Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
916  }
917 
918  // Negate the product if it would be negative in infinite precision.
919  Result = CGF.Builder.CreateSelect(
920  IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
921 
922  Result = CGF.Builder.CreateTrunc(Result, ResTy);
923  }
924  assert(Overflow && Result && "Missing overflow or result");
925 
926  bool isVolatile =
927  ResultArg->getType()->getPointeeType().isVolatileQualified();
928  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
929  isVolatile);
930  return RValue::get(Overflow);
931 }
932 
934  unsigned BuiltinID, const CallExpr *E,
935  ReturnValueSlot ReturnValue) {
936  // See if we can constant fold this builtin. If so, don't emit it at all.
937  Expr::EvalResult Result;
938  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
939  !Result.hasSideEffects()) {
940  if (Result.Val.isInt())
941  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
942  Result.Val.getInt()));
943  if (Result.Val.isFloat())
944  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
945  Result.Val.getFloat()));
946  }
947 
948  // There are LLVM math intrinsics/instructions corresponding to math library
949  // functions except the LLVM op will never set errno while the math library
950  // might. Also, math builtins have the same semantics as their math library
951  // twins. Thus, we can transform math library and builtin calls to their
952  // LLVM counterparts if the call is marked 'const' (known to never set errno).
953  if (FD->hasAttr<ConstAttr>()) {
954  switch (BuiltinID) {
955  case Builtin::BIceil:
956  case Builtin::BIceilf:
957  case Builtin::BIceill:
958  case Builtin::BI__builtin_ceil:
959  case Builtin::BI__builtin_ceilf:
960  case Builtin::BI__builtin_ceill:
961  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
962 
963  case Builtin::BIcopysign:
964  case Builtin::BIcopysignf:
965  case Builtin::BIcopysignl:
966  case Builtin::BI__builtin_copysign:
967  case Builtin::BI__builtin_copysignf:
968  case Builtin::BI__builtin_copysignl:
969  case Builtin::BI__builtin_copysignf128:
971 
972  case Builtin::BIcos:
973  case Builtin::BIcosf:
974  case Builtin::BIcosl:
975  case Builtin::BI__builtin_cos:
976  case Builtin::BI__builtin_cosf:
977  case Builtin::BI__builtin_cosl:
978  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
979 
980  case Builtin::BIexp:
981  case Builtin::BIexpf:
982  case Builtin::BIexpl:
983  case Builtin::BI__builtin_exp:
984  case Builtin::BI__builtin_expf:
985  case Builtin::BI__builtin_expl:
986  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
987 
988  case Builtin::BIexp2:
989  case Builtin::BIexp2f:
990  case Builtin::BIexp2l:
991  case Builtin::BI__builtin_exp2:
992  case Builtin::BI__builtin_exp2f:
993  case Builtin::BI__builtin_exp2l:
994  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
995 
996  case Builtin::BIfabs:
997  case Builtin::BIfabsf:
998  case Builtin::BIfabsl:
999  case Builtin::BI__builtin_fabs:
1000  case Builtin::BI__builtin_fabsf:
1001  case Builtin::BI__builtin_fabsl:
1002  case Builtin::BI__builtin_fabsf128:
1003  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
1004 
1005  case Builtin::BIfloor:
1006  case Builtin::BIfloorf:
1007  case Builtin::BIfloorl:
1008  case Builtin::BI__builtin_floor:
1009  case Builtin::BI__builtin_floorf:
1010  case Builtin::BI__builtin_floorl:
1011  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
1012 
1013  case Builtin::BIfma:
1014  case Builtin::BIfmaf:
1015  case Builtin::BIfmal:
1016  case Builtin::BI__builtin_fma:
1017  case Builtin::BI__builtin_fmaf:
1018  case Builtin::BI__builtin_fmal:
1019  return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
1020 
1021  case Builtin::BIfmax:
1022  case Builtin::BIfmaxf:
1023  case Builtin::BIfmaxl:
1024  case Builtin::BI__builtin_fmax:
1025  case Builtin::BI__builtin_fmaxf:
1026  case Builtin::BI__builtin_fmaxl:
1027  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
1028 
1029  case Builtin::BIfmin:
1030  case Builtin::BIfminf:
1031  case Builtin::BIfminl:
1032  case Builtin::BI__builtin_fmin:
1033  case Builtin::BI__builtin_fminf:
1034  case Builtin::BI__builtin_fminl:
1035  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
1036 
1037  // fmod() is a special-case. It maps to the frem instruction rather than an
1038  // LLVM intrinsic.
1039  case Builtin::BIfmod:
1040  case Builtin::BIfmodf:
1041  case Builtin::BIfmodl:
1042  case Builtin::BI__builtin_fmod:
1043  case Builtin::BI__builtin_fmodf:
1044  case Builtin::BI__builtin_fmodl: {
1045  Value *Arg1 = EmitScalarExpr(E->getArg(0));
1046  Value *Arg2 = EmitScalarExpr(E->getArg(1));
1047  return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
1048  }
1049 
1050  case Builtin::BIlog:
1051  case Builtin::BIlogf:
1052  case Builtin::BIlogl:
1053  case Builtin::BI__builtin_log:
1054  case Builtin::BI__builtin_logf:
1055  case Builtin::BI__builtin_logl:
1056  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
1057 
1058  case Builtin::BIlog10:
1059  case Builtin::BIlog10f:
1060  case Builtin::BIlog10l:
1061  case Builtin::BI__builtin_log10:
1062  case Builtin::BI__builtin_log10f:
1063  case Builtin::BI__builtin_log10l:
1064  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
1065 
1066  case Builtin::BIlog2:
1067  case Builtin::BIlog2f:
1068  case Builtin::BIlog2l:
1069  case Builtin::BI__builtin_log2:
1070  case Builtin::BI__builtin_log2f:
1071  case Builtin::BI__builtin_log2l:
1072  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
1073 
1074  case Builtin::BInearbyint:
1075  case Builtin::BInearbyintf:
1076  case Builtin::BInearbyintl:
1077  case Builtin::BI__builtin_nearbyint:
1078  case Builtin::BI__builtin_nearbyintf:
1079  case Builtin::BI__builtin_nearbyintl:
1081 
1082  case Builtin::BIpow:
1083  case Builtin::BIpowf:
1084  case Builtin::BIpowl:
1085  case Builtin::BI__builtin_pow:
1086  case Builtin::BI__builtin_powf:
1087  case Builtin::BI__builtin_powl:
1088  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
1089 
1090  case Builtin::BIrint:
1091  case Builtin::BIrintf:
1092  case Builtin::BIrintl:
1093  case Builtin::BI__builtin_rint:
1094  case Builtin::BI__builtin_rintf:
1095  case Builtin::BI__builtin_rintl:
1096  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
1097 
1098  case Builtin::BIround:
1099  case Builtin::BIroundf:
1100  case Builtin::BIroundl:
1101  case Builtin::BI__builtin_round:
1102  case Builtin::BI__builtin_roundf:
1103  case Builtin::BI__builtin_roundl:
1104  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
1105 
1106  case Builtin::BIsin:
1107  case Builtin::BIsinf:
1108  case Builtin::BIsinl:
1109  case Builtin::BI__builtin_sin:
1110  case Builtin::BI__builtin_sinf:
1111  case Builtin::BI__builtin_sinl:
1112  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
1113 
1114  case Builtin::BIsqrt:
1115  case Builtin::BIsqrtf:
1116  case Builtin::BIsqrtl:
1117  case Builtin::BI__builtin_sqrt:
1118  case Builtin::BI__builtin_sqrtf:
1119  case Builtin::BI__builtin_sqrtl:
1120  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
1121 
1122  case Builtin::BItrunc:
1123  case Builtin::BItruncf:
1124  case Builtin::BItruncl:
1125  case Builtin::BI__builtin_trunc:
1126  case Builtin::BI__builtin_truncf:
1127  case Builtin::BI__builtin_truncl:
1128  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
1129 
1130  default:
1131  break;
1132  }
1133  }
1134 
1135  switch (BuiltinID) {
1136  default: break;
1137  case Builtin::BI__builtin___CFStringMakeConstantString:
1138  case Builtin::BI__builtin___NSStringMakeConstantString:
1139  return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
1140  case Builtin::BI__builtin_stdarg_start:
1141  case Builtin::BI__builtin_va_start:
1142  case Builtin::BI__va_start:
1143  case Builtin::BI__builtin_va_end:
1144  return RValue::get(
1145  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
1146  ? EmitScalarExpr(E->getArg(0))
1147  : EmitVAListRef(E->getArg(0)).getPointer(),
1148  BuiltinID != Builtin::BI__builtin_va_end));
1149  case Builtin::BI__builtin_va_copy: {
1150  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
1151  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
1152 
1153  llvm::Type *Type = Int8PtrTy;
1154 
1155  DstPtr = Builder.CreateBitCast(DstPtr, Type);
1156  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
1157  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
1158  {DstPtr, SrcPtr}));
1159  }
1160  case Builtin::BI__builtin_abs:
1161  case Builtin::BI__builtin_labs:
1162  case Builtin::BI__builtin_llabs: {
1163  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1164 
1165  Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
1166  Value *CmpResult =
1167  Builder.CreateICmpSGE(ArgValue,
1168  llvm::Constant::getNullValue(ArgValue->getType()),
1169  "abscond");
1170  Value *Result =
1171  Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
1172 
1173  return RValue::get(Result);
1174  }
1175  case Builtin::BI__builtin_conj:
1176  case Builtin::BI__builtin_conjf:
1177  case Builtin::BI__builtin_conjl: {
1178  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1179  Value *Real = ComplexVal.first;
1180  Value *Imag = ComplexVal.second;
1181  Value *Zero =
1182  Imag->getType()->isFPOrFPVectorTy()
1183  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
1184  : llvm::Constant::getNullValue(Imag->getType());
1185 
1186  Imag = Builder.CreateFSub(Zero, Imag, "sub");
1187  return RValue::getComplex(std::make_pair(Real, Imag));
1188  }
1189  case Builtin::BI__builtin_creal:
1190  case Builtin::BI__builtin_crealf:
1191  case Builtin::BI__builtin_creall:
1192  case Builtin::BIcreal:
1193  case Builtin::BIcrealf:
1194  case Builtin::BIcreall: {
1195  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1196  return RValue::get(ComplexVal.first);
1197  }
1198 
1199  case Builtin::BI__builtin_cimag:
1200  case Builtin::BI__builtin_cimagf:
1201  case Builtin::BI__builtin_cimagl:
1202  case Builtin::BIcimag:
1203  case Builtin::BIcimagf:
1204  case Builtin::BIcimagl: {
1205  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1206  return RValue::get(ComplexVal.second);
1207  }
1208 
1209  case Builtin::BI__builtin_ctzs:
1210  case Builtin::BI__builtin_ctz:
1211  case Builtin::BI__builtin_ctzl:
1212  case Builtin::BI__builtin_ctzll: {
1213  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
1214 
1215  llvm::Type *ArgType = ArgValue->getType();
1216  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1217 
1218  llvm::Type *ResultType = ConvertType(E->getType());
1219  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1220  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1221  if (Result->getType() != ResultType)
1222  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1223  "cast");
1224  return RValue::get(Result);
1225  }
1226  case Builtin::BI__builtin_clzs:
1227  case Builtin::BI__builtin_clz:
1228  case Builtin::BI__builtin_clzl:
1229  case Builtin::BI__builtin_clzll: {
1230  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1231 
1232  llvm::Type *ArgType = ArgValue->getType();
1233  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1234 
1235  llvm::Type *ResultType = ConvertType(E->getType());
1236  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1237  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1238  if (Result->getType() != ResultType)
1239  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1240  "cast");
1241  return RValue::get(Result);
1242  }
1243  case Builtin::BI__builtin_ffs:
1244  case Builtin::BI__builtin_ffsl:
1245  case Builtin::BI__builtin_ffsll: {
1246  // ffs(x) -> x ? cttz(x) + 1 : 0
1247  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1248 
1249  llvm::Type *ArgType = ArgValue->getType();
1250  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1251 
1252  llvm::Type *ResultType = ConvertType(E->getType());
1253  Value *Tmp =
1254  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1255  llvm::ConstantInt::get(ArgType, 1));
1256  Value *Zero = llvm::Constant::getNullValue(ArgType);
1257  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1258  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1259  if (Result->getType() != ResultType)
1260  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1261  "cast");
1262  return RValue::get(Result);
1263  }
1264  case Builtin::BI__builtin_parity:
1265  case Builtin::BI__builtin_parityl:
1266  case Builtin::BI__builtin_parityll: {
1267  // parity(x) -> ctpop(x) & 1
1268  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1269 
1270  llvm::Type *ArgType = ArgValue->getType();
1271  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1272 
1273  llvm::Type *ResultType = ConvertType(E->getType());
1274  Value *Tmp = Builder.CreateCall(F, ArgValue);
1275  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1276  if (Result->getType() != ResultType)
1277  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1278  "cast");
1279  return RValue::get(Result);
1280  }
1281  case Builtin::BI__popcnt16:
1282  case Builtin::BI__popcnt:
1283  case Builtin::BI__popcnt64:
1284  case Builtin::BI__builtin_popcount:
1285  case Builtin::BI__builtin_popcountl:
1286  case Builtin::BI__builtin_popcountll: {
1287  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1288 
1289  llvm::Type *ArgType = ArgValue->getType();
1290  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1291 
1292  llvm::Type *ResultType = ConvertType(E->getType());
1293  Value *Result = Builder.CreateCall(F, ArgValue);
1294  if (Result->getType() != ResultType)
1295  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1296  "cast");
1297  return RValue::get(Result);
1298  }
1299  case Builtin::BI_rotr8:
1300  case Builtin::BI_rotr16:
1301  case Builtin::BI_rotr:
1302  case Builtin::BI_lrotr:
1303  case Builtin::BI_rotr64: {
1304  Value *Val = EmitScalarExpr(E->getArg(0));
1305  Value *Shift = EmitScalarExpr(E->getArg(1));
1306 
1307  llvm::Type *ArgType = Val->getType();
1308  Shift = Builder.CreateIntCast(Shift, ArgType, false);
1309  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1310  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1311  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1312 
1313  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1314  Shift = Builder.CreateAnd(Shift, Mask);
1315  Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
1316 
1317  Value *RightShifted = Builder.CreateLShr(Val, Shift);
1318  Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
1319  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1320 
1321  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1322  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1323  return RValue::get(Result);
1324  }
1325  case Builtin::BI_rotl8:
1326  case Builtin::BI_rotl16:
1327  case Builtin::BI_rotl:
1328  case Builtin::BI_lrotl:
1329  case Builtin::BI_rotl64: {
1330  Value *Val = EmitScalarExpr(E->getArg(0));
1331  Value *Shift = EmitScalarExpr(E->getArg(1));
1332 
1333  llvm::Type *ArgType = Val->getType();
1334  Shift = Builder.CreateIntCast(Shift, ArgType, false);
1335  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1336  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1337  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1338 
1339  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1340  Shift = Builder.CreateAnd(Shift, Mask);
1341  Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
1342 
1343  Value *LeftShifted = Builder.CreateShl(Val, Shift);
1344  Value *RightShifted = Builder.CreateLShr(Val, RightShift);
1345  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1346 
1347  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1348  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1349  return RValue::get(Result);
1350  }
1351  case Builtin::BI__builtin_unpredictable: {
1352  // Always return the argument of __builtin_unpredictable. LLVM does not
1353  // handle this builtin. Metadata for this builtin should be added directly
1354  // to instructions such as branches or switches that use it.
1355  return RValue::get(EmitScalarExpr(E->getArg(0)));
1356  }
1357  case Builtin::BI__builtin_expect: {
1358  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1359  llvm::Type *ArgType = ArgValue->getType();
1360 
1361  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1362  // Don't generate llvm.expect on -O0 as the backend won't use it for
1363  // anything.
1364  // Note, we still IRGen ExpectedValue because it could have side-effects.
1365  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1366  return RValue::get(ArgValue);
1367 
1368  Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1369  Value *Result =
1370  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1371  return RValue::get(Result);
1372  }
1373  case Builtin::BI__builtin_assume_aligned: {
1374  Value *PtrValue = EmitScalarExpr(E->getArg(0));
1375  Value *OffsetValue =
1376  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1377 
1378  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1379  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1380  unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
1381 
1382  EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
1383  return RValue::get(PtrValue);
1384  }
1385  case Builtin::BI__assume:
1386  case Builtin::BI__builtin_assume: {
1387  if (E->getArg(0)->HasSideEffects(getContext()))
1388  return RValue::get(nullptr);
1389 
1390  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1391  Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1392  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1393  }
1394  case Builtin::BI__builtin_bswap16:
1395  case Builtin::BI__builtin_bswap32:
1396  case Builtin::BI__builtin_bswap64: {
1397  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1398  }
1399  case Builtin::BI__builtin_bitreverse8:
1400  case Builtin::BI__builtin_bitreverse16:
1401  case Builtin::BI__builtin_bitreverse32:
1402  case Builtin::BI__builtin_bitreverse64: {
1403  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1404  }
1405  case Builtin::BI__builtin_object_size: {
1406  unsigned Type =
1407  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1408  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1409 
1410  // We pass this builtin onto the optimizer so that it can figure out the
1411  // object size in more complex cases.
1412  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1413  /*EmittedE=*/nullptr));
1414  }
1415  case Builtin::BI__builtin_prefetch: {
1416  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1417  // FIXME: Technically these constants should of type 'int', yes?
1418  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1419  llvm::ConstantInt::get(Int32Ty, 0);
1420  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1421  llvm::ConstantInt::get(Int32Ty, 3);
1422  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1423  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1424  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1425  }
1426  case Builtin::BI__builtin_readcyclecounter: {
1427  Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1428  return RValue::get(Builder.CreateCall(F));
1429  }
1430  case Builtin::BI__builtin___clear_cache: {
1431  Value *Begin = EmitScalarExpr(E->getArg(0));
1432  Value *End = EmitScalarExpr(E->getArg(1));
1433  Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1434  return RValue::get(Builder.CreateCall(F, {Begin, End}));
1435  }
1436  case Builtin::BI__builtin_trap:
1437  return RValue::get(EmitTrapCall(Intrinsic::trap));
1438  case Builtin::BI__debugbreak:
1439  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1440  case Builtin::BI__builtin_unreachable: {
1441  EmitUnreachable(E->getExprLoc());
1442 
1443  // We do need to preserve an insertion point.
1444  EmitBlock(createBasicBlock("unreachable.cont"));
1445 
1446  return RValue::get(nullptr);
1447  }
1448 
1449  case Builtin::BI__builtin_powi:
1450  case Builtin::BI__builtin_powif:
1451  case Builtin::BI__builtin_powil: {
1452  Value *Base = EmitScalarExpr(E->getArg(0));
1453  Value *Exponent = EmitScalarExpr(E->getArg(1));
1454  llvm::Type *ArgType = Base->getType();
1455  Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1456  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1457  }
1458 
1459  case Builtin::BI__builtin_isgreater:
1460  case Builtin::BI__builtin_isgreaterequal:
1461  case Builtin::BI__builtin_isless:
1462  case Builtin::BI__builtin_islessequal:
1463  case Builtin::BI__builtin_islessgreater:
1464  case Builtin::BI__builtin_isunordered: {
1465  // Ordered comparisons: we know the arguments to these are matching scalar
1466  // floating point values.
1467  Value *LHS = EmitScalarExpr(E->getArg(0));
1468  Value *RHS = EmitScalarExpr(E->getArg(1));
1469 
1470  switch (BuiltinID) {
1471  default: llvm_unreachable("Unknown ordered comparison");
1472  case Builtin::BI__builtin_isgreater:
1473  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1474  break;
1475  case Builtin::BI__builtin_isgreaterequal:
1476  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1477  break;
1478  case Builtin::BI__builtin_isless:
1479  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1480  break;
1481  case Builtin::BI__builtin_islessequal:
1482  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1483  break;
1484  case Builtin::BI__builtin_islessgreater:
1485  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1486  break;
1487  case Builtin::BI__builtin_isunordered:
1488  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1489  break;
1490  }
1491  // ZExt bool to int type.
1492  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1493  }
1494  case Builtin::BI__builtin_isnan: {
1495  Value *V = EmitScalarExpr(E->getArg(0));
1496  V = Builder.CreateFCmpUNO(V, V, "cmp");
1497  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1498  }
1499 
1500  case Builtin::BIfinite:
1501  case Builtin::BI__finite:
1502  case Builtin::BIfinitef:
1503  case Builtin::BI__finitef:
1504  case Builtin::BIfinitel:
1505  case Builtin::BI__finitel:
1506  case Builtin::BI__builtin_isinf:
1507  case Builtin::BI__builtin_isfinite: {
1508  // isinf(x) --> fabs(x) == infinity
1509  // isfinite(x) --> fabs(x) != infinity
1510  // x != NaN via the ordered compare in either case.
1511  Value *V = EmitScalarExpr(E->getArg(0));
1512  Value *Fabs = EmitFAbs(*this, V);
1513  Constant *Infinity = ConstantFP::getInfinity(V->getType());
1514  CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1515  ? CmpInst::FCMP_OEQ
1516  : CmpInst::FCMP_ONE;
1517  Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1518  return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1519  }
1520 
1521  case Builtin::BI__builtin_isinf_sign: {
1522  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1523  Value *Arg = EmitScalarExpr(E->getArg(0));
1524  Value *AbsArg = EmitFAbs(*this, Arg);
1525  Value *IsInf = Builder.CreateFCmpOEQ(
1526  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1527  Value *IsNeg = EmitSignBit(*this, Arg);
1528 
1529  llvm::Type *IntTy = ConvertType(E->getType());
1530  Value *Zero = Constant::getNullValue(IntTy);
1531  Value *One = ConstantInt::get(IntTy, 1);
1532  Value *NegativeOne = ConstantInt::get(IntTy, -1);
1533  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1534  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1535  return RValue::get(Result);
1536  }
1537 
1538  case Builtin::BI__builtin_isnormal: {
1539  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1540  Value *V = EmitScalarExpr(E->getArg(0));
1541  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1542 
1543  Value *Abs = EmitFAbs(*this, V);
1544  Value *IsLessThanInf =
1545  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1546  APFloat Smallest = APFloat::getSmallestNormalized(
1547  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1548  Value *IsNormal =
1549  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1550  "isnormal");
1551  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1552  V = Builder.CreateAnd(V, IsNormal, "and");
1553  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1554  }
1555 
1556  case Builtin::BI__builtin_fpclassify: {
1557  Value *V = EmitScalarExpr(E->getArg(5));
1558  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1559 
1560  // Create Result
1561  BasicBlock *Begin = Builder.GetInsertBlock();
1562  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1563  Builder.SetInsertPoint(End);
1564  PHINode *Result =
1565  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1566  "fpclassify_result");
1567 
1568  // if (V==0) return FP_ZERO
1569  Builder.SetInsertPoint(Begin);
1570  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1571  "iszero");
1572  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1573  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1574  Builder.CreateCondBr(IsZero, End, NotZero);
1575  Result->addIncoming(ZeroLiteral, Begin);
1576 
1577  // if (V != V) return FP_NAN
1578  Builder.SetInsertPoint(NotZero);
1579  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1580  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1581  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1582  Builder.CreateCondBr(IsNan, End, NotNan);
1583  Result->addIncoming(NanLiteral, NotZero);
1584 
1585  // if (fabs(V) == infinity) return FP_INFINITY
1586  Builder.SetInsertPoint(NotNan);
1587  Value *VAbs = EmitFAbs(*this, V);
1588  Value *IsInf =
1589  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1590  "isinf");
1591  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1592  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1593  Builder.CreateCondBr(IsInf, End, NotInf);
1594  Result->addIncoming(InfLiteral, NotNan);
1595 
1596  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1597  Builder.SetInsertPoint(NotInf);
1598  APFloat Smallest = APFloat::getSmallestNormalized(
1599  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1600  Value *IsNormal =
1601  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1602  "isnormal");
1603  Value *NormalResult =
1604  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1605  EmitScalarExpr(E->getArg(3)));
1606  Builder.CreateBr(End);
1607  Result->addIncoming(NormalResult, NotInf);
1608 
1609  // return Result
1610  Builder.SetInsertPoint(End);
1611  return RValue::get(Result);
1612  }
1613 
1614  case Builtin::BIalloca:
1615  case Builtin::BI_alloca:
1616  case Builtin::BI__builtin_alloca: {
1617  Value *Size = EmitScalarExpr(E->getArg(0));
1618  const TargetInfo &TI = getContext().getTargetInfo();
1619  // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1620  unsigned SuitableAlignmentInBytes =
1621  CGM.getContext()
1622  .toCharUnitsFromBits(TI.getSuitableAlign())
1623  .getQuantity();
1624  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1625  AI->setAlignment(SuitableAlignmentInBytes);
1626  return RValue::get(AI);
1627  }
1628 
1629  case Builtin::BI__builtin_alloca_with_align: {
1630  Value *Size = EmitScalarExpr(E->getArg(0));
1631  Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1632  auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1633  unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1634  unsigned AlignmentInBytes =
1635  CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1636  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1637  AI->setAlignment(AlignmentInBytes);
1638  return RValue::get(AI);
1639  }
1640 
1641  case Builtin::BIbzero:
1642  case Builtin::BI__builtin_bzero: {
1643  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1644  Value *SizeVal = EmitScalarExpr(E->getArg(1));
1645  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1646  E->getArg(0)->getExprLoc(), FD, 0);
1647  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1648  return RValue::get(nullptr);
1649  }
1650  case Builtin::BImemcpy:
1651  case Builtin::BI__builtin_memcpy: {
1652  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1653  Address Src = EmitPointerWithAlignment(E->getArg(1));
1654  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1655  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1656  E->getArg(0)->getExprLoc(), FD, 0);
1657  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1658  E->getArg(1)->getExprLoc(), FD, 1);
1659  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1660  return RValue::get(Dest.getPointer());
1661  }
1662 
1663  case Builtin::BI__builtin_char_memchr:
1664  BuiltinID = Builtin::BI__builtin_memchr;
1665  break;
1666 
1667  case Builtin::BI__builtin___memcpy_chk: {
1668  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1669  llvm::APSInt Size, DstSize;
1670  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1671  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1672  break;
1673  if (Size.ugt(DstSize))
1674  break;
1675  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1676  Address Src = EmitPointerWithAlignment(E->getArg(1));
1677  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1678  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1679  return RValue::get(Dest.getPointer());
1680  }
1681 
1682  case Builtin::BI__builtin_objc_memmove_collectable: {
1683  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1684  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1685  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1686  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1687  DestAddr, SrcAddr, SizeVal);
1688  return RValue::get(DestAddr.getPointer());
1689  }
1690 
1691  case Builtin::BI__builtin___memmove_chk: {
1692  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1693  llvm::APSInt Size, DstSize;
1694  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1695  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1696  break;
1697  if (Size.ugt(DstSize))
1698  break;
1699  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1700  Address Src = EmitPointerWithAlignment(E->getArg(1));
1701  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1702  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1703  return RValue::get(Dest.getPointer());
1704  }
1705 
1706  case Builtin::BImemmove:
1707  case Builtin::BI__builtin_memmove: {
1708  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1709  Address Src = EmitPointerWithAlignment(E->getArg(1));
1710  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1711  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1712  E->getArg(0)->getExprLoc(), FD, 0);
1713  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1714  E->getArg(1)->getExprLoc(), FD, 1);
1715  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1716  return RValue::get(Dest.getPointer());
1717  }
1718  case Builtin::BImemset:
1719  case Builtin::BI__builtin_memset: {
1720  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1721  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1722  Builder.getInt8Ty());
1723  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1724  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1725  E->getArg(0)->getExprLoc(), FD, 0);
1726  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1727  return RValue::get(Dest.getPointer());
1728  }
1729  case Builtin::BI__builtin___memset_chk: {
1730  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1731  llvm::APSInt Size, DstSize;
1732  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1733  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1734  break;
1735  if (Size.ugt(DstSize))
1736  break;
1737  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1738  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1739  Builder.getInt8Ty());
1740  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1741  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1742  return RValue::get(Dest.getPointer());
1743  }
1744  case Builtin::BI__builtin_wmemcmp: {
1745  // The MSVC runtime library does not provide a definition of wmemcmp, so we
1746  // need an inline implementation.
1747  if (!getTarget().getTriple().isOSMSVCRT())
1748  break;
1749 
1750  llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
1751 
1752  Value *Dst = EmitScalarExpr(E->getArg(0));
1753  Value *Src = EmitScalarExpr(E->getArg(1));
1754  Value *Size = EmitScalarExpr(E->getArg(2));
1755 
1756  BasicBlock *Entry = Builder.GetInsertBlock();
1757  BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
1758  BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
1759  BasicBlock *Next = createBasicBlock("wmemcmp.next");
1760  BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
1761  Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
1762  Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
1763 
1764  EmitBlock(CmpGT);
1765  PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
1766  DstPhi->addIncoming(Dst, Entry);
1767  PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
1768  SrcPhi->addIncoming(Src, Entry);
1769  PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
1770  SizePhi->addIncoming(Size, Entry);
1771  CharUnits WCharAlign =
1772  getContext().getTypeAlignInChars(getContext().WCharTy);
1773  Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
1774  Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
1775  Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
1776  Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
1777 
1778  EmitBlock(CmpLT);
1779  Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
1780  Builder.CreateCondBr(DstLtSrc, Exit, Next);
1781 
1782  EmitBlock(Next);
1783  Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
1784  Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
1785  Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
1786  Value *NextSizeEq0 =
1787  Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
1788  Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
1789  DstPhi->addIncoming(NextDst, Next);
1790  SrcPhi->addIncoming(NextSrc, Next);
1791  SizePhi->addIncoming(NextSize, Next);
1792 
1793  EmitBlock(Exit);
1794  PHINode *Ret = Builder.CreatePHI(IntTy, 4);
1795  Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
1796  Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
1797  Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
1798  Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
1799  return RValue::get(Ret);
1800  }
1801  case Builtin::BI__builtin_dwarf_cfa: {
1802  // The offset in bytes from the first argument to the CFA.
1803  //
1804  // Why on earth is this in the frontend? Is there any reason at
1805  // all that the backend can't reasonably determine this while
1806  // lowering llvm.eh.dwarf.cfa()?
1807  //
1808  // TODO: If there's a satisfactory reason, add a target hook for
1809  // this instead of hard-coding 0, which is correct for most targets.
1810  int32_t Offset = 0;
1811 
1812  Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1813  return RValue::get(Builder.CreateCall(F,
1814  llvm::ConstantInt::get(Int32Ty, Offset)));
1815  }
1816  case Builtin::BI__builtin_return_address: {
1817  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1818  getContext().UnsignedIntTy);
1819  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1820  return RValue::get(Builder.CreateCall(F, Depth));
1821  }
1822  case Builtin::BI_ReturnAddress: {
1823  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1824  return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1825  }
1826  case Builtin::BI__builtin_frame_address: {
1827  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1828  getContext().UnsignedIntTy);
1829  Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1830  return RValue::get(Builder.CreateCall(F, Depth));
1831  }
1832  case Builtin::BI__builtin_extract_return_addr: {
1833  Value *Address = EmitScalarExpr(E->getArg(0));
1834  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1835  return RValue::get(Result);
1836  }
1837  case Builtin::BI__builtin_frob_return_addr: {
1838  Value *Address = EmitScalarExpr(E->getArg(0));
1839  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1840  return RValue::get(Result);
1841  }
1842  case Builtin::BI__builtin_dwarf_sp_column: {
1843  llvm::IntegerType *Ty
1844  = cast<llvm::IntegerType>(ConvertType(E->getType()));
1845  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1846  if (Column == -1) {
1847  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1848  return RValue::get(llvm::UndefValue::get(Ty));
1849  }
1850  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1851  }
1852  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1853  Value *Address = EmitScalarExpr(E->getArg(0));
1854  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1855  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1856  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1857  }
1858  case Builtin::BI__builtin_eh_return: {
1859  Value *Int = EmitScalarExpr(E->getArg(0));
1860  Value *Ptr = EmitScalarExpr(E->getArg(1));
1861 
1862  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1863  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1864  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1865  Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1866  ? Intrinsic::eh_return_i32
1867  : Intrinsic::eh_return_i64);
1868  Builder.CreateCall(F, {Int, Ptr});
1869  Builder.CreateUnreachable();
1870 
1871  // We do need to preserve an insertion point.
1872  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1873 
1874  return RValue::get(nullptr);
1875  }
1876  case Builtin::BI__builtin_unwind_init: {
1877  Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1878  return RValue::get(Builder.CreateCall(F));
1879  }
1880  case Builtin::BI__builtin_extend_pointer: {
1881  // Extends a pointer to the size of an _Unwind_Word, which is
1882  // uint64_t on all platforms. Generally this gets poked into a
1883  // register and eventually used as an address, so if the
1884  // addressing registers are wider than pointers and the platform
1885  // doesn't implicitly ignore high-order bits when doing
1886  // addressing, we need to make sure we zext / sext based on
1887  // the platform's expectations.
1888  //
1889  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1890 
1891  // Cast the pointer to intptr_t.
1892  Value *Ptr = EmitScalarExpr(E->getArg(0));
1893  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1894 
1895  // If that's 64 bits, we're done.
1896  if (IntPtrTy->getBitWidth() == 64)
1897  return RValue::get(Result);
1898 
1899  // Otherwise, ask the codegen data what to do.
1900  if (getTargetHooks().extendPointerWithSExt())
1901  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1902  else
1903  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1904  }
1905  case Builtin::BI__builtin_setjmp: {
1906  // Buffer is a void**.
1907  Address Buf = EmitPointerWithAlignment(E->getArg(0));
1908 
1909  // Store the frame pointer to the setjmp buffer.
1910  Value *FrameAddr =
1911  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1912  ConstantInt::get(Int32Ty, 0));
1913  Builder.CreateStore(FrameAddr, Buf);
1914 
1915  // Store the stack pointer to the setjmp buffer.
1916  Value *StackAddr =
1917  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1918  Address StackSaveSlot =
1919  Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1920  Builder.CreateStore(StackAddr, StackSaveSlot);
1921 
1922  // Call LLVM's EH setjmp, which is lightweight.
1923  Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1924  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1925  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1926  }
1927  case Builtin::BI__builtin_longjmp: {
1928  Value *Buf = EmitScalarExpr(E->getArg(0));
1929  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1930 
1931  // Call LLVM's EH longjmp, which is lightweight.
1932  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1933 
1934  // longjmp doesn't return; mark this as unreachable.
1935  Builder.CreateUnreachable();
1936 
1937  // We do need to preserve an insertion point.
1938  EmitBlock(createBasicBlock("longjmp.cont"));
1939 
1940  return RValue::get(nullptr);
1941  }
1942  case Builtin::BI__sync_fetch_and_add:
1943  case Builtin::BI__sync_fetch_and_sub:
1944  case Builtin::BI__sync_fetch_and_or:
1945  case Builtin::BI__sync_fetch_and_and:
1946  case Builtin::BI__sync_fetch_and_xor:
1947  case Builtin::BI__sync_fetch_and_nand:
1948  case Builtin::BI__sync_add_and_fetch:
1949  case Builtin::BI__sync_sub_and_fetch:
1950  case Builtin::BI__sync_and_and_fetch:
1951  case Builtin::BI__sync_or_and_fetch:
1952  case Builtin::BI__sync_xor_and_fetch:
1953  case Builtin::BI__sync_nand_and_fetch:
1954  case Builtin::BI__sync_val_compare_and_swap:
1955  case Builtin::BI__sync_bool_compare_and_swap:
1956  case Builtin::BI__sync_lock_test_and_set:
1957  case Builtin::BI__sync_lock_release:
1958  case Builtin::BI__sync_swap:
1959  llvm_unreachable("Shouldn't make it through sema");
1960  case Builtin::BI__sync_fetch_and_add_1:
1961  case Builtin::BI__sync_fetch_and_add_2:
1962  case Builtin::BI__sync_fetch_and_add_4:
1963  case Builtin::BI__sync_fetch_and_add_8:
1964  case Builtin::BI__sync_fetch_and_add_16:
1965  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1966  case Builtin::BI__sync_fetch_and_sub_1:
1967  case Builtin::BI__sync_fetch_and_sub_2:
1968  case Builtin::BI__sync_fetch_and_sub_4:
1969  case Builtin::BI__sync_fetch_and_sub_8:
1970  case Builtin::BI__sync_fetch_and_sub_16:
1971  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1972  case Builtin::BI__sync_fetch_and_or_1:
1973  case Builtin::BI__sync_fetch_and_or_2:
1974  case Builtin::BI__sync_fetch_and_or_4:
1975  case Builtin::BI__sync_fetch_and_or_8:
1976  case Builtin::BI__sync_fetch_and_or_16:
1977  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1978  case Builtin::BI__sync_fetch_and_and_1:
1979  case Builtin::BI__sync_fetch_and_and_2:
1980  case Builtin::BI__sync_fetch_and_and_4:
1981  case Builtin::BI__sync_fetch_and_and_8:
1982  case Builtin::BI__sync_fetch_and_and_16:
1983  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1984  case Builtin::BI__sync_fetch_and_xor_1:
1985  case Builtin::BI__sync_fetch_and_xor_2:
1986  case Builtin::BI__sync_fetch_and_xor_4:
1987  case Builtin::BI__sync_fetch_and_xor_8:
1988  case Builtin::BI__sync_fetch_and_xor_16:
1989  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1990  case Builtin::BI__sync_fetch_and_nand_1:
1991  case Builtin::BI__sync_fetch_and_nand_2:
1992  case Builtin::BI__sync_fetch_and_nand_4:
1993  case Builtin::BI__sync_fetch_and_nand_8:
1994  case Builtin::BI__sync_fetch_and_nand_16:
1995  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1996 
1997  // Clang extensions: not overloaded yet.
1998  case Builtin::BI__sync_fetch_and_min:
1999  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
2000  case Builtin::BI__sync_fetch_and_max:
2001  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
2002  case Builtin::BI__sync_fetch_and_umin:
2003  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
2004  case Builtin::BI__sync_fetch_and_umax:
2005  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
2006 
2007  case Builtin::BI__sync_add_and_fetch_1:
2008  case Builtin::BI__sync_add_and_fetch_2:
2009  case Builtin::BI__sync_add_and_fetch_4:
2010  case Builtin::BI__sync_add_and_fetch_8:
2011  case Builtin::BI__sync_add_and_fetch_16:
2012  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
2013  llvm::Instruction::Add);
2014  case Builtin::BI__sync_sub_and_fetch_1:
2015  case Builtin::BI__sync_sub_and_fetch_2:
2016  case Builtin::BI__sync_sub_and_fetch_4:
2017  case Builtin::BI__sync_sub_and_fetch_8:
2018  case Builtin::BI__sync_sub_and_fetch_16:
2019  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
2020  llvm::Instruction::Sub);
2021  case Builtin::BI__sync_and_and_fetch_1:
2022  case Builtin::BI__sync_and_and_fetch_2:
2023  case Builtin::BI__sync_and_and_fetch_4:
2024  case Builtin::BI__sync_and_and_fetch_8:
2025  case Builtin::BI__sync_and_and_fetch_16:
2028  case Builtin::BI__sync_or_and_fetch_1:
2029  case Builtin::BI__sync_or_and_fetch_2:
2030  case Builtin::BI__sync_or_and_fetch_4:
2031  case Builtin::BI__sync_or_and_fetch_8:
2032  case Builtin::BI__sync_or_and_fetch_16:
2033  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
2034  llvm::Instruction::Or);
2035  case Builtin::BI__sync_xor_and_fetch_1:
2036  case Builtin::BI__sync_xor_and_fetch_2:
2037  case Builtin::BI__sync_xor_and_fetch_4:
2038  case Builtin::BI__sync_xor_and_fetch_8:
2039  case Builtin::BI__sync_xor_and_fetch_16:
2040  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
2041  llvm::Instruction::Xor);
2042  case Builtin::BI__sync_nand_and_fetch_1:
2043  case Builtin::BI__sync_nand_and_fetch_2:
2044  case Builtin::BI__sync_nand_and_fetch_4:
2045  case Builtin::BI__sync_nand_and_fetch_8:
2046  case Builtin::BI__sync_nand_and_fetch_16:
2047  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
2048  llvm::Instruction::And, true);
2049 
2050  case Builtin::BI__sync_val_compare_and_swap_1:
2051  case Builtin::BI__sync_val_compare_and_swap_2:
2052  case Builtin::BI__sync_val_compare_and_swap_4:
2053  case Builtin::BI__sync_val_compare_and_swap_8:
2054  case Builtin::BI__sync_val_compare_and_swap_16:
2055  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
2056 
2057  case Builtin::BI__sync_bool_compare_and_swap_1:
2058  case Builtin::BI__sync_bool_compare_and_swap_2:
2059  case Builtin::BI__sync_bool_compare_and_swap_4:
2060  case Builtin::BI__sync_bool_compare_and_swap_8:
2061  case Builtin::BI__sync_bool_compare_and_swap_16:
2062  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
2063 
2064  case Builtin::BI__sync_swap_1:
2065  case Builtin::BI__sync_swap_2:
2066  case Builtin::BI__sync_swap_4:
2067  case Builtin::BI__sync_swap_8:
2068  case Builtin::BI__sync_swap_16:
2069  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2070 
2071  case Builtin::BI__sync_lock_test_and_set_1:
2072  case Builtin::BI__sync_lock_test_and_set_2:
2073  case Builtin::BI__sync_lock_test_and_set_4:
2074  case Builtin::BI__sync_lock_test_and_set_8:
2075  case Builtin::BI__sync_lock_test_and_set_16:
2076  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2077 
2078  case Builtin::BI__sync_lock_release_1:
2079  case Builtin::BI__sync_lock_release_2:
2080  case Builtin::BI__sync_lock_release_4:
2081  case Builtin::BI__sync_lock_release_8:
2082  case Builtin::BI__sync_lock_release_16: {
2083  Value *Ptr = EmitScalarExpr(E->getArg(0));
2084  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
2085  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
2086  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
2087  StoreSize.getQuantity() * 8);
2088  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
2089  llvm::StoreInst *Store =
2090  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
2091  StoreSize);
2092  Store->setAtomic(llvm::AtomicOrdering::Release);
2093  return RValue::get(nullptr);
2094  }
2095 
2096  case Builtin::BI__sync_synchronize: {
2097  // We assume this is supposed to correspond to a C++0x-style
2098  // sequentially-consistent fence (i.e. this is only usable for
2099  // synchonization, not device I/O or anything like that). This intrinsic
2100  // is really badly designed in the sense that in theory, there isn't
2101  // any way to safely use it... but in practice, it mostly works
2102  // to use it with non-atomic loads and stores to get acquire/release
2103  // semantics.
2104  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
2105  return RValue::get(nullptr);
2106  }
2107 
2108  case Builtin::BI__builtin_nontemporal_load:
2109  return RValue::get(EmitNontemporalLoad(*this, E));
2110  case Builtin::BI__builtin_nontemporal_store:
2111  return RValue::get(EmitNontemporalStore(*this, E));
2112  case Builtin::BI__c11_atomic_is_lock_free:
2113  case Builtin::BI__atomic_is_lock_free: {
2114  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
2115  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
2116  // _Atomic(T) is always properly-aligned.
2117  const char *LibCallName = "__atomic_is_lock_free";
2118  CallArgList Args;
2119  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
2120  getContext().getSizeType());
2121  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
2122  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
2123  getContext().VoidPtrTy);
2124  else
2125  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
2126  getContext().VoidPtrTy);
2127  const CGFunctionInfo &FuncInfo =
2128  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
2129  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
2130  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
2131  return EmitCall(FuncInfo, CGCallee::forDirect(Func),
2132  ReturnValueSlot(), Args);
2133  }
2134 
2135  case Builtin::BI__atomic_test_and_set: {
2136  // Look at the argument type to determine whether this is a volatile
2137  // operation. The parameter type is always volatile.
2138  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2139  bool Volatile =
2140  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2141 
2142  Value *Ptr = EmitScalarExpr(E->getArg(0));
2143  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
2144  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2145  Value *NewVal = Builder.getInt8(1);
2146  Value *Order = EmitScalarExpr(E->getArg(1));
2147  if (isa<llvm::ConstantInt>(Order)) {
2148  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2149  AtomicRMWInst *Result = nullptr;
2150  switch (ord) {
2151  case 0: // memory_order_relaxed
2152  default: // invalid order
2153  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2154  llvm::AtomicOrdering::Monotonic);
2155  break;
2156  case 1: // memory_order_consume
2157  case 2: // memory_order_acquire
2158  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2159  llvm::AtomicOrdering::Acquire);
2160  break;
2161  case 3: // memory_order_release
2162  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2163  llvm::AtomicOrdering::Release);
2164  break;
2165  case 4: // memory_order_acq_rel
2166 
2167  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2168  llvm::AtomicOrdering::AcquireRelease);
2169  break;
2170  case 5: // memory_order_seq_cst
2171  Result = Builder.CreateAtomicRMW(
2172  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2173  llvm::AtomicOrdering::SequentiallyConsistent);
2174  break;
2175  }
2176  Result->setVolatile(Volatile);
2177  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2178  }
2179 
2180  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2181 
2182  llvm::BasicBlock *BBs[5] = {
2183  createBasicBlock("monotonic", CurFn),
2184  createBasicBlock("acquire", CurFn),
2185  createBasicBlock("release", CurFn),
2186  createBasicBlock("acqrel", CurFn),
2187  createBasicBlock("seqcst", CurFn)
2188  };
2189  llvm::AtomicOrdering Orders[5] = {
2190  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
2191  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
2192  llvm::AtomicOrdering::SequentiallyConsistent};
2193 
2194  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2195  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2196 
2197  Builder.SetInsertPoint(ContBB);
2198  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
2199 
2200  for (unsigned i = 0; i < 5; ++i) {
2201  Builder.SetInsertPoint(BBs[i]);
2202  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
2203  Ptr, NewVal, Orders[i]);
2204  RMW->setVolatile(Volatile);
2205  Result->addIncoming(RMW, BBs[i]);
2206  Builder.CreateBr(ContBB);
2207  }
2208 
2209  SI->addCase(Builder.getInt32(0), BBs[0]);
2210  SI->addCase(Builder.getInt32(1), BBs[1]);
2211  SI->addCase(Builder.getInt32(2), BBs[1]);
2212  SI->addCase(Builder.getInt32(3), BBs[2]);
2213  SI->addCase(Builder.getInt32(4), BBs[3]);
2214  SI->addCase(Builder.getInt32(5), BBs[4]);
2215 
2216  Builder.SetInsertPoint(ContBB);
2217  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2218  }
2219 
2220  case Builtin::BI__atomic_clear: {
2221  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2222  bool Volatile =
2223  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2224 
2225  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
2226  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
2227  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2228  Value *NewVal = Builder.getInt8(0);
2229  Value *Order = EmitScalarExpr(E->getArg(1));
2230  if (isa<llvm::ConstantInt>(Order)) {
2231  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2232  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2233  switch (ord) {
2234  case 0: // memory_order_relaxed
2235  default: // invalid order
2236  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
2237  break;
2238  case 3: // memory_order_release
2239  Store->setOrdering(llvm::AtomicOrdering::Release);
2240  break;
2241  case 5: // memory_order_seq_cst
2242  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
2243  break;
2244  }
2245  return RValue::get(nullptr);
2246  }
2247 
2248  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2249 
2250  llvm::BasicBlock *BBs[3] = {
2251  createBasicBlock("monotonic", CurFn),
2252  createBasicBlock("release", CurFn),
2253  createBasicBlock("seqcst", CurFn)
2254  };
2255  llvm::AtomicOrdering Orders[3] = {
2256  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
2257  llvm::AtomicOrdering::SequentiallyConsistent};
2258 
2259  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2260  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2261 
2262  for (unsigned i = 0; i < 3; ++i) {
2263  Builder.SetInsertPoint(BBs[i]);
2264  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2265  Store->setOrdering(Orders[i]);
2266  Builder.CreateBr(ContBB);
2267  }
2268 
2269  SI->addCase(Builder.getInt32(0), BBs[0]);
2270  SI->addCase(Builder.getInt32(3), BBs[1]);
2271  SI->addCase(Builder.getInt32(5), BBs[2]);
2272 
2273  Builder.SetInsertPoint(ContBB);
2274  return RValue::get(nullptr);
2275  }
2276 
2277  case Builtin::BI__atomic_thread_fence:
2278  case Builtin::BI__atomic_signal_fence:
2279  case Builtin::BI__c11_atomic_thread_fence:
2280  case Builtin::BI__c11_atomic_signal_fence: {
2281  llvm::SyncScope::ID SSID;
2282  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2283  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2284  SSID = llvm::SyncScope::SingleThread;
2285  else
2286  SSID = llvm::SyncScope::System;
2287  Value *Order = EmitScalarExpr(E->getArg(0));
2288  if (isa<llvm::ConstantInt>(Order)) {
2289  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2290  switch (ord) {
2291  case 0: // memory_order_relaxed
2292  default: // invalid order
2293  break;
2294  case 1: // memory_order_consume
2295  case 2: // memory_order_acquire
2296  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2297  break;
2298  case 3: // memory_order_release
2299  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2300  break;
2301  case 4: // memory_order_acq_rel
2302  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2303  break;
2304  case 5: // memory_order_seq_cst
2305  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2306  break;
2307  }
2308  return RValue::get(nullptr);
2309  }
2310 
2311  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2312  AcquireBB = createBasicBlock("acquire", CurFn);
2313  ReleaseBB = createBasicBlock("release", CurFn);
2314  AcqRelBB = createBasicBlock("acqrel", CurFn);
2315  SeqCstBB = createBasicBlock("seqcst", CurFn);
2316  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2317 
2318  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2319  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2320 
2321  Builder.SetInsertPoint(AcquireBB);
2322  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2323  Builder.CreateBr(ContBB);
2324  SI->addCase(Builder.getInt32(1), AcquireBB);
2325  SI->addCase(Builder.getInt32(2), AcquireBB);
2326 
2327  Builder.SetInsertPoint(ReleaseBB);
2328  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2329  Builder.CreateBr(ContBB);
2330  SI->addCase(Builder.getInt32(3), ReleaseBB);
2331 
2332  Builder.SetInsertPoint(AcqRelBB);
2333  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2334  Builder.CreateBr(ContBB);
2335  SI->addCase(Builder.getInt32(4), AcqRelBB);
2336 
2337  Builder.SetInsertPoint(SeqCstBB);
2338  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2339  Builder.CreateBr(ContBB);
2340  SI->addCase(Builder.getInt32(5), SeqCstBB);
2341 
2342  Builder.SetInsertPoint(ContBB);
2343  return RValue::get(nullptr);
2344  }
2345 
2346  case Builtin::BI__builtin_signbit:
2347  case Builtin::BI__builtin_signbitf:
2348  case Builtin::BI__builtin_signbitl: {
2349  return RValue::get(
2350  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
2351  ConvertType(E->getType())));
2352  }
2353  case Builtin::BI__annotation: {
2354  // Re-encode each wide string to UTF8 and make an MDString.
2356  for (const Expr *Arg : E->arguments()) {
2357  const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
2358  assert(Str->getCharByteWidth() == 2);
2359  StringRef WideBytes = Str->getBytes();
2360  std::string StrUtf8;
2361  if (!convertUTF16ToUTF8String(
2362  makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
2363  CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
2364  continue;
2365  }
2366  Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
2367  }
2368 
2369  // Build and MDTuple of MDStrings and emit the intrinsic call.
2370  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
2371  MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
2372  Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
2373  return RValue::getIgnored();
2374  }
2375  case Builtin::BI__builtin_annotation: {
2376  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
2377  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
2378  AnnVal->getType());
2379 
2380  // Get the annotation string, go through casts. Sema requires this to be a
2381  // non-wide string literal, potentially casted, so the cast<> is safe.
2382  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
2383  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
2384  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
2385  }
2386  case Builtin::BI__builtin_addcb:
2387  case Builtin::BI__builtin_addcs:
2388  case Builtin::BI__builtin_addc:
2389  case Builtin::BI__builtin_addcl:
2390  case Builtin::BI__builtin_addcll:
2391  case Builtin::BI__builtin_subcb:
2392  case Builtin::BI__builtin_subcs:
2393  case Builtin::BI__builtin_subc:
2394  case Builtin::BI__builtin_subcl:
2395  case Builtin::BI__builtin_subcll: {
2396 
2397  // We translate all of these builtins from expressions of the form:
2398  // int x = ..., y = ..., carryin = ..., carryout, result;
2399  // result = __builtin_addc(x, y, carryin, &carryout);
2400  //
2401  // to LLVM IR of the form:
2402  //
2403  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2404  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2405  // %carry1 = extractvalue {i32, i1} %tmp1, 1
2406  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2407  // i32 %carryin)
2408  // %result = extractvalue {i32, i1} %tmp2, 0
2409  // %carry2 = extractvalue {i32, i1} %tmp2, 1
2410  // %tmp3 = or i1 %carry1, %carry2
2411  // %tmp4 = zext i1 %tmp3 to i32
2412  // store i32 %tmp4, i32* %carryout
2413 
2414  // Scalarize our inputs.
2415  llvm::Value *X = EmitScalarExpr(E->getArg(0));
2416  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2417  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
2418  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2419 
2420  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2421  llvm::Intrinsic::ID IntrinsicId;
2422  switch (BuiltinID) {
2423  default: llvm_unreachable("Unknown multiprecision builtin id.");
2424  case Builtin::BI__builtin_addcb:
2425  case Builtin::BI__builtin_addcs:
2426  case Builtin::BI__builtin_addc:
2427  case Builtin::BI__builtin_addcl:
2428  case Builtin::BI__builtin_addcll:
2429  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2430  break;
2431  case Builtin::BI__builtin_subcb:
2432  case Builtin::BI__builtin_subcs:
2433  case Builtin::BI__builtin_subc:
2434  case Builtin::BI__builtin_subcl:
2435  case Builtin::BI__builtin_subcll:
2436  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2437  break;
2438  }
2439 
2440  // Construct our resulting LLVM IR expression.
2441  llvm::Value *Carry1;
2442  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2443  X, Y, Carry1);
2444  llvm::Value *Carry2;
2445  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2446  Sum1, Carryin, Carry2);
2447  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2448  X->getType());
2449  Builder.CreateStore(CarryOut, CarryOutPtr);
2450  return RValue::get(Sum2);
2451  }
2452 
2453  case Builtin::BI__builtin_add_overflow:
2454  case Builtin::BI__builtin_sub_overflow:
2455  case Builtin::BI__builtin_mul_overflow: {
2456  const clang::Expr *LeftArg = E->getArg(0);
2457  const clang::Expr *RightArg = E->getArg(1);
2458  const clang::Expr *ResultArg = E->getArg(2);
2459 
2460  clang::QualType ResultQTy =
2461  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2462 
2463  WidthAndSignedness LeftInfo =
2464  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2465  WidthAndSignedness RightInfo =
2466  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2467  WidthAndSignedness ResultInfo =
2468  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2469 
2470  // Handle mixed-sign multiplication as a special case, because adding
2471  // runtime or backend support for our generic irgen would be too expensive.
2472  if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
2473  return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
2474  RightInfo, ResultArg, ResultQTy,
2475  ResultInfo);
2476 
2477  WidthAndSignedness EncompassingInfo =
2478  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2479 
2480  llvm::Type *EncompassingLLVMTy =
2481  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2482 
2483  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2484 
2485  llvm::Intrinsic::ID IntrinsicId;
2486  switch (BuiltinID) {
2487  default:
2488  llvm_unreachable("Unknown overflow builtin id.");
2489  case Builtin::BI__builtin_add_overflow:
2490  IntrinsicId = EncompassingInfo.Signed
2491  ? llvm::Intrinsic::sadd_with_overflow
2492  : llvm::Intrinsic::uadd_with_overflow;
2493  break;
2494  case Builtin::BI__builtin_sub_overflow:
2495  IntrinsicId = EncompassingInfo.Signed
2496  ? llvm::Intrinsic::ssub_with_overflow
2497  : llvm::Intrinsic::usub_with_overflow;
2498  break;
2499  case Builtin::BI__builtin_mul_overflow:
2500  IntrinsicId = EncompassingInfo.Signed
2501  ? llvm::Intrinsic::smul_with_overflow
2502  : llvm::Intrinsic::umul_with_overflow;
2503  break;
2504  }
2505 
2506  llvm::Value *Left = EmitScalarExpr(LeftArg);
2507  llvm::Value *Right = EmitScalarExpr(RightArg);
2508  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2509 
2510  // Extend each operand to the encompassing type.
2511  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2512  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2513 
2514  // Perform the operation on the extended values.
2515  llvm::Value *Overflow, *Result;
2516  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2517 
2518  if (EncompassingInfo.Width > ResultInfo.Width) {
2519  // The encompassing type is wider than the result type, so we need to
2520  // truncate it.
2521  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2522 
2523  // To see if the truncation caused an overflow, we will extend
2524  // the result and then compare it to the original result.
2525  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2526  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2527  llvm::Value *TruncationOverflow =
2528  Builder.CreateICmpNE(Result, ResultTruncExt);
2529 
2530  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2531  Result = ResultTrunc;
2532  }
2533 
2534  // Finally, store the result using the pointer.
2535  bool isVolatile =
2536  ResultArg->getType()->getPointeeType().isVolatileQualified();
2537  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2538 
2539  return RValue::get(Overflow);
2540  }
2541 
2542  case Builtin::BI__builtin_uadd_overflow:
2543  case Builtin::BI__builtin_uaddl_overflow:
2544  case Builtin::BI__builtin_uaddll_overflow:
2545  case Builtin::BI__builtin_usub_overflow:
2546  case Builtin::BI__builtin_usubl_overflow:
2547  case Builtin::BI__builtin_usubll_overflow:
2548  case Builtin::BI__builtin_umul_overflow:
2549  case Builtin::BI__builtin_umull_overflow:
2550  case Builtin::BI__builtin_umulll_overflow:
2551  case Builtin::BI__builtin_sadd_overflow:
2552  case Builtin::BI__builtin_saddl_overflow:
2553  case Builtin::BI__builtin_saddll_overflow:
2554  case Builtin::BI__builtin_ssub_overflow:
2555  case Builtin::BI__builtin_ssubl_overflow:
2556  case Builtin::BI__builtin_ssubll_overflow:
2557  case Builtin::BI__builtin_smul_overflow:
2558  case Builtin::BI__builtin_smull_overflow:
2559  case Builtin::BI__builtin_smulll_overflow: {
2560 
2561  // We translate all of these builtins directly to the relevant llvm IR node.
2562 
2563  // Scalarize our inputs.
2564  llvm::Value *X = EmitScalarExpr(E->getArg(0));
2565  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2566  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2567 
2568  // Decide which of the overflow intrinsics we are lowering to:
2569  llvm::Intrinsic::ID IntrinsicId;
2570  switch (BuiltinID) {
2571  default: llvm_unreachable("Unknown overflow builtin id.");
2572  case Builtin::BI__builtin_uadd_overflow:
2573  case Builtin::BI__builtin_uaddl_overflow:
2574  case Builtin::BI__builtin_uaddll_overflow:
2575  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2576  break;
2577  case Builtin::BI__builtin_usub_overflow:
2578  case Builtin::BI__builtin_usubl_overflow:
2579  case Builtin::BI__builtin_usubll_overflow:
2580  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2581  break;
2582  case Builtin::BI__builtin_umul_overflow:
2583  case Builtin::BI__builtin_umull_overflow:
2584  case Builtin::BI__builtin_umulll_overflow:
2585  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2586  break;
2587  case Builtin::BI__builtin_sadd_overflow:
2588  case Builtin::BI__builtin_saddl_overflow:
2589  case Builtin::BI__builtin_saddll_overflow:
2590  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2591  break;
2592  case Builtin::BI__builtin_ssub_overflow:
2593  case Builtin::BI__builtin_ssubl_overflow:
2594  case Builtin::BI__builtin_ssubll_overflow:
2595  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2596  break;
2597  case Builtin::BI__builtin_smul_overflow:
2598  case Builtin::BI__builtin_smull_overflow:
2599  case Builtin::BI__builtin_smulll_overflow:
2600  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2601  break;
2602  }
2603 
2604 
2605  llvm::Value *Carry;
2606  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2607  Builder.CreateStore(Sum, SumOutPtr);
2608 
2609  return RValue::get(Carry);
2610  }
2611  case Builtin::BI__builtin_addressof:
2612  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2613  case Builtin::BI__builtin_operator_new:
2614  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2615  E->getArg(0), false);
2616  case Builtin::BI__builtin_operator_delete:
2617  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2618  E->getArg(0), true);
2619  case Builtin::BI__noop:
2620  // __noop always evaluates to an integer literal zero.
2621  return RValue::get(ConstantInt::get(IntTy, 0));
2622  case Builtin::BI__builtin_call_with_static_chain: {
2623  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2624  const Expr *Chain = E->getArg(1);
2625  return EmitCall(Call->getCallee()->getType(),
2626  EmitCallee(Call->getCallee()), Call, ReturnValue,
2627  EmitScalarExpr(Chain));
2628  }
2629  case Builtin::BI_InterlockedExchange8:
2630  case Builtin::BI_InterlockedExchange16:
2631  case Builtin::BI_InterlockedExchange:
2632  case Builtin::BI_InterlockedExchangePointer:
2633  return RValue::get(
2634  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2635  case Builtin::BI_InterlockedCompareExchangePointer: {
2636  llvm::Type *RTy;
2637  llvm::IntegerType *IntType =
2638  IntegerType::get(getLLVMContext(),
2639  getContext().getTypeSize(E->getType()));
2640  llvm::Type *IntPtrType = IntType->getPointerTo();
2641 
2642  llvm::Value *Destination =
2643  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2644 
2645  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2646  RTy = Exchange->getType();
2647  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2648 
2649  llvm::Value *Comparand =
2650  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2651 
2652  auto Result =
2653  Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2654  AtomicOrdering::SequentiallyConsistent,
2655  AtomicOrdering::SequentiallyConsistent);
2656  Result->setVolatile(true);
2657 
2658  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2659  0),
2660  RTy));
2661  }
2662  case Builtin::BI_InterlockedCompareExchange8:
2663  case Builtin::BI_InterlockedCompareExchange16:
2664  case Builtin::BI_InterlockedCompareExchange:
2665  case Builtin::BI_InterlockedCompareExchange64: {
2666  AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2667  EmitScalarExpr(E->getArg(0)),
2668  EmitScalarExpr(E->getArg(2)),
2669  EmitScalarExpr(E->getArg(1)),
2670  AtomicOrdering::SequentiallyConsistent,
2671  AtomicOrdering::SequentiallyConsistent);
2672  CXI->setVolatile(true);
2673  return RValue::get(Builder.CreateExtractValue(CXI, 0));
2674  }
2675  case Builtin::BI_InterlockedIncrement16:
2676  case Builtin::BI_InterlockedIncrement:
2677  return RValue::get(
2678  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2679  case Builtin::BI_InterlockedDecrement16:
2680  case Builtin::BI_InterlockedDecrement:
2681  return RValue::get(
2682  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2683  case Builtin::BI_InterlockedAnd8:
2684  case Builtin::BI_InterlockedAnd16:
2685  case Builtin::BI_InterlockedAnd:
2686  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2687  case Builtin::BI_InterlockedExchangeAdd8:
2688  case Builtin::BI_InterlockedExchangeAdd16:
2689  case Builtin::BI_InterlockedExchangeAdd:
2690  return RValue::get(
2691  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2692  case Builtin::BI_InterlockedExchangeSub8:
2693  case Builtin::BI_InterlockedExchangeSub16:
2694  case Builtin::BI_InterlockedExchangeSub:
2695  return RValue::get(
2696  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2697  case Builtin::BI_InterlockedOr8:
2698  case Builtin::BI_InterlockedOr16:
2699  case Builtin::BI_InterlockedOr:
2700  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2701  case Builtin::BI_InterlockedXor8:
2702  case Builtin::BI_InterlockedXor16:
2703  case Builtin::BI_InterlockedXor:
2704  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2705  case Builtin::BI_interlockedbittestandset:
2706  return RValue::get(
2707  EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2708 
2709  case Builtin::BI__exception_code:
2710  case Builtin::BI_exception_code:
2711  return RValue::get(EmitSEHExceptionCode());
2712  case Builtin::BI__exception_info:
2713  case Builtin::BI_exception_info:
2714  return RValue::get(EmitSEHExceptionInfo());
2715  case Builtin::BI__abnormal_termination:
2716  case Builtin::BI_abnormal_termination:
2717  return RValue::get(EmitSEHAbnormalTermination());
2718  case Builtin::BI_setjmpex: {
2719  if (getTarget().getTriple().isOSMSVCRT()) {
2720  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2721  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2722  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2723  llvm::Attribute::ReturnsTwice);
2724  llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2725  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2726  "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2727  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2728  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2729  llvm::Value *FrameAddr =
2730  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2731  ConstantInt::get(Int32Ty, 0));
2732  llvm::Value *Args[] = {Buf, FrameAddr};
2733  llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2734  CS.setAttributes(ReturnsTwiceAttr);
2735  return RValue::get(CS.getInstruction());
2736  }
2737  break;
2738  }
2739  case Builtin::BI_setjmp: {
2740  if (getTarget().getTriple().isOSMSVCRT()) {
2741  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2742  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2743  llvm::Attribute::ReturnsTwice);
2744  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2745  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2746  llvm::CallSite CS;
2747  if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2748  llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2749  llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2750  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2751  "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2752  llvm::Value *Count = ConstantInt::get(IntTy, 0);
2753  llvm::Value *Args[] = {Buf, Count};
2754  CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2755  } else {
2756  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2757  llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2758  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2759  "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2760  llvm::Value *FrameAddr =
2761  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2762  ConstantInt::get(Int32Ty, 0));
2763  llvm::Value *Args[] = {Buf, FrameAddr};
2764  CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2765  }
2766  CS.setAttributes(ReturnsTwiceAttr);
2767  return RValue::get(CS.getInstruction());
2768  }
2769  break;
2770  }
2771 
2772  case Builtin::BI__GetExceptionInfo: {
2773  if (llvm::GlobalVariable *GV =
2774  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2775  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2776  break;
2777  }
2778 
2779  case Builtin::BI__fastfail:
2780  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2781 
2782  case Builtin::BI__builtin_coro_size: {
2783  auto & Context = getContext();
2784  auto SizeTy = Context.getSizeType();
2785  auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2786  Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2787  return RValue::get(Builder.CreateCall(F));
2788  }
2789 
2790  case Builtin::BI__builtin_coro_id:
2791  return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2792  case Builtin::BI__builtin_coro_promise:
2793  return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2794  case Builtin::BI__builtin_coro_resume:
2795  return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2796  case Builtin::BI__builtin_coro_frame:
2797  return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2798  case Builtin::BI__builtin_coro_free:
2799  return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2800  case Builtin::BI__builtin_coro_destroy:
2801  return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2802  case Builtin::BI__builtin_coro_done:
2803  return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2804  case Builtin::BI__builtin_coro_alloc:
2805  return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2806  case Builtin::BI__builtin_coro_begin:
2807  return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2808  case Builtin::BI__builtin_coro_end:
2809  return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2810  case Builtin::BI__builtin_coro_suspend:
2811  return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2812  case Builtin::BI__builtin_coro_param:
2813  return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2814 
2815  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2816  case Builtin::BIread_pipe:
2817  case Builtin::BIwrite_pipe: {
2818  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2819  *Arg1 = EmitScalarExpr(E->getArg(1));
2820  CGOpenCLRuntime OpenCLRT(CGM);
2821  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2822  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2823 
2824  // Type of the generic packet parameter.
2825  unsigned GenericAS =
2826  getContext().getTargetAddressSpace(LangAS::opencl_generic);
2827  llvm::Type *I8PTy = llvm::PointerType::get(
2828  llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2829 
2830  // Testing which overloaded version we should generate the call for.
2831  if (2U == E->getNumArgs()) {
2832  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2833  : "__write_pipe_2";
2834  // Creating a generic function type to be able to call with any builtin or
2835  // user defined type.
2836  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2837  llvm::FunctionType *FTy = llvm::FunctionType::get(
2838  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2839  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2840  return RValue::get(
2841  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2842  {Arg0, BCast, PacketSize, PacketAlign}));
2843  } else {
2844  assert(4 == E->getNumArgs() &&
2845  "Illegal number of parameters to pipe function");
2846  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2847  : "__write_pipe_4";
2848 
2849  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2850  Int32Ty, Int32Ty};
2851  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2852  *Arg3 = EmitScalarExpr(E->getArg(3));
2853  llvm::FunctionType *FTy = llvm::FunctionType::get(
2854  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2855  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2856  // We know the third argument is an integer type, but we may need to cast
2857  // it to i32.
2858  if (Arg2->getType() != Int32Ty)
2859  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2860  return RValue::get(Builder.CreateCall(
2861  CGM.CreateRuntimeFunction(FTy, Name),
2862  {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2863  }
2864  }
2865  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2866  // functions
2867  case Builtin::BIreserve_read_pipe:
2868  case Builtin::BIreserve_write_pipe:
2869  case Builtin::BIwork_group_reserve_read_pipe:
2870  case Builtin::BIwork_group_reserve_write_pipe:
2871  case Builtin::BIsub_group_reserve_read_pipe:
2872  case Builtin::BIsub_group_reserve_write_pipe: {
2873  // Composing the mangled name for the function.
2874  const char *Name;
2875  if (BuiltinID == Builtin::BIreserve_read_pipe)
2876  Name = "__reserve_read_pipe";
2877  else if (BuiltinID == Builtin::BIreserve_write_pipe)
2878  Name = "__reserve_write_pipe";
2879  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2880  Name = "__work_group_reserve_read_pipe";
2881  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2882  Name = "__work_group_reserve_write_pipe";
2883  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2884  Name = "__sub_group_reserve_read_pipe";
2885  else
2886  Name = "__sub_group_reserve_write_pipe";
2887 
2888  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2889  *Arg1 = EmitScalarExpr(E->getArg(1));
2890  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2891  CGOpenCLRuntime OpenCLRT(CGM);
2892  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2893  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2894 
2895  // Building the generic function prototype.
2896  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2897  llvm::FunctionType *FTy = llvm::FunctionType::get(
2898  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2899  // We know the second argument is an integer type, but we may need to cast
2900  // it to i32.
2901  if (Arg1->getType() != Int32Ty)
2902  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2903  return RValue::get(
2904  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2905  {Arg0, Arg1, PacketSize, PacketAlign}));
2906  }
2907  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2908  // functions
2909  case Builtin::BIcommit_read_pipe:
2910  case Builtin::BIcommit_write_pipe:
2911  case Builtin::BIwork_group_commit_read_pipe:
2912  case Builtin::BIwork_group_commit_write_pipe:
2913  case Builtin::BIsub_group_commit_read_pipe:
2914  case Builtin::BIsub_group_commit_write_pipe: {
2915  const char *Name;
2916  if (BuiltinID == Builtin::BIcommit_read_pipe)
2917  Name = "__commit_read_pipe";
2918  else if (BuiltinID == Builtin::BIcommit_write_pipe)
2919  Name = "__commit_write_pipe";
2920  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2921  Name = "__work_group_commit_read_pipe";
2922  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2923  Name = "__work_group_commit_write_pipe";
2924  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2925  Name = "__sub_group_commit_read_pipe";
2926  else
2927  Name = "__sub_group_commit_write_pipe";
2928 
2929  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2930  *Arg1 = EmitScalarExpr(E->getArg(1));
2931  CGOpenCLRuntime OpenCLRT(CGM);
2932  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2933  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2934 
2935  // Building the generic function prototype.
2936  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2937  llvm::FunctionType *FTy =
2938  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2939  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2940 
2941  return RValue::get(
2942  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2943  {Arg0, Arg1, PacketSize, PacketAlign}));
2944  }
2945  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2946  case Builtin::BIget_pipe_num_packets:
2947  case Builtin::BIget_pipe_max_packets: {
2948  const char *Name;
2949  if (BuiltinID == Builtin::BIget_pipe_num_packets)
2950  Name = "__get_pipe_num_packets";
2951  else
2952  Name = "__get_pipe_max_packets";
2953 
2954  // Building the generic function prototype.
2955  Value *Arg0 = EmitScalarExpr(E->getArg(0));
2956  CGOpenCLRuntime OpenCLRT(CGM);
2957  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2958  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2959  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2960  llvm::FunctionType *FTy = llvm::FunctionType::get(
2961  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2962 
2963  return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2964  {Arg0, PacketSize, PacketAlign}));
2965  }
2966 
2967  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2968  case Builtin::BIto_global:
2969  case Builtin::BIto_local:
2970  case Builtin::BIto_private: {
2971  auto Arg0 = EmitScalarExpr(E->getArg(0));
2972  auto NewArgT = llvm::PointerType::get(Int8Ty,
2973  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2974  auto NewRetT = llvm::PointerType::get(Int8Ty,
2975  CGM.getContext().getTargetAddressSpace(
2977  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2978  llvm::Value *NewArg;
2979  if (Arg0->getType()->getPointerAddressSpace() !=
2980  NewArgT->getPointerAddressSpace())
2981  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2982  else
2983  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2984  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2985  auto NewCall =
2986  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2987  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2988  ConvertType(E->getType())));
2989  }
2990 
2991  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2992  // It contains four different overload formats specified in Table 6.13.17.1.
2993  case Builtin::BIenqueue_kernel: {
2994  StringRef Name; // Generated function call name
2995  unsigned NumArgs = E->getNumArgs();
2996 
2997  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2998  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2999  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3000 
3001  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
3002  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
3003  LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
3004  llvm::Value *Range = NDRangeL.getAddress().getPointer();
3005  llvm::Type *RangeTy = NDRangeL.getAddress().getType();
3006 
3007  if (NumArgs == 4) {
3008  // The most basic form of the call with parameters:
3009  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
3010  Name = "__enqueue_kernel_basic";
3011  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
3012  GenericVoidPtrTy};
3013  llvm::FunctionType *FTy = llvm::FunctionType::get(
3014  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3015 
3016  auto Info =
3017  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3018  llvm::Value *Kernel =
3019  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3020  llvm::Value *Block =
3021  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3022 
3023  AttrBuilder B;
3024  B.addAttribute(Attribute::ByVal);
3025  llvm::AttributeList ByValAttrSet =
3026  llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
3027 
3028  auto RTCall =
3029  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
3030  {Queue, Flags, Range, Kernel, Block});
3031  RTCall->setAttributes(ByValAttrSet);
3032  return RValue::get(RTCall);
3033  }
3034  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
3035 
3036  // Create a temporary array to hold the sizes of local pointer arguments
3037  // for the block. \p First is the position of the first size argument.
3038  auto CreateArrayForSizeVar = [=](unsigned First) {
3039  auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
3040  auto *Arr = Builder.CreateAlloca(AT);
3041  llvm::Value *Ptr;
3042  // Each of the following arguments specifies the size of the corresponding
3043  // argument passed to the enqueued block.
3044  auto *Zero = llvm::ConstantInt::get(IntTy, 0);
3045  for (unsigned I = First; I < NumArgs; ++I) {
3046  auto *Index = llvm::ConstantInt::get(IntTy, I - First);
3047  auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
3048  if (I == First)
3049  Ptr = GEP;
3050  auto *V =
3051  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
3052  Builder.CreateAlignedStore(
3053  V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
3054  }
3055  return Ptr;
3056  };
3057 
3058  // Could have events and/or vaargs.
3059  if (E->getArg(3)->getType()->isBlockPointerType()) {
3060  // No events passed, but has variadic arguments.
3061  Name = "__enqueue_kernel_vaargs";
3062  auto Info =
3063  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3064  llvm::Value *Kernel =
3065  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3066  auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3067  auto *PtrToSizeArray = CreateArrayForSizeVar(4);
3068 
3069  // Create a vector of the arguments, as well as a constant value to
3070  // express to the runtime the number of variadic arguments.
3071  std::vector<llvm::Value *> Args = {
3072  Queue, Flags, Range,
3073  Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
3074  PtrToSizeArray};
3075  std::vector<llvm::Type *> ArgTys = {
3076  QueueTy, IntTy, RangeTy,
3077  GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
3078  PtrToSizeArray->getType()};
3079 
3080  llvm::FunctionType *FTy = llvm::FunctionType::get(
3081  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3082  return RValue::get(
3083  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3085  }
3086  // Any calls now have event arguments passed.
3087  if (NumArgs >= 7) {
3088  llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
3089  llvm::Type *EventPtrTy = EventTy->getPointerTo(
3090  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3091 
3092  llvm::Value *NumEvents =
3093  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
3094  llvm::Value *EventList =
3095  E->getArg(4)->getType()->isArrayType()
3096  ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
3097  : EmitScalarExpr(E->getArg(4));
3098  llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
3099  // Convert to generic address space.
3100  EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
3101  ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
3102  auto Info =
3103  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
3104  llvm::Value *Kernel =
3105  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3106  llvm::Value *Block =
3107  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3108 
3109  std::vector<llvm::Type *> ArgTys = {
3110  QueueTy, Int32Ty, RangeTy, Int32Ty,
3111  EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
3112 
3113  std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
3114  EventList, ClkEvent, Kernel, Block};
3115 
3116  if (NumArgs == 7) {
3117  // Has events but no variadics.
3118  Name = "__enqueue_kernel_basic_events";
3119  llvm::FunctionType *FTy = llvm::FunctionType::get(
3120  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3121  return RValue::get(
3122  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3124  }
3125  // Has event info and variadics
3126  // Pass the number of variadics to the runtime function too.
3127  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
3128  ArgTys.push_back(Int32Ty);
3129  Name = "__enqueue_kernel_events_vaargs";
3130 
3131  auto *PtrToSizeArray = CreateArrayForSizeVar(7);
3132  Args.push_back(PtrToSizeArray);
3133  ArgTys.push_back(PtrToSizeArray->getType());
3134 
3135  llvm::FunctionType *FTy = llvm::FunctionType::get(
3136  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3137  return RValue::get(
3138  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3140  }
3141  LLVM_FALLTHROUGH;
3142  }
3143  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
3144  // parameter.
3145  case Builtin::BIget_kernel_work_group_size: {
3146  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3147  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3148  auto Info =
3149  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3150  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3151  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3152  return RValue::get(Builder.CreateCall(
3153  CGM.CreateRuntimeFunction(
3154  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3155  false),
3156  "__get_kernel_work_group_size_impl"),
3157  {Kernel, Arg}));
3158  }
3159  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
3160  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3161  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3162  auto Info =
3163  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3164  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3165  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3166  return RValue::get(Builder.CreateCall(
3167  CGM.CreateRuntimeFunction(
3168  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3169  false),
3170  "__get_kernel_preferred_work_group_multiple_impl"),
3171  {Kernel, Arg}));
3172  }
3173  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
3174  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
3175  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3176  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3177  LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
3178  llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
3179  auto Info =
3180  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
3181  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3182  Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3183  const char *Name =
3184  BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
3185  ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
3186  : "__get_kernel_sub_group_count_for_ndrange_impl";
3187  return RValue::get(Builder.CreateCall(
3188  CGM.CreateRuntimeFunction(
3189  llvm::FunctionType::get(
3190  IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
3191  false),
3192  Name),
3193  {NDRange, Kernel, Block}));
3194  }
3195 
3196  case Builtin::BI__builtin_store_half:
3197  case Builtin::BI__builtin_store_halff: {
3198  Value *Val = EmitScalarExpr(E->getArg(0));
3199  Address Address = EmitPointerWithAlignment(E->getArg(1));
3200  Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
3201  return RValue::get(Builder.CreateStore(HalfVal, Address));
3202  }
3203  case Builtin::BI__builtin_load_half: {
3204  Address Address = EmitPointerWithAlignment(E->getArg(0));
3205  Value *HalfVal = Builder.CreateLoad(Address);
3206  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
3207  }
3208  case Builtin::BI__builtin_load_halff: {
3209  Address Address = EmitPointerWithAlignment(E->getArg(0));
3210  Value *HalfVal = Builder.CreateLoad(Address);
3211  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
3212  }
3213  case Builtin::BIprintf:
3214  if (getTarget().getTriple().isNVPTX())
3215  return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
3216  break;
3217  case Builtin::BI__builtin_canonicalize:
3218  case Builtin::BI__builtin_canonicalizef:
3219  case Builtin::BI__builtin_canonicalizel:
3220  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
3221 
3222  case Builtin::BI__builtin_thread_pointer: {
3223  if (!getContext().getTargetInfo().isTLSSupported())
3224  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
3225  // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
3226  break;
3227  }
3228  case Builtin::BI__builtin_os_log_format:
3229  return emitBuiltinOSLogFormat(*E);
3230 
3231  case Builtin::BI__builtin_os_log_format_buffer_size: {
3233  analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
3234  return RValue::get(ConstantInt::get(ConvertType(E->getType()),
3235  Layout.size().getQuantity()));
3236  }
3237 
3238  case Builtin::BI__xray_customevent: {
3239  if (!ShouldXRayInstrumentFunction())
3240  return RValue::getIgnored();
3241  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3242  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
3243  return RValue::getIgnored();
3244 
3245  Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
3246  auto FTy = F->getFunctionType();
3247  auto Arg0 = E->getArg(0);
3248  auto Arg0Val = EmitScalarExpr(Arg0);
3249  auto Arg0Ty = Arg0->getType();
3250  auto PTy0 = FTy->getParamType(0);
3251  if (PTy0 != Arg0Val->getType()) {
3252  if (Arg0Ty->isArrayType())
3253  Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3254  else
3255  Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3256  }
3257  auto Arg1 = EmitScalarExpr(E->getArg(1));
3258  auto PTy1 = FTy->getParamType(1);
3259  if (PTy1 != Arg1->getType())
3260  Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3261  return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3262  }
3263 
3264  case Builtin::BI__builtin_ms_va_start:
3265  case Builtin::BI__builtin_ms_va_end:
3266  return RValue::get(
3267  EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3268  BuiltinID == Builtin::BI__builtin_ms_va_start));
3269 
3270  case Builtin::BI__builtin_ms_va_copy: {
3271  // Lower this manually. We can't reliably determine whether or not any
3272  // given va_copy() is for a Win64 va_list from the calling convention
3273  // alone, because it's legal to do this from a System V ABI function.
3274  // With opaque pointer types, we won't have enough information in LLVM
3275  // IR to determine this from the argument types, either. Best to do it
3276  // now, while we have enough information.
3277  Address DestAddr = EmitMSVAListRef(E->getArg(0));
3278  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
3279 
3280  llvm::Type *BPP = Int8PtrPtrTy;
3281 
3282  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
3283  DestAddr.getAlignment());
3284  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
3285  SrcAddr.getAlignment());
3286 
3287  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
3288  return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
3289  }
3290  }
3291 
3292  // If this is an alias for a lib function (e.g. __builtin_sin), emit
3293  // the call using the normal call path, but using the unmangled
3294  // version of the function name.
3295  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
3296  return emitLibraryCall(*this, FD, E,
3297  CGM.getBuiltinLibFunction(FD, BuiltinID));
3298 
3299  // If this is a predefined lib function (e.g. malloc), emit the call
3300  // using exactly the normal call path.
3301  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
3302  return emitLibraryCall(*this, FD, E,
3303  cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
3304 
3305  // Check that a call to a target specific builtin has the correct target
3306  // features.
3307  // This is down here to avoid non-target specific builtins, however, if
3308  // generic builtins start to require generic target features then we
3309  // can move this up to the beginning of the function.
3310  checkTargetFeatures(E, FD);
3311 
3312  // See if we have a target specific intrinsic.
3313  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
3314  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
3315  StringRef Prefix =
3316  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
3317  if (!Prefix.empty()) {
3318  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
3319  // NOTE we dont need to perform a compatibility flag check here since the
3320  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
3321  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
3322  if (IntrinsicID == Intrinsic::not_intrinsic)
3323  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
3324  }
3325 
3326  if (IntrinsicID != Intrinsic::not_intrinsic) {
3328 
3329  // Find out if any arguments are required to be integer constant
3330  // expressions.
3331  unsigned ICEArguments = 0;
3333  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3334  assert(Error == ASTContext::GE_None && "Should not codegen an error");
3335 
3336  Function *F = CGM.getIntrinsic(IntrinsicID);
3337  llvm::FunctionType *FTy = F->getFunctionType();
3338 
3339  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
3340  Value *ArgValue;
3341  // If this is a normal argument, just emit it as a scalar.
3342  if ((ICEArguments & (1 << i)) == 0) {
3343  ArgValue = EmitScalarExpr(E->getArg(i));
3344  } else {
3345  // If this is required to be a constant, constant fold it so that we
3346  // know that the generated intrinsic gets a ConstantInt.
3347  llvm::APSInt Result;
3348  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
3349  assert(IsConst && "Constant arg isn't actually constant?");
3350  (void)IsConst;
3351  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
3352  }
3353 
3354  // If the intrinsic arg type is different from the builtin arg type
3355  // we need to do a bit cast.
3356  llvm::Type *PTy = FTy->getParamType(i);
3357  if (PTy != ArgValue->getType()) {
3358  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
3359  "Must be able to losslessly bit cast to param");
3360  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
3361  }
3362 
3363  Args.push_back(ArgValue);
3364  }
3365 
3366  Value *V = Builder.CreateCall(F, Args);
3367  QualType BuiltinRetType = E->getType();
3368 
3369  llvm::Type *RetTy = VoidTy;
3370  if (!BuiltinRetType->isVoidType())
3371  RetTy = ConvertType(BuiltinRetType);
3372 
3373  if (RetTy != V->getType()) {
3374  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
3375  "Must be able to losslessly bit cast result type");
3376  V = Builder.CreateBitCast(V, RetTy);
3377  }
3378 
3379  return RValue::get(V);
3380  }
3381 
3382  // See if we have a target specific builtin that needs to be lowered.
3383  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
3384  return RValue::get(V);
3385 
3386  ErrorUnsupported(E, "builtin function");
3387 
3388  // Unknown builtin, for now just dump it out and return undef.
3389  return GetUndefRValue(E->getType());
3390 }
3391 
3393  unsigned BuiltinID, const CallExpr *E,
3394  llvm::Triple::ArchType Arch) {
3395  switch (Arch) {
3396  case llvm::Triple::arm:
3397  case llvm::Triple::armeb:
3398  case llvm::Triple::thumb:
3399  case llvm::Triple::thumbeb:
3400  return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
3401  case llvm::Triple::aarch64:
3402  case llvm::Triple::aarch64_be:
3403  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
3404  case llvm::Triple::x86:
3405  case llvm::Triple::x86_64:
3406  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
3407  case llvm::Triple::ppc:
3408  case llvm::Triple::ppc64:
3409  case llvm::Triple::ppc64le:
3410  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
3411  case llvm::Triple::r600:
3412  case llvm::Triple::amdgcn:
3413  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
3414  case llvm::Triple::systemz:
3415  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
3416  case llvm::Triple::nvptx:
3417  case llvm::Triple::nvptx64:
3418  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
3419  case llvm::Triple::wasm32:
3420  case llvm::Triple::wasm64:
3421  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
3422  case llvm::Triple::hexagon:
3423  return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
3424  default:
3425  return nullptr;
3426  }
3427 }
3428 
3430  const CallExpr *E) {
3431  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
3432  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3434  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3435  getContext().getAuxTargetInfo()->getTriple().getArch());
3436  }
3437 
3438  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3439  getTarget().getTriple().getArch());
3440 }
3441 
3442 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3443  NeonTypeFlags TypeFlags,
3444  llvm::Triple::ArchType Arch,
3445  bool V1Ty=false) {
3446  int IsQuad = TypeFlags.isQuad();
3447  switch (TypeFlags.getEltType()) {
3448  case NeonTypeFlags::Int8:
3449  case NeonTypeFlags::Poly8:
3450  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
3451  case NeonTypeFlags::Int16:
3452  case NeonTypeFlags::Poly16:
3453  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3455  // FIXME: Only AArch64 backend can so far properly handle half types.
3456  // Remove else part once ARM backend support for half is complete.
3457  if (Arch == llvm::Triple::aarch64)
3458  return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
3459  else
3460  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3461  case NeonTypeFlags::Int32:
3462  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
3463  case NeonTypeFlags::Int64:
3464  case NeonTypeFlags::Poly64:
3465  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
3467  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3468  // There is a lot of i128 and f128 API missing.
3469  // so we use v16i8 to represent poly128 and get pattern matched.
3470  return llvm::VectorType::get(CGF->Int8Ty, 16);
3472  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3474  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3475  }
3476  llvm_unreachable("Unknown vector element type!");
3477 }
3478 
3479 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3480  NeonTypeFlags IntTypeFlags) {
3481  int IsQuad = IntTypeFlags.isQuad();
3482  switch (IntTypeFlags.getEltType()) {
3483  case NeonTypeFlags::Int16:
3484  return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
3485  case NeonTypeFlags::Int32:
3486  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3487  case NeonTypeFlags::Int64:
3488  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3489  default:
3490  llvm_unreachable("Type can't be converted to floating-point!");
3491  }
3492 }
3493 
3495  unsigned nElts = V->getType()->getVectorNumElements();
3496  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3497  return Builder.CreateShuffleVector(V, V, SV, "lane");
3498 }
3499 
3501  const char *name,
3502  unsigned shift, bool rightshift) {
3503  unsigned j = 0;
3504  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3505  ai != ae; ++ai, ++j)
3506  if (shift > 0 && shift == j)
3507  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3508  else
3509  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3510 
3511  return Builder.CreateCall(F, Ops, name);
3512 }
3513 
3515  bool neg) {
3516  int SV = cast<ConstantInt>(V)->getSExtValue();
3517  return ConstantInt::get(Ty, neg ? -SV : SV);
3518 }
3519 
3520 // \brief Right-shift a vector by a constant.
3522  llvm::Type *Ty, bool usgn,
3523  const char *name) {
3524  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3525 
3526  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3527  int EltSize = VTy->getScalarSizeInBits();
3528 
3529  Vec = Builder.CreateBitCast(Vec, Ty);
3530 
3531  // lshr/ashr are undefined when the shift amount is equal to the vector
3532  // element size.
3533  if (ShiftAmt == EltSize) {
3534  if (usgn) {
3535  // Right-shifting an unsigned value by its size yields 0.
3536  return llvm::ConstantAggregateZero::get(VTy);
3537  } else {
3538  // Right-shifting a signed value by its size is equivalent
3539  // to a shift of size-1.
3540  --ShiftAmt;
3541  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3542  }
3543  }
3544 
3545  Shift = EmitNeonShiftVector(Shift, Ty, false);
3546  if (usgn)
3547  return Builder.CreateLShr(Vec, Shift, name);
3548  else
3549  return Builder.CreateAShr(Vec, Shift, name);
3550 }
3551 
3552 enum {
3553  AddRetType = (1 << 0),
3554  Add1ArgType = (1 << 1),
3555  Add2ArgTypes = (1 << 2),
3556 
3557  VectorizeRetType = (1 << 3),
3558  VectorizeArgTypes = (1 << 4),
3559 
3560  InventFloatType = (1 << 5),
3561  UnsignedAlts = (1 << 6),
3562 
3563  Use64BitVectors = (1 << 7),
3564  Use128BitVectors = (1 << 8),
3565 
3572 };
3573 
3574 namespace {
3575 struct NeonIntrinsicInfo {
3576  const char *NameHint;
3577  unsigned BuiltinID;
3578  unsigned LLVMIntrinsic;
3579  unsigned AltLLVMIntrinsic;
3580  unsigned TypeModifier;
3581 
3582  bool operator<(unsigned RHSBuiltinID) const {
3583  return BuiltinID < RHSBuiltinID;
3584  }
3585  bool operator<(const NeonIntrinsicInfo &TE) const {
3586  return BuiltinID < TE.BuiltinID;
3587  }
3588 };
3589 } // end anonymous namespace
3590 
3591 #define NEONMAP0(NameBase) \
3592  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3593 
3594 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3595  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3596  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3597 
3598 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3599  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3600  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3601  TypeModifier }
3602 
3603 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3604  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3605  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3606  NEONMAP1(vabs_v, arm_neon_vabs, 0),
3607  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3608  NEONMAP0(vaddhn_v),
3609  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3610  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3611  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3612  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3613  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3614  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3615  NEONMAP1(vcage_v, arm_neon_vacge, 0),
3616  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3617  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3618  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3619  NEONMAP1(vcale_v, arm_neon_vacge, 0),
3620  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3621  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3622  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3623  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3624  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3625  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3626  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3627  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3628  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3629  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3630  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3631  NEONMAP0(vcvt_f32_v),
3632  NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3633  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3634  NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
3635  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3636  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3637  NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
3638  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3639  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3640  NEONMAP0(vcvt_s16_v),
3641  NEONMAP0(vcvt_s32_v),
3642  NEONMAP0(vcvt_s64_v),
3643  NEONMAP0(vcvt_u16_v),
3644  NEONMAP0(vcvt_u32_v),
3645  NEONMAP0(vcvt_u64_v),
3646  NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
3647  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3648  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3649  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3650  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3651  NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
3652  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3653  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3654  NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
3655  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3656  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3657  NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
3658  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3659  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3660  NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
3661  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3662  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3663  NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
3664  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3665  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3666  NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
3667  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3668  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3669  NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
3670  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3671  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3672  NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
3673  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3674  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3675  NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
3676  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3677  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3678  NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
3679  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3680  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3681  NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
3682  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3683  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3684  NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
3685  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3686  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3687  NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
3688  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3689  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3690  NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
3691  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3692  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3693  NEONMAP0(vcvtq_f32_v),
3694  NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3695  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3696  NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
3697  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3698  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3699  NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
3700  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3701  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3702  NEONMAP0(vcvtq_s16_v),
3703  NEONMAP0(vcvtq_s32_v),
3704  NEONMAP0(vcvtq_s64_v),
3705  NEONMAP0(vcvtq_u16_v),
3706  NEONMAP0(vcvtq_u32_v),
3707  NEONMAP0(vcvtq_u64_v),
3708  NEONMAP0(vext_v),
3709  NEONMAP0(vextq_v),
3710  NEONMAP0(vfma_v),
3711  NEONMAP0(vfmaq_v),
3712  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3713  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3714  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3715  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3716  NEONMAP0(vld1_dup_v),
3717  NEONMAP1(vld1_v, arm_neon_vld1, 0),
3718  NEONMAP0(vld1q_dup_v),
3719  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3720  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3721  NEONMAP1(vld2_v, arm_neon_vld2, 0),
3722  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3723  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3724  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3725  NEONMAP1(vld3_v, arm_neon_vld3, 0),
3726  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3727  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3728  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3729  NEONMAP1(vld4_v, arm_neon_vld4, 0),
3730  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3731  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3732  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3733  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3734  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3735  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3736  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3737  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3738  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3739  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3740  NEONMAP0(vmovl_v),
3741  NEONMAP0(vmovn_v),
3742  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3743  NEONMAP0(vmull_v),
3744  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3745  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3746  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3747  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3748  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3749  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3750  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3751  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3752  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3753  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3754  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3755  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3756  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3757  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3758  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3759  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3760  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3761  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3762  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3763  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3764  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3765  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3766  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3767  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3768  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3769  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3770  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3771  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3772  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3773  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3774  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3775  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3776  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3777  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3778  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3779  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3780  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3781  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3782  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3783  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3784  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3785  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3786  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3787  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3788  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3789  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3790  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3791  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3792  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3793  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3794  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3795  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3796  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3797  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3798  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3799  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3800  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3801  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3802  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3803  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3804  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3805  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3806  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3807  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3808  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3809  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3810  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3811  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3812  NEONMAP0(vshl_n_v),
3813  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3814  NEONMAP0(vshll_n_v),
3815  NEONMAP0(vshlq_n_v),
3816  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3817  NEONMAP0(vshr_n_v),
3818  NEONMAP0(vshrn_n_v),
3819  NEONMAP0(vshrq_n_v),
3820  NEONMAP1(vst1_v, arm_neon_vst1, 0),
3821  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3822  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3823  NEONMAP1(vst2_v, arm_neon_vst2, 0),
3824  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3825  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3826  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3827  NEONMAP1(vst3_v, arm_neon_vst3, 0),
3828  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3829  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3830  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3831  NEONMAP1(vst4_v, arm_neon_vst4, 0),
3832  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3833  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3834  NEONMAP0(vsubhn_v),
3835  NEONMAP0(vtrn_v),
3836  NEONMAP0(vtrnq_v),
3837  NEONMAP0(vtst_v),
3838  NEONMAP0(vtstq_v),
3839  NEONMAP0(vuzp_v),
3840  NEONMAP0(vuzpq_v),
3841  NEONMAP0(vzip_v),
3842  NEONMAP0(vzipq_v)
3843 };
3844 
3845 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3846  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3847  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3848  NEONMAP0(vaddhn_v),
3849  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3850  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3851  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3852  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3853  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3854  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3855  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3856  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3857  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3858  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3859  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3860  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3861  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3862  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3863  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3864  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3865  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3866  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3867  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3868  NEONMAP0(vcvt_f16_v),
3869  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3870  NEONMAP0(vcvt_f32_v),
3871  NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3872  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3873  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3874  NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
3875  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3876  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3877  NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
3878  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3879  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3880  NEONMAP0(vcvtq_f16_v),
3881  NEONMAP0(vcvtq_f32_v),
3882  NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3883  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3884  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3885  NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
3886  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3887  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3888  NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
3889  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3890  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3891  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3892  NEONMAP0(vext_v),
3893  NEONMAP0(vextq_v),
3894  NEONMAP0(vfma_v),
3895  NEONMAP0(vfmaq_v),
3896  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3897  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3898  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3899  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3900  NEONMAP0(vmovl_v),
3901  NEONMAP0(vmovn_v),
3902  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3903  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3904  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3905  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3906  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3907  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3908  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3909  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3910  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3911  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3912  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3913  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3914  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3915  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3916  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3917  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3918  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3919  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3920  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3921  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3922  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3923  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3924  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3925  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3926  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3927  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3928  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3929  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3930  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3931  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3932  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3933  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3934  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3935  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3936  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3937  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3938  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3939  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3940  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3941  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3942  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3943  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3944  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3945  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3946  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3947  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3948  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3949  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3950  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3951  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3952  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3953  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3954  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3955  NEONMAP0(vshl_n_v),
3956  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3957  NEONMAP0(vshll_n_v),
3958  NEONMAP0(vshlq_n_v),
3959  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3960  NEONMAP0(vshr_n_v),
3961  NEONMAP0(vshrn_n_v),
3962  NEONMAP0(vshrq_n_v),
3963  NEONMAP0(vsubhn_v),
3964  NEONMAP0(vtst_v),
3965  NEONMAP0(vtstq_v),
3966 };
3967 
3968 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3969  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3970  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3971  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3972  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3973  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3974  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3975  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3976  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3977  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3978  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3979  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3980  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3981  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3982  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3983  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3984  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3985  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3986  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3987  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3988  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3989  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3990  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3991  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3992  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3993  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3994  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3995  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3996  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3997  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3998  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3999  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4000  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4001  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4002  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4003  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4004  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4005  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4006  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4007  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4008  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4009  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4010  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4011  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4012  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4013  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4014  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4015  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4016  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4017  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
4018  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4019  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4020  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4021  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4022  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4023  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4024  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4025  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4026  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4027  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4028  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4029  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4030  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4031  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4032  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4033  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4034  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4035  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4036  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4037  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4038  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
4039  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
4040  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
4041  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4042  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4043  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4044  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4045  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4046  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4047  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4048  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4049  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4050  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4051  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4052  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
4053  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4054  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
4055  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4056  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4057  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
4058  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
4059  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4060  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4061  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
4062  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
4063  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
4064  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
4065  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
4066  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
4067  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
4068  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
4069  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4070  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4071  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4072  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4073  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
4074  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4075  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4076  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4077  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
4078  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4079  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
4080  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
4081  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
4082  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4083  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4084  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
4085  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
4086  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4087  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4088  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
4089  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
4090  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
4091  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
4092  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4093  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4094  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4095  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4096  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
4097  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4098  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4099  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4100  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4101  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4102  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4103  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
4104  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
4105  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4106  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4107  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4108  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4109  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
4110  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
4111  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
4112  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
4113  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4114  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4115  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
4116  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
4117  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
4118  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4119  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4120  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4121  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4122  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
4123  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4124  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4125  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4126  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4127  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
4128  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
4129  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4130  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4131  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
4132  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
4133  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
4134  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
4135  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
4136  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
4137  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
4138  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
4139  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
4140  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
4141  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
4142  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
4143  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
4144  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
4145  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
4146  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
4147  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
4148  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
4149  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
4150  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
4151  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4152  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
4153  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4154  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
4155  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
4156  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
4157  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4158  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
4159  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4160  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
4161  // FP16 scalar intrinisics go here.
4162  NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
4163  NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4164  NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4165  NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4166  NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4167  NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4168  NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4169  NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4170  NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4171  NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4172  NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4173  NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4174  NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4175  NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4176  NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4177  NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4178  NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4179  NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4180  NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4181  NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4182  NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4183  NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4184  NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4185  NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4186  NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4187  NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
4188  NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
4189  NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
4190  NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
4191  NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
4192 };
4193 
4194 #undef NEONMAP0
4195 #undef NEONMAP1
4196 #undef NEONMAP2
4197 
4199 
4202 
4203 
4204 static const NeonIntrinsicInfo *
4206  unsigned BuiltinID, bool &MapProvenSorted) {
4207 
4208 #ifndef NDEBUG
4209  if (!MapProvenSorted) {
4210  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
4211  MapProvenSorted = true;
4212  }
4213 #endif
4214 
4215  const NeonIntrinsicInfo *Builtin =
4216  std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
4217 
4218  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
4219  return Builtin;
4220 
4221  return nullptr;
4222 }
4223 
4224 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
4225  unsigned Modifier,
4226  llvm::Type *ArgType,
4227  const CallExpr *E) {
4228  int VectorSize = 0;
4229  if (Modifier & Use64BitVectors)
4230  VectorSize = 64;
4231  else if (Modifier & Use128BitVectors)
4232  VectorSize = 128;
4233 
4234  // Return type.
4236  if (Modifier & AddRetType) {
4237  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
4238  if (Modifier & VectorizeRetType)
4239  Ty = llvm::VectorType::get(
4240  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
4241 
4242  Tys.push_back(Ty);
4243  }
4244 
4245  // Arguments.
4246  if (Modifier & VectorizeArgTypes) {
4247  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
4248  ArgType = llvm::VectorType::get(ArgType, Elts);
4249  }
4250 
4251  if (Modifier & (Add1ArgType | Add2ArgTypes))
4252  Tys.push_back(ArgType);
4253 
4254  if (Modifier & Add2ArgTypes)
4255  Tys.push_back(ArgType);
4256 
4257  if (Modifier & InventFloatType)
4258  Tys.push_back(FloatTy);
4259 
4260  return CGM.getIntrinsic(IntrinsicID, Tys);
4261 }
4262 
4264  const NeonIntrinsicInfo &SISDInfo,
4266  const CallExpr *E) {
4267  unsigned BuiltinID = SISDInfo.BuiltinID;
4268  unsigned int Int = SISDInfo.LLVMIntrinsic;
4269  unsigned Modifier = SISDInfo.TypeModifier;
4270  const char *s = SISDInfo.NameHint;
4271 
4272  switch (BuiltinID) {
4273  case NEON::BI__builtin_neon_vcled_s64:
4274  case NEON::BI__builtin_neon_vcled_u64:
4275  case NEON::BI__builtin_neon_vcles_f32:
4276  case NEON::BI__builtin_neon_vcled_f64:
4277  case NEON::BI__builtin_neon_vcltd_s64:
4278  case NEON::BI__builtin_neon_vcltd_u64:
4279  case NEON::BI__builtin_neon_vclts_f32:
4280  case NEON::BI__builtin_neon_vcltd_f64:
4281  case NEON::BI__builtin_neon_vcales_f32:
4282  case NEON::BI__builtin_neon_vcaled_f64:
4283  case NEON::BI__builtin_neon_vcalts_f32:
4284  case NEON::BI__builtin_neon_vcaltd_f64:
4285  // Only one direction of comparisons actually exist, cmle is actually a cmge
4286  // with swapped operands. The table gives us the right intrinsic but we
4287  // still need to do the swap.
4288  std::swap(Ops[0], Ops[1]);
4289  break;
4290  }
4291 
4292  assert(Int && "Generic code assumes a valid intrinsic");
4293 
4294  // Determine the type(s) of this overloaded AArch64 intrinsic.
4295  const Expr *Arg = E->getArg(0);
4296  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
4297  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
4298 
4299  int j = 0;
4300  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
4301  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
4302  ai != ae; ++ai, ++j) {
4303  llvm::Type *ArgTy = ai->getType();
4304  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
4305  ArgTy->getPrimitiveSizeInBits())
4306  continue;
4307 
4308  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
4309  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
4310  // it before inserting.
4311  Ops[j] =
4312  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
4313  Ops[j] =
4314  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
4315  }
4316 
4317  Value *Result = CGF.EmitNeonCall(F, Ops, s);
4318  llvm::Type *ResultType = CGF.ConvertType(E->getType());
4319  if (ResultType->getPrimitiveSizeInBits() <
4320  Result->getType()->getPrimitiveSizeInBits())
4321  return CGF.Builder.CreateExtractElement(Result, C0);
4322 
4323  return CGF.Builder.CreateBitCast(Result, ResultType, s);
4324 }
4325 
4327  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
4328  const char *NameHint, unsigned Modifier, const CallExpr *E,
4329  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
4330  llvm::Triple::ArchType Arch) {
4331  // Get the last argument, which specifies the vector type.
4332  llvm::APSInt NeonTypeConst;
4333  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4334  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
4335  return nullptr;
4336 
4337  // Determine the type of this overloaded NEON intrinsic.
4338  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
4339  bool Usgn = Type.isUnsigned();
4340  bool Quad = Type.isQuad();
4341 
4342  llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
4343  llvm::Type *Ty = VTy;
4344  if (!Ty)
4345  return nullptr;
4346 
4347  auto getAlignmentValue32 = [&](Address addr) -> Value* {
4348  return Builder.getInt32(addr.getAlignment().getQuantity());
4349  };
4350 
4351  unsigned Int = LLVMIntrinsic;
4352  if ((Modifier & UnsignedAlts) && !Usgn)
4353  Int = AltLLVMIntrinsic;
4354 
4355  switch (BuiltinID) {
4356  default: break;
4357  case NEON::BI__builtin_neon_vabs_v:
4358  case NEON::BI__builtin_neon_vabsq_v:
4359  if (VTy->getElementType()->isFloatingPointTy())
4360  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
4361  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
4362  case NEON::BI__builtin_neon_vaddhn_v: {
4363  llvm::VectorType *SrcTy =
4364  llvm::VectorType::getExtendedElementVectorType(VTy);
4365 
4366  // %sum = add <4 x i32> %lhs, %rhs
4367  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4368  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4369  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
4370 
4371  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4372  Constant *ShiftAmt =
4373  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4374  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
4375 
4376  // %res = trunc <4 x i32> %high to <4 x i16>
4377  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
4378  }
4379  case NEON::BI__builtin_neon_vcale_v:
4380  case NEON::BI__builtin_neon_vcaleq_v:
4381  case NEON::BI__builtin_neon_vcalt_v:
4382  case NEON::BI__builtin_neon_vcaltq_v:
4383  std::swap(Ops[0], Ops[1]);
4384  LLVM_FALLTHROUGH;
4385  case NEON::BI__builtin_neon_vcage_v:
4386  case NEON::BI__builtin_neon_vcageq_v:
4387  case NEON::BI__builtin_neon_vcagt_v:
4388  case NEON::BI__builtin_neon_vcagtq_v: {
4389  llvm::Type *Ty;
4390  switch (VTy->getScalarSizeInBits()) {
4391  default: llvm_unreachable("unexpected type");
4392  case 32:
4393  Ty = FloatTy;
4394  break;
4395  case 64:
4396  Ty = DoubleTy;
4397  break;
4398  case 16:
4399  Ty = HalfTy;
4400  break;
4401  }
4402  llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
4403  llvm::Type *Tys[] = { VTy, VecFlt };
4404  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4405  return EmitNeonCall(F, Ops, NameHint);
4406  }
4407  case NEON::BI__builtin_neon_vclz_v:
4408  case NEON::BI__builtin_neon_vclzq_v:
4409  // We generate target-independent intrinsic, which needs a second argument
4410  // for whether or not clz of zero is undefined; on ARM it isn't.
4411  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
4412  break;
4413  case NEON::BI__builtin_neon_vcvt_f32_v:
4414  case NEON::BI__builtin_neon_vcvtq_f32_v:
4415  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4416  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch);
4417  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4418  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4419  case NEON::BI__builtin_neon_vcvt_f16_v:
4420  case NEON::BI__builtin_neon_vcvtq_f16_v:
4421  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4422  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch);
4423  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4424  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4425  case NEON::BI__builtin_neon_vcvt_n_f16_v:
4426  case NEON::BI__builtin_neon_vcvt_n_f32_v:
4427  case NEON::BI__builtin_neon_vcvt_n_f64_v:
4428  case NEON::BI__builtin_neon_vcvtq_n_f16_v:
4429  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
4430  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
4431  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
4432  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
4433  Function *F = CGM.getIntrinsic(Int, Tys);
4434  return EmitNeonCall(F, Ops, "vcvt_n");
4435  }
4436  case NEON::BI__builtin_neon_vcvt_n_s16_v:
4437  case NEON::BI__builtin_neon_vcvt_n_s32_v:
4438  case NEON::BI__builtin_neon_vcvt_n_u16_v:
4439  case NEON::BI__builtin_neon_vcvt_n_u32_v:
4440  case NEON::BI__builtin_neon_vcvt_n_s64_v:
4441  case NEON::BI__builtin_neon_vcvt_n_u64_v:
4442  case NEON::BI__builtin_neon_vcvtq_n_s16_v:
4443  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
4444  case NEON::BI__builtin_neon_vcvtq_n_u16_v:
4445  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
4446  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
4447  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
4448  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4449  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4450  return EmitNeonCall(F, Ops, "vcvt_n");
4451  }
4452  case NEON::BI__builtin_neon_vcvt_s32_v:
4453  case NEON::BI__builtin_neon_vcvt_u32_v:
4454  case NEON::BI__builtin_neon_vcvt_s64_v:
4455  case NEON::BI__builtin_neon_vcvt_u64_v:
4456  case NEON::BI__builtin_neon_vcvt_s16_v:
4457  case NEON::BI__builtin_neon_vcvt_u16_v:
4458  case NEON::BI__builtin_neon_vcvtq_s32_v:
4459  case NEON::BI__builtin_neon_vcvtq_u32_v:
4460  case NEON::BI__builtin_neon_vcvtq_s64_v:
4461  case NEON::BI__builtin_neon_vcvtq_u64_v:
4462  case NEON::BI__builtin_neon_vcvtq_s16_v:
4463  case NEON::BI__builtin_neon_vcvtq_u16_v: {
4464  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
4465  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
4466  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
4467  }
4468  case NEON::BI__builtin_neon_vcvta_s16_v:
4469  case NEON::BI__builtin_neon_vcvta_s32_v:
4470  case NEON::BI__builtin_neon_vcvta_s64_v:
4471  case NEON::BI__builtin_neon_vcvta_u32_v:
4472  case NEON::BI__builtin_neon_vcvta_u64_v:
4473  case NEON::BI__builtin_neon_vcvtaq_s16_v:
4474  case NEON::BI__builtin_neon_vcvtaq_s32_v:
4475  case NEON::BI__builtin_neon_vcvtaq_s64_v:
4476  case NEON::BI__builtin_neon_vcvtaq_u16_v:
4477  case NEON::BI__builtin_neon_vcvtaq_u32_v:
4478  case NEON::BI__builtin_neon_vcvtaq_u64_v:
4479  case NEON::BI__builtin_neon_vcvtn_s16_v:
4480  case NEON::BI__builtin_neon_vcvtn_s32_v:
4481  case NEON::BI__builtin_neon_vcvtn_s64_v:
4482  case NEON::BI__builtin_neon_vcvtn_u16_v:
4483  case NEON::BI__builtin_neon_vcvtn_u32_v:
4484  case NEON::BI__builtin_neon_vcvtn_u64_v:
4485  case NEON::BI__builtin_neon_vcvtnq_s16_v:
4486  case NEON::BI__builtin_neon_vcvtnq_s32_v:
4487  case NEON::BI__builtin_neon_vcvtnq_s64_v:
4488  case NEON::BI__builtin_neon_vcvtnq_u16_v:
4489  case NEON::BI__builtin_neon_vcvtnq_u32_v:
4490  case NEON::BI__builtin_neon_vcvtnq_u64_v:
4491  case NEON::BI__builtin_neon_vcvtp_s16_v:
4492  case NEON::BI__builtin_neon_vcvtp_s32_v:
4493  case NEON::BI__builtin_neon_vcvtp_s64_v:
4494  case NEON::BI__builtin_neon_vcvtp_u16_v:
4495  case NEON::BI__builtin_neon_vcvtp_u32_v:
4496  case NEON::BI__builtin_neon_vcvtp_u64_v:
4497  case NEON::BI__builtin_neon_vcvtpq_s16_v:
4498  case NEON::BI__builtin_neon_vcvtpq_s32_v:
4499  case NEON::BI__builtin_neon_vcvtpq_s64_v:
4500  case NEON::BI__builtin_neon_vcvtpq_u16_v:
4501  case NEON::BI__builtin_neon_vcvtpq_u32_v:
4502  case NEON::BI__builtin_neon_vcvtpq_u64_v:
4503  case NEON::BI__builtin_neon_vcvtm_s16_v:
4504  case NEON::BI__builtin_neon_vcvtm_s32_v:
4505  case NEON::BI__builtin_neon_vcvtm_s64_v:
4506  case NEON::BI__builtin_neon_vcvtm_u16_v:
4507  case NEON::BI__builtin_neon_vcvtm_u32_v:
4508  case NEON::BI__builtin_neon_vcvtm_u64_v:
4509  case NEON::BI__builtin_neon_vcvtmq_s16_v:
4510  case NEON::BI__builtin_neon_vcvtmq_s32_v:
4511  case NEON::BI__builtin_neon_vcvtmq_s64_v:
4512  case NEON::BI__builtin_neon_vcvtmq_u16_v:
4513  case NEON::BI__builtin_neon_vcvtmq_u32_v:
4514  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
4515  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4516  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
4517  }
4518  case NEON::BI__builtin_neon_vext_v:
4519  case NEON::BI__builtin_neon_vextq_v: {
4520  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
4521  SmallVector<uint32_t, 16> Indices;
4522  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4523  Indices.push_back(i+CV);
4524 
4525  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4526  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4527  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
4528  }
4529  case NEON::BI__builtin_neon_vfma_v:
4530  case NEON::BI__builtin_neon_vfmaq_v: {
4531  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4532  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4533  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4534  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4535 
4536  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4537  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4538  }
4539  case NEON::BI__builtin_neon_vld1_v:
4540  case NEON::BI__builtin_neon_vld1q_v: {
4541  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4542  Ops.push_back(getAlignmentValue32(PtrOp0));
4543  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4544  }
4545  case NEON::BI__builtin_neon_vld2_v:
4546  case NEON::BI__builtin_neon_vld2q_v:
4547  case NEON::BI__builtin_neon_vld3_v:
4548  case NEON::BI__builtin_neon_vld3q_v:
4549  case NEON::BI__builtin_neon_vld4_v:
4550  case NEON::BI__builtin_neon_vld4q_v: {
4551  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4552  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4553  Value *Align = getAlignmentValue32(PtrOp1);
4554  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4555  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4556  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4557  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4558  }
4559  case NEON::BI__builtin_neon_vld1_dup_v:
4560  case NEON::BI__builtin_neon_vld1q_dup_v: {
4561  Value *V = UndefValue::get(Ty);
4562  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4563  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4564  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4565  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4566  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4567  return EmitNeonSplat(Ops[0], CI);
4568  }
4569  case NEON::BI__builtin_neon_vld2_lane_v:
4570  case NEON::BI__builtin_neon_vld2q_lane_v:
4571  case NEON::BI__builtin_neon_vld3_lane_v:
4572  case NEON::BI__builtin_neon_vld3q_lane_v:
4573  case NEON::BI__builtin_neon_vld4_lane_v:
4574  case NEON::BI__builtin_neon_vld4q_lane_v: {
4575  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4576  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4577  for (unsigned I = 2; I < Ops.size() - 1; ++I)
4578  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4579  Ops.push_back(getAlignmentValue32(PtrOp1));
4580  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4581  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4582  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4583  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4584  }
4585  case NEON::BI__builtin_neon_vmovl_v: {
4586  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4587  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4588  if (Usgn)
4589  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4590  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4591  }
4592  case NEON::BI__builtin_neon_vmovn_v: {
4593  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4594  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4595  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4596  }
4597  case NEON::BI__builtin_neon_vmull_v:
4598  // FIXME: the integer vmull operations could be emitted in terms of pure
4599  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4600  // hoisting the exts outside loops. Until global ISel comes along that can
4601  // see through such movement this leads to bad CodeGen. So we need an
4602  // intrinsic for now.
4603  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4604  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4605  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4606  case NEON::BI__builtin_neon_vpadal_v:
4607  case NEON::BI__builtin_neon_vpadalq_v: {
4608  // The source operand type has twice as many elements of half the size.
4609  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4610  llvm::Type *EltTy =
4611  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4612  llvm::Type *NarrowTy =
4613  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4614  llvm::Type *Tys[2] = { Ty, NarrowTy };
4615  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4616  }
4617  case NEON::BI__builtin_neon_vpaddl_v:
4618  case NEON::BI__builtin_neon_vpaddlq_v: {
4619  // The source operand type has twice as many elements of half the size.
4620  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4621  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4622  llvm::Type *NarrowTy =
4623  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4624  llvm::Type *Tys[2] = { Ty, NarrowTy };
4625  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4626  }
4627  case NEON::BI__builtin_neon_vqdmlal_v:
4628  case NEON::BI__builtin_neon_vqdmlsl_v: {
4629  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4630  Ops[1] =
4631  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4632  Ops.resize(2);
4633  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4634  }
4635  case NEON::BI__builtin_neon_vqshl_n_v:
4636  case NEON::BI__builtin_neon_vqshlq_n_v:
4637  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4638  1, false);
4639  case NEON::BI__builtin_neon_vqshlu_n_v:
4640  case NEON::BI__builtin_neon_vqshluq_n_v:
4641  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4642  1, false);
4643  case NEON::BI__builtin_neon_vrecpe_v:
4644  case NEON::BI__builtin_neon_vrecpeq_v:
4645  case NEON::BI__builtin_neon_vrsqrte_v:
4646  case NEON::BI__builtin_neon_vrsqrteq_v:
4647  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4648  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4649 
4650  case NEON::BI__builtin_neon_vrshr_n_v:
4651  case NEON::BI__builtin_neon_vrshrq_n_v:
4652  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4653  1, true);
4654  case NEON::BI__builtin_neon_vshl_n_v:
4655  case NEON::BI__builtin_neon_vshlq_n_v:
4656  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4657  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4658  "vshl_n");
4659  case NEON::BI__builtin_neon_vshll_n_v: {
4660  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4661  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4662  if (Usgn)
4663  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4664  else
4665  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4666  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4667  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4668  }
4669  case NEON::BI__builtin_neon_vshrn_n_v: {
4670  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4671  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4672  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4673  if (Usgn)
4674  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4675  else
4676  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4677  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4678  }
4679  case NEON::BI__builtin_neon_vshr_n_v:
4680  case NEON::BI__builtin_neon_vshrq_n_v:
4681  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4682  case NEON::BI__builtin_neon_vst1_v:
4683  case NEON::BI__builtin_neon_vst1q_v:
4684  case NEON::BI__builtin_neon_vst2_v:
4685  case NEON::BI__builtin_neon_vst2q_v:
4686  case NEON::BI__builtin_neon_vst3_v:
4687  case NEON::BI__builtin_neon_vst3q_v:
4688  case NEON::BI__builtin_neon_vst4_v:
4689  case NEON::BI__builtin_neon_vst4q_v:
4690  case NEON::BI__builtin_neon_vst2_lane_v:
4691  case NEON::BI__builtin_neon_vst2q_lane_v:
4692  case NEON::BI__builtin_neon_vst3_lane_v:
4693  case NEON::BI__builtin_neon_vst3q_lane_v:
4694  case NEON::BI__builtin_neon_vst4_lane_v:
4695  case NEON::BI__builtin_neon_vst4q_lane_v: {
4696  llvm::Type *Tys[] = {Int8PtrTy, Ty};
4697  Ops.push_back(getAlignmentValue32(PtrOp0));
4698  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4699  }
4700  case NEON::BI__builtin_neon_vsubhn_v: {
4701  llvm::VectorType *SrcTy =
4702  llvm::VectorType::getExtendedElementVectorType(VTy);
4703 
4704  // %sum = add <4 x i32> %lhs, %rhs
4705  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4706  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4707  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4708 
4709  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4710  Constant *ShiftAmt =
4711  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4712  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4713 
4714  // %res = trunc <4 x i32> %high to <4 x i16>
4715  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4716  }
4717  case NEON::BI__builtin_neon_vtrn_v:
4718  case NEON::BI__builtin_neon_vtrnq_v: {
4719  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4720  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4721  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4722  Value *SV = nullptr;
4723 
4724  for (unsigned vi = 0; vi != 2; ++vi) {
4725  SmallVector<uint32_t, 16> Indices;
4726  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4727  Indices.push_back(i+vi);
4728  Indices.push_back(i+e+vi);
4729  }
4730  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4731  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4732  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4733  }
4734  return SV;
4735  }
4736  case NEON::BI__builtin_neon_vtst_v:
4737  case NEON::BI__builtin_neon_vtstq_v: {
4738  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4739  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4740  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4741  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4742  ConstantAggregateZero::get(Ty));
4743  return Builder.CreateSExt(Ops[0], Ty, "vtst");
4744  }
4745  case NEON::BI__builtin_neon_vuzp_v:
4746  case NEON::BI__builtin_neon_vuzpq_v: {
4747  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4748  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4749  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4750  Value *SV = nullptr;
4751 
4752  for (unsigned vi = 0; vi != 2; ++vi) {
4753  SmallVector<uint32_t, 16> Indices;
4754  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4755  Indices.push_back(2*i+vi);
4756 
4757  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4758  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4759  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4760  }
4761  return SV;
4762  }
4763  case NEON::BI__builtin_neon_vzip_v:
4764  case NEON::BI__builtin_neon_vzipq_v: {
4765  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4766  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4767  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4768  Value *SV = nullptr;
4769 
4770  for (unsigned vi = 0; vi != 2; ++vi) {
4771  SmallVector<uint32_t, 16> Indices;
4772  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4773  Indices.push_back((i + vi*e) >> 1);
4774  Indices.push_back(((i + vi*e) >> 1)+e);
4775  }
4776  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4777  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4778  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4779  }
4780  return SV;
4781  }
4782  }
4783 
4784  assert(Int && "Expected valid intrinsic number");
4785 
4786  // Determine the type(s) of this overloaded AArch64 intrinsic.
4787  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4788 
4789  Value *Result = EmitNeonCall(F, Ops, NameHint);
4790  llvm::Type *ResultType = ConvertType(E->getType());
4791  // AArch64 intrinsic one-element vector type cast to
4792  // scalar type expected by the builtin
4793  return Builder.CreateBitCast(Result, ResultType, NameHint);
4794 }
4795 
4797  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4798  const CmpInst::Predicate Ip, const Twine &Name) {
4799  llvm::Type *OTy = Op->getType();
4800 
4801  // FIXME: this is utterly horrific. We should not be looking at previous
4802  // codegen context to find out what needs doing. Unfortunately TableGen
4803  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4804  // (etc).
4805  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4806  OTy = BI->getOperand(0)->getType();
4807 
4808  Op = Builder.CreateBitCast(Op, OTy);
4809  if (OTy->getScalarType()->isFloatingPointTy()) {
4810  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4811  } else {
4812  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4813  }
4814  return Builder.CreateSExt(Op, Ty, Name);
4815 }
4816 
4818  Value *ExtOp, Value *IndexOp,
4819  llvm::Type *ResTy, unsigned IntID,
4820  const char *Name) {
4821  SmallVector<Value *, 2> TblOps;
4822  if (ExtOp)
4823  TblOps.push_back(ExtOp);
4824 
4825  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4826  SmallVector<uint32_t, 16> Indices;
4827  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4828  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4829  Indices.push_back(2*i);
4830  Indices.push_back(2*i+1);
4831  }
4832 
4833  int PairPos = 0, End = Ops.size() - 1;
4834  while (PairPos < End) {
4835  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4836  Ops[PairPos+1], Indices,
4837  Name));
4838  PairPos += 2;
4839  }
4840 
4841  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4842  // of the 128-bit lookup table with zero.
4843  if (PairPos == End) {
4844  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4845  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4846  ZeroTbl, Indices, Name));
4847  }
4848 
4849  Function *TblF;
4850  TblOps.push_back(IndexOp);
4851  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4852 
4853  return CGF.EmitNeonCall(TblF, TblOps, Name);
4854 }
4855 
4856 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4857  unsigned Value;
4858  switch (BuiltinID) {
4859  default:
4860  return nullptr;
4861  case ARM::BI__builtin_arm_nop:
4862  Value = 0;
4863  break;
4864  case ARM::BI__builtin_arm_yield:
4865  case ARM::BI__yield:
4866  Value = 1;
4867  break;
4868  case ARM::BI__builtin_arm_wfe:
4869  case ARM::BI__wfe:
4870  Value = 2;
4871  break;
4872  case ARM::BI__builtin_arm_wfi:
4873  case ARM::BI__wfi:
4874  Value = 3;
4875  break;
4876  case ARM::BI__builtin_arm_sev:
4877  case ARM::BI__sev:
4878  Value = 4;
4879  break;
4880  case ARM::BI__builtin_arm_sevl:
4881  case ARM::BI__sevl:
4882  Value = 5;
4883  break;
4884  }
4885 
4886  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4887  llvm::ConstantInt::get(Int32Ty, Value));
4888 }
4889 
4890 // Generates the IR for the read/write special register builtin,
4891 // ValueType is the type of the value that is to be written or read,
4892 // RegisterType is the type of the register being written to or read from.
4894  const CallExpr *E,
4895  llvm::Type *RegisterType,
4896  llvm::Type *ValueType,
4897  bool IsRead,
4898  StringRef SysReg = "") {
4899  // write and register intrinsics only support 32 and 64 bit operations.
4900  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4901  && "Unsupported size for register.");
4902 
4903  CodeGen::CGBuilderTy &Builder = CGF.Builder;
4904  CodeGen::CodeGenModule &CGM = CGF.CGM;
4905  LLVMContext &Context = CGM.getLLVMContext();
4906 
4907  if (SysReg.empty()) {
4908  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4909  SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4910  }
4911 
4912  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4913  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4914  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4915 
4916  llvm::Type *Types[] = { RegisterType };
4917 
4918  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4919  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4920  && "Can't fit 64-bit value in 32-bit register");
4921 
4922  if (IsRead) {
4923  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4924  llvm::Value *Call = Builder.CreateCall(F, Metadata);
4925 
4926  if (MixedTypes)
4927  // Read into 64 bit register and then truncate result to 32 bit.
4928  return Builder.CreateTrunc(Call, ValueType);
4929 
4930  if (ValueType->isPointerTy())
4931  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4932  return Builder.CreateIntToPtr(Call, ValueType);
4933 
4934  return Call;
4935  }
4936 
4937  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4938  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4939  if (MixedTypes) {
4940  // Extend 32 bit write value to 64 bit to pass to write.
4941  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4942  return Builder.CreateCall(F, { Metadata, ArgValue });
4943  }
4944 
4945  if (ValueType->isPointerTy()) {
4946  // Have VoidPtrTy ArgValue but want to return an i32/i64.
4947  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4948  return Builder.CreateCall(F, { Metadata, ArgValue });
4949  }
4950 
4951  return Builder.CreateCall(F, { Metadata, ArgValue });
4952 }
4953 
4954 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4955 /// argument that specifies the vector type.
4956 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4957  switch (BuiltinID) {
4958  default: break;
4959  case NEON::BI__builtin_neon_vget_lane_i8:
4960  case NEON::BI__builtin_neon_vget_lane_i16:
4961  case NEON::BI__builtin_neon_vget_lane_i32:
4962  case NEON::BI__builtin_neon_vget_lane_i64:
4963  case NEON::BI__builtin_neon_vget_lane_f32:
4964  case NEON::BI__builtin_neon_vgetq_lane_i8:
4965  case NEON::BI__builtin_neon_vgetq_lane_i16:
4966  case NEON::BI__builtin_neon_vgetq_lane_i32:
4967  case NEON::BI__builtin_neon_vgetq_lane_i64:
4968  case NEON::BI__builtin_neon_vgetq_lane_f32:
4969  case NEON::BI__builtin_neon_vset_lane_i8:
4970  case NEON::BI__builtin_neon_vset_lane_i16:
4971  case NEON::BI__builtin_neon_vset_lane_i32:
4972  case NEON::BI__builtin_neon_vset_lane_i64:
4973  case NEON::BI__builtin_neon_vset_lane_f32:
4974  case NEON::BI__builtin_neon_vsetq_lane_i8:
4975  case NEON::BI__builtin_neon_vsetq_lane_i16:
4976  case NEON::BI__builtin_neon_vsetq_lane_i32:
4977  case NEON::BI__builtin_neon_vsetq_lane_i64:
4978  case NEON::BI__builtin_neon_vsetq_lane_f32:
4979  case NEON::BI__builtin_neon_vsha1h_u32:
4980  case NEON::BI__builtin_neon_vsha1cq_u32:
4981  case NEON::BI__builtin_neon_vsha1pq_u32:
4982  case NEON::BI__builtin_neon_vsha1mq_u32:
4983  case clang::ARM::BI_MoveToCoprocessor:
4984  case clang::ARM::BI_MoveToCoprocessor2:
4985  return false;
4986  }
4987  return true;
4988 }
4989 
4991  const CallExpr *E,
4992  llvm::Triple::ArchType Arch) {
4993  if (auto Hint = GetValueForARMHint(BuiltinID))
4994  return Hint;
4995 
4996  if (BuiltinID == ARM::BI__emit) {
4997  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4998  llvm::FunctionType *FTy =
4999  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
5000 
5001  APSInt Value;
5002  if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
5003  llvm_unreachable("Sema will ensure that the parameter is constant");
5004 
5005  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
5006 
5007  llvm::InlineAsm *Emit =
5008  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
5009  /*SideEffects=*/true)
5010  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
5011  /*SideEffects=*/true);
5012 
5013  return Builder.CreateCall(Emit);
5014  }
5015 
5016  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
5017  Value *Option = EmitScalarExpr(E->getArg(0));
5018  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
5019  }
5020 
5021  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
5022  Value *Address = EmitScalarExpr(E->getArg(0));
5023  Value *RW = EmitScalarExpr(E->getArg(1));
5024  Value *IsData = EmitScalarExpr(E->getArg(2));
5025 
5026  // Locality is not supported on ARM target
5027  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
5028 
5029  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5030  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5031  }
5032 
5033  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
5034  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5035  return Builder.CreateCall(
5036  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5037  }
5038 
5039  if (BuiltinID == ARM::BI__clear_cache) {
5040  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5041  const FunctionDecl *FD = E->getDirectCallee();
5042  Value *Ops[2];
5043  for (unsigned i = 0; i < 2; i++)
5044  Ops[i] = EmitScalarExpr(E->getArg(i));
5045  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5046  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5047  StringRef Name = FD->getName();
5048  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5049  }
5050 
5051  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
5052  BuiltinID == ARM::BI__builtin_arm_mcrr2) {
5053  Function *F;
5054 
5055  switch (BuiltinID) {
5056  default: llvm_unreachable("unexpected builtin");
5057  case ARM::BI__builtin_arm_mcrr:
5058  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
5059  break;
5060  case ARM::BI__builtin_arm_mcrr2:
5061  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
5062  break;
5063  }
5064 
5065  // MCRR{2} instruction has 5 operands but
5066  // the intrinsic has 4 because Rt and Rt2
5067  // are represented as a single unsigned 64
5068  // bit integer in the intrinsic definition
5069  // but internally it's represented as 2 32
5070  // bit integers.
5071 
5072  Value *Coproc = EmitScalarExpr(E->getArg(0));
5073  Value *Opc1 = EmitScalarExpr(E->getArg(1));
5074  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
5075  Value *CRm = EmitScalarExpr(E->getArg(3));
5076 
5077  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
5078  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
5079  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
5080  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
5081 
5082  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
5083  }
5084 
5085  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
5086  BuiltinID == ARM::BI__builtin_arm_mrrc2) {
5087  Function *F;
5088 
5089  switch (BuiltinID) {
5090  default: llvm_unreachable("unexpected builtin");
5091  case ARM::BI__builtin_arm_mrrc:
5092  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
5093  break;
5094  case ARM::BI__builtin_arm_mrrc2:
5095  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
5096  break;
5097  }
5098 
5099  Value *Coproc = EmitScalarExpr(E->getArg(0));
5100  Value *Opc1 = EmitScalarExpr(E->getArg(1));
5101  Value *CRm = EmitScalarExpr(E->getArg(2));
5102  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
5103 
5104  // Returns an unsigned 64 bit integer, represented
5105  // as two 32 bit integers.
5106 
5107  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
5108  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
5109  Rt = Builder.CreateZExt(Rt, Int64Ty);
5110  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
5111 
5112  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
5113  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
5114  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
5115 
5116  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
5117  }
5118 
5119  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
5120  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
5121  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
5122  getContext().getTypeSize(E->getType()) == 64) ||
5123  BuiltinID == ARM::BI__ldrexd) {
5124  Function *F;
5125 
5126  switch (BuiltinID) {
5127  default: llvm_unreachable("unexpected builtin");
5128  case ARM::BI__builtin_arm_ldaex:
5129  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
5130  break;
5131  case ARM::BI__builtin_arm_ldrexd:
5132  case ARM::BI__builtin_arm_ldrex:
5133  case ARM::BI__ldrexd:
5134  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
5135  break;
5136  }
5137 
5138  Value *LdPtr = EmitScalarExpr(E->getArg(0));
5139  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5140  "ldrexd");
5141 
5142  Value *Val0 = Builder.CreateExtractValue(Val, 1);
5143  Value *Val1 = Builder.CreateExtractValue(Val, 0);
5144  Val0 = Builder.CreateZExt(Val0, Int64Ty);
5145  Val1 = Builder.CreateZExt(Val1, Int64Ty);
5146 
5147  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
5148  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5149  Val = Builder.CreateOr(Val, Val1);
5150  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5151  }
5152 
5153  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
5154  BuiltinID == ARM::BI__builtin_arm_ldaex) {
5155  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5156 
5157  QualType Ty = E->getType();
5158  llvm::Type *RealResTy = ConvertType(Ty);
5159  llvm::Type *PtrTy = llvm::IntegerType::get(
5160  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5161  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5162 
5163  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
5164  ? Intrinsic::arm_ldaex
5165  : Intrinsic::arm_ldrex,
5166  PtrTy);
5167  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
5168 
5169  if (RealResTy->isPointerTy())
5170  return Builder.CreateIntToPtr(Val, RealResTy);
5171  else {
5172  llvm::Type *IntResTy = llvm::IntegerType::get(
5173  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5174  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5175  return Builder.CreateBitCast(Val, RealResTy);
5176  }
5177  }
5178 
5179  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
5180  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
5181  BuiltinID == ARM::BI__builtin_arm_strex) &&
5182  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
5183  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
5184  ? Intrinsic::arm_stlexd
5185  : Intrinsic::arm_strexd);
5186  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
5187 
5188  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5189  Value *Val = EmitScalarExpr(E->getArg(0));
5190  Builder.CreateStore(Val, Tmp);
5191 
5192  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
5193  Val = Builder.CreateLoad(LdPtr);
5194 
5195  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5196  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5197  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
5198  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
5199  }
5200 
5201  if (BuiltinID == ARM::BI__builtin_arm_strex ||
5202  BuiltinID == ARM::BI__builtin_arm_stlex) {
5203  Value *StoreVal = EmitScalarExpr(E->getArg(0));
5204  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5205 
5206  QualType Ty = E->getArg(0)->getType();
5207  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5208  getContext().getTypeSize(Ty));
5209  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5210 
5211  if (StoreVal->getType()->isPointerTy())
5212  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
5213  else {
5214  llvm::Type *IntTy = llvm::IntegerType::get(
5215  getLLVMContext(),
5216  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5217  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5218  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
5219  }
5220 
5221  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
5222  ? Intrinsic::arm_stlex
5223  : Intrinsic::arm_strex,
5224  StoreAddr->getType());
5225  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
5226  }
5227 
5228  switch (BuiltinID) {
5229  case ARM::BI__iso_volatile_load8:
5230  case ARM::BI__iso_volatile_load16:
5231  case ARM::BI__iso_volatile_load32:
5232  case ARM::BI__iso_volatile_load64: {
5233  Value *Ptr = EmitScalarExpr(E->getArg(0));
5234  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5235  CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
5236  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5237  LoadSize.getQuantity() * 8);
5238  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5239  llvm::LoadInst *Load =
5240  Builder.CreateAlignedLoad(Ptr, LoadSize);
5241  Load->setVolatile(true);
5242  return Load;
5243  }
5244  case ARM::BI__iso_volatile_store8:
5245  case ARM::BI__iso_volatile_store16:
5246  case ARM::BI__iso_volatile_store32:
5247  case ARM::BI__iso_volatile_store64: {
5248  Value *Ptr = EmitScalarExpr(E->getArg(0));
5249  Value *Value = EmitScalarExpr(E->getArg(1));
5250  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5251  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
5252  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5253  StoreSize.getQuantity() * 8);
5254  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5255  llvm::StoreInst *Store =
5256  Builder.CreateAlignedStore(Value, Ptr,
5257  StoreSize);
5258  Store->setVolatile(true);
5259