clang  3.9.0
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenFunction.h"
15 #include "CGCXXABI.h"
16 #include "CGObjCRuntime.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Decl.h"
22 #include "clang/Basic/TargetInfo.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/InlineAsm.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include <sstream>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm;
35 
36 /// getBuiltinLibFunction - Given a builtin id for a function like
37 /// "__builtin_fabsf", return a Function* for "fabsf".
39  unsigned BuiltinID) {
40  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
41 
42  // Get the name, skip over the __builtin_ prefix (if necessary).
43  StringRef Name;
44  GlobalDecl D(FD);
45 
46  // If the builtin has been declared explicitly with an assembler label,
47  // use the mangled name. This differs from the plain label on platforms
48  // that prefix labels.
49  if (FD->hasAttr<AsmLabelAttr>())
50  Name = getMangledName(D);
51  else
52  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
53 
54  llvm::FunctionType *Ty =
55  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
56 
57  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
58 }
59 
60 /// Emit the conversions required to turn the given value into an
61 /// integer of the given size.
63  QualType T, llvm::IntegerType *IntType) {
64  V = CGF.EmitToMemory(V, T);
65 
66  if (V->getType()->isPointerTy())
67  return CGF.Builder.CreatePtrToInt(V, IntType);
68 
69  assert(V->getType() == IntType);
70  return V;
71 }
72 
74  QualType T, llvm::Type *ResultType) {
75  V = CGF.EmitFromMemory(V, T);
76 
77  if (ResultType->isPointerTy())
78  return CGF.Builder.CreateIntToPtr(V, ResultType);
79 
80  assert(V->getType() == ResultType);
81  return V;
82 }
83 
84 /// Utility to insert an atomic instruction based on Instrinsic::ID
85 /// and the expression node.
87  llvm::AtomicRMWInst::BinOp Kind,
88  const CallExpr *E) {
89  QualType T = E->getType();
90  assert(E->getArg(0)->getType()->isPointerType());
91  assert(CGF.getContext().hasSameUnqualifiedType(T,
92  E->getArg(0)->getType()->getPointeeType()));
93  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
94 
95  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
96  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
97 
98  llvm::IntegerType *IntType =
99  llvm::IntegerType::get(CGF.getLLVMContext(),
100  CGF.getContext().getTypeSize(T));
101  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
102 
103  llvm::Value *Args[2];
104  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
105  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
106  llvm::Type *ValueType = Args[1]->getType();
107  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
108 
109  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
110  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
111  return EmitFromInt(CGF, Result, T, ValueType);
112 }
113 
115  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
116  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
117 
118  // Convert the type of the pointer to a pointer to the stored type.
119  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
120  Value *BC = CGF.Builder.CreateBitCast(
121  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
122  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
123  LV.setNontemporal(true);
124  CGF.EmitStoreOfScalar(Val, LV, false);
125  return nullptr;
126 }
127 
129  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
130 
131  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
132  LV.setNontemporal(true);
133  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
134 }
135 
137  llvm::AtomicRMWInst::BinOp Kind,
138  const CallExpr *E) {
139  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
140 }
141 
142 /// Utility to insert an atomic instruction based Instrinsic::ID and
143 /// the expression node, where the return value is the result of the
144 /// operation.
146  llvm::AtomicRMWInst::BinOp Kind,
147  const CallExpr *E,
148  Instruction::BinaryOps Op,
149  bool Invert = false) {
150  QualType T = E->getType();
151  assert(E->getArg(0)->getType()->isPointerType());
152  assert(CGF.getContext().hasSameUnqualifiedType(T,
153  E->getArg(0)->getType()->getPointeeType()));
154  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
155 
156  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
157  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
158 
159  llvm::IntegerType *IntType =
160  llvm::IntegerType::get(CGF.getLLVMContext(),
161  CGF.getContext().getTypeSize(T));
162  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
163 
164  llvm::Value *Args[2];
165  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
166  llvm::Type *ValueType = Args[1]->getType();
167  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
168  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
169 
170  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
171  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
172  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
173  if (Invert)
174  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
175  llvm::ConstantInt::get(IntType, -1));
176  Result = EmitFromInt(CGF, Result, T, ValueType);
177  return RValue::get(Result);
178 }
179 
180 /// @brief Utility to insert an atomic cmpxchg instruction.
181 ///
182 /// @param CGF The current codegen function.
183 /// @param E Builtin call expression to convert to cmpxchg.
184 /// arg0 - address to operate on
185 /// arg1 - value to compare with
186 /// arg2 - new value
187 /// @param ReturnBool Specifies whether to return success flag of
188 /// cmpxchg result or the old value.
189 ///
190 /// @returns result of cmpxchg, according to ReturnBool
192  bool ReturnBool) {
193  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
194  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
195  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
196 
197  llvm::IntegerType *IntType = llvm::IntegerType::get(
198  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
199  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
200 
201  Value *Args[3];
202  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
203  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
204  llvm::Type *ValueType = Args[1]->getType();
205  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
206  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
207 
208  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
209  Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
210  llvm::AtomicOrdering::SequentiallyConsistent);
211  if (ReturnBool)
212  // Extract boolean success flag and zext it to int.
213  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
214  CGF.ConvertType(E->getType()));
215  else
216  // Extract old value and emit it using the same type as compare value.
217  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
218  ValueType);
219 }
220 
221 // Emit a simple mangled intrinsic that has 1 argument and a return type
222 // matching the argument type.
224  const CallExpr *E,
225  unsigned IntrinsicID) {
226  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
227 
228  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
229  return CGF.Builder.CreateCall(F, Src0);
230 }
231 
232 // Emit an intrinsic that has 2 operands of the same type as its result.
234  const CallExpr *E,
235  unsigned IntrinsicID) {
236  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
237  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
238 
239  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240  return CGF.Builder.CreateCall(F, { Src0, Src1 });
241 }
242 
243 // Emit an intrinsic that has 3 operands of the same type as its result.
245  const CallExpr *E,
246  unsigned IntrinsicID) {
247  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
250 
251  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
252  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
253 }
254 
255 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
257  const CallExpr *E,
258  unsigned IntrinsicID) {
259  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
260  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
261 
262  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263  return CGF.Builder.CreateCall(F, {Src0, Src1});
264 }
265 
266 /// EmitFAbs - Emit a call to @llvm.fabs().
267 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
268  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
269  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
270  Call->setDoesNotAccessMemory();
271  return Call;
272 }
273 
274 /// Emit the computation of the sign bit for a floating point value. Returns
275 /// the i1 sign bit value.
277  LLVMContext &C = CGF.CGM.getLLVMContext();
278 
279  llvm::Type *Ty = V->getType();
280  int Width = Ty->getPrimitiveSizeInBits();
281  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
282  V = CGF.Builder.CreateBitCast(V, IntTy);
283  if (Ty->isPPC_FP128Ty()) {
284  // We want the sign bit of the higher-order double. The bitcast we just
285  // did works as if the double-double was stored to memory and then
286  // read as an i128. The "store" will put the higher-order double in the
287  // lower address in both little- and big-Endian modes, but the "load"
288  // will treat those bits as a different part of the i128: the low bits in
289  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
290  // we need to shift the high bits down to the low before truncating.
291  Width >>= 1;
292  if (CGF.getTarget().isBigEndian()) {
293  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
294  V = CGF.Builder.CreateLShr(V, ShiftCst);
295  }
296  // We are truncating value in order to extract the higher-order
297  // double, which we will be using to extract the sign from.
298  IntTy = llvm::IntegerType::get(C, Width);
299  V = CGF.Builder.CreateTrunc(V, IntTy);
300  }
301  Value *Zero = llvm::Constant::getNullValue(IntTy);
302  return CGF.Builder.CreateICmpSLT(V, Zero);
303 }
304 
306  const CallExpr *E, llvm::Value *calleeValue) {
307  return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
308  ReturnValueSlot(), Fn);
309 }
310 
311 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
312 /// depending on IntrinsicID.
313 ///
314 /// \arg CGF The current codegen function.
315 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
316 /// \arg X The first argument to the llvm.*.with.overflow.*.
317 /// \arg Y The second argument to the llvm.*.with.overflow.*.
318 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
319 /// \returns The result (i.e. sum/product) returned by the intrinsic.
321  const llvm::Intrinsic::ID IntrinsicID,
323  llvm::Value *&Carry) {
324  // Make sure we have integers of the same width.
325  assert(X->getType() == Y->getType() &&
326  "Arguments must be the same type. (Did you forget to make sure both "
327  "arguments have the same integer width?)");
328 
329  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
330  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
331  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
332  return CGF.Builder.CreateExtractValue(Tmp, 0);
333 }
334 
336  unsigned IntrinsicID,
337  int low, int high) {
338  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
339  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
340  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
341  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
342  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
343  return Call;
344 }
345 
346 namespace {
347  struct WidthAndSignedness {
348  unsigned Width;
349  bool Signed;
350  };
351 }
352 
353 static WidthAndSignedness
355  const clang::QualType Type) {
356  assert(Type->isIntegerType() && "Given type is not an integer.");
357  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
358  bool Signed = Type->isSignedIntegerType();
359  return {Width, Signed};
360 }
361 
362 // Given one or more integer types, this function produces an integer type that
363 // encompasses them: any value in one of the given types could be expressed in
364 // the encompassing type.
365 static struct WidthAndSignedness
366 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
367  assert(Types.size() > 0 && "Empty list of types.");
368 
369  // If any of the given types is signed, we must return a signed type.
370  bool Signed = false;
371  for (const auto &Type : Types) {
372  Signed |= Type.Signed;
373  }
374 
375  // The encompassing type must have a width greater than or equal to the width
376  // of the specified types. Aditionally, if the encompassing type is signed,
377  // its width must be strictly greater than the width of any unsigned types
378  // given.
379  unsigned Width = 0;
380  for (const auto &Type : Types) {
381  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
382  if (Width < MinWidth) {
383  Width = MinWidth;
384  }
385  }
386 
387  return {Width, Signed};
388 }
389 
390 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
391  llvm::Type *DestType = Int8PtrTy;
392  if (ArgValue->getType() != DestType)
393  ArgValue =
394  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
395 
396  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
397  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
398 }
399 
400 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
401 /// __builtin_object_size(p, @p To) is correct
402 static bool areBOSTypesCompatible(int From, int To) {
403  // Note: Our __builtin_object_size implementation currently treats Type=0 and
404  // Type=2 identically. Encoding this implementation detail here may make
405  // improving __builtin_object_size difficult in the future, so it's omitted.
406  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
407 }
408 
409 static llvm::Value *
410 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
411  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
412 }
413 
414 llvm::Value *
415 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
416  llvm::IntegerType *ResType) {
417  uint64_t ObjectSize;
418  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
419  return emitBuiltinObjectSize(E, Type, ResType);
420  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
421 }
422 
423 /// Returns a Value corresponding to the size of the given expression.
424 /// This Value may be either of the following:
425 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
426 /// it)
427 /// - A call to the @llvm.objectsize intrinsic
428 llvm::Value *
429 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
430  llvm::IntegerType *ResType) {
431  // We need to reference an argument if the pointer is a parameter with the
432  // pass_object_size attribute.
433  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
434  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
435  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
436  if (Param != nullptr && PS != nullptr &&
437  areBOSTypesCompatible(PS->getType(), Type)) {
438  auto Iter = SizeArguments.find(Param);
439  assert(Iter != SizeArguments.end());
440 
441  const ImplicitParamDecl *D = Iter->second;
442  auto DIter = LocalDeclMap.find(D);
443  assert(DIter != LocalDeclMap.end());
444 
445  return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
446  getContext().getSizeType(), E->getLocStart());
447  }
448  }
449 
450  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
451  // evaluate E for side-effects. In either case, we shouldn't lower to
452  // @llvm.objectsize.
453  if (Type == 3 || E->HasSideEffects(getContext()))
454  return getDefaultBuiltinObjectSizeResult(Type, ResType);
455 
456  // LLVM only supports 0 and 2, make sure that we pass along that
457  // as a boolean.
458  auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
459  // FIXME: Get right address space.
460  llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
461  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
462  return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
463 }
464 
466  unsigned BuiltinID, const CallExpr *E,
467  ReturnValueSlot ReturnValue) {
468  // See if we can constant fold this builtin. If so, don't emit it at all.
470  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
471  !Result.hasSideEffects()) {
472  if (Result.Val.isInt())
473  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
474  Result.Val.getInt()));
475  if (Result.Val.isFloat())
476  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
477  Result.Val.getFloat()));
478  }
479 
480  switch (BuiltinID) {
481  default: break; // Handle intrinsics and libm functions below.
482  case Builtin::BI__builtin___CFStringMakeConstantString:
483  case Builtin::BI__builtin___NSStringMakeConstantString:
484  return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
485  case Builtin::BI__builtin_stdarg_start:
486  case Builtin::BI__builtin_va_start:
487  case Builtin::BI__va_start:
488  case Builtin::BI__builtin_va_end:
489  return RValue::get(
490  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
491  ? EmitScalarExpr(E->getArg(0))
492  : EmitVAListRef(E->getArg(0)).getPointer(),
493  BuiltinID != Builtin::BI__builtin_va_end));
494  case Builtin::BI__builtin_va_copy: {
495  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
496  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
497 
498  llvm::Type *Type = Int8PtrTy;
499 
500  DstPtr = Builder.CreateBitCast(DstPtr, Type);
501  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
502  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
503  {DstPtr, SrcPtr}));
504  }
505  case Builtin::BI__builtin_abs:
506  case Builtin::BI__builtin_labs:
507  case Builtin::BI__builtin_llabs: {
508  Value *ArgValue = EmitScalarExpr(E->getArg(0));
509 
510  Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
511  Value *CmpResult =
512  Builder.CreateICmpSGE(ArgValue,
513  llvm::Constant::getNullValue(ArgValue->getType()),
514  "abscond");
515  Value *Result =
516  Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
517 
518  return RValue::get(Result);
519  }
520  case Builtin::BI__builtin_fabs:
521  case Builtin::BI__builtin_fabsf:
522  case Builtin::BI__builtin_fabsl: {
523  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
524  }
525  case Builtin::BI__builtin_fmod:
526  case Builtin::BI__builtin_fmodf:
527  case Builtin::BI__builtin_fmodl: {
528  Value *Arg1 = EmitScalarExpr(E->getArg(0));
529  Value *Arg2 = EmitScalarExpr(E->getArg(1));
530  Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
531  return RValue::get(Result);
532  }
533  case Builtin::BI__builtin_copysign:
534  case Builtin::BI__builtin_copysignf:
535  case Builtin::BI__builtin_copysignl: {
537  }
538  case Builtin::BI__builtin_ceil:
539  case Builtin::BI__builtin_ceilf:
540  case Builtin::BI__builtin_ceill: {
541  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
542  }
543  case Builtin::BI__builtin_floor:
544  case Builtin::BI__builtin_floorf:
545  case Builtin::BI__builtin_floorl: {
546  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
547  }
548  case Builtin::BI__builtin_trunc:
549  case Builtin::BI__builtin_truncf:
550  case Builtin::BI__builtin_truncl: {
551  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
552  }
553  case Builtin::BI__builtin_rint:
554  case Builtin::BI__builtin_rintf:
555  case Builtin::BI__builtin_rintl: {
556  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
557  }
558  case Builtin::BI__builtin_nearbyint:
559  case Builtin::BI__builtin_nearbyintf:
560  case Builtin::BI__builtin_nearbyintl: {
562  }
563  case Builtin::BI__builtin_round:
564  case Builtin::BI__builtin_roundf:
565  case Builtin::BI__builtin_roundl: {
566  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
567  }
568  case Builtin::BI__builtin_fmin:
569  case Builtin::BI__builtin_fminf:
570  case Builtin::BI__builtin_fminl: {
571  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
572  }
573  case Builtin::BI__builtin_fmax:
574  case Builtin::BI__builtin_fmaxf:
575  case Builtin::BI__builtin_fmaxl: {
576  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
577  }
578  case Builtin::BI__builtin_conj:
579  case Builtin::BI__builtin_conjf:
580  case Builtin::BI__builtin_conjl: {
581  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
582  Value *Real = ComplexVal.first;
583  Value *Imag = ComplexVal.second;
584  Value *Zero =
585  Imag->getType()->isFPOrFPVectorTy()
586  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
587  : llvm::Constant::getNullValue(Imag->getType());
588 
589  Imag = Builder.CreateFSub(Zero, Imag, "sub");
590  return RValue::getComplex(std::make_pair(Real, Imag));
591  }
592  case Builtin::BI__builtin_creal:
593  case Builtin::BI__builtin_crealf:
594  case Builtin::BI__builtin_creall:
595  case Builtin::BIcreal:
596  case Builtin::BIcrealf:
597  case Builtin::BIcreall: {
598  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
599  return RValue::get(ComplexVal.first);
600  }
601 
602  case Builtin::BI__builtin_cimag:
603  case Builtin::BI__builtin_cimagf:
604  case Builtin::BI__builtin_cimagl:
605  case Builtin::BIcimag:
606  case Builtin::BIcimagf:
607  case Builtin::BIcimagl: {
608  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
609  return RValue::get(ComplexVal.second);
610  }
611 
612  case Builtin::BI__builtin_ctzs:
613  case Builtin::BI__builtin_ctz:
614  case Builtin::BI__builtin_ctzl:
615  case Builtin::BI__builtin_ctzll: {
616  Value *ArgValue = EmitScalarExpr(E->getArg(0));
617 
618  llvm::Type *ArgType = ArgValue->getType();
619  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
620 
621  llvm::Type *ResultType = ConvertType(E->getType());
622  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
623  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
624  if (Result->getType() != ResultType)
625  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
626  "cast");
627  return RValue::get(Result);
628  }
629  case Builtin::BI__builtin_clzs:
630  case Builtin::BI__builtin_clz:
631  case Builtin::BI__builtin_clzl:
632  case Builtin::BI__builtin_clzll: {
633  Value *ArgValue = EmitScalarExpr(E->getArg(0));
634 
635  llvm::Type *ArgType = ArgValue->getType();
636  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
637 
638  llvm::Type *ResultType = ConvertType(E->getType());
639  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
640  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
641  if (Result->getType() != ResultType)
642  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
643  "cast");
644  return RValue::get(Result);
645  }
646  case Builtin::BI__builtin_ffs:
647  case Builtin::BI__builtin_ffsl:
648  case Builtin::BI__builtin_ffsll: {
649  // ffs(x) -> x ? cttz(x) + 1 : 0
650  Value *ArgValue = EmitScalarExpr(E->getArg(0));
651 
652  llvm::Type *ArgType = ArgValue->getType();
653  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
654 
655  llvm::Type *ResultType = ConvertType(E->getType());
656  Value *Tmp =
657  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
658  llvm::ConstantInt::get(ArgType, 1));
659  Value *Zero = llvm::Constant::getNullValue(ArgType);
660  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
661  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
662  if (Result->getType() != ResultType)
663  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
664  "cast");
665  return RValue::get(Result);
666  }
667  case Builtin::BI__builtin_parity:
668  case Builtin::BI__builtin_parityl:
669  case Builtin::BI__builtin_parityll: {
670  // parity(x) -> ctpop(x) & 1
671  Value *ArgValue = EmitScalarExpr(E->getArg(0));
672 
673  llvm::Type *ArgType = ArgValue->getType();
674  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
675 
676  llvm::Type *ResultType = ConvertType(E->getType());
677  Value *Tmp = Builder.CreateCall(F, ArgValue);
678  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
679  if (Result->getType() != ResultType)
680  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
681  "cast");
682  return RValue::get(Result);
683  }
684  case Builtin::BI__builtin_popcount:
685  case Builtin::BI__builtin_popcountl:
686  case Builtin::BI__builtin_popcountll: {
687  Value *ArgValue = EmitScalarExpr(E->getArg(0));
688 
689  llvm::Type *ArgType = ArgValue->getType();
690  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
691 
692  llvm::Type *ResultType = ConvertType(E->getType());
693  Value *Result = Builder.CreateCall(F, ArgValue);
694  if (Result->getType() != ResultType)
695  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
696  "cast");
697  return RValue::get(Result);
698  }
699  case Builtin::BI__builtin_unpredictable: {
700  // Always return the argument of __builtin_unpredictable. LLVM does not
701  // handle this builtin. Metadata for this builtin should be added directly
702  // to instructions such as branches or switches that use it.
703  return RValue::get(EmitScalarExpr(E->getArg(0)));
704  }
705  case Builtin::BI__builtin_expect: {
706  Value *ArgValue = EmitScalarExpr(E->getArg(0));
707  llvm::Type *ArgType = ArgValue->getType();
708 
709  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
710  // Don't generate llvm.expect on -O0 as the backend won't use it for
711  // anything.
712  // Note, we still IRGen ExpectedValue because it could have side-effects.
713  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
714  return RValue::get(ArgValue);
715 
716  Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
717  Value *Result =
718  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
719  return RValue::get(Result);
720  }
721  case Builtin::BI__builtin_assume_aligned: {
722  Value *PtrValue = EmitScalarExpr(E->getArg(0));
723  Value *OffsetValue =
724  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
725 
726  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
727  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
728  unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
729 
730  EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
731  return RValue::get(PtrValue);
732  }
733  case Builtin::BI__assume:
734  case Builtin::BI__builtin_assume: {
735  if (E->getArg(0)->HasSideEffects(getContext()))
736  return RValue::get(nullptr);
737 
738  Value *ArgValue = EmitScalarExpr(E->getArg(0));
739  Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
740  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
741  }
742  case Builtin::BI__builtin_bswap16:
743  case Builtin::BI__builtin_bswap32:
744  case Builtin::BI__builtin_bswap64: {
745  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
746  }
747  case Builtin::BI__builtin_bitreverse8:
748  case Builtin::BI__builtin_bitreverse16:
749  case Builtin::BI__builtin_bitreverse32:
750  case Builtin::BI__builtin_bitreverse64: {
751  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
752  }
753  case Builtin::BI__builtin_object_size: {
754  unsigned Type =
755  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
756  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
757 
758  // We pass this builtin onto the optimizer so that it can figure out the
759  // object size in more complex cases.
760  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
761  }
762  case Builtin::BI__builtin_prefetch: {
763  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
764  // FIXME: Technically these constants should of type 'int', yes?
765  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
766  llvm::ConstantInt::get(Int32Ty, 0);
767  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
768  llvm::ConstantInt::get(Int32Ty, 3);
769  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
770  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
771  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
772  }
773  case Builtin::BI__builtin_readcyclecounter: {
774  Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
775  return RValue::get(Builder.CreateCall(F));
776  }
777  case Builtin::BI__builtin___clear_cache: {
778  Value *Begin = EmitScalarExpr(E->getArg(0));
779  Value *End = EmitScalarExpr(E->getArg(1));
780  Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
781  return RValue::get(Builder.CreateCall(F, {Begin, End}));
782  }
783  case Builtin::BI__builtin_trap:
784  return RValue::get(EmitTrapCall(Intrinsic::trap));
785  case Builtin::BI__debugbreak:
786  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
787  case Builtin::BI__builtin_unreachable: {
788  if (SanOpts.has(SanitizerKind::Unreachable)) {
789  SanitizerScope SanScope(this);
790  EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
791  SanitizerKind::Unreachable),
792  "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
793  None);
794  } else
795  Builder.CreateUnreachable();
796 
797  // We do need to preserve an insertion point.
798  EmitBlock(createBasicBlock("unreachable.cont"));
799 
800  return RValue::get(nullptr);
801  }
802 
803  case Builtin::BI__builtin_powi:
804  case Builtin::BI__builtin_powif:
805  case Builtin::BI__builtin_powil: {
806  Value *Base = EmitScalarExpr(E->getArg(0));
807  Value *Exponent = EmitScalarExpr(E->getArg(1));
808  llvm::Type *ArgType = Base->getType();
809  Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
810  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
811  }
812 
813  case Builtin::BI__builtin_isgreater:
814  case Builtin::BI__builtin_isgreaterequal:
815  case Builtin::BI__builtin_isless:
816  case Builtin::BI__builtin_islessequal:
817  case Builtin::BI__builtin_islessgreater:
818  case Builtin::BI__builtin_isunordered: {
819  // Ordered comparisons: we know the arguments to these are matching scalar
820  // floating point values.
821  Value *LHS = EmitScalarExpr(E->getArg(0));
822  Value *RHS = EmitScalarExpr(E->getArg(1));
823 
824  switch (BuiltinID) {
825  default: llvm_unreachable("Unknown ordered comparison");
826  case Builtin::BI__builtin_isgreater:
827  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
828  break;
829  case Builtin::BI__builtin_isgreaterequal:
830  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
831  break;
832  case Builtin::BI__builtin_isless:
833  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
834  break;
835  case Builtin::BI__builtin_islessequal:
836  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
837  break;
838  case Builtin::BI__builtin_islessgreater:
839  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
840  break;
841  case Builtin::BI__builtin_isunordered:
842  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
843  break;
844  }
845  // ZExt bool to int type.
846  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
847  }
848  case Builtin::BI__builtin_isnan: {
849  Value *V = EmitScalarExpr(E->getArg(0));
850  V = Builder.CreateFCmpUNO(V, V, "cmp");
851  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
852  }
853 
854  case Builtin::BI__builtin_isinf:
855  case Builtin::BI__builtin_isfinite: {
856  // isinf(x) --> fabs(x) == infinity
857  // isfinite(x) --> fabs(x) != infinity
858  // x != NaN via the ordered compare in either case.
859  Value *V = EmitScalarExpr(E->getArg(0));
860  Value *Fabs = EmitFAbs(*this, V);
861  Constant *Infinity = ConstantFP::getInfinity(V->getType());
862  CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
863  ? CmpInst::FCMP_OEQ
864  : CmpInst::FCMP_ONE;
865  Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
866  return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
867  }
868 
869  case Builtin::BI__builtin_isinf_sign: {
870  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
871  Value *Arg = EmitScalarExpr(E->getArg(0));
872  Value *AbsArg = EmitFAbs(*this, Arg);
873  Value *IsInf = Builder.CreateFCmpOEQ(
874  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
875  Value *IsNeg = EmitSignBit(*this, Arg);
876 
877  llvm::Type *IntTy = ConvertType(E->getType());
878  Value *Zero = Constant::getNullValue(IntTy);
879  Value *One = ConstantInt::get(IntTy, 1);
880  Value *NegativeOne = ConstantInt::get(IntTy, -1);
881  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
882  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
883  return RValue::get(Result);
884  }
885 
886  case Builtin::BI__builtin_isnormal: {
887  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
888  Value *V = EmitScalarExpr(E->getArg(0));
889  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
890 
891  Value *Abs = EmitFAbs(*this, V);
892  Value *IsLessThanInf =
893  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
894  APFloat Smallest = APFloat::getSmallestNormalized(
895  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
896  Value *IsNormal =
897  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
898  "isnormal");
899  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
900  V = Builder.CreateAnd(V, IsNormal, "and");
901  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
902  }
903 
904  case Builtin::BI__builtin_fpclassify: {
905  Value *V = EmitScalarExpr(E->getArg(5));
906  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
907 
908  // Create Result
909  BasicBlock *Begin = Builder.GetInsertBlock();
910  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
911  Builder.SetInsertPoint(End);
912  PHINode *Result =
913  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
914  "fpclassify_result");
915 
916  // if (V==0) return FP_ZERO
917  Builder.SetInsertPoint(Begin);
918  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
919  "iszero");
920  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
921  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
922  Builder.CreateCondBr(IsZero, End, NotZero);
923  Result->addIncoming(ZeroLiteral, Begin);
924 
925  // if (V != V) return FP_NAN
926  Builder.SetInsertPoint(NotZero);
927  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
928  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
929  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
930  Builder.CreateCondBr(IsNan, End, NotNan);
931  Result->addIncoming(NanLiteral, NotZero);
932 
933  // if (fabs(V) == infinity) return FP_INFINITY
934  Builder.SetInsertPoint(NotNan);
935  Value *VAbs = EmitFAbs(*this, V);
936  Value *IsInf =
937  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
938  "isinf");
939  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
940  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
941  Builder.CreateCondBr(IsInf, End, NotInf);
942  Result->addIncoming(InfLiteral, NotNan);
943 
944  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
945  Builder.SetInsertPoint(NotInf);
946  APFloat Smallest = APFloat::getSmallestNormalized(
947  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
948  Value *IsNormal =
949  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
950  "isnormal");
951  Value *NormalResult =
952  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
953  EmitScalarExpr(E->getArg(3)));
954  Builder.CreateBr(End);
955  Result->addIncoming(NormalResult, NotInf);
956 
957  // return Result
958  Builder.SetInsertPoint(End);
959  return RValue::get(Result);
960  }
961 
962  case Builtin::BIalloca:
963  case Builtin::BI_alloca:
964  case Builtin::BI__builtin_alloca: {
965  Value *Size = EmitScalarExpr(E->getArg(0));
966  return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
967  }
968  case Builtin::BIbzero:
969  case Builtin::BI__builtin_bzero: {
970  Address Dest = EmitPointerWithAlignment(E->getArg(0));
971  Value *SizeVal = EmitScalarExpr(E->getArg(1));
972  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
973  E->getArg(0)->getExprLoc(), FD, 0);
974  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
975  return RValue::get(Dest.getPointer());
976  }
977  case Builtin::BImemcpy:
978  case Builtin::BI__builtin_memcpy: {
979  Address Dest = EmitPointerWithAlignment(E->getArg(0));
980  Address Src = EmitPointerWithAlignment(E->getArg(1));
981  Value *SizeVal = EmitScalarExpr(E->getArg(2));
982  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
983  E->getArg(0)->getExprLoc(), FD, 0);
984  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
985  E->getArg(1)->getExprLoc(), FD, 1);
986  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
987  return RValue::get(Dest.getPointer());
988  }
989 
990  case Builtin::BI__builtin___memcpy_chk: {
991  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
992  llvm::APSInt Size, DstSize;
993  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
994  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
995  break;
996  if (Size.ugt(DstSize))
997  break;
998  Address Dest = EmitPointerWithAlignment(E->getArg(0));
999  Address Src = EmitPointerWithAlignment(E->getArg(1));
1000  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1001  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1002  return RValue::get(Dest.getPointer());
1003  }
1004 
1005  case Builtin::BI__builtin_objc_memmove_collectable: {
1006  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1007  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1008  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1009  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1010  DestAddr, SrcAddr, SizeVal);
1011  return RValue::get(DestAddr.getPointer());
1012  }
1013 
1014  case Builtin::BI__builtin___memmove_chk: {
1015  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1016  llvm::APSInt Size, DstSize;
1017  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1018  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1019  break;
1020  if (Size.ugt(DstSize))
1021  break;
1022  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1023  Address Src = EmitPointerWithAlignment(E->getArg(1));
1024  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1025  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1026  return RValue::get(Dest.getPointer());
1027  }
1028 
1029  case Builtin::BImemmove:
1030  case Builtin::BI__builtin_memmove: {
1031  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1032  Address Src = EmitPointerWithAlignment(E->getArg(1));
1033  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1034  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1035  E->getArg(0)->getExprLoc(), FD, 0);
1036  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1037  E->getArg(1)->getExprLoc(), FD, 1);
1038  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1039  return RValue::get(Dest.getPointer());
1040  }
1041  case Builtin::BImemset:
1042  case Builtin::BI__builtin_memset: {
1043  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1044  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1045  Builder.getInt8Ty());
1046  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1047  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1048  E->getArg(0)->getExprLoc(), FD, 0);
1049  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1050  return RValue::get(Dest.getPointer());
1051  }
1052  case Builtin::BI__builtin___memset_chk: {
1053  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1054  llvm::APSInt Size, DstSize;
1055  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1056  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1057  break;
1058  if (Size.ugt(DstSize))
1059  break;
1060  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1061  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1062  Builder.getInt8Ty());
1063  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1064  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1065  return RValue::get(Dest.getPointer());
1066  }
1067  case Builtin::BI__builtin_dwarf_cfa: {
1068  // The offset in bytes from the first argument to the CFA.
1069  //
1070  // Why on earth is this in the frontend? Is there any reason at
1071  // all that the backend can't reasonably determine this while
1072  // lowering llvm.eh.dwarf.cfa()?
1073  //
1074  // TODO: If there's a satisfactory reason, add a target hook for
1075  // this instead of hard-coding 0, which is correct for most targets.
1076  int32_t Offset = 0;
1077 
1078  Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1079  return RValue::get(Builder.CreateCall(F,
1080  llvm::ConstantInt::get(Int32Ty, Offset)));
1081  }
1082  case Builtin::BI__builtin_return_address: {
1083  Value *Depth =
1084  CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1085  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1086  return RValue::get(Builder.CreateCall(F, Depth));
1087  }
1088  case Builtin::BI__builtin_frame_address: {
1089  Value *Depth =
1090  CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1091  Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1092  return RValue::get(Builder.CreateCall(F, Depth));
1093  }
1094  case Builtin::BI__builtin_extract_return_addr: {
1095  Value *Address = EmitScalarExpr(E->getArg(0));
1096  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1097  return RValue::get(Result);
1098  }
1099  case Builtin::BI__builtin_frob_return_addr: {
1100  Value *Address = EmitScalarExpr(E->getArg(0));
1101  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1102  return RValue::get(Result);
1103  }
1104  case Builtin::BI__builtin_dwarf_sp_column: {
1105  llvm::IntegerType *Ty
1106  = cast<llvm::IntegerType>(ConvertType(E->getType()));
1107  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1108  if (Column == -1) {
1109  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1110  return RValue::get(llvm::UndefValue::get(Ty));
1111  }
1112  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1113  }
1114  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1115  Value *Address = EmitScalarExpr(E->getArg(0));
1116  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1117  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1118  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1119  }
1120  case Builtin::BI__builtin_eh_return: {
1121  Value *Int = EmitScalarExpr(E->getArg(0));
1122  Value *Ptr = EmitScalarExpr(E->getArg(1));
1123 
1124  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1125  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1126  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1127  Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1128  ? Intrinsic::eh_return_i32
1129  : Intrinsic::eh_return_i64);
1130  Builder.CreateCall(F, {Int, Ptr});
1131  Builder.CreateUnreachable();
1132 
1133  // We do need to preserve an insertion point.
1134  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1135 
1136  return RValue::get(nullptr);
1137  }
1138  case Builtin::BI__builtin_unwind_init: {
1139  Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1140  return RValue::get(Builder.CreateCall(F));
1141  }
1142  case Builtin::BI__builtin_extend_pointer: {
1143  // Extends a pointer to the size of an _Unwind_Word, which is
1144  // uint64_t on all platforms. Generally this gets poked into a
1145  // register and eventually used as an address, so if the
1146  // addressing registers are wider than pointers and the platform
1147  // doesn't implicitly ignore high-order bits when doing
1148  // addressing, we need to make sure we zext / sext based on
1149  // the platform's expectations.
1150  //
1151  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1152 
1153  // Cast the pointer to intptr_t.
1154  Value *Ptr = EmitScalarExpr(E->getArg(0));
1155  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1156 
1157  // If that's 64 bits, we're done.
1158  if (IntPtrTy->getBitWidth() == 64)
1159  return RValue::get(Result);
1160 
1161  // Otherwise, ask the codegen data what to do.
1162  if (getTargetHooks().extendPointerWithSExt())
1163  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1164  else
1165  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1166  }
1167  case Builtin::BI__builtin_setjmp: {
1168  // Buffer is a void**.
1169  Address Buf = EmitPointerWithAlignment(E->getArg(0));
1170 
1171  // Store the frame pointer to the setjmp buffer.
1172  Value *FrameAddr =
1173  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1174  ConstantInt::get(Int32Ty, 0));
1175  Builder.CreateStore(FrameAddr, Buf);
1176 
1177  // Store the stack pointer to the setjmp buffer.
1178  Value *StackAddr =
1179  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1180  Address StackSaveSlot =
1181  Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1182  Builder.CreateStore(StackAddr, StackSaveSlot);
1183 
1184  // Call LLVM's EH setjmp, which is lightweight.
1185  Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1186  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1187  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1188  }
1189  case Builtin::BI__builtin_longjmp: {
1190  Value *Buf = EmitScalarExpr(E->getArg(0));
1191  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1192 
1193  // Call LLVM's EH longjmp, which is lightweight.
1194  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1195 
1196  // longjmp doesn't return; mark this as unreachable.
1197  Builder.CreateUnreachable();
1198 
1199  // We do need to preserve an insertion point.
1200  EmitBlock(createBasicBlock("longjmp.cont"));
1201 
1202  return RValue::get(nullptr);
1203  }
1204  case Builtin::BI__sync_fetch_and_add:
1205  case Builtin::BI__sync_fetch_and_sub:
1206  case Builtin::BI__sync_fetch_and_or:
1207  case Builtin::BI__sync_fetch_and_and:
1208  case Builtin::BI__sync_fetch_and_xor:
1209  case Builtin::BI__sync_fetch_and_nand:
1210  case Builtin::BI__sync_add_and_fetch:
1211  case Builtin::BI__sync_sub_and_fetch:
1212  case Builtin::BI__sync_and_and_fetch:
1213  case Builtin::BI__sync_or_and_fetch:
1214  case Builtin::BI__sync_xor_and_fetch:
1215  case Builtin::BI__sync_nand_and_fetch:
1216  case Builtin::BI__sync_val_compare_and_swap:
1217  case Builtin::BI__sync_bool_compare_and_swap:
1218  case Builtin::BI__sync_lock_test_and_set:
1219  case Builtin::BI__sync_lock_release:
1220  case Builtin::BI__sync_swap:
1221  llvm_unreachable("Shouldn't make it through sema");
1222  case Builtin::BI__sync_fetch_and_add_1:
1223  case Builtin::BI__sync_fetch_and_add_2:
1224  case Builtin::BI__sync_fetch_and_add_4:
1225  case Builtin::BI__sync_fetch_and_add_8:
1226  case Builtin::BI__sync_fetch_and_add_16:
1227  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1228  case Builtin::BI__sync_fetch_and_sub_1:
1229  case Builtin::BI__sync_fetch_and_sub_2:
1230  case Builtin::BI__sync_fetch_and_sub_4:
1231  case Builtin::BI__sync_fetch_and_sub_8:
1232  case Builtin::BI__sync_fetch_and_sub_16:
1233  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1234  case Builtin::BI__sync_fetch_and_or_1:
1235  case Builtin::BI__sync_fetch_and_or_2:
1236  case Builtin::BI__sync_fetch_and_or_4:
1237  case Builtin::BI__sync_fetch_and_or_8:
1238  case Builtin::BI__sync_fetch_and_or_16:
1239  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1240  case Builtin::BI__sync_fetch_and_and_1:
1241  case Builtin::BI__sync_fetch_and_and_2:
1242  case Builtin::BI__sync_fetch_and_and_4:
1243  case Builtin::BI__sync_fetch_and_and_8:
1244  case Builtin::BI__sync_fetch_and_and_16:
1245  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1246  case Builtin::BI__sync_fetch_and_xor_1:
1247  case Builtin::BI__sync_fetch_and_xor_2:
1248  case Builtin::BI__sync_fetch_and_xor_4:
1249  case Builtin::BI__sync_fetch_and_xor_8:
1250  case Builtin::BI__sync_fetch_and_xor_16:
1251  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1252  case Builtin::BI__sync_fetch_and_nand_1:
1253  case Builtin::BI__sync_fetch_and_nand_2:
1254  case Builtin::BI__sync_fetch_and_nand_4:
1255  case Builtin::BI__sync_fetch_and_nand_8:
1256  case Builtin::BI__sync_fetch_and_nand_16:
1257  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1258 
1259  // Clang extensions: not overloaded yet.
1260  case Builtin::BI__sync_fetch_and_min:
1261  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1262  case Builtin::BI__sync_fetch_and_max:
1263  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1264  case Builtin::BI__sync_fetch_and_umin:
1265  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1266  case Builtin::BI__sync_fetch_and_umax:
1267  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1268 
1269  case Builtin::BI__sync_add_and_fetch_1:
1270  case Builtin::BI__sync_add_and_fetch_2:
1271  case Builtin::BI__sync_add_and_fetch_4:
1272  case Builtin::BI__sync_add_and_fetch_8:
1273  case Builtin::BI__sync_add_and_fetch_16:
1274  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1275  llvm::Instruction::Add);
1276  case Builtin::BI__sync_sub_and_fetch_1:
1277  case Builtin::BI__sync_sub_and_fetch_2:
1278  case Builtin::BI__sync_sub_and_fetch_4:
1279  case Builtin::BI__sync_sub_and_fetch_8:
1280  case Builtin::BI__sync_sub_and_fetch_16:
1281  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1282  llvm::Instruction::Sub);
1283  case Builtin::BI__sync_and_and_fetch_1:
1284  case Builtin::BI__sync_and_and_fetch_2:
1285  case Builtin::BI__sync_and_and_fetch_4:
1286  case Builtin::BI__sync_and_and_fetch_8:
1287  case Builtin::BI__sync_and_and_fetch_16:
1290  case Builtin::BI__sync_or_and_fetch_1:
1291  case Builtin::BI__sync_or_and_fetch_2:
1292  case Builtin::BI__sync_or_and_fetch_4:
1293  case Builtin::BI__sync_or_and_fetch_8:
1294  case Builtin::BI__sync_or_and_fetch_16:
1295  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1296  llvm::Instruction::Or);
1297  case Builtin::BI__sync_xor_and_fetch_1:
1298  case Builtin::BI__sync_xor_and_fetch_2:
1299  case Builtin::BI__sync_xor_and_fetch_4:
1300  case Builtin::BI__sync_xor_and_fetch_8:
1301  case Builtin::BI__sync_xor_and_fetch_16:
1302  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1303  llvm::Instruction::Xor);
1304  case Builtin::BI__sync_nand_and_fetch_1:
1305  case Builtin::BI__sync_nand_and_fetch_2:
1306  case Builtin::BI__sync_nand_and_fetch_4:
1307  case Builtin::BI__sync_nand_and_fetch_8:
1308  case Builtin::BI__sync_nand_and_fetch_16:
1309  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1310  llvm::Instruction::And, true);
1311 
1312  case Builtin::BI__sync_val_compare_and_swap_1:
1313  case Builtin::BI__sync_val_compare_and_swap_2:
1314  case Builtin::BI__sync_val_compare_and_swap_4:
1315  case Builtin::BI__sync_val_compare_and_swap_8:
1316  case Builtin::BI__sync_val_compare_and_swap_16:
1317  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1318 
1319  case Builtin::BI__sync_bool_compare_and_swap_1:
1320  case Builtin::BI__sync_bool_compare_and_swap_2:
1321  case Builtin::BI__sync_bool_compare_and_swap_4:
1322  case Builtin::BI__sync_bool_compare_and_swap_8:
1323  case Builtin::BI__sync_bool_compare_and_swap_16:
1324  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1325 
1326  case Builtin::BI__sync_swap_1:
1327  case Builtin::BI__sync_swap_2:
1328  case Builtin::BI__sync_swap_4:
1329  case Builtin::BI__sync_swap_8:
1330  case Builtin::BI__sync_swap_16:
1331  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1332 
1333  case Builtin::BI__sync_lock_test_and_set_1:
1334  case Builtin::BI__sync_lock_test_and_set_2:
1335  case Builtin::BI__sync_lock_test_and_set_4:
1336  case Builtin::BI__sync_lock_test_and_set_8:
1337  case Builtin::BI__sync_lock_test_and_set_16:
1338  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1339 
1340  case Builtin::BI__sync_lock_release_1:
1341  case Builtin::BI__sync_lock_release_2:
1342  case Builtin::BI__sync_lock_release_4:
1343  case Builtin::BI__sync_lock_release_8:
1344  case Builtin::BI__sync_lock_release_16: {
1345  Value *Ptr = EmitScalarExpr(E->getArg(0));
1346  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1347  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1348  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1349  StoreSize.getQuantity() * 8);
1350  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1351  llvm::StoreInst *Store =
1352  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1353  StoreSize);
1354  Store->setAtomic(llvm::AtomicOrdering::Release);
1355  return RValue::get(nullptr);
1356  }
1357 
1358  case Builtin::BI__sync_synchronize: {
1359  // We assume this is supposed to correspond to a C++0x-style
1360  // sequentially-consistent fence (i.e. this is only usable for
1361  // synchonization, not device I/O or anything like that). This intrinsic
1362  // is really badly designed in the sense that in theory, there isn't
1363  // any way to safely use it... but in practice, it mostly works
1364  // to use it with non-atomic loads and stores to get acquire/release
1365  // semantics.
1366  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1367  return RValue::get(nullptr);
1368  }
1369 
1370  case Builtin::BI__builtin_nontemporal_load:
1371  return RValue::get(EmitNontemporalLoad(*this, E));
1372  case Builtin::BI__builtin_nontemporal_store:
1373  return RValue::get(EmitNontemporalStore(*this, E));
1374  case Builtin::BI__c11_atomic_is_lock_free:
1375  case Builtin::BI__atomic_is_lock_free: {
1376  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1377  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1378  // _Atomic(T) is always properly-aligned.
1379  const char *LibCallName = "__atomic_is_lock_free";
1380  CallArgList Args;
1381  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1382  getContext().getSizeType());
1383  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1384  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1385  getContext().VoidPtrTy);
1386  else
1387  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1388  getContext().VoidPtrTy);
1389  const CGFunctionInfo &FuncInfo =
1390  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1391  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1392  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1393  return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1394  }
1395 
1396  case Builtin::BI__atomic_test_and_set: {
1397  // Look at the argument type to determine whether this is a volatile
1398  // operation. The parameter type is always volatile.
1399  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1400  bool Volatile =
1401  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1402 
1403  Value *Ptr = EmitScalarExpr(E->getArg(0));
1404  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1405  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1406  Value *NewVal = Builder.getInt8(1);
1407  Value *Order = EmitScalarExpr(E->getArg(1));
1408  if (isa<llvm::ConstantInt>(Order)) {
1409  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1410  AtomicRMWInst *Result = nullptr;
1411  switch (ord) {
1412  case 0: // memory_order_relaxed
1413  default: // invalid order
1414  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1415  llvm::AtomicOrdering::Monotonic);
1416  break;
1417  case 1: // memory_order_consume
1418  case 2: // memory_order_acquire
1419  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1420  llvm::AtomicOrdering::Acquire);
1421  break;
1422  case 3: // memory_order_release
1423  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1424  llvm::AtomicOrdering::Release);
1425  break;
1426  case 4: // memory_order_acq_rel
1427 
1428  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1429  llvm::AtomicOrdering::AcquireRelease);
1430  break;
1431  case 5: // memory_order_seq_cst
1432  Result = Builder.CreateAtomicRMW(
1433  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1434  llvm::AtomicOrdering::SequentiallyConsistent);
1435  break;
1436  }
1437  Result->setVolatile(Volatile);
1438  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1439  }
1440 
1441  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1442 
1443  llvm::BasicBlock *BBs[5] = {
1444  createBasicBlock("monotonic", CurFn),
1445  createBasicBlock("acquire", CurFn),
1446  createBasicBlock("release", CurFn),
1447  createBasicBlock("acqrel", CurFn),
1448  createBasicBlock("seqcst", CurFn)
1449  };
1450  llvm::AtomicOrdering Orders[5] = {
1451  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1452  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1453  llvm::AtomicOrdering::SequentiallyConsistent};
1454 
1455  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1456  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1457 
1458  Builder.SetInsertPoint(ContBB);
1459  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1460 
1461  for (unsigned i = 0; i < 5; ++i) {
1462  Builder.SetInsertPoint(BBs[i]);
1463  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1464  Ptr, NewVal, Orders[i]);
1465  RMW->setVolatile(Volatile);
1466  Result->addIncoming(RMW, BBs[i]);
1467  Builder.CreateBr(ContBB);
1468  }
1469 
1470  SI->addCase(Builder.getInt32(0), BBs[0]);
1471  SI->addCase(Builder.getInt32(1), BBs[1]);
1472  SI->addCase(Builder.getInt32(2), BBs[1]);
1473  SI->addCase(Builder.getInt32(3), BBs[2]);
1474  SI->addCase(Builder.getInt32(4), BBs[3]);
1475  SI->addCase(Builder.getInt32(5), BBs[4]);
1476 
1477  Builder.SetInsertPoint(ContBB);
1478  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1479  }
1480 
1481  case Builtin::BI__atomic_clear: {
1482  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1483  bool Volatile =
1484  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1485 
1486  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1487  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1488  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1489  Value *NewVal = Builder.getInt8(0);
1490  Value *Order = EmitScalarExpr(E->getArg(1));
1491  if (isa<llvm::ConstantInt>(Order)) {
1492  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1493  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1494  switch (ord) {
1495  case 0: // memory_order_relaxed
1496  default: // invalid order
1497  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1498  break;
1499  case 3: // memory_order_release
1500  Store->setOrdering(llvm::AtomicOrdering::Release);
1501  break;
1502  case 5: // memory_order_seq_cst
1503  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1504  break;
1505  }
1506  return RValue::get(nullptr);
1507  }
1508 
1509  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1510 
1511  llvm::BasicBlock *BBs[3] = {
1512  createBasicBlock("monotonic", CurFn),
1513  createBasicBlock("release", CurFn),
1514  createBasicBlock("seqcst", CurFn)
1515  };
1516  llvm::AtomicOrdering Orders[3] = {
1517  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1518  llvm::AtomicOrdering::SequentiallyConsistent};
1519 
1520  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1521  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1522 
1523  for (unsigned i = 0; i < 3; ++i) {
1524  Builder.SetInsertPoint(BBs[i]);
1525  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1526  Store->setOrdering(Orders[i]);
1527  Builder.CreateBr(ContBB);
1528  }
1529 
1530  SI->addCase(Builder.getInt32(0), BBs[0]);
1531  SI->addCase(Builder.getInt32(3), BBs[1]);
1532  SI->addCase(Builder.getInt32(5), BBs[2]);
1533 
1534  Builder.SetInsertPoint(ContBB);
1535  return RValue::get(nullptr);
1536  }
1537 
1538  case Builtin::BI__atomic_thread_fence:
1539  case Builtin::BI__atomic_signal_fence:
1540  case Builtin::BI__c11_atomic_thread_fence:
1541  case Builtin::BI__c11_atomic_signal_fence: {
1542  llvm::SynchronizationScope Scope;
1543  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1544  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1545  Scope = llvm::SingleThread;
1546  else
1547  Scope = llvm::CrossThread;
1548  Value *Order = EmitScalarExpr(E->getArg(0));
1549  if (isa<llvm::ConstantInt>(Order)) {
1550  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1551  switch (ord) {
1552  case 0: // memory_order_relaxed
1553  default: // invalid order
1554  break;
1555  case 1: // memory_order_consume
1556  case 2: // memory_order_acquire
1557  Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1558  break;
1559  case 3: // memory_order_release
1560  Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1561  break;
1562  case 4: // memory_order_acq_rel
1563  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1564  break;
1565  case 5: // memory_order_seq_cst
1566  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1567  Scope);
1568  break;
1569  }
1570  return RValue::get(nullptr);
1571  }
1572 
1573  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1574  AcquireBB = createBasicBlock("acquire", CurFn);
1575  ReleaseBB = createBasicBlock("release", CurFn);
1576  AcqRelBB = createBasicBlock("acqrel", CurFn);
1577  SeqCstBB = createBasicBlock("seqcst", CurFn);
1578  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1579 
1580  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1581  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1582 
1583  Builder.SetInsertPoint(AcquireBB);
1584  Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1585  Builder.CreateBr(ContBB);
1586  SI->addCase(Builder.getInt32(1), AcquireBB);
1587  SI->addCase(Builder.getInt32(2), AcquireBB);
1588 
1589  Builder.SetInsertPoint(ReleaseBB);
1590  Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1591  Builder.CreateBr(ContBB);
1592  SI->addCase(Builder.getInt32(3), ReleaseBB);
1593 
1594  Builder.SetInsertPoint(AcqRelBB);
1595  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1596  Builder.CreateBr(ContBB);
1597  SI->addCase(Builder.getInt32(4), AcqRelBB);
1598 
1599  Builder.SetInsertPoint(SeqCstBB);
1600  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1601  Builder.CreateBr(ContBB);
1602  SI->addCase(Builder.getInt32(5), SeqCstBB);
1603 
1604  Builder.SetInsertPoint(ContBB);
1605  return RValue::get(nullptr);
1606  }
1607 
1608  // Library functions with special handling.
1609  case Builtin::BIsqrt:
1610  case Builtin::BIsqrtf:
1611  case Builtin::BIsqrtl: {
1612  // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1613  // in finite- or unsafe-math mode (the intrinsic has different semantics
1614  // for handling negative numbers compared to the library function, so
1615  // -fmath-errno=0 is not enough).
1616  if (!FD->hasAttr<ConstAttr>())
1617  break;
1618  if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1619  CGM.getCodeGenOpts().NoNaNsFPMath))
1620  break;
1621  Value *Arg0 = EmitScalarExpr(E->getArg(0));
1622  llvm::Type *ArgType = Arg0->getType();
1623  Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1624  return RValue::get(Builder.CreateCall(F, Arg0));
1625  }
1626 
1627  case Builtin::BI__builtin_pow:
1628  case Builtin::BI__builtin_powf:
1629  case Builtin::BI__builtin_powl:
1630  case Builtin::BIpow:
1631  case Builtin::BIpowf:
1632  case Builtin::BIpowl: {
1633  // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1634  if (!FD->hasAttr<ConstAttr>())
1635  break;
1636  Value *Base = EmitScalarExpr(E->getArg(0));
1637  Value *Exponent = EmitScalarExpr(E->getArg(1));
1638  llvm::Type *ArgType = Base->getType();
1639  Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1640  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1641  }
1642 
1643  case Builtin::BIfma:
1644  case Builtin::BIfmaf:
1645  case Builtin::BIfmal:
1646  case Builtin::BI__builtin_fma:
1647  case Builtin::BI__builtin_fmaf:
1648  case Builtin::BI__builtin_fmal: {
1649  // Rewrite fma to intrinsic.
1650  Value *FirstArg = EmitScalarExpr(E->getArg(0));
1651  llvm::Type *ArgType = FirstArg->getType();
1652  Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1653  return RValue::get(
1654  Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1655  EmitScalarExpr(E->getArg(2))}));
1656  }
1657 
1658  case Builtin::BI__builtin_signbit:
1659  case Builtin::BI__builtin_signbitf:
1660  case Builtin::BI__builtin_signbitl: {
1661  return RValue::get(
1662  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1663  ConvertType(E->getType())));
1664  }
1665  case Builtin::BI__builtin_annotation: {
1666  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1667  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1668  AnnVal->getType());
1669 
1670  // Get the annotation string, go through casts. Sema requires this to be a
1671  // non-wide string literal, potentially casted, so the cast<> is safe.
1672  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1673  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1674  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1675  }
1676  case Builtin::BI__builtin_addcb:
1677  case Builtin::BI__builtin_addcs:
1678  case Builtin::BI__builtin_addc:
1679  case Builtin::BI__builtin_addcl:
1680  case Builtin::BI__builtin_addcll:
1681  case Builtin::BI__builtin_subcb:
1682  case Builtin::BI__builtin_subcs:
1683  case Builtin::BI__builtin_subc:
1684  case Builtin::BI__builtin_subcl:
1685  case Builtin::BI__builtin_subcll: {
1686 
1687  // We translate all of these builtins from expressions of the form:
1688  // int x = ..., y = ..., carryin = ..., carryout, result;
1689  // result = __builtin_addc(x, y, carryin, &carryout);
1690  //
1691  // to LLVM IR of the form:
1692  //
1693  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1694  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1695  // %carry1 = extractvalue {i32, i1} %tmp1, 1
1696  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1697  // i32 %carryin)
1698  // %result = extractvalue {i32, i1} %tmp2, 0
1699  // %carry2 = extractvalue {i32, i1} %tmp2, 1
1700  // %tmp3 = or i1 %carry1, %carry2
1701  // %tmp4 = zext i1 %tmp3 to i32
1702  // store i32 %tmp4, i32* %carryout
1703 
1704  // Scalarize our inputs.
1705  llvm::Value *X = EmitScalarExpr(E->getArg(0));
1706  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1707  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1708  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1709 
1710  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1711  llvm::Intrinsic::ID IntrinsicId;
1712  switch (BuiltinID) {
1713  default: llvm_unreachable("Unknown multiprecision builtin id.");
1714  case Builtin::BI__builtin_addcb:
1715  case Builtin::BI__builtin_addcs:
1716  case Builtin::BI__builtin_addc:
1717  case Builtin::BI__builtin_addcl:
1718  case Builtin::BI__builtin_addcll:
1719  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1720  break;
1721  case Builtin::BI__builtin_subcb:
1722  case Builtin::BI__builtin_subcs:
1723  case Builtin::BI__builtin_subc:
1724  case Builtin::BI__builtin_subcl:
1725  case Builtin::BI__builtin_subcll:
1726  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1727  break;
1728  }
1729 
1730  // Construct our resulting LLVM IR expression.
1731  llvm::Value *Carry1;
1732  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1733  X, Y, Carry1);
1734  llvm::Value *Carry2;
1735  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1736  Sum1, Carryin, Carry2);
1737  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1738  X->getType());
1739  Builder.CreateStore(CarryOut, CarryOutPtr);
1740  return RValue::get(Sum2);
1741  }
1742 
1743  case Builtin::BI__builtin_add_overflow:
1744  case Builtin::BI__builtin_sub_overflow:
1745  case Builtin::BI__builtin_mul_overflow: {
1746  const clang::Expr *LeftArg = E->getArg(0);
1747  const clang::Expr *RightArg = E->getArg(1);
1748  const clang::Expr *ResultArg = E->getArg(2);
1749 
1750  clang::QualType ResultQTy =
1751  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1752 
1753  WidthAndSignedness LeftInfo =
1754  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1755  WidthAndSignedness RightInfo =
1756  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1757  WidthAndSignedness ResultInfo =
1758  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1759  WidthAndSignedness EncompassingInfo =
1760  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1761 
1762  llvm::Type *EncompassingLLVMTy =
1763  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1764 
1765  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1766 
1767  llvm::Intrinsic::ID IntrinsicId;
1768  switch (BuiltinID) {
1769  default:
1770  llvm_unreachable("Unknown overflow builtin id.");
1771  case Builtin::BI__builtin_add_overflow:
1772  IntrinsicId = EncompassingInfo.Signed
1773  ? llvm::Intrinsic::sadd_with_overflow
1774  : llvm::Intrinsic::uadd_with_overflow;
1775  break;
1776  case Builtin::BI__builtin_sub_overflow:
1777  IntrinsicId = EncompassingInfo.Signed
1778  ? llvm::Intrinsic::ssub_with_overflow
1779  : llvm::Intrinsic::usub_with_overflow;
1780  break;
1781  case Builtin::BI__builtin_mul_overflow:
1782  IntrinsicId = EncompassingInfo.Signed
1783  ? llvm::Intrinsic::smul_with_overflow
1784  : llvm::Intrinsic::umul_with_overflow;
1785  break;
1786  }
1787 
1788  llvm::Value *Left = EmitScalarExpr(LeftArg);
1789  llvm::Value *Right = EmitScalarExpr(RightArg);
1790  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1791 
1792  // Extend each operand to the encompassing type.
1793  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
1794  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
1795 
1796  // Perform the operation on the extended values.
1797  llvm::Value *Overflow, *Result;
1798  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
1799 
1800  if (EncompassingInfo.Width > ResultInfo.Width) {
1801  // The encompassing type is wider than the result type, so we need to
1802  // truncate it.
1803  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
1804 
1805  // To see if the truncation caused an overflow, we will extend
1806  // the result and then compare it to the original result.
1807  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
1808  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
1809  llvm::Value *TruncationOverflow =
1810  Builder.CreateICmpNE(Result, ResultTruncExt);
1811 
1812  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
1813  Result = ResultTrunc;
1814  }
1815 
1816  // Finally, store the result using the pointer.
1817  bool isVolatile =
1818  ResultArg->getType()->getPointeeType().isVolatileQualified();
1819  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
1820 
1821  return RValue::get(Overflow);
1822  }
1823 
1824  case Builtin::BI__builtin_uadd_overflow:
1825  case Builtin::BI__builtin_uaddl_overflow:
1826  case Builtin::BI__builtin_uaddll_overflow:
1827  case Builtin::BI__builtin_usub_overflow:
1828  case Builtin::BI__builtin_usubl_overflow:
1829  case Builtin::BI__builtin_usubll_overflow:
1830  case Builtin::BI__builtin_umul_overflow:
1831  case Builtin::BI__builtin_umull_overflow:
1832  case Builtin::BI__builtin_umulll_overflow:
1833  case Builtin::BI__builtin_sadd_overflow:
1834  case Builtin::BI__builtin_saddl_overflow:
1835  case Builtin::BI__builtin_saddll_overflow:
1836  case Builtin::BI__builtin_ssub_overflow:
1837  case Builtin::BI__builtin_ssubl_overflow:
1838  case Builtin::BI__builtin_ssubll_overflow:
1839  case Builtin::BI__builtin_smul_overflow:
1840  case Builtin::BI__builtin_smull_overflow:
1841  case Builtin::BI__builtin_smulll_overflow: {
1842 
1843  // We translate all of these builtins directly to the relevant llvm IR node.
1844 
1845  // Scalarize our inputs.
1846  llvm::Value *X = EmitScalarExpr(E->getArg(0));
1847  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1848  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1849 
1850  // Decide which of the overflow intrinsics we are lowering to:
1851  llvm::Intrinsic::ID IntrinsicId;
1852  switch (BuiltinID) {
1853  default: llvm_unreachable("Unknown overflow builtin id.");
1854  case Builtin::BI__builtin_uadd_overflow:
1855  case Builtin::BI__builtin_uaddl_overflow:
1856  case Builtin::BI__builtin_uaddll_overflow:
1857  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1858  break;
1859  case Builtin::BI__builtin_usub_overflow:
1860  case Builtin::BI__builtin_usubl_overflow:
1861  case Builtin::BI__builtin_usubll_overflow:
1862  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1863  break;
1864  case Builtin::BI__builtin_umul_overflow:
1865  case Builtin::BI__builtin_umull_overflow:
1866  case Builtin::BI__builtin_umulll_overflow:
1867  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1868  break;
1869  case Builtin::BI__builtin_sadd_overflow:
1870  case Builtin::BI__builtin_saddl_overflow:
1871  case Builtin::BI__builtin_saddll_overflow:
1872  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1873  break;
1874  case Builtin::BI__builtin_ssub_overflow:
1875  case Builtin::BI__builtin_ssubl_overflow:
1876  case Builtin::BI__builtin_ssubll_overflow:
1877  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1878  break;
1879  case Builtin::BI__builtin_smul_overflow:
1880  case Builtin::BI__builtin_smull_overflow:
1881  case Builtin::BI__builtin_smulll_overflow:
1882  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1883  break;
1884  }
1885 
1886 
1887  llvm::Value *Carry;
1888  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1889  Builder.CreateStore(Sum, SumOutPtr);
1890 
1891  return RValue::get(Carry);
1892  }
1893  case Builtin::BI__builtin_addressof:
1894  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1895  case Builtin::BI__builtin_operator_new:
1896  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1897  E->getArg(0), false);
1898  case Builtin::BI__builtin_operator_delete:
1899  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1900  E->getArg(0), true);
1901  case Builtin::BI__noop:
1902  // __noop always evaluates to an integer literal zero.
1903  return RValue::get(ConstantInt::get(IntTy, 0));
1904  case Builtin::BI__builtin_call_with_static_chain: {
1905  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1906  const Expr *Chain = E->getArg(1);
1907  return EmitCall(Call->getCallee()->getType(),
1908  EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1909  Call->getCalleeDecl(), EmitScalarExpr(Chain));
1910  }
1911  case Builtin::BI_InterlockedExchange:
1912  case Builtin::BI_InterlockedExchangePointer:
1913  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1914  case Builtin::BI_InterlockedCompareExchangePointer: {
1915  llvm::Type *RTy;
1916  llvm::IntegerType *IntType =
1917  IntegerType::get(getLLVMContext(),
1918  getContext().getTypeSize(E->getType()));
1919  llvm::Type *IntPtrType = IntType->getPointerTo();
1920 
1921  llvm::Value *Destination =
1922  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1923 
1924  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1925  RTy = Exchange->getType();
1926  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1927 
1928  llvm::Value *Comparand =
1929  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1930 
1931  auto Result =
1932  Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1933  AtomicOrdering::SequentiallyConsistent,
1934  AtomicOrdering::SequentiallyConsistent);
1935  Result->setVolatile(true);
1936 
1937  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1938  0),
1939  RTy));
1940  }
1941  case Builtin::BI_InterlockedCompareExchange: {
1942  AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1943  EmitScalarExpr(E->getArg(0)),
1944  EmitScalarExpr(E->getArg(2)),
1945  EmitScalarExpr(E->getArg(1)),
1946  AtomicOrdering::SequentiallyConsistent,
1947  AtomicOrdering::SequentiallyConsistent);
1948  CXI->setVolatile(true);
1949  return RValue::get(Builder.CreateExtractValue(CXI, 0));
1950  }
1951  case Builtin::BI_InterlockedIncrement: {
1952  llvm::Type *IntTy = ConvertType(E->getType());
1953  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1954  AtomicRMWInst::Add,
1955  EmitScalarExpr(E->getArg(0)),
1956  ConstantInt::get(IntTy, 1),
1957  llvm::AtomicOrdering::SequentiallyConsistent);
1958  RMWI->setVolatile(true);
1959  return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)));
1960  }
1961  case Builtin::BI_InterlockedDecrement: {
1962  llvm::Type *IntTy = ConvertType(E->getType());
1963  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1964  AtomicRMWInst::Sub,
1965  EmitScalarExpr(E->getArg(0)),
1966  ConstantInt::get(IntTy, 1),
1967  llvm::AtomicOrdering::SequentiallyConsistent);
1968  RMWI->setVolatile(true);
1969  return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)));
1970  }
1971  case Builtin::BI_InterlockedExchangeAdd: {
1972  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1973  AtomicRMWInst::Add,
1974  EmitScalarExpr(E->getArg(0)),
1975  EmitScalarExpr(E->getArg(1)),
1976  llvm::AtomicOrdering::SequentiallyConsistent);
1977  RMWI->setVolatile(true);
1978  return RValue::get(RMWI);
1979  }
1980  case Builtin::BI__readfsdword: {
1981  llvm::Type *IntTy = ConvertType(E->getType());
1982  Value *IntToPtr =
1983  Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1984  llvm::PointerType::get(IntTy, 257));
1985  LoadInst *Load =
1986  Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true);
1987  return RValue::get(Load);
1988  }
1989 
1990  case Builtin::BI__exception_code:
1991  case Builtin::BI_exception_code:
1992  return RValue::get(EmitSEHExceptionCode());
1993  case Builtin::BI__exception_info:
1994  case Builtin::BI_exception_info:
1995  return RValue::get(EmitSEHExceptionInfo());
1996  case Builtin::BI__abnormal_termination:
1997  case Builtin::BI_abnormal_termination:
1998  return RValue::get(EmitSEHAbnormalTermination());
1999  case Builtin::BI_setjmpex: {
2000  if (getTarget().getTriple().isOSMSVCRT()) {
2001  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2002  llvm::AttributeSet ReturnsTwiceAttr =
2003  AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2004  llvm::Attribute::ReturnsTwice);
2005  llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2006  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2007  "_setjmpex", ReturnsTwiceAttr);
2008  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2009  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2010  llvm::Value *FrameAddr =
2011  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2012  ConstantInt::get(Int32Ty, 0));
2013  llvm::Value *Args[] = {Buf, FrameAddr};
2014  llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2015  CS.setAttributes(ReturnsTwiceAttr);
2016  return RValue::get(CS.getInstruction());
2017  }
2018  break;
2019  }
2020  case Builtin::BI_setjmp: {
2021  if (getTarget().getTriple().isOSMSVCRT()) {
2022  llvm::AttributeSet ReturnsTwiceAttr =
2023  AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2024  llvm::Attribute::ReturnsTwice);
2025  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2026  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2027  llvm::CallSite CS;
2028  if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2029  llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2030  llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2031  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2032  "_setjmp3", ReturnsTwiceAttr);
2033  llvm::Value *Count = ConstantInt::get(IntTy, 0);
2034  llvm::Value *Args[] = {Buf, Count};
2035  CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2036  } else {
2037  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2038  llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2039  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2040  "_setjmp", ReturnsTwiceAttr);
2041  llvm::Value *FrameAddr =
2042  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2043  ConstantInt::get(Int32Ty, 0));
2044  llvm::Value *Args[] = {Buf, FrameAddr};
2045  CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2046  }
2047  CS.setAttributes(ReturnsTwiceAttr);
2048  return RValue::get(CS.getInstruction());
2049  }
2050  break;
2051  }
2052 
2053  case Builtin::BI__GetExceptionInfo: {
2054  if (llvm::GlobalVariable *GV =
2055  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2056  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2057  break;
2058  }
2059 
2060  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2061  case Builtin::BIread_pipe:
2062  case Builtin::BIwrite_pipe: {
2063  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2064  *Arg1 = EmitScalarExpr(E->getArg(1));
2065 
2066  // Type of the generic packet parameter.
2067  unsigned GenericAS =
2068  getContext().getTargetAddressSpace(LangAS::opencl_generic);
2069  llvm::Type *I8PTy = llvm::PointerType::get(
2070  llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2071 
2072  // Testing which overloaded version we should generate the call for.
2073  if (2U == E->getNumArgs()) {
2074  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2075  : "__write_pipe_2";
2076  // Creating a generic function type to be able to call with any builtin or
2077  // user defined type.
2078  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy};
2079  llvm::FunctionType *FTy = llvm::FunctionType::get(
2080  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2081  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2082  return RValue::get(Builder.CreateCall(
2083  CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast}));
2084  } else {
2085  assert(4 == E->getNumArgs() &&
2086  "Illegal number of parameters to pipe function");
2087  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2088  : "__write_pipe_4";
2089 
2090  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy};
2091  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2092  *Arg3 = EmitScalarExpr(E->getArg(3));
2093  llvm::FunctionType *FTy = llvm::FunctionType::get(
2094  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2095  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2096  // We know the third argument is an integer type, but we may need to cast
2097  // it to i32.
2098  if (Arg2->getType() != Int32Ty)
2099  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2100  return RValue::get(Builder.CreateCall(
2101  CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast}));
2102  }
2103  }
2104  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2105  // functions
2106  case Builtin::BIreserve_read_pipe:
2107  case Builtin::BIreserve_write_pipe:
2108  case Builtin::BIwork_group_reserve_read_pipe:
2109  case Builtin::BIwork_group_reserve_write_pipe:
2110  case Builtin::BIsub_group_reserve_read_pipe:
2111  case Builtin::BIsub_group_reserve_write_pipe: {
2112  // Composing the mangled name for the function.
2113  const char *Name;
2114  if (BuiltinID == Builtin::BIreserve_read_pipe)
2115  Name = "__reserve_read_pipe";
2116  else if (BuiltinID == Builtin::BIreserve_write_pipe)
2117  Name = "__reserve_write_pipe";
2118  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2119  Name = "__work_group_reserve_read_pipe";
2120  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2121  Name = "__work_group_reserve_write_pipe";
2122  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2123  Name = "__sub_group_reserve_read_pipe";
2124  else
2125  Name = "__sub_group_reserve_write_pipe";
2126 
2127  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2128  *Arg1 = EmitScalarExpr(E->getArg(1));
2129  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2130 
2131  // Building the generic function prototype.
2132  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty};
2133  llvm::FunctionType *FTy = llvm::FunctionType::get(
2134  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2135  // We know the second argument is an integer type, but we may need to cast
2136  // it to i32.
2137  if (Arg1->getType() != Int32Ty)
2138  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2139  return RValue::get(
2140  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2141  }
2142  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2143  // functions
2144  case Builtin::BIcommit_read_pipe:
2145  case Builtin::BIcommit_write_pipe:
2146  case Builtin::BIwork_group_commit_read_pipe:
2147  case Builtin::BIwork_group_commit_write_pipe:
2148  case Builtin::BIsub_group_commit_read_pipe:
2149  case Builtin::BIsub_group_commit_write_pipe: {
2150  const char *Name;
2151  if (BuiltinID == Builtin::BIcommit_read_pipe)
2152  Name = "__commit_read_pipe";
2153  else if (BuiltinID == Builtin::BIcommit_write_pipe)
2154  Name = "__commit_write_pipe";
2155  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2156  Name = "__work_group_commit_read_pipe";
2157  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2158  Name = "__work_group_commit_write_pipe";
2159  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2160  Name = "__sub_group_commit_read_pipe";
2161  else
2162  Name = "__sub_group_commit_write_pipe";
2163 
2164  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2165  *Arg1 = EmitScalarExpr(E->getArg(1));
2166 
2167  // Building the generic function prototype.
2168  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()};
2169  llvm::FunctionType *FTy =
2170  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2171  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2172 
2173  return RValue::get(
2174  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2175  }
2176  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2177  case Builtin::BIget_pipe_num_packets:
2178  case Builtin::BIget_pipe_max_packets: {
2179  const char *Name;
2180  if (BuiltinID == Builtin::BIget_pipe_num_packets)
2181  Name = "__get_pipe_num_packets";
2182  else
2183  Name = "__get_pipe_max_packets";
2184 
2185  // Building the generic function prototype.
2186  Value *Arg0 = EmitScalarExpr(E->getArg(0));
2187  llvm::Type *ArgTys[] = {Arg0->getType()};
2188  llvm::FunctionType *FTy = llvm::FunctionType::get(
2189  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2190 
2191  return RValue::get(
2192  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0}));
2193  }
2194 
2195  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2196  case Builtin::BIto_global:
2197  case Builtin::BIto_local:
2198  case Builtin::BIto_private: {
2199  auto Arg0 = EmitScalarExpr(E->getArg(0));
2200  auto NewArgT = llvm::PointerType::get(Int8Ty,
2201  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2202  auto NewRetT = llvm::PointerType::get(Int8Ty,
2203  CGM.getContext().getTargetAddressSpace(
2205  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2206  llvm::Value *NewArg;
2207  if (Arg0->getType()->getPointerAddressSpace() !=
2208  NewArgT->getPointerAddressSpace())
2209  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2210  else
2211  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2212  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2213  auto NewCall =
2214  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2215  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2216  ConvertType(E->getType())));
2217  }
2218 
2219  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2220  // It contains four different overload formats specified in Table 6.13.17.1.
2221  case Builtin::BIenqueue_kernel: {
2222  StringRef Name; // Generated function call name
2223  unsigned NumArgs = E->getNumArgs();
2224 
2225  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2226  llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
2227 
2228  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2229  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2230  llvm::Value *Range = EmitScalarExpr(E->getArg(2));
2231 
2232  if (NumArgs == 4) {
2233  // The most basic form of the call with parameters:
2234  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2235  Name = "__enqueue_kernel_basic";
2236  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
2237  llvm::FunctionType *FTy = llvm::FunctionType::get(
2238  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2239 
2240  llvm::Value *Block =
2241  Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2242 
2243  return RValue::get(Builder.CreateCall(
2244  CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
2245  }
2246  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2247 
2248  // Could have events and/or vaargs.
2249  if (E->getArg(3)->getType()->isBlockPointerType()) {
2250  // No events passed, but has variadic arguments.
2251  Name = "__enqueue_kernel_vaargs";
2252  llvm::Value *Block =
2253  Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2254  // Create a vector of the arguments, as well as a constant value to
2255  // express to the runtime the number of variadic arguments.
2256  std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2257  ConstantInt::get(IntTy, NumArgs - 4)};
2258  std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
2259  IntTy};
2260 
2261  // Add the variadics.
2262  for (unsigned I = 4; I < NumArgs; ++I) {
2263  llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2264  unsigned TypeSizeInBytes =
2265  getContext()
2266  .getTypeSizeInChars(E->getArg(I)->getType())
2267  .getQuantity();
2268  Args.push_back(TypeSizeInBytes < 4
2269  ? Builder.CreateZExt(ArgSize, Int32Ty)
2270  : ArgSize);
2271  }
2272 
2273  llvm::FunctionType *FTy = llvm::FunctionType::get(
2274  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2275  return RValue::get(
2276  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2278  }
2279  // Any calls now have event arguments passed.
2280  if (NumArgs >= 7) {
2281  llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2282  unsigned AS4 =
2283  E->getArg(4)->getType()->isArrayType()
2284  ? E->getArg(4)->getType().getAddressSpace()
2285  : E->getArg(4)->getType()->getPointeeType().getAddressSpace();
2286  llvm::Type *EventPtrAS4Ty =
2287  EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4));
2288  unsigned AS5 =
2289  E->getArg(5)->getType()->getPointeeType().getAddressSpace();
2290  llvm::Type *EventPtrAS5Ty =
2291  EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
2292 
2293  llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
2294  llvm::Value *EventList =
2295  E->getArg(4)->getType()->isArrayType()
2296  ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2297  : EmitScalarExpr(E->getArg(4));
2298  llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2299  llvm::Value *Block =
2300  Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
2301 
2302  std::vector<llvm::Type *> ArgTys = {
2303  QueueTy, Int32Ty, RangeTy, Int32Ty,
2304  EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy};
2305  std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
2306  EventList, ClkEvent, Block};
2307 
2308  if (NumArgs == 7) {
2309  // Has events but no variadics.
2310  Name = "__enqueue_kernel_basic_events";
2311  llvm::FunctionType *FTy = llvm::FunctionType::get(
2312  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2313  return RValue::get(
2314  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2316  }
2317  // Has event info and variadics
2318  // Pass the number of variadics to the runtime function too.
2319  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2320  ArgTys.push_back(Int32Ty);
2321  Name = "__enqueue_kernel_events_vaargs";
2322 
2323  // Add the variadics.
2324  for (unsigned I = 7; I < NumArgs; ++I) {
2325  llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2326  unsigned TypeSizeInBytes =
2327  getContext()
2328  .getTypeSizeInChars(E->getArg(I)->getType())
2329  .getQuantity();
2330  Args.push_back(TypeSizeInBytes < 4
2331  ? Builder.CreateZExt(ArgSize, Int32Ty)
2332  : ArgSize);
2333  }
2334  llvm::FunctionType *FTy = llvm::FunctionType::get(
2335  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2336  return RValue::get(
2337  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2339  }
2340  }
2341  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2342  // parameter.
2343  case Builtin::BIget_kernel_work_group_size: {
2344  Value *Arg = EmitScalarExpr(E->getArg(0));
2345  Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2346  return RValue::get(
2347  Builder.CreateCall(CGM.CreateRuntimeFunction(
2348  llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2349  "__get_kernel_work_group_size_impl"),
2350  Arg));
2351  }
2352  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2353  Value *Arg = EmitScalarExpr(E->getArg(0));
2354  Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2355  return RValue::get(Builder.CreateCall(
2356  CGM.CreateRuntimeFunction(
2357  llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2358  "__get_kernel_preferred_work_group_multiple_impl"),
2359  Arg));
2360  }
2361  case Builtin::BIprintf:
2362  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
2363  return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
2364  break;
2365  case Builtin::BI__builtin_canonicalize:
2366  case Builtin::BI__builtin_canonicalizef:
2367  case Builtin::BI__builtin_canonicalizel:
2368  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2369 
2370  case Builtin::BI__builtin_thread_pointer: {
2371  if (!getContext().getTargetInfo().isTLSSupported())
2372  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2373  // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2374  break;
2375  }
2376  }
2377 
2378  // If this is an alias for a lib function (e.g. __builtin_sin), emit
2379  // the call using the normal call path, but using the unmangled
2380  // version of the function name.
2381  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2382  return emitLibraryCall(*this, FD, E,
2383  CGM.getBuiltinLibFunction(FD, BuiltinID));
2384 
2385  // If this is a predefined lib function (e.g. malloc), emit the call
2386  // using exactly the normal call path.
2387  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2388  return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
2389 
2390  // Check that a call to a target specific builtin has the correct target
2391  // features.
2392  // This is down here to avoid non-target specific builtins, however, if
2393  // generic builtins start to require generic target features then we
2394  // can move this up to the beginning of the function.
2395  checkTargetFeatures(E, FD);
2396 
2397  // See if we have a target specific intrinsic.
2398  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2399  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2400  if (const char *Prefix =
2401  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
2402  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
2403  // NOTE we dont need to perform a compatibility flag check here since the
2404  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2405  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2406  if (IntrinsicID == Intrinsic::not_intrinsic)
2407  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
2408  }
2409 
2410  if (IntrinsicID != Intrinsic::not_intrinsic) {
2412 
2413  // Find out if any arguments are required to be integer constant
2414  // expressions.
2415  unsigned ICEArguments = 0;
2417  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2418  assert(Error == ASTContext::GE_None && "Should not codegen an error");
2419 
2420  Function *F = CGM.getIntrinsic(IntrinsicID);
2421  llvm::FunctionType *FTy = F->getFunctionType();
2422 
2423  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2424  Value *ArgValue;
2425  // If this is a normal argument, just emit it as a scalar.
2426  if ((ICEArguments & (1 << i)) == 0) {
2427  ArgValue = EmitScalarExpr(E->getArg(i));
2428  } else {
2429  // If this is required to be a constant, constant fold it so that we
2430  // know that the generated intrinsic gets a ConstantInt.
2431  llvm::APSInt Result;
2432  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2433  assert(IsConst && "Constant arg isn't actually constant?");
2434  (void)IsConst;
2435  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2436  }
2437 
2438  // If the intrinsic arg type is different from the builtin arg type
2439  // we need to do a bit cast.
2440  llvm::Type *PTy = FTy->getParamType(i);
2441  if (PTy != ArgValue->getType()) {
2442  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2443  "Must be able to losslessly bit cast to param");
2444  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2445  }
2446 
2447  Args.push_back(ArgValue);
2448  }
2449 
2450  Value *V = Builder.CreateCall(F, Args);
2451  QualType BuiltinRetType = E->getType();
2452 
2453  llvm::Type *RetTy = VoidTy;
2454  if (!BuiltinRetType->isVoidType())
2455  RetTy = ConvertType(BuiltinRetType);
2456 
2457  if (RetTy != V->getType()) {
2458  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2459  "Must be able to losslessly bit cast result type");
2460  V = Builder.CreateBitCast(V, RetTy);
2461  }
2462 
2463  return RValue::get(V);
2464  }
2465 
2466  // See if we have a target specific builtin that needs to be lowered.
2467  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2468  return RValue::get(V);
2469 
2470  ErrorUnsupported(E, "builtin function");
2471 
2472  // Unknown builtin, for now just dump it out and return undef.
2473  return GetUndefRValue(E->getType());
2474 }
2475 
2477  unsigned BuiltinID, const CallExpr *E,
2478  llvm::Triple::ArchType Arch) {
2479  switch (Arch) {
2480  case llvm::Triple::arm:
2481  case llvm::Triple::armeb:
2482  case llvm::Triple::thumb:
2483  case llvm::Triple::thumbeb:
2484  return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2485  case llvm::Triple::aarch64:
2486  case llvm::Triple::aarch64_be:
2487  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2488  case llvm::Triple::x86:
2489  case llvm::Triple::x86_64:
2490  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2491  case llvm::Triple::ppc:
2492  case llvm::Triple::ppc64:
2493  case llvm::Triple::ppc64le:
2494  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2495  case llvm::Triple::r600:
2496  case llvm::Triple::amdgcn:
2497  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2498  case llvm::Triple::systemz:
2499  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2500  case llvm::Triple::nvptx:
2501  case llvm::Triple::nvptx64:
2502  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2503  case llvm::Triple::wasm32:
2504  case llvm::Triple::wasm64:
2505  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2506  default:
2507  return nullptr;
2508  }
2509 }
2510 
2511 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2512  const CallExpr *E) {
2513  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2514  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2516  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2517  getContext().getAuxTargetInfo()->getTriple().getArch());
2518  }
2519 
2520  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2521  getTarget().getTriple().getArch());
2522 }
2523 
2524 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2525  NeonTypeFlags TypeFlags,
2526  bool V1Ty=false) {
2527  int IsQuad = TypeFlags.isQuad();
2528  switch (TypeFlags.getEltType()) {
2529  case NeonTypeFlags::Int8:
2530  case NeonTypeFlags::Poly8:
2531  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2532  case NeonTypeFlags::Int16:
2533  case NeonTypeFlags::Poly16:
2535  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2536  case NeonTypeFlags::Int32:
2537  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2538  case NeonTypeFlags::Int64:
2539  case NeonTypeFlags::Poly64:
2540  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2542  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2543  // There is a lot of i128 and f128 API missing.
2544  // so we use v16i8 to represent poly128 and get pattern matched.
2545  return llvm::VectorType::get(CGF->Int8Ty, 16);
2547  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2549  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2550  }
2551  llvm_unreachable("Unknown vector element type!");
2552 }
2553 
2554 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2555  NeonTypeFlags IntTypeFlags) {
2556  int IsQuad = IntTypeFlags.isQuad();
2557  switch (IntTypeFlags.getEltType()) {
2558  case NeonTypeFlags::Int32:
2559  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2560  case NeonTypeFlags::Int64:
2561  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2562  default:
2563  llvm_unreachable("Type can't be converted to floating-point!");
2564  }
2565 }
2566 
2568  unsigned nElts = V->getType()->getVectorNumElements();
2569  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2570  return Builder.CreateShuffleVector(V, V, SV, "lane");
2571 }
2572 
2574  const char *name,
2575  unsigned shift, bool rightshift) {
2576  unsigned j = 0;
2577  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2578  ai != ae; ++ai, ++j)
2579  if (shift > 0 && shift == j)
2580  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2581  else
2582  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2583 
2584  return Builder.CreateCall(F, Ops, name);
2585 }
2586 
2588  bool neg) {
2589  int SV = cast<ConstantInt>(V)->getSExtValue();
2590  return ConstantInt::get(Ty, neg ? -SV : SV);
2591 }
2592 
2593 // \brief Right-shift a vector by a constant.
2595  llvm::Type *Ty, bool usgn,
2596  const char *name) {
2597  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2598 
2599  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2600  int EltSize = VTy->getScalarSizeInBits();
2601 
2602  Vec = Builder.CreateBitCast(Vec, Ty);
2603 
2604  // lshr/ashr are undefined when the shift amount is equal to the vector
2605  // element size.
2606  if (ShiftAmt == EltSize) {
2607  if (usgn) {
2608  // Right-shifting an unsigned value by its size yields 0.
2609  return llvm::ConstantAggregateZero::get(VTy);
2610  } else {
2611  // Right-shifting a signed value by its size is equivalent
2612  // to a shift of size-1.
2613  --ShiftAmt;
2614  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2615  }
2616  }
2617 
2618  Shift = EmitNeonShiftVector(Shift, Ty, false);
2619  if (usgn)
2620  return Builder.CreateLShr(Vec, Shift, name);
2621  else
2622  return Builder.CreateAShr(Vec, Shift, name);
2623 }
2624 
2625 enum {
2626  AddRetType = (1 << 0),
2627  Add1ArgType = (1 << 1),
2628  Add2ArgTypes = (1 << 2),
2629 
2630  VectorizeRetType = (1 << 3),
2631  VectorizeArgTypes = (1 << 4),
2632 
2633  InventFloatType = (1 << 5),
2634  UnsignedAlts = (1 << 6),
2635 
2636  Use64BitVectors = (1 << 7),
2637  Use128BitVectors = (1 << 8),
2638 
2645 };
2646 
2647 namespace {
2648 struct NeonIntrinsicInfo {
2649  const char *NameHint;
2650  unsigned BuiltinID;
2651  unsigned LLVMIntrinsic;
2652  unsigned AltLLVMIntrinsic;
2653  unsigned TypeModifier;
2654 
2655  bool operator<(unsigned RHSBuiltinID) const {
2656  return BuiltinID < RHSBuiltinID;
2657  }
2658  bool operator<(const NeonIntrinsicInfo &TE) const {
2659  return BuiltinID < TE.BuiltinID;
2660  }
2661 };
2662 } // end anonymous namespace
2663 
2664 #define NEONMAP0(NameBase) \
2665  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2666 
2667 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2668  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2669  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2670 
2671 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2672  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2673  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2674  TypeModifier }
2675 
2676 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2677  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2678  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2679  NEONMAP1(vabs_v, arm_neon_vabs, 0),
2680  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2681  NEONMAP0(vaddhn_v),
2682  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2683  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2684  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2685  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2686  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2687  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2688  NEONMAP1(vcage_v, arm_neon_vacge, 0),
2689  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2690  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2691  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2692  NEONMAP1(vcale_v, arm_neon_vacge, 0),
2693  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2694  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2695  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2696  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2697  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2698  NEONMAP1(vclz_v, ctlz, Add1ArgType),
2699  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2700  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2701  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2702  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2703  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2704  NEONMAP0(vcvt_f32_v),
2705  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2706  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2707  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2708  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2709  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2710  NEONMAP0(vcvt_s32_v),
2711  NEONMAP0(vcvt_s64_v),
2712  NEONMAP0(vcvt_u32_v),
2713  NEONMAP0(vcvt_u64_v),
2714  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2715  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2716  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2717  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2718  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2719  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2720  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2721  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2722  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2723  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2724  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2725  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2726  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2727  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2728  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2729  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2730  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2731  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2732  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2733  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2734  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2735  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2736  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2737  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2738  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2739  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2740  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2741  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2742  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2743  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2744  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2745  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2746  NEONMAP0(vcvtq_f32_v),
2747  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2748  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2749  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2750  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2751  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2752  NEONMAP0(vcvtq_s32_v),
2753  NEONMAP0(vcvtq_s64_v),
2754  NEONMAP0(vcvtq_u32_v),
2755  NEONMAP0(vcvtq_u64_v),
2756  NEONMAP0(vext_v),
2757  NEONMAP0(vextq_v),
2758  NEONMAP0(vfma_v),
2759  NEONMAP0(vfmaq_v),
2760  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2761  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2762  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2763  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2764  NEONMAP0(vld1_dup_v),
2765  NEONMAP1(vld1_v, arm_neon_vld1, 0),
2766  NEONMAP0(vld1q_dup_v),
2767  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2768  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2769  NEONMAP1(vld2_v, arm_neon_vld2, 0),
2770  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2771  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2772  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2773  NEONMAP1(vld3_v, arm_neon_vld3, 0),
2774  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2775  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2776  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2777  NEONMAP1(vld4_v, arm_neon_vld4, 0),
2778  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2779  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2780  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2781  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2782  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2783  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2784  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2785  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2786  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2787  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2788  NEONMAP0(vmovl_v),
2789  NEONMAP0(vmovn_v),
2790  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2791  NEONMAP0(vmull_v),
2792  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2793  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2794  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2795  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2796  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2797  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2798  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2799  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2800  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2801  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2802  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2803  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2804  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2805  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2806  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2807  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2808  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2809  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2810  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2811  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2812  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2813  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2814  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2815  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2816  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2817  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2818  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2819  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2820  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2821  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2822  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2823  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2824  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2825  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2826  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2827  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2828  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2829  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2830  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2831  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2832  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2833  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2834  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2835  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2836  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2837  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2838  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2839  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2840  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2841  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2842  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2843  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2844  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2845  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2846  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2847  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2848  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2849  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2850  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2851  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2852  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2853  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2854  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2855  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2856  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2857  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2858  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2859  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2860  NEONMAP0(vshl_n_v),
2861  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2862  NEONMAP0(vshll_n_v),
2863  NEONMAP0(vshlq_n_v),
2864  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2865  NEONMAP0(vshr_n_v),
2866  NEONMAP0(vshrn_n_v),
2867  NEONMAP0(vshrq_n_v),
2868  NEONMAP1(vst1_v, arm_neon_vst1, 0),
2869  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2870  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2871  NEONMAP1(vst2_v, arm_neon_vst2, 0),
2872  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2873  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2874  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2875  NEONMAP1(vst3_v, arm_neon_vst3, 0),
2876  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2877  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2878  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2879  NEONMAP1(vst4_v, arm_neon_vst4, 0),
2880  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2881  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2882  NEONMAP0(vsubhn_v),
2883  NEONMAP0(vtrn_v),
2884  NEONMAP0(vtrnq_v),
2885  NEONMAP0(vtst_v),
2886  NEONMAP0(vtstq_v),
2887  NEONMAP0(vuzp_v),
2888  NEONMAP0(vuzpq_v),
2889  NEONMAP0(vzip_v),
2890  NEONMAP0(vzipq_v)
2891 };
2892 
2893 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2894  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2895  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2896  NEONMAP0(vaddhn_v),
2897  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2898  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2899  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2900  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2901  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2902  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2903  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2904  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2905  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2906  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2907  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2908  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2909  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2910  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2911  NEONMAP1(vclz_v, ctlz, Add1ArgType),
2912  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2913  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2914  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2915  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2916  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2917  NEONMAP0(vcvt_f32_v),
2918  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2919  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2920  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2921  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2922  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2923  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2924  NEONMAP0(vcvtq_f32_v),
2925  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2926  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2927  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2928  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2929  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2930  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2931  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2932  NEONMAP0(vext_v),
2933  NEONMAP0(vextq_v),
2934  NEONMAP0(vfma_v),
2935  NEONMAP0(vfmaq_v),
2936  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2937  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2938  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2939  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2940  NEONMAP0(vmovl_v),
2941  NEONMAP0(vmovn_v),
2942  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2943  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2944  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2945  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2946  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2947  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2948  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2949  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2950  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2951  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2952  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2953  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2954  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2955  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2956  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2957  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2958  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2959  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2960  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2961  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2962  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2963  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2964  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2965  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2966  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2967  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2968  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2969  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2970  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2971  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2972  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2973  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2974  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2975  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2976  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2977  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2978  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2979  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2980  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2981  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2982  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2983  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2984  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2985  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2986  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2987  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2988  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2989  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2990  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2991  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2992  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2993  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2994  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2995  NEONMAP0(vshl_n_v),
2996  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2997  NEONMAP0(vshll_n_v),
2998  NEONMAP0(vshlq_n_v),
2999  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3000  NEONMAP0(vshr_n_v),
3001  NEONMAP0(vshrn_n_v),
3002  NEONMAP0(vshrq_n_v),
3003  NEONMAP0(vsubhn_v),
3004  NEONMAP0(vtst_v),
3005  NEONMAP0(vtstq_v),
3006 };
3007 
3008 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3009  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3010  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3011  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3012  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3013  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3014  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3015  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3016  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3017  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3018  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3019  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3020  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3021  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3022  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3023  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3024  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3025  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3026  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3027  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3028  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3029  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3030  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3031  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3032  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3033  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3034  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3035  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3036  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3037  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3038  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3039  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3040  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3041  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3042  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3043  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3044  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3045  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3046  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3047  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3048  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3049  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3050  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3051  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3052  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3053  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3054  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3055  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3056  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3057  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3058  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3059  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3060  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3061  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3062  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3063  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3064  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3065  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3066  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3067  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3068  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3069  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3070  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3071  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3072  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3073  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3074  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3075  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3076  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3077  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3078  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3079  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3080  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3081  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3082  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3083  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3084  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3085  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3086  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3087  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3088  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3089  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3090  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3091  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3092  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3093  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3094  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3095  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3096  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3097  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3098  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3099  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3100  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3101  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3102  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3103  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3104  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3105  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3106  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3107  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3108  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3109  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3110  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3111  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3112  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3113  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3114  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3115  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3116  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3117  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3118  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3119  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3120  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3121  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3122  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3123  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3124  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3125  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3126  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3127  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3128  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3129  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3130  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3131  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3132  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3133  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3134  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3135  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3136  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3137  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3138  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3139  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3140  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3141  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3142  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3143  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3144  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3145  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3146  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3147  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3148  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3149  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3150  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3151  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3152  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3153  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3154  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3155  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3156  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3157  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3158  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3159  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3160  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3161  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3162  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3163  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3164  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3165  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3166  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3167  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3168  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3169  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3170  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3171  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3172  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3173  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3174  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3175  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3176  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3177  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3178  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3179  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3180  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3181  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3182  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3183  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3184  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3185  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3186  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3187  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3188  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3189  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3190  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3191  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3192  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3193  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3194  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3195  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3196  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3197  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3198  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3199  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3200  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3201 };
3202 
3203 #undef NEONMAP0
3204 #undef NEONMAP1
3205 #undef NEONMAP2
3206 
3208 
3211 
3212 
3213 static const NeonIntrinsicInfo *
3215  unsigned BuiltinID, bool &MapProvenSorted) {
3216 
3217 #ifndef NDEBUG
3218  if (!MapProvenSorted) {
3219  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3220  MapProvenSorted = true;
3221  }
3222 #endif
3223 
3224  const NeonIntrinsicInfo *Builtin =
3225  std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3226 
3227  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3228  return Builtin;
3229 
3230  return nullptr;
3231 }
3232 
3233 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3234  unsigned Modifier,
3235  llvm::Type *ArgType,
3236  const CallExpr *E) {
3237  int VectorSize = 0;
3238  if (Modifier & Use64BitVectors)
3239  VectorSize = 64;
3240  else if (Modifier & Use128BitVectors)
3241  VectorSize = 128;
3242 
3243  // Return type.
3245  if (Modifier & AddRetType) {
3247  if (Modifier & VectorizeRetType)
3248  Ty = llvm::VectorType::get(
3249  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3250 
3251  Tys.push_back(Ty);
3252  }
3253 
3254  // Arguments.
3255  if (Modifier & VectorizeArgTypes) {
3256  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3257  ArgType = llvm::VectorType::get(ArgType, Elts);
3258  }
3259 
3260  if (Modifier & (Add1ArgType | Add2ArgTypes))
3261  Tys.push_back(ArgType);
3262 
3263  if (Modifier & Add2ArgTypes)
3264  Tys.push_back(ArgType);
3265 
3266  if (Modifier & InventFloatType)
3267  Tys.push_back(FloatTy);
3268 
3269  return CGM.getIntrinsic(IntrinsicID, Tys);
3270 }
3271 
3273  const NeonIntrinsicInfo &SISDInfo,
3275  const CallExpr *E) {
3276  unsigned BuiltinID = SISDInfo.BuiltinID;
3277  unsigned int Int = SISDInfo.LLVMIntrinsic;
3278  unsigned Modifier = SISDInfo.TypeModifier;
3279  const char *s = SISDInfo.NameHint;
3280 
3281  switch (BuiltinID) {
3282  case NEON::BI__builtin_neon_vcled_s64:
3283  case NEON::BI__builtin_neon_vcled_u64:
3284  case NEON::BI__builtin_neon_vcles_f32:
3285  case NEON::BI__builtin_neon_vcled_f64:
3286  case NEON::BI__builtin_neon_vcltd_s64:
3287  case NEON::BI__builtin_neon_vcltd_u64:
3288  case NEON::BI__builtin_neon_vclts_f32:
3289  case NEON::BI__builtin_neon_vcltd_f64:
3290  case NEON::BI__builtin_neon_vcales_f32:
3291  case NEON::BI__builtin_neon_vcaled_f64:
3292  case NEON::BI__builtin_neon_vcalts_f32:
3293  case NEON::BI__builtin_neon_vcaltd_f64:
3294  // Only one direction of comparisons actually exist, cmle is actually a cmge
3295  // with swapped operands. The table gives us the right intrinsic but we
3296  // still need to do the swap.
3297  std::swap(Ops[0], Ops[1]);
3298  break;
3299  }
3300 
3301  assert(Int && "Generic code assumes a valid intrinsic");
3302 
3303  // Determine the type(s) of this overloaded AArch64 intrinsic.
3304  const Expr *Arg = E->getArg(0);
3305  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3306  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3307 
3308  int j = 0;
3309  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3310  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3311  ai != ae; ++ai, ++j) {
3312  llvm::Type *ArgTy = ai->getType();
3313  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3314  ArgTy->getPrimitiveSizeInBits())
3315  continue;
3316 
3317  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3318  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3319  // it before inserting.
3320  Ops[j] =
3321  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3322  Ops[j] =
3323  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3324  }
3325 
3326  Value *Result = CGF.EmitNeonCall(F, Ops, s);
3327  llvm::Type *ResultType = CGF.ConvertType(E->getType());
3328  if (ResultType->getPrimitiveSizeInBits() <
3329  Result->getType()->getPrimitiveSizeInBits())
3330  return CGF.Builder.CreateExtractElement(Result, C0);
3331 
3332  return CGF.Builder.CreateBitCast(Result, ResultType, s);
3333 }
3334 
3336  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3337  const char *NameHint, unsigned Modifier, const CallExpr *E,
3338  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3339  // Get the last argument, which specifies the vector type.
3340  llvm::APSInt NeonTypeConst;
3341  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3342  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3343  return nullptr;
3344 
3345  // Determine the type of this overloaded NEON intrinsic.
3346  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3347  bool Usgn = Type.isUnsigned();
3348  bool Quad = Type.isQuad();
3349 
3350  llvm::VectorType *VTy = GetNeonType(this, Type);
3351  llvm::Type *Ty = VTy;
3352  if (!Ty)
3353  return nullptr;
3354 
3355  auto getAlignmentValue32 = [&](Address addr) -> Value* {
3356  return Builder.getInt32(addr.getAlignment().getQuantity());
3357  };
3358 
3359  unsigned Int = LLVMIntrinsic;
3360  if ((Modifier & UnsignedAlts) && !Usgn)
3361  Int = AltLLVMIntrinsic;
3362 
3363  switch (BuiltinID) {
3364  default: break;
3365  case NEON::BI__builtin_neon_vabs_v:
3366  case NEON::BI__builtin_neon_vabsq_v:
3367  if (VTy->getElementType()->isFloatingPointTy())
3368  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3369  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3370  case NEON::BI__builtin_neon_vaddhn_v: {
3371  llvm::VectorType *SrcTy =
3372  llvm::VectorType::getExtendedElementVectorType(VTy);
3373 
3374  // %sum = add <4 x i32> %lhs, %rhs
3375  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3376  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3377  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3378 
3379  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3380  Constant *ShiftAmt =
3381  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3382  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3383 
3384  // %res = trunc <4 x i32> %high to <4 x i16>
3385  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3386  }
3387  case NEON::BI__builtin_neon_vcale_v:
3388  case NEON::BI__builtin_neon_vcaleq_v:
3389  case NEON::BI__builtin_neon_vcalt_v:
3390  case NEON::BI__builtin_neon_vcaltq_v:
3391  std::swap(Ops[0], Ops[1]);
3392  case NEON::BI__builtin_neon_vcage_v:
3393  case NEON::BI__builtin_neon_vcageq_v:
3394  case NEON::BI__builtin_neon_vcagt_v:
3395  case NEON::BI__builtin_neon_vcagtq_v: {
3396  llvm::Type *VecFlt = llvm::VectorType::get(
3397  VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3398  VTy->getNumElements());
3399  llvm::Type *Tys[] = { VTy, VecFlt };
3400  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3401  return EmitNeonCall(F, Ops, NameHint);
3402  }
3403  case NEON::BI__builtin_neon_vclz_v:
3404  case NEON::BI__builtin_neon_vclzq_v:
3405  // We generate target-independent intrinsic, which needs a second argument
3406  // for whether or not clz of zero is undefined; on ARM it isn't.
3407  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3408  break;
3409  case NEON::BI__builtin_neon_vcvt_f32_v:
3410  case NEON::BI__builtin_neon_vcvtq_f32_v:
3411  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3412  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3413  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3414  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3415  case NEON::BI__builtin_neon_vcvt_n_f32_v:
3416  case NEON::BI__builtin_neon_vcvt_n_f64_v:
3417  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3418  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3419  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3420  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3421  Function *F = CGM.getIntrinsic(Int, Tys);
3422  return EmitNeonCall(F, Ops, "vcvt_n");
3423  }
3424  case NEON::BI__builtin_neon_vcvt_n_s32_v:
3425  case NEON::BI__builtin_neon_vcvt_n_u32_v:
3426  case NEON::BI__builtin_neon_vcvt_n_s64_v:
3427  case NEON::BI__builtin_neon_vcvt_n_u64_v:
3428  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3429  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3430  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3431  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3432  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3433  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3434  return EmitNeonCall(F, Ops, "vcvt_n");
3435  }
3436  case NEON::BI__builtin_neon_vcvt_s32_v:
3437  case NEON::BI__builtin_neon_vcvt_u32_v:
3438  case NEON::BI__builtin_neon_vcvt_s64_v:
3439  case NEON::BI__builtin_neon_vcvt_u64_v:
3440  case NEON::BI__builtin_neon_vcvtq_s32_v:
3441  case NEON::BI__builtin_neon_vcvtq_u32_v:
3442  case NEON::BI__builtin_neon_vcvtq_s64_v:
3443  case NEON::BI__builtin_neon_vcvtq_u64_v: {
3444  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3445  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3446  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3447  }
3448  case NEON::BI__builtin_neon_vcvta_s32_v:
3449  case NEON::BI__builtin_neon_vcvta_s64_v:
3450  case NEON::BI__builtin_neon_vcvta_u32_v:
3451  case NEON::BI__builtin_neon_vcvta_u64_v:
3452  case NEON::BI__builtin_neon_vcvtaq_s32_v:
3453  case NEON::BI__builtin_neon_vcvtaq_s64_v:
3454  case NEON::BI__builtin_neon_vcvtaq_u32_v:
3455  case NEON::BI__builtin_neon_vcvtaq_u64_v:
3456  case NEON::BI__builtin_neon_vcvtn_s32_v:
3457  case NEON::BI__builtin_neon_vcvtn_s64_v:
3458  case NEON::BI__builtin_neon_vcvtn_u32_v:
3459  case NEON::BI__builtin_neon_vcvtn_u64_v:
3460  case NEON::BI__builtin_neon_vcvtnq_s32_v:
3461  case NEON::BI__builtin_neon_vcvtnq_s64_v:
3462  case NEON::BI__builtin_neon_vcvtnq_u32_v:
3463  case NEON::BI__builtin_neon_vcvtnq_u64_v:
3464  case NEON::BI__builtin_neon_vcvtp_s32_v:
3465  case NEON::BI__builtin_neon_vcvtp_s64_v:
3466  case NEON::BI__builtin_neon_vcvtp_u32_v:
3467  case NEON::BI__builtin_neon_vcvtp_u64_v:
3468  case NEON::BI__builtin_neon_vcvtpq_s32_v:
3469  case NEON::BI__builtin_neon_vcvtpq_s64_v:
3470  case NEON::BI__builtin_neon_vcvtpq_u32_v:
3471  case NEON::BI__builtin_neon_vcvtpq_u64_v:
3472  case NEON::BI__builtin_neon_vcvtm_s32_v:
3473  case NEON::BI__builtin_neon_vcvtm_s64_v:
3474  case NEON::BI__builtin_neon_vcvtm_u32_v:
3475  case NEON::BI__builtin_neon_vcvtm_u64_v:
3476  case NEON::BI__builtin_neon_vcvtmq_s32_v:
3477  case NEON::BI__builtin_neon_vcvtmq_s64_v:
3478  case NEON::BI__builtin_neon_vcvtmq_u32_v:
3479  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3480  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3481  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3482  }
3483  case NEON::BI__builtin_neon_vext_v:
3484  case NEON::BI__builtin_neon_vextq_v: {
3485  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3486  SmallVector<uint32_t, 16> Indices;
3487  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3488  Indices.push_back(i+CV);
3489 
3490  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3491  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3492  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3493  }
3494  case NEON::BI__builtin_neon_vfma_v:
3495  case NEON::BI__builtin_neon_vfmaq_v: {
3497  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3498  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3499  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3500 
3501  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3502  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3503  }
3504  case NEON::BI__builtin_neon_vld1_v:
3505  case NEON::BI__builtin_neon_vld1q_v: {
3506  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3507  Ops.push_back(getAlignmentValue32(PtrOp0));
3508  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3509  }
3510  case NEON::BI__builtin_neon_vld2_v:
3511  case NEON::BI__builtin_neon_vld2q_v:
3512  case NEON::BI__builtin_neon_vld3_v:
3513  case NEON::BI__builtin_neon_vld3q_v:
3514  case NEON::BI__builtin_neon_vld4_v:
3515  case NEON::BI__builtin_neon_vld4q_v: {
3516  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3517  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3518  Value *Align = getAlignmentValue32(PtrOp1);
3519  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3520  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3521  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3522  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3523  }
3524  case NEON::BI__builtin_neon_vld1_dup_v:
3525  case NEON::BI__builtin_neon_vld1q_dup_v: {
3526  Value *V = UndefValue::get(Ty);
3527  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3528  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3529  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3530  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3531  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3532  return EmitNeonSplat(Ops[0], CI);
3533  }
3534  case NEON::BI__builtin_neon_vld2_lane_v:
3535  case NEON::BI__builtin_neon_vld2q_lane_v:
3536  case NEON::BI__builtin_neon_vld3_lane_v:
3537  case NEON::BI__builtin_neon_vld3q_lane_v:
3538  case NEON::BI__builtin_neon_vld4_lane_v:
3539  case NEON::BI__builtin_neon_vld4q_lane_v: {
3540  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3541  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3542  for (unsigned I = 2; I < Ops.size() - 1; ++I)
3543  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3544  Ops.push_back(getAlignmentValue32(PtrOp1));
3545  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3546  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3547  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3548  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3549  }
3550  case NEON::BI__builtin_neon_vmovl_v: {
3551  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3552  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3553  if (Usgn)
3554  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3555  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3556  }
3557  case NEON::BI__builtin_neon_vmovn_v: {
3558  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3559  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3560  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3561  }
3562  case NEON::BI__builtin_neon_vmull_v:
3563  // FIXME: the integer vmull operations could be emitted in terms of pure
3564  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3565  // hoisting the exts outside loops. Until global ISel comes along that can
3566  // see through such movement this leads to bad CodeGen. So we need an
3567  // intrinsic for now.
3568  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3569  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3570  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3571  case NEON::BI__builtin_neon_vpadal_v:
3572  case NEON::BI__builtin_neon_vpadalq_v: {
3573  // The source operand type has twice as many elements of half the size.
3574  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3575  llvm::Type *EltTy =
3576  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3577  llvm::Type *NarrowTy =
3578  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3579  llvm::Type *Tys[2] = { Ty, NarrowTy };
3580  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3581  }
3582  case NEON::BI__builtin_neon_vpaddl_v:
3583  case NEON::BI__builtin_neon_vpaddlq_v: {
3584  // The source operand type has twice as many elements of half the size.
3585  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3586  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3587  llvm::Type *NarrowTy =
3588  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3589  llvm::Type *Tys[2] = { Ty, NarrowTy };
3590  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3591  }
3592  case NEON::BI__builtin_neon_vqdmlal_v:
3593  case NEON::BI__builtin_neon_vqdmlsl_v: {
3594  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3595  Ops[1] =
3596  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3597  Ops.resize(2);
3598  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3599  }
3600  case NEON::BI__builtin_neon_vqshl_n_v:
3601  case NEON::BI__builtin_neon_vqshlq_n_v:
3602  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3603  1, false);
3604  case NEON::BI__builtin_neon_vqshlu_n_v:
3605  case NEON::BI__builtin_neon_vqshluq_n_v:
3606  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3607  1, false);
3608  case NEON::BI__builtin_neon_vrecpe_v:
3609  case NEON::BI__builtin_neon_vrecpeq_v:
3610  case NEON::BI__builtin_neon_vrsqrte_v:
3611  case NEON::BI__builtin_neon_vrsqrteq_v:
3612  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3613  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3614 
3615  case NEON::BI__builtin_neon_vrshr_n_v:
3616  case NEON::BI__builtin_neon_vrshrq_n_v:
3617  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3618  1, true);
3619  case NEON::BI__builtin_neon_vshl_n_v:
3620  case NEON::BI__builtin_neon_vshlq_n_v:
3621  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3622  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3623  "vshl_n");
3624  case NEON::BI__builtin_neon_vshll_n_v: {
3625  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3626  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3627  if (Usgn)
3628  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3629  else
3630  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3631  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3632  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3633  }
3634  case NEON::BI__builtin_neon_vshrn_n_v: {
3635  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3636  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3637  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3638  if (Usgn)
3639  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3640  else
3641  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3642  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3643  }
3644  case NEON::BI__builtin_neon_vshr_n_v:
3645  case NEON::BI__builtin_neon_vshrq_n_v:
3646  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3647  case NEON::BI__builtin_neon_vst1_v:
3648  case NEON::BI__builtin_neon_vst1q_v:
3649  case NEON::BI__builtin_neon_vst2_v:
3650  case NEON::BI__builtin_neon_vst2q_v:
3651  case NEON::BI__builtin_neon_vst3_v:
3652  case NEON::BI__builtin_neon_vst3q_v:
3653  case NEON::BI__builtin_neon_vst4_v:
3654  case NEON::BI__builtin_neon_vst4q_v:
3655  case NEON::BI__builtin_neon_vst2_lane_v:
3656  case NEON::BI__builtin_neon_vst2q_lane_v:
3657  case NEON::BI__builtin_neon_vst3_lane_v:
3658  case NEON::BI__builtin_neon_vst3q_lane_v:
3659  case NEON::BI__builtin_neon_vst4_lane_v:
3660  case NEON::BI__builtin_neon_vst4q_lane_v: {
3661  llvm::Type *Tys[] = {Int8PtrTy, Ty};
3662  Ops.push_back(getAlignmentValue32(PtrOp0));
3663  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3664  }
3665  case NEON::BI__builtin_neon_vsubhn_v: {
3666  llvm::VectorType *SrcTy =
3667  llvm::VectorType::getExtendedElementVectorType(VTy);
3668 
3669  // %sum = add <4 x i32> %lhs, %rhs
3670  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3671  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3672  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3673 
3674  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3675  Constant *ShiftAmt =
3676  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3677  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3678 
3679  // %res = trunc <4 x i32> %high to <4 x i16>
3680  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3681  }
3682  case NEON::BI__builtin_neon_vtrn_v:
3683  case NEON::BI__builtin_neon_vtrnq_v: {
3684  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3685  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3686  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3687  Value *SV = nullptr;
3688 
3689  for (unsigned vi = 0; vi != 2; ++vi) {
3690  SmallVector<uint32_t, 16> Indices;
3691  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3692  Indices.push_back(i+vi);
3693  Indices.push_back(i+e+vi);
3694  }
3695  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3696  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
3697  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3698  }
3699  return SV;
3700  }
3701  case NEON::BI__builtin_neon_vtst_v:
3702  case NEON::BI__builtin_neon_vtstq_v: {
3703  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3704  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3705  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3706  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3707  ConstantAggregateZero::get(Ty));
3708  return Builder.CreateSExt(Ops[0], Ty, "vtst");
3709  }
3710  case NEON::BI__builtin_neon_vuzp_v:
3711  case NEON::BI__builtin_neon_vuzpq_v: {
3712  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3713  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3714  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3715  Value *SV = nullptr;
3716 
3717  for (unsigned vi = 0; vi != 2; ++vi) {
3718  SmallVector<uint32_t, 16> Indices;
3719  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3720  Indices.push_back(2*i+vi);
3721 
3722  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3723  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
3724  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3725  }
3726  return SV;
3727  }
3728  case NEON::BI__builtin_neon_vzip_v:
3729  case NEON::BI__builtin_neon_vzipq_v: {
3730  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3731  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3732  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3733  Value *SV = nullptr;
3734 
3735  for (unsigned vi = 0; vi != 2; ++vi) {
3736  SmallVector<uint32_t, 16> Indices;
3737  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3738  Indices.push_back((i + vi*e) >> 1);
3739  Indices.push_back(((i + vi*e) >> 1)+e);
3740  }
3741  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3742  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
3743  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3744  }
3745  return SV;
3746  }
3747  }
3748 
3749  assert(Int && "Expected valid intrinsic number");
3750 
3751  // Determine the type(s) of this overloaded AArch64 intrinsic.
3752  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3753 
3754  Value *Result = EmitNeonCall(F, Ops, NameHint);
3755  llvm::Type *ResultType = ConvertType(E->getType());
3756  // AArch64 intrinsic one-element vector type cast to
3757  // scalar type expected by the builtin
3758  return Builder.CreateBitCast(Result, ResultType, NameHint);
3759 }
3760 
3762  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3763  const CmpInst::Predicate Ip, const Twine &Name) {
3764  llvm::Type *OTy = Op->getType();
3765 
3766  // FIXME: this is utterly horrific. We should not be looking at previous
3767  // codegen context to find out what needs doing. Unfortunately TableGen
3768  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3769  // (etc).
3770  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3771  OTy = BI->getOperand(0)->getType();
3772 
3773  Op = Builder.CreateBitCast(Op, OTy);
3774  if (OTy->getScalarType()->isFloatingPointTy()) {
3775  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3776  } else {
3777  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3778  }
3779  return Builder.CreateSExt(Op, Ty, Name);
3780 }
3781 
3783  Value *ExtOp, Value *IndexOp,
3784  llvm::Type *ResTy, unsigned IntID,
3785  const char *Name) {
3786  SmallVector<Value *, 2> TblOps;
3787  if (ExtOp)
3788  TblOps.push_back(ExtOp);
3789 
3790  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3791  SmallVector<uint32_t, 16> Indices;
3792  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3793  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3794  Indices.push_back(2*i);
3795  Indices.push_back(2*i+1);
3796  }
3797 
3798  int PairPos = 0, End = Ops.size() - 1;
3799  while (PairPos < End) {
3800  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3801  Ops[PairPos+1], Indices,
3802  Name));
3803  PairPos += 2;
3804  }
3805 
3806  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3807  // of the 128-bit lookup table with zero.
3808  if (PairPos == End) {
3809  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3810  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3811  ZeroTbl, Indices, Name));
3812  }
3813 
3814  Function *TblF;
3815  TblOps.push_back(IndexOp);
3816  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3817 
3818  return CGF.EmitNeonCall(TblF, TblOps, Name);
3819 }
3820 
3821 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3822  unsigned Value;
3823  switch (BuiltinID) {
3824  default:
3825  return nullptr;
3826  case ARM::BI__builtin_arm_nop:
3827  Value = 0;
3828  break;
3829  case ARM::BI__builtin_arm_yield:
3830  case ARM::BI__yield:
3831  Value = 1;
3832  break;
3833  case ARM::BI__builtin_arm_wfe:
3834  case ARM::BI__wfe:
3835  Value = 2;
3836  break;
3837  case ARM::BI__builtin_arm_wfi:
3838  case ARM::BI__wfi:
3839  Value = 3;
3840  break;
3841  case ARM::BI__builtin_arm_sev:
3842  case ARM::BI__sev:
3843  Value = 4;
3844  break;
3845  case ARM::BI__builtin_arm_sevl:
3846  case ARM::BI__sevl:
3847  Value = 5;
3848  break;
3849  }
3850 
3851  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3852  llvm::ConstantInt::get(Int32Ty, Value));
3853 }
3854 
3855 // Generates the IR for the read/write special register builtin,
3856 // ValueType is the type of the value that is to be written or read,
3857 // RegisterType is the type of the register being written to or read from.
3859  const CallExpr *E,
3860  llvm::Type *RegisterType,
3861  llvm::Type *ValueType,
3862  bool IsRead,
3863  StringRef SysReg = "") {
3864  // write and register intrinsics only support 32 and 64 bit operations.
3865  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3866  && "Unsupported size for register.");
3867 
3869  CodeGen::CodeGenModule &CGM = CGF.CGM;
3870  LLVMContext &Context = CGM.getLLVMContext();
3871 
3872  if (SysReg.empty()) {
3873  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3874  SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3875  }
3876 
3877  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3878  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3879  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3880 
3881  llvm::Type *Types[] = { RegisterType };
3882 
3883  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3884  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3885  && "Can't fit 64-bit value in 32-bit register");
3886 
3887  if (IsRead) {
3888  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3889  llvm::Value *Call = Builder.CreateCall(F, Metadata);
3890 
3891  if (MixedTypes)
3892  // Read into 64 bit register and then truncate result to 32 bit.
3893  return Builder.CreateTrunc(Call, ValueType);
3894 
3895  if (ValueType->isPointerTy())
3896  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3897  return Builder.CreateIntToPtr(Call, ValueType);
3898 
3899  return Call;
3900  }
3901 
3902  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3903  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3904  if (MixedTypes) {
3905  // Extend 32 bit write value to 64 bit to pass to write.
3906  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3907  return Builder.CreateCall(F, { Metadata, ArgValue });
3908  }
3909 
3910  if (ValueType->isPointerTy()) {
3911  // Have VoidPtrTy ArgValue but want to return an i32/i64.
3912  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3913  return Builder.CreateCall(F, { Metadata, ArgValue });
3914  }
3915 
3916  return Builder.CreateCall(F, { Metadata, ArgValue });
3917 }
3918 
3919 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3920 /// argument that specifies the vector type.
3921 static bool HasExtraNeonArgument(unsigned BuiltinID) {
3922  switch (BuiltinID) {
3923  default: break;
3924  case NEON::BI__builtin_neon_vget_lane_i8:
3925  case NEON::BI__builtin_neon_vget_lane_i16:
3926  case NEON::BI__builtin_neon_vget_lane_i32:
3927  case NEON::BI__builtin_neon_vget_lane_i64:
3928  case NEON::BI__builtin_neon_vget_lane_f32:
3929  case NEON::BI__builtin_neon_vgetq_lane_i8:
3930  case NEON::BI__builtin_neon_vgetq_lane_i16:
3931  case NEON::BI__builtin_neon_vgetq_lane_i32:
3932  case NEON::BI__builtin_neon_vgetq_lane_i64:
3933  case NEON::BI__builtin_neon_vgetq_lane_f32:
3934  case NEON::BI__builtin_neon_vset_lane_i8:
3935  case NEON::BI__builtin_neon_vset_lane_i16:
3936  case NEON::BI__builtin_neon_vset_lane_i32:
3937  case NEON::BI__builtin_neon_vset_lane_i64:
3938  case NEON::BI__builtin_neon_vset_lane_f32:
3939  case NEON::BI__builtin_neon_vsetq_lane_i8:
3940  case NEON::BI__builtin_neon_vsetq_lane_i16:
3941  case NEON::BI__builtin_neon_vsetq_lane_i32:
3942  case NEON::BI__builtin_neon_vsetq_lane_i64:
3943  case NEON::BI__builtin_neon_vsetq_lane_f32:
3944  case NEON::BI__builtin_neon_vsha1h_u32:
3945  case NEON::BI__builtin_neon_vsha1cq_u32:
3946  case NEON::BI__builtin_neon_vsha1pq_u32:
3947  case NEON::BI__builtin_neon_vsha1mq_u32:
3948  case ARM::BI_MoveToCoprocessor:
3949  case ARM::BI_MoveToCoprocessor2:
3950  return false;
3951  }
3952  return true;
3953 }
3954 
3955 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3956  const CallExpr *E) {
3957  if (auto Hint = GetValueForARMHint(BuiltinID))
3958  return Hint;
3959 
3960  if (BuiltinID == ARM::BI__emit) {
3961  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3962  llvm::FunctionType *FTy =
3963  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3964 
3965  APSInt Value;
3966  if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3967  llvm_unreachable("Sema will ensure that the parameter is constant");
3968 
3969  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3970 
3971  llvm::InlineAsm *Emit =
3972  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3973  /*SideEffects=*/true)
3974  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3975  /*SideEffects=*/true);
3976 
3977  return Builder.CreateCall(Emit);
3978  }
3979 
3980  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3981  Value *Option = EmitScalarExpr(E->getArg(0));
3982  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3983  }
3984 
3985  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3986  Value *Address = EmitScalarExpr(E->getArg(0));
3987  Value *RW = EmitScalarExpr(E->getArg(1));
3988  Value *IsData = EmitScalarExpr(E->getArg(2));
3989 
3990  // Locality is not supported on ARM target
3991  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3992 
3993  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3994  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3995  }
3996 
3997  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3998  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3999  EmitScalarExpr(E->getArg(0)),
4000  "rbit");
4001  }
4002 
4003  if (BuiltinID == ARM::BI__clear_cache) {
4004  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4005  const FunctionDecl *FD = E->getDirectCallee();
4006  Value *Ops[2];
4007  for (unsigned i = 0; i < 2; i++)
4008  Ops[i] = EmitScalarExpr(E->getArg(i));
4009  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4010  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4011  StringRef Name = FD->getName();
4012  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4013  }
4014 
4015  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4016  BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4017  Function *F;
4018 
4019  switch (BuiltinID) {
4020  default: llvm_unreachable("unexpected builtin");
4021  case ARM::BI__builtin_arm_mcrr:
4022  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4023  break;
4024  case ARM::BI__builtin_arm_mcrr2:
4025  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4026  break;
4027  }
4028 
4029  // MCRR{2} instruction has 5 operands but
4030  // the intrinsic has 4 because Rt and Rt2
4031  // are represented as a single unsigned 64
4032  // bit integer in the intrinsic definition
4033  // but internally it's represented as 2 32
4034  // bit integers.
4035 
4036  Value *Coproc = EmitScalarExpr(E->getArg(0));
4037  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4038  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4039  Value *CRm = EmitScalarExpr(E->getArg(3));
4040 
4041  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4042  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4043  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4044  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4045 
4046  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4047  }
4048 
4049  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4050  BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4051  Function *F;
4052 
4053  switch (BuiltinID) {
4054  default: llvm_unreachable("unexpected builtin");
4055  case ARM::BI__builtin_arm_mrrc:
4056  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4057  break;
4058  case ARM::BI__builtin_arm_mrrc2:
4059  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4060  break;
4061  }
4062 
4063  Value *Coproc = EmitScalarExpr(E->getArg(0));
4064  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4065  Value *CRm = EmitScalarExpr(E->getArg(2));
4066  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4067 
4068  // Returns an unsigned 64 bit integer, represented
4069  // as two 32 bit integers.
4070 
4071  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4072  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4073  Rt = Builder.CreateZExt(Rt, Int64Ty);
4074  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4075 
4076  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4077  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4078  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4079 
4080  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4081  }
4082 
4083  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4084  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4085  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4086  getContext().getTypeSize(E->getType()) == 64) ||
4087  BuiltinID == ARM::BI__ldrexd) {
4088  Function *F;
4089 
4090  switch (BuiltinID) {
4091  default: llvm_unreachable("unexpected builtin");
4092  case ARM::BI__builtin_arm_ldaex:
4093  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4094  break;
4095  case ARM::BI__builtin_arm_ldrexd:
4096  case ARM::BI__builtin_arm_ldrex:
4097  case ARM::BI__ldrexd:
4098  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4099  break;
4100  }
4101 
4102  Value *LdPtr = EmitScalarExpr(E->getArg(0));
4103  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4104  "ldrexd");
4105 
4106  Value *Val0 = Builder.CreateExtractValue(Val, 1);
4107  Value *Val1 = Builder.CreateExtractValue(Val, 0);
4108  Val0 = Builder.CreateZExt(Val0, Int64Ty);
4109  Val1 = Builder.CreateZExt(Val1, Int64Ty);
4110 
4111  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4112  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4113  Val = Builder.CreateOr(Val, Val1);
4114  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4115  }
4116 
4117  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4118  BuiltinID == ARM::BI__builtin_arm_ldaex) {
4119  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4120 
4121  QualType Ty = E->getType();
4122  llvm::Type *RealResTy = ConvertType(Ty);
4123  llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4124  getContext().getTypeSize(Ty));
4125  LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4126 
4127  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4128  ? Intrinsic::arm_ldaex
4129  : Intrinsic::arm_ldrex,
4130  LoadAddr->getType());
4131  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4132 
4133  if (RealResTy->isPointerTy())
4134  return Builder.CreateIntToPtr(Val, RealResTy);
4135  else {
4136  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4137  return Builder.CreateBitCast(Val, RealResTy);
4138  }
4139  }
4140 
4141  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4142  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4143  BuiltinID == ARM::BI__builtin_arm_strex) &&
4144  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4145  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4146  ? Intrinsic::arm_stlexd
4147  : Intrinsic::arm_strexd);
4148  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
4149 
4150  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4151  Value *Val = EmitScalarExpr(E->getArg(0));
4152  Builder.CreateStore(Val, Tmp);
4153 
4154  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4155  Val = Builder.CreateLoad(LdPtr);
4156 
4157  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4158  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4159  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4160  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4161  }
4162 
4163  if (BuiltinID == ARM::BI__builtin_arm_strex ||
4164  BuiltinID == ARM::BI__builtin_arm_stlex) {
4165  Value *StoreVal = EmitScalarExpr(E->getArg(0));
4166  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4167 
4168  QualType Ty = E->getArg(0)->getType();
4169  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4170  getContext().getTypeSize(Ty));
4171  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4172 
4173  if (StoreVal->getType()->isPointerTy())
4174  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4175  else {
4176  StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4177  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4178  }
4179 
4180  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4181  ? Intrinsic::arm_stlex
4182  : Intrinsic::arm_strex,
4183  StoreAddr->getType());
4184  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4185  }
4186 
4187  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4188  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4189  return Builder.CreateCall(F);
4190  }
4191 
4192  // CRC32
4193  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4194  switch (BuiltinID) {
4195  case ARM::BI__builtin_arm_crc32b:
4196  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4197  case ARM::BI__builtin_arm_crc32cb:
4198  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4199  case ARM::BI__builtin_arm_crc32h:
4200  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4201  case ARM::BI__builtin_arm_crc32ch:
4202  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4203  case ARM::BI__builtin_arm_crc32w:
4204  case ARM::BI__builtin_arm_crc32d:
4205  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4206  case ARM::BI__builtin_arm_crc32cw:
4207  case ARM::BI__builtin_arm_crc32cd:
4208  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4209  }
4210 
4211  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4212  Value *Arg0 = EmitScalarExpr(E->getArg(0));
4213  Value *Arg1 = EmitScalarExpr(E->getArg(1));
4214 
4215  // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4216  // intrinsics, hence we need different codegen for these cases.
4217  if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4218  BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4219  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4220  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4221  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4222  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4223 
4224  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4225  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4226  return Builder.CreateCall(F, {Res, Arg1b});
4227  } else {
4228  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4229 
4230  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4231  return Builder.CreateCall(F, {Arg0, Arg1});
4232  }
4233  }
4234 
4235  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4236  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4237  BuiltinID == ARM::BI__builtin_arm_rsrp ||
4238  BuiltinID == ARM::BI__builtin_arm_wsr ||
4239  BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4240  BuiltinID == ARM::BI__builtin_arm_wsrp) {
4241 
4242  bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4243  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4244  BuiltinID == ARM::BI__builtin_arm_rsrp;
4245 
4246  bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4247  BuiltinID == ARM::BI__builtin_arm_wsrp;
4248 
4249  bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4250  BuiltinID == ARM::BI__builtin_arm_wsr64;
4251 
4252  llvm::Type *ValueType;
4253  llvm::Type *RegisterType;
4254  if (IsPointerBuiltin) {
4255  ValueType = VoidPtrTy;
4256  RegisterType = Int32Ty;
4257  } else if (Is64Bit) {
4258  ValueType = RegisterType = Int64Ty;
4259  } else {
4260  ValueType = RegisterType = Int32Ty;
4261  }
4262 
4263  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4264  }
4265 
4266  // Find out if any arguments are required to be integer constant
4267  // expressions.
4268  unsigned ICEArguments = 0;
4270  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4271  assert(Error == ASTContext::GE_None && "Should not codegen an error");
4272 
4273  auto getAlignmentValue32 = [&](Address addr) -> Value* {
4274  return Builder.getInt32(addr.getAlignment().getQuantity());
4275  };
4276 
4277  Address PtrOp0 = Address::invalid();
4278  Address PtrOp1 = Address::invalid();
4280  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4281  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4282  for (unsigned i = 0, e = NumArgs; i != e; i++) {
4283  if (i == 0) {
4284  switch (BuiltinID) {
4285  case NEON::BI__builtin_neon_vld1_v:
4286  case NEON::BI__builtin_neon_vld1q_v:
4287  case NEON::BI__builtin_neon_vld1q_lane_v:
4288  case NEON::BI__builtin_neon_vld1_lane_v:
4289  case NEON::BI__builtin_neon_vld1_dup_v:
4290  case NEON::BI__builtin_neon_vld1q_dup_v:
4291  case NEON::BI__builtin_neon_vst1_v:
4292  case NEON::BI__builtin_neon_vst1q_v:
4293  case NEON::BI__builtin_neon_vst1q_lane_v:
4294  case NEON::BI__builtin_neon_vst1_lane_v:
4295  case NEON::BI__builtin_neon_vst2_v:
4296  case NEON::BI__builtin_neon_vst2q_v:
4297  case NEON::BI__builtin_neon_vst2_lane_v:
4298  case NEON::BI__builtin_neon_vst2q_lane_v:
4299  case NEON::BI__builtin_neon_vst3_v:
4300  case NEON::BI__builtin_neon_vst3q_v:
4301  case NEON::BI__builtin_neon_vst3_lane_v:
4302  case NEON::BI__builtin_neon_vst3q_lane_v:
4303  case NEON::BI__builtin_neon_vst4_v:
4304  case NEON::BI__builtin_neon_vst4q_v:
4305  case NEON::BI__builtin_neon_vst4_lane_v:
4306  case NEON::BI__builtin_neon_vst4q_lane_v:
4307  // Get the alignment for the argument in addition to the value;
4308  // we'll use it later.
4309  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4310  Ops.push_back(PtrOp0.getPointer());
4311  continue;
4312  }
4313  }
4314  if (i == 1) {
4315  switch (BuiltinID) {
4316  case NEON::BI__builtin_neon_vld2_v:
4317  case NEON::BI__builtin_neon_vld2q_v:
4318  case NEON::BI__builtin_neon_vld3_v:
4319  case NEON::BI__builtin_neon_vld3q_v:
4320  case NEON::BI__builtin_neon_vld4_v:
4321  case NEON::BI__builtin_neon_vld4q_v:
4322  case NEON::BI__builtin_neon_vld2_lane_v:
4323  case NEON::BI__builtin_neon_vld2q_lane_v:
4324  case NEON::BI__builtin_neon_vld3_lane_v:
4325  case NEON::BI__builtin_neon_vld3q_lane_v:
4326  case NEON::BI__builtin_neon_vld4_lane_v:
4327  case NEON::BI__builtin_neon_vld4q_lane_v:
4328  case NEON::BI__builtin_neon_vld2_dup_v:
4329  case NEON::BI__builtin_neon_vld3_dup_v:
4330  case NEON::BI__builtin_neon_vld4_dup_v:
4331  // Get the alignment for the argument in addition to the value;
4332  // we'll use it later.
4333  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4334  Ops.push_back(PtrOp1.getPointer());
4335  continue;
4336  }
4337  }
4338 
4339  if ((ICEArguments & (1 << i)) == 0) {
4340  Ops.push_back(EmitScalarExpr(E->getArg(i)));
4341  } else {
4342  // If this is required to be a constant, constant fold it so that we know
4343  // that the generated intrinsic gets a ConstantInt.
4344  llvm::APSInt Result;
4345  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4346  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4347  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4348  }
4349  }
4350 
4351  switch (BuiltinID) {
4352  default: break;
4353 
4354  case NEON::BI__builtin_neon_vget_lane_i8:
4355  case NEON::BI__builtin_neon_vget_lane_i16:
4356  case NEON::BI__builtin_neon_vget_lane_i32:
4357  case NEON::BI__builtin_neon_vget_lane_i64:
4358  case NEON::BI__builtin_neon_vget_lane_f32:
4359  case NEON::BI__builtin_neon_vgetq_lane_i8:
4360  case NEON::BI__builtin_neon_vgetq_lane_i16:
4361  case NEON::BI__builtin_neon_vgetq_lane_i32:
4362  case NEON::BI__builtin_neon_vgetq_lane_i64:
4363  case NEON::BI__builtin_neon_vgetq_lane_f32:
4364  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4365 
4366  case NEON::BI__builtin_neon_vset_lane_i8:
4367  case NEON::BI__builtin_neon_vset_lane_i16:
4368  case NEON::BI__builtin_neon_vset_lane_i32:
4369  case NEON::BI__builtin_neon_vset_lane_i64:
4370  case NEON::BI__builtin_neon_vset_lane_f32:
4371  case NEON::BI__builtin_neon_vsetq_lane_i8:
4372  case NEON::BI__builtin_neon_vsetq_lane_i16:
4373  case NEON::BI__builtin_neon_vsetq_lane_i32:
4374  case NEON::BI__builtin_neon_vsetq_lane_i64:
4375  case NEON::BI__builtin_neon_vsetq_lane_f32:
4376  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4377 
4378  case NEON::BI__builtin_neon_vsha1h_u32:
4379  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4380  "vsha1h");
4381  case NEON::BI__builtin_neon_vsha1cq_u32:
4382  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4383  "vsha1h");
4384  case NEON::BI__builtin_neon_vsha1pq_u32:
4385  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4386  "vsha1h");
4387  case NEON::BI__builtin_neon_vsha1mq_u32:
4388  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4389  "vsha1h");
4390 
4391  // The ARM _MoveToCoprocessor builtins put the input register value as
4392  // the first argument, but the LLVM intrinsic expects it as the third one.
4393  case ARM::BI_MoveToCoprocessor:
4394  case ARM::BI_MoveToCoprocessor2: {
4395  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4396  Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4397  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4398  Ops[3], Ops[4], Ops[5]});
4399  }
4400  }
4401 
4402  // Get the last argument, which specifies the vector type.
4403  assert(HasExtraArg);
4404  llvm::APSInt Result;
4405  const Expr *Arg = E->getArg(E->getNumArgs()-1);
4406  if (!Arg->isIntegerConstantExpr(Result, getContext()))
4407  return nullptr;
4408 
4409  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4410  BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4411  // Determine the overloaded type of this builtin.
4412  llvm::Type *Ty;
4413  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4414  Ty = FloatTy;
4415  else
4416  Ty = DoubleTy;
4417 
4418  // Determine whether this is an unsigned conversion or not.
4419  bool usgn = Result.getZExtValue() == 1;
4420  unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4421 
4422  // Call the appropriate intrinsic.
4423  Function *F = CGM.getIntrinsic(Int, Ty);
4424  return Builder.CreateCall(F, Ops, "vcvtr");
4425  }
4426 
4427  // Determine the type of this overloaded NEON intrinsic.
4428  NeonTypeFlags Type(Result.getZExtValue());
4429  bool usgn = Type.isUnsigned();
4430  bool rightShift = false;
4431 
4432  llvm::VectorType *VTy = GetNeonType(this, Type);
4433  llvm::Type *Ty = VTy;
4434  if (!Ty)
4435  return nullptr;
4436 
4437  // Many NEON builtins have identical semantics and uses in ARM and
4438  // AArch64. Emit these in a single function.
4439  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4440  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4441  IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4442  if (Builtin)
4444  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4445  Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4446 
4447  unsigned Int;
4448  switch (BuiltinID) {
4449  default: return nullptr;
4450  case NEON::BI__builtin_neon_vld1q_lane_v:
4451  // Handle 64-bit integer elements as a special case. Use shuffles of
4452  // one-element vectors to avoid poor code for i64 in the backend.
4453  if (VTy->getElementType()->isIntegerTy(64)) {
4454  // Extract the other lane.
4455  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4456  uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4457  Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4458  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4459  // Load the value as a one-element vector.
4460  Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4461  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4462  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4463  Value *Align = getAlignmentValue32(PtrOp0);
4464  Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4465  // Combine them.
4466  uint32_t Indices[] = {1 - Lane, Lane};
4467  SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4468  return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4469  }
4470  // fall through
4471  case NEON::BI__builtin_neon_vld1_lane_v: {
4472  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4473  PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4474  Value *Ld = Builder.CreateLoad(PtrOp0);
4475  return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4476  }
4477  case NEON::BI__builtin_neon_vld2_dup_v:
4478  case NEON::BI__builtin_neon_vld3_dup_v:
4479  case NEON::BI__builtin_neon_vld4_dup_v: {
4480  // Handle 64-bit elements as a special-case. There is no "dup" needed.
4481  if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4482  switch (BuiltinID) {
4483  case NEON::BI__builtin_neon_vld2_dup_v:
4484  Int = Intrinsic::arm_neon_vld2;
4485  break;
4486  case NEON::BI__builtin_neon_vld3_dup_v:
4487  Int = Intrinsic::arm_neon_vld3;
4488  break;
4489  case NEON::BI__builtin_neon_vld4_dup_v:
4490  Int = Intrinsic::arm_neon_vld4;
4491  break;
4492  default: llvm_unreachable("unknown vld_dup intrinsic?");
4493  }
4494  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4495  Function *F = CGM.getIntrinsic(Int, Tys);
4496  llvm::Value *Align = getAlignmentValue32(PtrOp1);
4497  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4498  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4499  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4500  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4501  }
4502  switch (BuiltinID) {
4503  case NEON::BI__builtin_neon_vld2_dup_v:
4504  Int = Intrinsic::arm_neon_vld2lane;
4505  break;
4506  case NEON::BI__builtin_neon_vld3_dup_v:
4507  Int = Intrinsic::arm_neon_vld3lane;
4508  break;
4509  case NEON::BI__builtin_neon_vld4_dup_v:
4510  Int = Intrinsic::arm_neon_vld4lane;
4511  break;
4512  default: llvm_unreachable("unknown vld_dup intrinsic?");
4513  }
4514  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4515  Function *F = CGM.getIntrinsic(Int, Tys);
4516  llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4517 
4519  Args.push_back(Ops[1]);
4520  Args.append(STy->getNumElements(), UndefValue::get(Ty));
4521 
4522  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4523  Args.push_back(CI);
4524  Args.push_back(getAlignmentValue32(PtrOp1));
4525 
4526  Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4527  // splat lane 0 to all elts in each vector of the result.
4528  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4529  Value *Val = Builder.CreateExtractValue(Ops[1], i);
4530  Value *Elt = Builder.CreateBitCast(Val, Ty);
4531  Elt = EmitNeonSplat(Elt, CI);
4532  Elt = Builder.CreateBitCast(Elt, Val->getType());
4533  Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4534  }
4535  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4536  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4537  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4538  }
4539  case NEON::BI__builtin_neon_vqrshrn_n_v:
4540  Int =
4541  usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4542  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4543  1, true);
4544  case NEON::BI__builtin_neon_vqrshrun_n_v:
4545  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4546  Ops, "vqrshrun_n", 1, true);
4547  case NEON::BI__builtin_neon_vqshrn_n_v:
4548  Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4549  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4550  1, true);
4551  case NEON::BI__builtin_neon_vqshrun_n_v:
4552  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4553  Ops, "vqshrun_n", 1, true);
4554  case NEON::BI__builtin_neon_vrecpe_v:
4555  case NEON::BI__builtin_neon_vrecpeq_v:
4556  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4557  Ops, "vrecpe");
4558  case NEON::BI__builtin_neon_vrshrn_n_v:
4559  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4560  Ops, "vrshrn_n", 1, true);
4561  case NEON::BI__builtin_neon_vrsra_n_v:
4562  case NEON::BI__builtin_neon_vrsraq_n_v:
4563  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4564  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4565  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4566  Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4567  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4568  return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4569  case NEON::BI__builtin_neon_vsri_n_v:
4570  case NEON::BI__builtin_neon_vsriq_n_v:
4571  rightShift = true;
4572  case NEON::BI__builtin_neon_vsli_n_v:
4573  case NEON::BI__builtin_neon_vsliq_n_v:
4574  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4575  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4576  Ops, "vsli_n");
4577  case NEON::BI__builtin_neon_vsra_n_v:
4578  case NEON::BI__builtin_neon_vsraq_n_v:
4579  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4580  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4581  return Builder.CreateAdd(Ops[0], Ops[1]);
4582  case NEON::BI__builtin_neon_vst1q_lane_v:
4583  // Handle 64-bit integer elements as a special case. Use a shuffle to get
4584  // a one-element vector and avoid poor code for i64 in the backend.
4585  if (VTy->getElementType()->isIntegerTy(64)) {
4586  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4587  Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4588  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4589  Ops[2] = getAlignmentValue32(PtrOp0);
4590  llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4591  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4592  Tys), Ops);
4593  }
4594  // fall through
4595  case NEON::BI__builtin_neon_vst1_lane_v: {
4596  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4597  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4598  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4599  auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4600  return St;
4601  }
4602  case NEON::BI__builtin_neon_vtbl1_v:
4603  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4604  Ops, "vtbl1");
4605  case NEON::BI__builtin_neon_vtbl2_v:
4606  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4607  Ops, "vtbl2");
4608  case NEON::BI__builtin_neon_vtbl3_v:
4609  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4610  Ops, "vtbl3");
4611  case NEON::BI__builtin_neon_vtbl4_v:
4612  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4613  Ops, "vtbl4");
4614  case NEON::BI__builtin_neon_vtbx1_v:
4615  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
4616  Ops, "vtbx1");
4617  case NEON::BI__builtin_neon_vtbx2_v:
4618  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
4619  Ops, "vtbx2");
4620  case NEON::BI__builtin_neon_vtbx3_v:
4621  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
4622  Ops, "vtbx3");
4623  case NEON::BI__builtin_neon_vtbx4_v:
4624  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
4625  Ops, "vtbx4");
4626  }
4627 }
4628 
4629 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
4630  const CallExpr *E,
4631  SmallVectorImpl<Value *> &Ops) {
4632  unsigned int Int = 0;
4633  const char *s = nullptr;
4634 
4635  switch (BuiltinID) {
4636  default:
4637  return nullptr;
4638  case NEON::BI__builtin_neon_vtbl1_v:
4639  case NEON::BI__builtin_neon_vqtbl1_v:
4640  case NEON::BI__builtin_neon_vqtbl1q_v:
4641  case NEON::BI__builtin_neon_vtbl2_v:
4642  case NEON::BI__builtin_neon_vqtbl2_v:
4643  case NEON::BI__builtin_neon_vqtbl2q_v:
4644  case NEON::BI__builtin_neon_vtbl3_v:
4645  case NEON::BI__builtin_neon_vqtbl3_v:
4646  case NEON::BI__builtin_neon_vqtbl3q_v:
4647  case NEON::BI__builtin_neon_vtbl4_v:
4648  case NEON::BI__builtin_neon_vqtbl4_v:
4649  case NEON::BI__builtin_neon_vqtbl4q_v:
4650  break;
4651  case NEON::BI__builtin_neon_vtbx1_v:
4652  case NEON::BI__builtin_neon_vqtbx1_v:
4653  case NEON::BI__builtin_neon_vqtbx1q_v:
4654  case NEON::BI__builtin_neon_vtbx2_v:
4655  case NEON::BI__builtin_neon_vqtbx2_v:
4656  case NEON::BI__builtin_neon_vqtbx2q_v:
4657  case NEON::BI__builtin_neon_vtbx3_v:
4658  case NEON::BI__builtin_neon_vqtbx3_v:
4659  case NEON::BI__builtin_neon_vqtbx3q_v:
4660  case NEON::BI__builtin_neon_vtbx4_v:
4661  case NEON::BI__builtin_neon_vqtbx4_v:
4662  case NEON::BI__builtin_neon_vqtbx4q_v:
4663  break;
4664  }
4665 
4666  assert(E->getNumArgs() >= 3);
4667 
4668  // Get the last argument, which specifies the vector type.
4669  llvm::APSInt Result;
4670  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4671  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
4672  return nullptr;
4673 
4674  // Determine the type of this overloaded NEON intrinsic.
4675  NeonTypeFlags Type(Result.getZExtValue());
4676  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
4677  if (!Ty)
4678  return nullptr;
4679 
4681 
4682  // AArch64 scalar builtins are not overloaded, they do not have an extra
4683  // argument that specifies the vector type, need to handle each case.
4684  switch (BuiltinID) {
4685  case NEON::BI__builtin_neon_vtbl1_v: {
4686  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4687  Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4688  "vtbl1");
4689  }
4690  case NEON::BI__builtin_neon_vtbl2_v: {
4691  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4692  Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4693  "vtbl1");
4694  }
4695  case NEON::BI__builtin_neon_vtbl3_v: {
4696  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4697  Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4698  "vtbl2");
4699  }
4700  case NEON::BI__builtin_neon_vtbl4_v: {
4701  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4702  Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4703  "vtbl2");
4704  }
4705  case NEON::BI__builtin_neon_vtbx1_v: {
4706  Value *TblRes =
4707  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4708  Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4709 
4710  llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4711  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4712  CmpRes = Builder.CreateSExt(CmpRes, Ty);
4713 
4714  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4715  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4716  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4717  }
4718  case NEON::BI__builtin_neon_vtbx2_v: {
4719  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4720  Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4721  "vtbx1");
4722  }
4723  case NEON::BI__builtin_neon_vtbx3_v: {
4724  Value *TblRes =
4725  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4726  Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4727 
4728  llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4729  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4730  TwentyFourV);
4731  CmpRes = Builder.CreateSExt(CmpRes, Ty);
4732 
4733  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4734  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4735  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4736  }
4737  case NEON::BI__builtin_neon_vtbx4_v: {
4738  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4739  Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4740  "vtbx2");
4741  }
4742  case NEON::BI__builtin_neon_vqtbl1_v:
4743  case NEON::BI__builtin_neon_vqtbl1q_v:
4744  Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4745  case NEON::BI__builtin_neon_vqtbl2_v:
4746  case NEON::BI__builtin_neon_vqtbl2q_v: {
4747  Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4748  case NEON::BI__builtin_neon_vqtbl3_v:
4749  case NEON::BI__builtin_neon_vqtbl3q_v:
4750  Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4751  case NEON::BI__builtin_neon_vqtbl4_v:
4752  case NEON::BI__builtin_neon_vqtbl4q_v:
4753  Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4754  case NEON::BI__builtin_neon_vqtbx1_v:
4755  case NEON::BI__builtin_neon_vqtbx1q_v:
4756  Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4757  case NEON::BI__builtin_neon_vqtbx2_v:
4758  case NEON::BI__builtin_neon_vqtbx2q_v:
4759  Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4760  case NEON::BI__builtin_neon_vqtbx3_v:
4761  case NEON::BI__builtin_neon_vqtbx3q_v:
4762  Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4763  case NEON::BI__builtin_neon_vqtbx4_v:
4764  case NEON::BI__builtin_neon_vqtbx4q_v:
4765  Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4766  }
4767  }
4768 
4769  if (!Int)
4770  return nullptr;
4771 
4772  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4773  return CGF.EmitNeonCall(F, Ops, s);
4774 }
4775 
4777  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4778  Op = Builder.CreateBitCast(Op, Int16Ty);
4779  Value *V = UndefValue::get(VTy);
4780  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4781  Op = Builder.CreateInsertElement(V, Op, CI);
4782  return Op;
4783 }
4784 
4786  const CallExpr *E) {
4787  unsigned HintID = static_cast<unsigned>(-1);
4788  switch (BuiltinID) {
4789  default: break;
4790  case AArch64::BI__builtin_arm_nop:
4791  HintID = 0;
4792  break;
4793  case AArch64::BI__builtin_arm_yield:
4794  HintID = 1;
4795  break;
4796  case AArch64::BI__builtin_arm_wfe:
4797  HintID = 2;
4798  break;
4799  case AArch64::BI__builtin_arm_wfi:
4800  HintID = 3;
4801  break;
4802  case AArch64::BI__builtin_arm_sev:
4803  HintID = 4;
4804  break;
4805  case AArch64::BI__builtin_arm_sevl:
4806  HintID = 5;
4807  break;
4808  }
4809 
4810  if (HintID != static_cast<unsigned>(-1)) {
4811  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4812  return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4813  }
4814 
4815  if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4816  Value *Address = EmitScalarExpr(E->getArg(0));
4817  Value *RW = EmitScalarExpr(E->getArg(1));
4818  Value *CacheLevel = EmitScalarExpr(E->getArg(2));
4819  Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4820  Value *IsData = EmitScalarExpr(E->getArg(4));
4821 
4822  Value *Locality = nullptr;
4823  if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4824  // Temporal fetch, needs to convert cache level to locality.
4825  Locality = llvm::ConstantInt::get(Int32Ty,
4826  -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4827  } else {
4828  // Streaming fetch.
4829  Locality = llvm::ConstantInt::get(Int32Ty, 0);
4830  }
4831 
4832  // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4833  // PLDL3STRM or PLDL2STRM.
4834  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4835  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4836  }
4837 
4838  if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4839  assert((getContext().getTypeSize(E->getType()) == 32) &&
4840  "rbit of unusual size!");
4841  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4842  return Builder.CreateCall(
4843  CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4844  }
4845  if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4846  assert((getContext().getTypeSize(E->getType()) == 64) &&
4847  "rbit of unusual size!");
4848  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4849  return Builder.CreateCall(
4850  CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4851  }
4852 
4853  if (BuiltinID == AArch64::BI__clear_cache) {
4854  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4855  const FunctionDecl *FD = E->getDirectCallee();
4856  Value *Ops[2];
4857  for (unsigned i = 0; i < 2; i++)
4858  Ops[i] = EmitScalarExpr(E->getArg(i));
4859  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4860  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4861  StringRef Name = FD->getName();
4862  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4863  }
4864 
4865  if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4866  BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4867  getContext().getTypeSize(E->getType()) == 128) {
4868  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4869  ? Intrinsic::aarch64_ldaxp
4870  : Intrinsic::aarch64_ldxp);
4871 
4872  Value *LdPtr = EmitScalarExpr(E->getArg(0));
4873  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4874  "ldxp");
4875 
4876  Value *Val0 = Builder.CreateExtractValue(Val, 1);
4877  Value *Val1 = Builder.CreateExtractValue(Val, 0);
4878  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4879  Val0 = Builder.CreateZExt(Val0, Int128Ty);
4880  Val1 = Builder.CreateZExt(Val1, Int128Ty);
4881 
4882  Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4883  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4884  Val = Builder.CreateOr(Val, Val1);
4885  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4886  } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4887  BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4888  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4889 
4890  QualType Ty = E->getType();
4891  llvm::Type *RealResTy = ConvertType(Ty);
4892  llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4893  getContext().getTypeSize(Ty));
4894  LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4895 
4896  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4897  ? Intrinsic::aarch64_ldaxr
4898  : Intrinsic::aarch64_ldxr,
4899  LoadAddr->getType());
4900  Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4901 
4902  if (RealResTy->isPointerTy())
4903  return Builder.CreateIntToPtr(Val, RealResTy);
4904 
4905  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4906  return Builder.CreateBitCast(Val, RealResTy);
4907  }
4908 
4909  if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4910  BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4911  getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4912  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4913  ? Intrinsic::aarch64_stlxp
4914  : Intrinsic::aarch64_stxp);
4915  llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4916 
4917  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4918  EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4919 
4920  Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4921  llvm::Value *Val = Builder.CreateLoad(Tmp);
4922 
4923  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4924  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4925  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4926  Int8PtrTy);
4927  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4928  }
4929 
4930  if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4931  BuiltinID == AArch64::BI__builtin_arm_stlex) {
4932  Value *StoreVal = EmitScalarExpr(E->getArg(0));
4933  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4934 
4935  QualType Ty = E->getArg(0)->getType();
4936  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4937  getContext().getTypeSize(Ty));
4938  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4939 
4940  if (StoreVal->getType()->isPointerTy())
4941  StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4942  else {
4943  StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4944  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4945  }
4946 
4947  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4948  ? Intrinsic::aarch64_stlxr
4949  : Intrinsic::aarch64_stxr,
4950  StoreAddr->getType());
4951  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4952  }
4953 
4954  if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4955  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4956  return Builder.CreateCall(F);
4957  }
4958 
4959  // CRC32
4960  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4961  switch (BuiltinID) {
4962  case AArch64::BI__builtin_arm_crc32b:
4963  CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4964  case AArch64::BI__builtin_arm_crc32cb:
4965  CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4966  case AArch64::BI__builtin_arm_crc32h:
4967  CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4968  case AArch64::BI__builtin_arm_crc32ch:
4969  CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4970  case AArch64::BI__builtin_arm_crc32w:
4971  CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4972  case AArch64::BI__builtin_arm_crc32cw:
4973  CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4974  case AArch64::BI__builtin_arm_crc32d:
4975  CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4976  case AArch64::BI__builtin_arm_crc32cd:
4977  CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4978  }
4979 
4980  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4981  Value *Arg0 = EmitScalarExpr(E->getArg(0));
4982  Value *Arg1 = EmitScalarExpr(E->getArg(1));
4983  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4984 
4985  llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4986  Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4987 
4988  return Builder.CreateCall(F, {Arg0, Arg1});
4989  }
4990 
4991  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4992  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4993  BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4994  BuiltinID == AArch64::BI__builtin_arm_wsr ||
4995  BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4996  BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4997 
4998  bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4999  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5000  BuiltinID == AArch64::BI__builtin_arm_rsrp;
5001 
5002  bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5003  BuiltinID == AArch64::BI__builtin_arm_wsrp;
5004 
5005  bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5006  BuiltinID != AArch64::BI__builtin_arm_wsr;
5007 
5008  llvm::Type *ValueType;
5009  llvm::Type *RegisterType = Int64Ty;
5010  if (IsPointerBuiltin) {
5011  ValueType = VoidPtrTy;
5012  } else if (Is64Bit) {
5013  ValueType = Int64Ty;
5014  } else {
5015  ValueType = Int32Ty;
5016  }
5017 
5018  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5019  }
5020 
5021  // Find out if any arguments are required to be integer constant
5022  // expressions.
5023  unsigned ICEArguments = 0;
5025  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5026  assert(Error == ASTContext::GE_None && "Should not codegen an error");
5027 
5029  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5030  if ((ICEArguments & (1 << i)) == 0) {
5031  Ops.push_back(EmitScalarExpr(E->getArg(i)));
5032  } else {
5033  // If this is required to be a constant, constant fold it so that we know
5034  // that the generated intrinsic gets a ConstantInt.
5035  llvm::APSInt Result;
5036  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5037  assert(IsConst && "Constant arg isn't actually constant?");
5038  (void)IsConst;
5039  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5040  }
5041  }
5042 
5043  auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5044  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5045  SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5046 
5047  if (Builtin) {
5048  Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5049  Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5050  assert(Result && "SISD intrinsic should have been handled");
5051  return Result;
5052  }
5053 
5054  llvm::APSInt Result;
5055  const Expr *Arg = E->getArg(E->getNumArgs()-1);
5056  NeonTypeFlags Type(0);
5057  if (Arg->isIntegerConstantExpr(Result, getContext()))
5058  // Determine the type of this overloaded NEON intrinsic.
5059  Type = NeonTypeFlags(Result.getZExtValue());
5060 
5061  bool usgn = Type.isUnsigned();
5062  bool quad = Type.isQuad();
5063 
5064  // Handle non-overloaded intrinsics first.
5065  switch (BuiltinID) {
5066  default: break;
5067  case NEON::BI__builtin_neon_vldrq_p128: {
5068  llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5069  Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5070  return Builder.CreateDefaultAlignedLoad(Ptr);
5071  }
5072  case NEON::BI__builtin_neon_vstrq_p128: {
5073  llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5074  Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5076  }
5077  case NEON::BI__builtin_neon_vcvts_u32_f32:
5078  case NEON::BI__builtin_neon_vcvtd_u64_f64:
5079  usgn = true;
5080  // FALL THROUGH
5081  case NEON::BI__builtin_neon_vcvts_s32_f32:
5082  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5083  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5084  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5085  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5086  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5087  Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5088  if (usgn)
5089  return Builder.CreateFPToUI(Ops[0], InTy);
5090  return Builder.CreateFPToSI(Ops[0], InTy);
5091  }
5092  case NEON::BI__builtin_neon_vcvts_f32_u32:
5093  case NEON::BI__builtin_neon_vcvtd_f64_u64:
5094  usgn = true;
5095  // FALL THROUGH
5096  case NEON::BI__builtin_neon_vcvts_f32_s32:
5097  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5098  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5099  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5100  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5101  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5102  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5103  if (usgn)
5104  return Builder.CreateUIToFP(Ops[0], FTy);
5105  return Builder.CreateSIToFP(Ops[0], FTy);
5106  }
5107  case NEON::BI__builtin_neon_vpaddd_s64: {
5108  llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5109  Value *Vec = EmitScalarExpr(E->getArg(0));
5110  // The vector is v2f64, so make sure it's bitcast to that.
5111  Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5112  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5113  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5114  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5115  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5116  // Pairwise addition of a v2f64 into a scalar f64.
5117  return Builder.CreateAdd(Op0, Op1, "vpaddd");
5118  }
5119  case NEON::BI__builtin_neon_vpaddd_f64: {
5120  llvm::Type *Ty =
5121  llvm::VectorType::get(DoubleTy, 2);
5122  Value *Vec = EmitScalarExpr(E->getArg(0));
5123  // The vector is v2f64, so make sure it's bitcast to that.
5124  Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5125  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5126  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5127  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5128  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5129  // Pairwise addition of a v2f64 into a scalar f64.
5130  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5131  }
5132  case NEON::BI__builtin_neon_vpadds_f32: {
5133  llvm::Type *Ty =
5134  llvm::VectorType::get(FloatTy, 2);
5135  Value *Vec = EmitScalarExpr(E->getArg(0));
5136  // The vector is v2f32, so make sure it's bitcast to that.
5137  Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5138  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5139  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5140  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5141  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5142  // Pairwise addition of a v2f32 into a scalar f32.
5143  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5144  }
5145  case NEON::BI__builtin_neon_vceqzd_s64:
5146  case NEON::BI__builtin_neon_vceqzd_f64:
5147  case NEON::BI__builtin_neon_vceqzs_f32:
5148  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5150  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5151  ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5152  case NEON::BI__builtin_neon_vcgezd_s64:
5153  case NEON::BI__builtin_neon_vcgezd_f64:
5154  case NEON::BI__builtin_neon_vcgezs_f32:
5155  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5157  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5158  ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5159  case NEON::BI__builtin_neon_vclezd_s64:
5160  case NEON::BI__builtin_neon_vclezd_f64:
5161  case NEON::BI__builtin_neon_vclezs_f32:
5162  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5164  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5165  ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5166  case NEON::BI__builtin_neon_vcgtzd_s64:
5167  case NEON::BI__builtin_neon_vcgtzd_f64:
5168  case NEON::BI__builtin_neon_vcgtzs_f32:
5169  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5171  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5172  ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5173  case NEON::BI__builtin_neon_vcltzd_s64:
5174  case NEON::BI__builtin_neon_vcltzd_f64:
5175  case NEON::BI__builtin_neon_vcltzs_f32:
5176  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5178  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5179  ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5180 
5181  case NEON::BI__builtin_neon_vceqzd_u64: {
5182  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5183  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5184  Ops[0] =
5185  Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5186  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5187  }
5188  case NEON::BI__builtin_neon_vceqd_f64:
5189  case NEON::BI__builtin_neon_vcled_f64:
5190  case NEON::BI__builtin_neon_vcltd_f64:
5191  case NEON::BI__builtin_neon_vcged_f64:
5192  case NEON::BI__builtin_neon_vcgtd_f64: {
5193  llvm::CmpInst::Predicate P;
5194  switch (BuiltinID) {
5195  default: llvm_unreachable("missing builtin ID in switch!");
5196  case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5197  case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5198  case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5199  case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5200  case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5201  }
5202  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5203  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5204  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5205  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5206  return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5207  }
5208  case NEON::BI__builtin_neon_vceqs_f32:
5209  case NEON::BI__builtin_neon_vcles_f32:
5210  case NEON::BI__builtin_neon_vclts_f32:
5211  case NEON::BI__builtin_neon_vcges_f32:
5212  case NEON::BI__builtin_neon_vcgts_f32: {
5213  llvm::CmpInst::Predicate P;
5214  switch (BuiltinID) {
5215  default: llvm_unreachable("missing builtin ID in switch!");
5216  case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5217  case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5218  case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5219  case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5220  case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5221  }
5222  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5223  Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5224  Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5225  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5226  return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5227  }
5228  case NEON::BI__builtin_neon_vceqd_s64:
5229  case NEON::BI__builtin_neon_vceqd_u64:
5230  case NEON::BI__builtin_neon_vcgtd_s64:
5231  case NEON::BI__builtin_neon_vcgtd_u64:
5232  case NEON::BI__builtin_neon_vcltd_s64:
5233  case NEON::BI__builtin_neon_vcltd_u64:
5234  case NEON::BI__builtin_neon_vcged_u64:
5235  case NEON::BI__builtin_neon_vcged_s64:
5236  case NEON::BI__builtin_neon_vcled_u64:
5237  case NEON::BI__builtin_neon_vcled_s64: {
5238  llvm::CmpInst::Predicate P;
5239  switch (BuiltinID) {
5240  default: llvm_unreachable("missing builtin ID in switch!");
5241  case NEON::BI__builtin_neon_vceqd_s64:
5242  case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5243  case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5244  case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5245  case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5246  case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5247  case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5248  case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5249  case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5250  case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5251  }
5252  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5253  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5254  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5255  Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5256  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5257  }
5258  case NEON::BI__builtin_neon_vtstd_s64:
5259  case NEON::BI__builtin_neon_vtstd_u64: {
5260  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5261  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5262  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5263  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5264  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5265  llvm::Constant::getNullValue(Int64Ty));
5266  return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5267  }
5268  case NEON::BI__builtin_neon_vset_lane_i8:
5269  case NEON::BI__builtin_neon_vset_lane_i16:
5270  case NEON::BI__builtin_neon_vset_lane_i32:
5271  case NEON::BI__builtin_neon_vset_lane_i64:
5272  case NEON::BI__builtin_neon_vset_lane_f32:
5273  case NEON::BI__builtin_neon_vsetq_lane_i8:
5274  case NEON::BI__builtin_neon_vsetq_lane_i16:
5275  case NEON::BI__builtin_neon_vsetq_lane_i32:
5276  case NEON::BI__builtin_neon_vsetq_lane_i64:
5277  case NEON::BI__builtin_neon_vsetq_lane_f32:
5278  Ops.push_back(EmitScalarExpr(E->getArg(2)));
5279  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5280  case NEON::BI__builtin_neon_vset_lane_f64:
5281  // The vector type needs a cast for the v1f64 variant.
5282  Ops[1] = Builder.CreateBitCast(Ops[1],
5283  llvm::VectorType::get(DoubleTy, 1));
5284  Ops.push_back(EmitScalarExpr(E->getArg(2)));
5285  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5286  case NEON::BI__builtin_neon_vsetq_lane_f64:
5287  // The vector type needs a cast for the v2f64 variant.
5288  Ops[1] = Builder.CreateBitCast(Ops[1],
5289  llvm::VectorType::get(DoubleTy, 2));
5290  Ops.push_back(EmitScalarExpr(E->getArg(2)));
5291  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5292 
5293  case NEON::BI__builtin_neon_vget_lane_i8:
5294  case NEON::BI__builtin_neon_vdupb_lane_i8:
5295  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5296  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5297  "vget_lane");
5298  case NEON::BI__builtin_neon_vgetq_lane_i8:
5299  case NEON::BI__builtin_neon_vdupb_laneq_i8:
5300  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5301  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5302  "vgetq_lane");
5303  case NEON::BI__builtin_neon_vget_lane_i16:
5304  case NEON::BI__builtin_neon_vduph_lane_i16:
5305  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5306  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5307  "vget_lane");
5308  case NEON::BI__builtin_neon_vgetq_lane_i16:
5309  case NEON::BI__builtin_neon_vduph_laneq_i16:
5310  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5311  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5312  "vgetq_lane");
5313  case NEON::BI__builtin_neon_vget_lane_i32:
5314  case NEON::BI__builtin_neon_vdups_lane_i32:
5315  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5316  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5317  "vget_lane");
5318  case NEON::BI__builtin_neon_vdups_lane_f32:
5319  Ops[0] = Builder.CreateBitCast(Ops[0],
5320  llvm::VectorType::get(FloatTy, 2));
5321  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5322  "vdups_lane");
5323  case NEON::BI__builtin_neon_vgetq_lane_i32:
5324  case NEON::BI__builtin_neon_vdups_laneq_i32:
5325  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5326  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5327  "vgetq_lane");
5328  case NEON::BI__builtin_neon_vget_lane_i64:
5329  case NEON::BI__builtin_neon_vdupd_lane_i64:
5330  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5331  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5332  "vget_lane");
5333  case NEON::BI__builtin_neon_vdupd_lane_f64:
5334  Ops[0] = Builder.CreateBitCast(Ops[0],
5335  llvm::VectorType::get(DoubleTy, 1));
5336  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5337  "vdupd_lane");
5338  case NEON::BI__builtin_neon_vgetq_lane_i64:
5339  case NEON::BI__builtin_neon_vdupd_laneq_i64:
5340  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5341  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5342  "vgetq_lane");
5343  case NEON::BI__builtin_neon_vget_lane_f32:
5344  Ops[0] = Builder.CreateBitCast(Ops[0],
5345  llvm::VectorType::get(FloatTy, 2));
5346  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5347  "vget_lane");
5348  case NEON::BI__builtin_neon_vget_lane_f64:
5349  Ops[0] = Builder.CreateBitCast(Ops[0],
5350  llvm::VectorType::get(DoubleTy, 1));
5351  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5352  "vget_lane");
5353  case NEON::BI__builtin_neon_vgetq_lane_f32:
5354  case NEON::BI__builtin_neon_vdups_laneq_f32:
5355  Ops[0] = Builder.CreateBitCast(Ops[0],
5356  llvm::VectorType::get(FloatTy, 4));
5357  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5358  "vgetq_lane");
5359  case NEON::BI__builtin_neon_vgetq_lane_f64:
5360  case NEON::BI__builtin_neon_vdupd_laneq_f64:
5361  Ops[0] = Builder.CreateBitCast(Ops[0],
5362  llvm::VectorType::get(DoubleTy, 2));
5363  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5364  "vgetq_lane");
5365  case NEON::BI__builtin_neon_vaddd_s64:
5366  case NEON::BI__builtin_neon_vaddd_u64:
5367  return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5368  case NEON::BI__builtin_neon_vsubd_s64:
5369  case NEON::BI__builtin_neon_vsubd_u64:
5370  return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5371  case NEON::BI__builtin_neon_vqdmlalh_s16:
5372  case NEON::BI__builtin_neon_vqdmlslh_s16: {
5373  SmallVector<Value *, 2> ProductOps;
5374  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5375  ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5376  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5377  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5378  ProductOps, "vqdmlXl");
5379  Constant *CI = ConstantInt::get(SizeTy, 0);
5380  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5381 
5382  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5383  ? Intrinsic::aarch64_neon_sqadd
5384  : Intrinsic::aarch64_neon_sqsub;
5385  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5386  }
5387  case NEON::BI__builtin_neon_vqshlud_n_s64: {
5388  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5389  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5390  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5391  Ops, "vqshlu_n");
5392  }
5393  case NEON::BI__builtin_neon_vqshld_n_u64:
5394  case NEON::BI__builtin_neon_vqshld_n_s64: {
5395  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5396  ? Intrinsic::aarch64_neon_uqshl
5397  : Intrinsic::aarch64_neon_sqshl;
5398  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5399  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5400  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5401  }
5402  case NEON::BI__builtin_neon_vrshrd_n_u64:
5403  case NEON::BI__builtin_neon_vrshrd_n_s64: {
5404  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5405  ? Intrinsic::aarch64_neon_urshl
5406  : Intrinsic::aarch64_neon_srshl;
5407  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5408  int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5409  Ops[1] = ConstantInt::get(Int64Ty, -SV);
5410  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5411  }
5412  case NEON::BI__builtin_neon_vrsrad_n_u64:
5413  case NEON::BI__builtin_neon_vrsrad_n_s64: {
5414  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5415  ? Intrinsic::aarch64_neon_urshl
5416  : Intrinsic::aarch64_neon_srshl;
5417  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5418  Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5419  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5420  {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5421  return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5422  }
5423  case NEON::BI__builtin_neon_vshld_n_s64:
5424  case NEON::BI__builtin_neon_vshld_n_u64: {
5425  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5426  return Builder.CreateShl(
5427  Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5428  }
5429  case NEON::BI__builtin_neon_vshrd_n_s64: {
5430  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5431  return Builder.CreateAShr(
5432  Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5433  Amt->getZExtValue())),
5434  "shrd_n");
5435  }
5436  case NEON::BI__builtin_neon_vshrd_n_u64: {
5437  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5438  uint64_t ShiftAmt = Amt->getZExtValue();
5439  // Right-shifting an unsigned value by its size yields 0.
5440  if (ShiftAmt == 64)
5441  return ConstantInt::get(Int64Ty, 0);
5442  return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5443  "shrd_n");
5444  }
5445  case NEON::BI__builtin_neon_vsrad_n_s64: {
5446  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5447  Ops[1] = Builder.CreateAShr(
5448  Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5449  Amt->getZExtValue())),
5450  "shrd_n");
5451  return Builder.CreateAdd(Ops[0], Ops[1]);
5452  }
5453  case NEON::BI__builtin_neon_vsrad_n_u64: {
5454  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5455  uint64_t ShiftAmt = Amt->getZExtValue();
5456  // Right-shifting an unsigned value by its size yields 0.
5457  // As Op + 0 = Op, return Ops[0] directly.
5458  if (ShiftAmt == 64)
5459  return Ops[0];
5460  Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5461  "shrd_n");
5462  return Builder.CreateAdd(Ops[0], Ops[1]);
5463  }
5464  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5465  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5466  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5467  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5468  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5469  "lane");
5470  SmallVector<Value *, 2> ProductOps;
5471  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5472  ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5473  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5474  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5475  ProductOps, "vqdmlXl");
5476  Constant *CI = ConstantInt::get(SizeTy, 0);
5477  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5478  Ops.pop_back();
5479 
5480  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5481  BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5482  ? Intrinsic::aarch64_neon_sqadd
5483  : Intrinsic::aarch64_neon_sqsub;
5484  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5485  }
5486  case NEON::BI__builtin_neon_vqdmlals_s32:
5487  case NEON::BI__builtin_neon_vqdmlsls_s32: {
5488  SmallVector<Value *, 2> ProductOps;
5489  ProductOps.push_back(Ops[1]);
5490  ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5491  Ops[1] =
5492  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5493  ProductOps, "vqdmlXl");
5494 
5495  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5496  ? Intrinsic::aarch64_neon_sqadd
5497  : Intrinsic::aarch64_neon_sqsub;
5498  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5499  }
5500  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5501  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5502  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5503  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5504  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5505  "lane");
5506  SmallVector<Value *, 2> ProductOps;
5507  ProductOps.push_back(Ops[1]);
5508  ProductOps.push_back(Ops[2]);
5509  Ops[1] =
5510  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5511  ProductOps, "vqdmlXl");
5512  Ops.pop_back();
5513 
5514  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5515  BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5516  ? Intrinsic::aarch64_neon_sqadd
5517  : Intrinsic::aarch64_neon_sqsub;
5518  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5519  }
5520  }
5521 
5522  llvm::VectorType *VTy = GetNeonType(this, Type);
5523  llvm::Type *Ty = VTy;
5524  if (!Ty)
5525  return nullptr;
5526 
5527  // Not all intrinsics handled by the common case work for AArch64 yet, so only
5528  // defer to common code if it's been added to our special map.
5529  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5530  AArch64SIMDIntrinsicsProvenSorted);
5531 
5532  if (Builtin)
5534  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5535  Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5536  /*never use addresses*/ Address::invalid(), Address::invalid());
5537 
5538  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5539  return V;
5540 
5541  unsigned Int;
5542  switch (BuiltinID) {
5543  default: return nullptr;
5544  case NEON::BI__builtin_neon_vbsl_v:
5545  case NEON::BI__builtin_neon_vbslq_v: {
5546  llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5547  Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5548  Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5549  Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5550 
5551  Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5552  Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5553  Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5554  return Builder.CreateBitCast(Ops[0], Ty);
5555  }
5556  case NEON::BI__builtin_neon_vfma_lane_v:
5557  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5558  // The ARM builtins (and instructions) have the addend as the first
5559  // operand, but the 'fma' intrinsics have it last. Swap it around here.
5560  Value *Addend = Ops[0];
5561  Value *Multiplicand = Ops[1];
5562  Value *LaneSource = Ops[2];
5563  Ops[0] = Multiplicand;
5564  Ops[1] = LaneSource;
5565  Ops[2] = Addend;
5566 
5567  // Now adjust things to handle the lane access.
5568  llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5569  llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5570  VTy;
5571  llvm::Constant *cst = cast<Constant>(Ops[3]);
5572  Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5573  Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5574  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5575 
5576  Ops.pop_back();
5577  Int = Intrinsic::fma;
5578  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5579  }
5580  case NEON::BI__builtin_neon_vfma_laneq_v: {
5581  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5582  // v1f64 fma should be mapped to Neon scalar f64 fma
5583  if (VTy && VTy->getElementType() == DoubleTy) {
5584  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5585  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5586  llvm::Type *VTy = GetNeonType(this,
5587  NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5588  Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5589  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5590  Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5591  Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5592  return Builder.CreateBitCast(Result, Ty);
5593  }
5594  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5595  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5596  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5597 
5598  llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5599  VTy->getNumElements() * 2);
5600  Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5601  Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5602  cast<ConstantInt>(Ops[3]));
5603  Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5604 
5605  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5606  }
5607  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
5608  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5609  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5610  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5611 
5612  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5613  Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
5614  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5615  }
5616  case NEON::BI__builtin_neon_vfmas_lane_f32:
5617  case NEON::BI__builtin_neon_vfmas_laneq_f32:
5618  case NEON::BI__builtin_neon_vfmad_lane_f64:
5619  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
5620  Ops.push_back(EmitScalarExpr(E->getArg(3)));
5622  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5623  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5624  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5625  }
5626  case NEON::BI__builtin_neon_vmull_v:
5627  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5628  Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
5629  if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
5630  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5631  case NEON::BI__builtin_neon_vmax_v:
5632  case NEON::BI__builtin_neon_vmaxq_v:
5633  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5634  Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
5635  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
5636  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
5637  case NEON::BI__builtin_neon_vmin_v:
5638  case NEON::BI__builtin_neon_vminq_v:
5639  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5640  Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
5641  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
5642  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
5643  case NEON::BI__builtin_neon_vabd_v:
5644  case NEON::BI__builtin_neon_vabdq_v:
5645  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5646  Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
5647  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
5648  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
5649  case NEON::BI__builtin_neon_vpadal_v:
5650  case NEON::BI__builtin_neon_vpadalq_v: {
5651  unsigned ArgElts = VTy->getNumElements();
5652  llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
5653  unsigned BitWidth = EltTy->getBitWidth();
5654  llvm::Type *ArgTy = llvm::VectorType::get(
5655  llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5656  llvm::Type* Tys[2] = { VTy, ArgTy };
5657  Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5659  TmpOps.push_back(Ops[1]);
5660  Function *F = CGM.getIntrinsic(Int, Tys);
5661  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5662  llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5663  return Builder.CreateAdd(tmp, addend);
5664  }
5665  case NEON::BI__builtin_neon_vpmin_v:
5666  case NEON::BI__builtin_neon_vpminq_v:
5667  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5668  Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5669  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5670  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5671  case NEON::BI__builtin_neon_vpmax_v:
5672  case NEON::BI__builtin_neon_vpmaxq_v:
5673  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5674  Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5675  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5676  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5677  case NEON::BI__builtin_neon_vminnm_v:
5678  case NEON::BI__builtin_neon_vminnmq_v:
5679  Int = Intrinsic::aarch64_neon_fminnm;
5680  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5681  case NEON::BI__builtin_neon_vmaxnm_v:
5682  case NEON::BI__builtin_neon_vmaxnmq_v:
5683  Int = Intrinsic::aarch64_neon_fmaxnm;
5684  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5685  case NEON::BI__builtin_neon_vrecpss_f32: {
5686  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5687  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5688  Ops, "vrecps");
5689  }
5690  case NEON::BI__builtin_neon_vrecpsd_f64: {
5691  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5692  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5693  Ops, "vrecps");
5694  }
5695  case NEON::BI__builtin_neon_vqshrun_n_v:
5696  Int = Intrinsic::aarch64_neon_sqshrun;
5697  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5698  case NEON::BI__builtin_neon_vqrshrun_n_v:
5699  Int = Intrinsic::aarch64_neon_sqrshrun;
5700  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5701  case NEON::BI__builtin_neon_vqshrn_n_v:
5702  Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5703  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5704  case NEON::BI__builtin_neon_vrshrn_n_v:
5705  Int = Intrinsic::aarch64_neon_rshrn;
5706  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5707  case NEON::BI__builtin_neon_vqrshrn_n_v:
5708  Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5709  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5710  case NEON::BI__builtin_neon_vrnda_v:
5711  case NEON::BI__builtin_neon_vrndaq_v: {
5712  Int = Intrinsic::round;
5713  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5714  }
5715  case NEON::BI__builtin_neon_vrndi_v:
5716  case NEON::BI__builtin_neon_vrndiq_v: {
5717  Int = Intrinsic::nearbyint;
5718  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5719  }
5720  case NEON::BI__builtin_neon_vrndm_v:
5721  case NEON::BI__builtin_neon_vrndmq_v: {
5722  Int = Intrinsic::floor;
5723  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5724  }
5725  case NEON::BI__builtin_neon_vrndn_v:
5726  case NEON::BI__builtin_neon_vrndnq_v: {
5727  Int = Intrinsic::aarch64_neon_frintn;
5728  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5729  }
5730  case NEON::BI__builtin_neon_vrndp_v:
5731  case NEON::BI__builtin_neon_vrndpq_v: {
5732  Int = Intrinsic::ceil;
5733  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5734  }
5735  case NEON::BI__builtin_neon_vrndx_v:
5736  case NEON::BI__builtin_neon_vrndxq_v: {
5737  Int = Intrinsic::rint;
5738  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5739  }
5740  case NEON::BI__builtin_neon_vrnd_v:
5741  case NEON::BI__builtin_neon_vrndq_v: {
5742  Int = Intrinsic::trunc;
5743  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5744  }
5745  case NEON::BI__builtin_neon_vceqz_v:
5746  case NEON::BI__builtin_neon_vceqzq_v:
5747  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5748  ICmpInst::ICMP_EQ, "vceqz");
5749  case NEON::BI__builtin_neon_vcgez_v:
5750  case NEON::BI__builtin_neon_vcgezq_v:
5751  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5752  ICmpInst::ICMP_SGE, "vcgez");
5753  case NEON::BI__builtin_neon_vclez_v:
5754  case NEON::BI__builtin_neon_vclezq_v:
5755  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5756  ICmpInst::ICMP_SLE, "vclez");
5757  case NEON::BI__builtin_neon_vcgtz_v:
5758  case NEON::BI__builtin_neon_vcgtzq_v:
5759  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5760  ICmpInst::ICMP_SGT, "vcgtz");
5761  case NEON::BI__builtin_neon_vcltz_v:
5762  case NEON::BI__builtin_neon_vcltzq_v:
5763  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5764  ICmpInst::ICMP_SLT, "vcltz");
5765  case NEON::BI__builtin_neon_vcvt_f64_v:
5766  case NEON::BI__builtin_neon_vcvtq_f64_v:
5767  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5768  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5769  return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5770  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5771  case NEON::BI__builtin_neon_vcvt_f64_f32: {
5772  assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5773  "unexpected vcvt_f64_f32 builtin");
5774  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5775  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5776 
5777  return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5778  }
5779  case NEON::BI__builtin_neon_vcvt_f32_f64: {
5780  assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5781  "unexpected vcvt_f32_f64 builtin");
5782  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5783  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5784 
5785  return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5786  }
5787  case NEON::BI__builtin_neon_vcvt_s32_v:
5788  case NEON::BI__builtin_neon_vcvt_u32_v:
5789  case NEON::BI__builtin_neon_vcvt_s64_v:
5790  case NEON::BI__builtin_neon_vcvt_u64_v:
5791  case NEON::BI__builtin_neon_vcvtq_s32_v:
5792  case NEON::BI__builtin_neon_vcvtq_u32_v:
5793  case NEON::BI__builtin_neon_vcvtq_s64_v:
5794  case NEON::BI__builtin_neon_vcvtq_u64_v: {
5795  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5796  if (usgn)
5797  return Builder.CreateFPToUI(Ops[0], Ty);
5798  return Builder.CreateFPToSI(Ops[0], Ty);
5799  }
5800  case NEON::BI__builtin_neon_vcvta_s32_v:
5801  case NEON::BI__builtin_neon_vcvtaq_s32_v:
5802  case NEON::BI__builtin_neon_vcvta_u32_v:
5803  case NEON::BI__builtin_neon_vcvtaq_u32_v:
5804  case NEON::BI__builtin_neon_vcvta_s64_v:
5805  case NEON::BI__builtin_neon_vcvtaq_s64_v:
5806  case NEON::BI__builtin_neon_vcvta_u64_v:
5807  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5808  Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5809  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5810  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5811  }
5812  case NEON::BI__builtin_neon_vcvtm_s32_v:
5813  case NEON::BI__builtin_neon_vcvtmq_s32_v:
5814  case NEON::BI__builtin_neon_vcvtm_u32_v:
5815  case NEON::BI__builtin_neon_vcvtmq_u32_v:
5816  case NEON::BI__builtin_neon_vcvtm_s64_v:
5817  case NEON::BI__builtin_neon_vcvtmq_s64_v:
5818  case NEON::BI__builtin_neon_vcvtm_u64_v:
5819  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5820  Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5821  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5822  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5823  }
5824  case NEON::BI__builtin_neon_vcvtn_s32_v:
5825  case NEON::BI__builtin_neon_vcvtnq_s32_v:
5826  case NEON::BI__builtin_neon_vcvtn_u32_v:
5827  case NEON::BI__builtin_neon_vcvtnq_u32_v:
5828  case NEON::BI__builtin_neon_vcvtn_s64_v:
5829  case NEON::BI__builtin_neon_vcvtnq_s64_v:
5830  case NEON::BI__builtin_neon_vcvtn_u64_v:
5831  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5832  Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5833  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5834  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5835  }
5836  case NEON::BI__builtin_neon_vcvtp_s32_v:
5837  case NEON::BI__builtin_neon_vcvtpq_s32_v:
5838  case NEON::BI__builtin_neon_vcvtp_u32_v:
5839  case NEON::BI__builtin_neon_vcvtpq_u32_v:
5840  case NEON::BI__builtin_neon_vcvtp_s64_v:
5841  case NEON::BI__builtin_neon_vcvtpq_s64_v:
5842  case NEON::BI__builtin_neon_vcvtp_u64_v:
5843  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5844  Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5845  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5846  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5847  }
5848  case NEON::BI__builtin_neon_vmulx_v:
5849  case NEON::BI__builtin_neon_vmulxq_v: {
5850  Int = Intrinsic::aarch64_neon_fmulx;
5851  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5852  }
5853  case NEON::BI__builtin_neon_vmul_lane_v:
5854  case NEON::BI__builtin_neon_vmul_laneq_v: {
5855  // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5856  bool Quad = false;
5857  if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5858  Quad = true;
5859  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5860  llvm::Type *VTy = GetNeonType(this,
5861  NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5862  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5863  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5864  Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5865  return Builder.CreateBitCast(Result, Ty);
5866  }
5867  case NEON::BI__builtin_neon_vnegd_s64:
5868  return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5869  case NEON::BI__builtin_neon_vpmaxnm_v:
5870  case NEON::BI__builtin_neon_vpmaxnmq_v: {
5871  Int = Intrinsic::aarch64_neon_fmaxnmp;
5872  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5873  }
5874  case NEON::BI__builtin_neon_vpminnm_v:
5875  case NEON::BI__builtin_neon_vpminnmq_v: {
5876  Int = Intrinsic::aarch64_neon_fminnmp;
5877  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5878  }
5879  case NEON::BI__builtin_neon_vsqrt_v:
5880  case NEON::BI__builtin_neon_vsqrtq_v: {
5881  Int = Intrinsic::sqrt;
5882  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5883  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5884  }
5885  case NEON::BI__builtin_neon_vrbit_v:
5886  case NEON::BI__builtin_neon_vrbitq_v: {
5887  Int = Intrinsic::aarch64_neon_rbit;
5888  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5889  }
5890  case NEON::BI__builtin_neon_vaddv_u8:
5891  // FIXME: These are handled by the AArch64 scalar code.
5892  usgn = true;
5893  // FALLTHROUGH
5894  case NEON::BI__builtin_neon_vaddv_s8: {
5895  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5896  Ty = Int32Ty;
5897  VTy = llvm::VectorType::get(Int8Ty, 8);
5898  llvm::Type *Tys[2] = { Ty, VTy };
5899  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5900  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5901  return Builder.CreateTrunc(Ops[0], Int8Ty);
5902  }
5903  case NEON::BI__builtin_neon_vaddv_u16:
5904  usgn = true;
5905  // FALLTHROUGH
5906  case NEON::BI__builtin_neon_vaddv_s16: {
5907  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5908  Ty = Int32Ty;
5909  VTy = llvm::VectorType::get(Int16Ty, 4);
5910  llvm::Type *Tys[2] = { Ty, VTy };
5911  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5912  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5913  return Builder.CreateTrunc(Ops[0], Int16Ty);
5914  }
5915  case NEON::BI__builtin_neon_vaddvq_u8:
5916  usgn = true;
5917  // FALLTHROUGH
5918  case NEON::BI__builtin_neon_vaddvq_s8: {
5919  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5920  Ty = Int32Ty;
5921  VTy = llvm::VectorType::get(Int8Ty, 16);
5922  llvm::Type *Tys[2] = { Ty, VTy };
5923  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5924  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5925  return Builder.CreateTrunc(Ops[0], Int8Ty);
5926  }
5927  case NEON::BI__builtin_neon_vaddvq_u16:
5928  usgn = true;
5929  // FALLTHROUGH
5930  case NEON::BI__builtin_neon_vaddvq_s16: {
5931  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5932  Ty = Int32Ty;
5933  VTy = llvm::VectorType::get(Int16Ty, 8);
5934  llvm::Type *Tys[2] = { Ty, VTy };
5935  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5936  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5937  return Builder.CreateTrunc(Ops[0], Int16Ty);
5938  }
5939  case NEON::BI__builtin_neon_vmaxv_u8: {
5940  Int = Intrinsic::aarch64_neon_umaxv;
5941  Ty = Int32Ty;
5942  VTy = llvm::VectorType::get(Int8Ty, 8);
5943  llvm::Type *Tys[2] = { Ty, VTy };
5944  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5945  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5946  return Builder.CreateTrunc(Ops[0], Int8Ty);
5947  }
5948  case NEON::BI__builtin_neon_vmaxv_u16: {
5949  Int = Intrinsic::aarch64_neon_umaxv;
5950  Ty = Int32Ty;
5951  VTy = llvm::VectorType::get(Int16Ty, 4);
5952  llvm::Type *Tys[2] = { Ty, VTy };
5953  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5954  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5955  return Builder.CreateTrunc(Ops[0], Int16Ty);
5956  }
5957  case NEON::BI__builtin_neon_vmaxvq_u8: {
5958  Int = Intrinsic::aarch64_neon_umaxv;
5959  Ty = Int32Ty;
5960  VTy = llvm::VectorType::get(Int8Ty, 16);
5961  llvm::Type *Tys[2] = { Ty, VTy };
5962  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5963  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5964  return Builder.CreateTrunc(Ops[0], Int8Ty);
5965  }
5966  case NEON::BI__builtin_neon_vmaxvq_u16: {
5967  Int = Intrinsic::aarch64_neon_umaxv;
5968  Ty = Int32Ty;
5969  VTy = llvm::VectorType::get(Int16Ty, 8);
5970  llvm::Type *Tys[2] = { Ty, VTy };
5971  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5972  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5973  return Builder.CreateTrunc(Ops[0], Int16Ty);
5974  }
5975  case NEON::BI__builtin_neon_vmaxv_s8: {
5976  Int = Intrinsic::aarch64_neon_smaxv;
5977  Ty = Int32Ty;
5978  VTy = llvm::VectorType::get(Int8Ty, 8);
5979  llvm::Type *Tys[2] = { Ty, VTy };
5980  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5981  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5982  return Builder.CreateTrunc(Ops[0], Int8Ty);
5983  }
5984  case NEON::BI__builtin_neon_vmaxv_s16: {
5985  Int = Intrinsic::aarch64_neon_smaxv;
5986  Ty = Int32Ty;
5987  VTy = llvm::VectorType::get(Int16Ty, 4);
5988  llvm::Type *Tys[2] = { Ty, VTy };
5989  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5990  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5991  return Builder.CreateTrunc(Ops[0], Int16Ty);
5992  }
5993  case NEON::BI__builtin_neon_vmaxvq_s8: {
5994  Int = Intrinsic::aarch64_neon_smaxv;
5995  Ty = Int32Ty;
5996  VTy = llvm::VectorType::get(Int8Ty, 16);
5997  llvm::Type *Tys[2] = { Ty, VTy };
5998  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5999  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6000  return Builder.CreateTrunc(Ops[0], Int8Ty);
6001  }
6002  case NEON::BI__builtin_neon_vmaxvq_s16: {
6003  Int = Intrinsic::aarch64_neon_smaxv;
6004  Ty = Int32Ty;
6005  VTy = llvm::VectorType::get(Int16Ty, 8);
6006  llvm::Type *Tys[2] = { Ty, VTy };
6007  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6008  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6009  return Builder.CreateTrunc(Ops[0], Int16Ty);
6010  }
6011  case NEON::BI__builtin_neon_vminv_u8: {
6012  Int = Intrinsic::aarch64_neon_uminv;
6013  Ty = Int32Ty;
6014  VTy = llvm::VectorType::get(Int8Ty, 8);
6015  llvm::Type *Tys[2] = { Ty, VTy };
6016  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6017  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6018  return Builder.CreateTrunc(Ops[0], Int8Ty);
6019  }
6020  case NEON::BI__builtin_neon_vminv_u16: {
6021  Int = Intrinsic::aarch64_neon_uminv;
6022  Ty = Int32Ty;
6023  VTy = llvm::VectorType::get(Int16Ty, 4);
6024  llvm::Type *Tys[2] = { Ty, VTy };
6025  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6026  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6027  return Builder.CreateTrunc(Ops[0], Int16Ty);
6028  }
6029  case NEON::BI__builtin_neon_vminvq_u8: {
6030  Int = Intrinsic::aarch64_neon_uminv;
6031  Ty = Int32Ty;
6032  VTy = llvm::VectorType::get(Int8Ty, 16);
6033  llvm::Type *Tys[2] = { Ty, VTy };
6034  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6035  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6036  return Builder.CreateTrunc(Ops[0], Int8Ty);
6037  }
6038  case NEON::BI__builtin_neon_vminvq_u16: {
6039  Int = Intrinsic::aarch64_neon_uminv;
6040  Ty = Int32Ty;
6041  VTy = llvm::VectorType::get(Int16Ty, 8);
6042  llvm::Type *Tys[2] = { Ty, VTy };
6043  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6044  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6045  return Builder.CreateTrunc(Ops[0], Int16Ty);
6046  }
6047  case NEON::BI__builtin_neon_vminv_s8: {
6048  Int = Intrinsic::aarch64_neon_sminv;
6049  Ty = Int32Ty;
6050  VTy = llvm::VectorType::get(Int8Ty, 8);
6051  llvm::Type *Tys[2] = { Ty, VTy };
6052  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6053  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6054  return Builder.CreateTrunc(Ops[0], Int8Ty);
6055  }
6056  case NEON::BI__builtin_neon_vminv_s16: {
6057  Int = Intrinsic::aarch64_neon_sminv;
6058  Ty = Int32Ty;
6059  VTy = llvm::VectorType::get(Int16Ty, 4);
6060  llvm::Type *Tys[2] = { Ty, VTy };
6061  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6062  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6063  return Builder.CreateTrunc(Ops[0], Int16Ty);
6064  }
6065  case NEON::BI__builtin_neon_vminvq_s8: {
6066  Int = Intrinsic::aarch64_neon_sminv;
6067  Ty = Int32Ty;
6068  VTy = llvm::VectorType::get(Int8Ty, 16);
6069  llvm::Type *Tys[2] = { Ty, VTy };
6070  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6071  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6072  return Builder.CreateTrunc(Ops[0], Int8Ty);
6073  }
6074  case NEON::BI__builtin_neon_vminvq_s16: {
6075  Int = Intrinsic::aarch64_neon_sminv;
6076  Ty = Int32Ty;
6077  VTy = llvm::VectorType::get(Int16Ty, 8);
6078  llvm::Type *Tys[2] = { Ty, VTy };
6079  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6080  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6081  return Builder.CreateTrunc(Ops[0], Int16Ty);
6082  }
6083  case NEON::BI__builtin_neon_vmul_n_f64: {
6084  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6085  Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6086  return Builder.CreateFMul(Ops[0], RHS);
6087  }
6088  case NEON::BI__builtin_neon_vaddlv_u8: {
6089  Int = Intrinsic::aarch64_neon_uaddlv;
6090  Ty = Int32Ty;
6091  VTy = llvm::VectorType::get(Int8Ty, 8);
6092  llvm::Type *Tys[2] = { Ty, VTy };
6093  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6094  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6095  return Builder.CreateTrunc(Ops[0], Int16Ty);
6096  }
6097  case NEON::BI__builtin_neon_vaddlv_u16: {
6098  Int = Intrinsic::aarch64_neon_uaddlv;
6099  Ty = Int32Ty;
6100  VTy = llvm::VectorType::get(Int16Ty, 4);
6101  llvm::Type *Tys[2] = { Ty, VTy };
6102  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6103  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6104  }
6105  case NEON::BI__builtin_neon_vaddlvq_u8: {
6106  Int = Intrinsic::aarch64_neon_uaddlv;
6107  Ty = Int32Ty;
6108  VTy = llvm::VectorType::get(Int8Ty, 16);
6109  llvm::Type *Tys[2] = { Ty, VTy };
6110  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6111  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6112  return Builder.CreateTrunc(Ops[0], Int16Ty);
6113  }
6114  case NEON::BI__builtin_neon_vaddlvq_u16: {
6115  Int = Intrinsic::aarch64_neon_uaddlv;
6116  Ty = Int32Ty;
6117  VTy = llvm::VectorType::get(Int16Ty, 8);
6118  llvm::Type *Tys[2] = { Ty, VTy };
6119  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6120  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6121  }
6122  case NEON::BI__builtin_neon_vaddlv_s8: {
6123  Int = Intrinsic::aarch64_neon_saddlv;
6124  Ty = Int32Ty;
6125  VTy = llvm::VectorType::get(Int8Ty, 8);
6126  llvm::Type *Tys[2] = { Ty, VTy };
6127  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6128  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6129  return Builder.CreateTrunc(Ops[0], Int16Ty);
6130  }
6131  case NEON::BI__builtin_neon_vaddlv_s16: {
6132  Int = Intrinsic::aarch64_neon_saddlv;
6133  Ty = Int32Ty;
6134  VTy = llvm::VectorType::get(Int16Ty, 4);
6135  llvm::Type *Tys[2] = { Ty, VTy };
6136  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6137  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6138  }
6139  case NEON::BI__builtin_neon_vaddlvq_s8: {
6140  Int = Intrinsic::aarch64_neon_saddlv;
6141  Ty = Int32Ty;
6142  VTy = llvm::VectorType::get(Int8Ty, 16);
6143  llvm::Type *Tys[2] = { Ty, VTy };
6144  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6145  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6146  return Builder.CreateTrunc(Ops[0], Int16Ty);
6147  }
6148  case NEON::BI__builtin_neon_vaddlvq_s16: {
6149  Int = Intrinsic::aarch64_neon_saddlv;
6150  Ty = Int32Ty;
6151  VTy = llvm::VectorType::get(Int16Ty, 8);
6152  llvm::Type *Tys[2] = { Ty, VTy };
6153  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6154  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6155  }
6156  case NEON::BI__builtin_neon_vsri_n_v:
6157  case NEON::BI__builtin_neon_vsriq_n_v: {
6158  Int = Intrinsic::aarch64_neon_vsri;
6159  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6160  return EmitNeonCall(Intrin, Ops, "vsri_n");
6161  }
6162  case NEON::BI__builtin_neon_vsli_n_v:
6163  case NEON::BI__builtin_neon_vsliq_n_v: {
6164  Int = Intrinsic::aarch64_neon_vsli;
6165  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6166  return EmitNeonCall(Intrin, Ops, "vsli_n");
6167  }
6168  case NEON::BI__builtin_neon_vsra_n_v:
6169  case NEON::BI__builtin_neon_vsraq_n_v:
6170  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6171  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6172  return Builder.CreateAdd(Ops[0], Ops[1]);
6173  case NEON::BI__builtin_neon_vrsra_n_v:
6174  case NEON::BI__builtin_neon_vrsraq_n_v: {
6175  Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6177  TmpOps.push_back(Ops[1]);
6178  TmpOps.push_back(Ops[2]);
6179  Function* F = CGM.getIntrinsic(Int, Ty);
6180  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6181  Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6182  return Builder.CreateAdd(Ops[0], tmp);
6183  }
6184  // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6185  // of an Align parameter here.
6186  case NEON::BI__builtin_neon_vld1_x2_v:
6187  case NEON::BI__builtin_neon_vld1q_x2_v:
6188  case NEON::BI__builtin_neon_vld1_x3_v:
6189  case NEON::BI__builtin_neon_vld1q_x3_v:
6190  case NEON::BI__builtin_neon_vld1_x4_v:
6191  case NEON::BI__builtin_neon_vld1q_x4_v: {
6192  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6193  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6194  llvm::Type *Tys[2] = { VTy, PTy };
6195  unsigned Int;
6196  switch (BuiltinID) {
6197  case NEON::BI__builtin_neon_vld1_x2_v:
6198  case NEON::BI__builtin_neon_vld1q_x2_v:
6199  Int = Intrinsic::aarch64_neon_ld1x2;
6200  break;
6201  case NEON::BI__builtin_neon_vld1_x3_v:
6202  case NEON::BI__builtin_neon_vld1q_x3_v:
6203  Int = Intrinsic::aarch64_neon_ld1x3;
6204  break;
6205  case NEON::BI__builtin_neon_vld1_x4_v:
6206  case NEON::BI__builtin_neon_vld1q_x4_v:
6207  Int = Intrinsic::aarch64_neon_ld1x4;
6208  break;
6209  }
6210  Function *F = CGM.getIntrinsic(Int, Tys);
6211  Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6212  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6213  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6214  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6215  }
6216  case NEON::BI__builtin_neon_vst1_x2_v:
6217  case NEON::BI__builtin_neon_vst1q_x2_v:
6218  case NEON::BI__builtin_neon_vst1_x3_v:
6219  case NEON::BI__builtin_neon_vst1q_x3_v:
6220  case NEON::BI__builtin_neon_vst1_x4_v:
6221  case NEON::BI__builtin_neon_vst1q_x4_v: {
6222  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6223  llvm::Type *Tys[2] = { VTy, PTy };
6224  unsigned Int;
6225  switch (BuiltinID) {
6226  case NEON::BI__builtin_neon_vst1_x2_v:
6227  case NEON::BI__builtin_neon_vst1q_x2_v:
6228  Int = Intrinsic::aarch64_neon_st1x2;
6229  break;
6230  case NEON::BI__builtin_neon_vst1_x3_v:
6231  case NEON::BI__builtin_neon_vst1q_x3_v:
6232  Int = Intrinsic::aarch64_neon_st1x3;
6233  break;
6234  case NEON::BI__builtin_neon_vst1_x4_v:
6235  case NEON::BI__builtin_neon_vst1q_x4_v:
6236  Int = Intrinsic::aarch64_neon_st1x4;
6237  break;
6238  }
6239  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6240  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6241  }
6242  case NEON::BI__builtin_neon_vld1_v:
6243  case NEON::BI__builtin_neon_vld1q_v:
6244  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6245  return Builder.CreateDefaultAlignedLoad(Ops[0]);
6246  case NEON::BI__builtin_neon_vst1_v:
6247  case NEON::BI__builtin_neon_vst1q_v:
6248  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6249  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6250  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6251  case NEON::BI__builtin_neon_vld1_lane_v:
6252  case NEON::BI__builtin_neon_vld1q_lane_v:
6253  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6254  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6255  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6256  Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6257  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6258  case NEON::BI__builtin_neon_vld1_dup_v:
6259  case NEON::BI__builtin_neon_vld1q_dup_v: {
6260  Value *V = UndefValue::get(Ty);
6261  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6262  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6263  Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6264  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6265  Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6266  return EmitNeonSplat(Ops[0], CI);
6267  }
6268  case NEON::BI__builtin_neon_vst1_lane_v:
6269  case NEON::BI__builtin_neon_vst1q_lane_v:
6270  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6271  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6272  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6273  return Builder.CreateDefaultAlignedStore(Ops[1],
6274  Builder.CreateBitCast(Ops[0], Ty));
6275  case NEON::BI__builtin_neon_vld2_v:
6276  case NEON::BI__builtin_neon_vld2q_v: {
6277  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6278  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6279  llvm::Type *Tys[2] = { VTy, PTy };
6280  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6281  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6282  Ops[0] = Builder.CreateBitCast(Ops[0],
6283  llvm::PointerType::getUnqual(Ops[1]->getType()));
6284  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6285  }
6286  case NEON::BI__builtin_neon_vld3_v:
6287  case NEON::BI__builtin_neon_vld3q_v: {
6288  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6289  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6290  llvm::Type *Tys[2] = { VTy, PTy };
6291  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6292  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6293  Ops[0] = Builder.CreateBitCast(Ops[0],
6294  llvm::PointerType::getUnqual(Ops[1]->getType()));
6295  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6296  }
6297  case NEON::BI__builtin_neon_vld4_v:
6298  case NEON::BI__builtin_neon_vld4q_v: {
6299  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6300  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6301  llvm::Type *Tys[2] = { VTy, PTy };
6302  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6303  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6304  Ops[0] = Builder.CreateBitCast(Ops[0],
6305  llvm::PointerType::getUnqual(Ops[1]->getType()));
6306  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6307  }
6308  case NEON::BI__builtin_neon_vld2_dup_v:
6309  case NEON::BI__builtin_neon_vld2q_dup_v: {
6310  llvm::Type *PTy =
6311  llvm::PointerType::getUnqual(VTy->getElementType());
6312  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6313  llvm::Type *Tys[2] = { VTy, PTy };
6314  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6315  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6316  Ops[0] = Builder.CreateBitCast(Ops[0],
6317  llvm::PointerType::getUnqual(Ops[1]->getType()));
6318  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6319  }
6320  case NEON::BI__builtin_neon_vld3_dup_v:
6321  case NEON::BI__builtin_neon_vld3q_dup_v: {
6322  llvm::Type *PTy =
6323  llvm::PointerType::getUnqual(VTy->getElementType());
6324  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6325  llvm::Type *Tys[2] = { VTy, PTy };
6326  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6327  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6328  Ops[0] = Builder.CreateBitCast(Ops[0],
6329  llvm::PointerType::getUnqual(Ops[1]->getType()));
6330  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6331  }
6332  case NEON::BI__builtin_neon_vld4_dup_v:
6333  case NEON::BI__builtin_neon_vld4q_dup_v: {
6334  llvm::Type *PTy =
6335  llvm::PointerType::getUnqual(VTy->getElementType());
6336  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6337  llvm::Type *Tys[2] = { VTy, PTy };
6338  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6339  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6340  Ops[0] = Builder.CreateBitCast(Ops[0],
6341  llvm::PointerType::getUnqual(Ops[1]->getType()));
6342  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6343  }
6344  case NEON::BI__builtin_neon_vld2_lane_v:
6345  case NEON::BI__builtin_neon_vld2q_lane_v: {
6346  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6347  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6348  Ops.push_back(Ops[1]);
6349  Ops.erase(Ops.begin()+1);
6350  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6351  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6352  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6353  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6354  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6355  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6356  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6357  }
6358  case NEON::BI__builtin_neon_vld3_lane_v:
6359  case NEON::BI__builtin_neon_vld3q_lane_v: {
6360  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6361  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6362  Ops.push_back(Ops[1]);
6363  Ops.erase(Ops.begin()+1);
6364  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6365  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6366  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6367  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6368  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6369  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6370  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6371  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6372  }
6373  case NEON::BI__builtin_neon_vld4_lane_v:
6374  case NEON::BI__builtin_neon_vld4q_lane_v: {
6375  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6376  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6377  Ops.push_back(Ops[1]);
6378  Ops.erase(Ops.begin()+1);
6379  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6380  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6381  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6382  Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6383  Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6384  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6385  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6386  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6387  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6388  }
6389  case NEON::BI__builtin_neon_vst2_v:
6390  case NEON::BI__builtin_neon_vst2q_v: {
6391  Ops.push_back(Ops[0]);
6392  Ops.erase(Ops.begin());
6393  llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6394  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6395  Ops, "");
6396  }
6397  case NEON::BI__builtin_neon_vst2_lane_v:
6398  case NEON::BI__builtin_neon_vst2q_lane_v: {
6399  Ops.push_back(Ops[0]);
6400  Ops.erase(Ops.begin());
6401  Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6402  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6403  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6404  Ops, "");
6405  }
6406  case NEON::BI__builtin_neon_vst3_v:
6407  case NEON::BI__builtin_neon_vst3q_v: {
6408  Ops.push_back(Ops[0]);
6409  Ops.erase(Ops.begin());
6410  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6411  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6412  Ops, "");
6413  }
6414  case NEON::BI__builtin_neon_vst3_lane_v:
6415  case NEON::BI__builtin_neon_vst3q_lane_v: {
6416  Ops.push_back(Ops[0]);
6417  Ops.erase(Ops.begin());
6418  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6419  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6420  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6421  Ops, "");
6422  }
6423  case NEON::BI__builtin_neon_vst4_v:
6424  case NEON::BI__builtin_neon_vst4q_v: {
6425  Ops.push_back(Ops[0]);
6426  Ops.erase(Ops.begin());
6427  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6428  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6429  Ops, "");
6430  }
6431  case NEON::BI__builtin_neon_vst4_lane_v:
6432  case NEON::BI__builtin_neon_vst4q_lane_v: {
6433  Ops.push_back(Ops[0]);
6434  Ops.erase(Ops.begin());
6435  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6436  llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6437  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6438  Ops, "");
6439  }
6440  case NEON::BI__builtin_neon_vtrn_v:
6441  case NEON::BI__builtin_neon_vtrnq_v: {
6442  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6443  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6444  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6445  Value *SV = nullptr;
6446 
6447  for (unsigned vi = 0; vi != 2; ++vi) {
6448  SmallVector<uint32_t, 16> Indices;
6449  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6450  Indices.push_back(i+vi);
6451  Indices.push_back(i+e+vi);
6452  }
6453  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6454  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6455  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6456  }
6457  return SV;
6458  }
6459  case NEON::BI__builtin_neon_vuzp_v:
6460  case NEON::BI__builtin_neon_vuzpq_v: {
6461  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6462  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6463  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6464  Value *SV = nullptr;
6465 
6466  for (unsigned vi = 0; vi != 2; ++vi) {
6467  SmallVector<uint32_t, 16> Indices;
6468  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6469  Indices.push_back(2*i+vi);
6470 
6471  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6472  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6473  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6474  }
6475  return SV;
6476  }
6477  case NEON::BI__builtin_neon_vzip_v:
6478  case NEON::BI__builtin_neon_vzipq_v: {
6479  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6480  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6481  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6482  Value *SV = nullptr;
6483 
6484  for (unsigned vi = 0; vi != 2; ++vi) {
6485  SmallVector<uint32_t, 16> Indices;
6486  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6487  Indices.push_back((i + vi*e) >> 1);
6488  Indices.push_back(((i + vi*e) >> 1)+e);
6489  }
6490  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6491  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6492  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6493  }
6494  return SV;
6495  }
6496  case NEON::BI__builtin_neon_vqtbl1q_v: {
6497  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6498  Ops, "vtbl1");
6499  }
6500  case NEON::BI__builtin_neon_vqtbl2q_v: {
6501  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6502  Ops, "vtbl2");
6503  }
6504  case NEON::BI__builtin_neon_vqtbl3q_v: {
6505  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6506  Ops, "vtbl3");
6507  }
6508  case NEON::BI__builtin_neon_vqtbl4q_v: {
6509  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6510  Ops, "vtbl4");
6511  }
6512  case NEON::BI__builtin_neon_vqtbx1q_v: {
6513  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6514  Ops, "vtbx1");
6515  }
6516  case NEON::BI__builtin_neon_vqtbx2q_v: {
6517  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6518  Ops, "vtbx2");
6519  }
6520  case NEON::BI__builtin_neon_vqtbx3q_v: {
6521  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6522  Ops, "vtbx3");
6523  }
6524  case NEON::BI__builtin_neon_vqtbx4q_v: {
6525  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6526  Ops, "vtbx4");
6527  }
6528  case NEON::BI__builtin_neon_vsqadd_v:
6529  case NEON::BI__builtin_neon_vsqaddq_v: {
6530  Int = Intrinsic::aarch64_neon_usqadd;
6531  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6532  }
6533  case NEON::BI__builtin_neon_vuqadd_v:
6534  case NEON::BI__builtin_neon_vuqaddq_v: {
6535  Int = Intrinsic::aarch64_neon_suqadd;
6536  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6537  }
6538  }
6539 }
6540 
6543  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6544  "Not a power-of-two sized vector!");
6545  bool AllConstants = true;
6546  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6547  AllConstants &= isa<Constant>(Ops[i]);
6548 
6549  // If this is a constant vector, create a ConstantVector.
6550  if (AllConstants) {
6552  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6553  CstOps.push_back(cast<Constant>(Ops[i]));
6554  return llvm::ConstantVector::get(CstOps);
6555  }
6556 
6557  // Otherwise, insertelement the values to build the vector.
6558  Value *Result =
6559  llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6560 
6561  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6562  Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6563 
6564  return Result;
6565 }
6566 
6567 // Convert the mask from an integer type to a vector of i1.
6568 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
6569  unsigned NumElts) {
6570 
6571  llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
6572  cast<IntegerType>(Mask->getType())->getBitWidth());
6573  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
6574 
6575  // If we have less than 8 elements, then the starting mask was an i8 and
6576  // we need to extract down to the right number of elements.
6577  if (NumElts < 8) {
6578  uint32_t Indices[4];
6579  for (unsigned i = 0; i != NumElts; ++i)
6580  Indices[i] = i;
6581  MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
6582  makeArrayRef(Indices, NumElts),
6583  "extract");
6584  }
6585  return MaskVec;
6586 }
6587 
6590  unsigned Align) {
6591  // Cast the pointer to right type.
6592  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6593  llvm::PointerType::getUnqual(Ops[1]->getType()));
6594 
6595  // If the mask is all ones just emit a regular store.
6596  if (const auto *C = dyn_cast<Constant>(Ops[2]))
6597  if (C->isAllOnesValue())
6598  return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
6599 
6600  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6601  Ops[1]->getType()->getVectorNumElements());
6602 
6603  return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
6604 }
6605 
6607  SmallVectorImpl<Value *> &Ops, unsigned Align) {
6608  // Cast the pointer to right type.
6609  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6610  llvm::PointerType::getUnqual(Ops[1]->getType()));
6611 
6612  // If the mask is all ones just emit a regular store.
6613  if (const auto *C = dyn_cast<Constant>(Ops[2]))
6614  if (C->isAllOnesValue())
6615  return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
6616 
6617  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6618  Ops[1]->getType()->getVectorNumElements());
6619 
6620  return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
6621 }
6622 
6623 static Value *EmitX86Select(CodeGenFunction &CGF,
6624  Value *Mask, Value *Op0, Value *Op1) {
6625 
6626  // If the mask is all ones just return first argument.
6627  if (const auto *C = dyn_cast<Constant>(Mask))
6628  if (C->isAllOnesValue())
6629  return Op0;
6630 
6631  Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
6632 
6633  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
6634 }
6635 
6636 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
6637  bool Signed, SmallVectorImpl<Value *> &Ops) {
6638  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
6639  Value *Cmp;
6640 
6641  if (CC == 3) {
6642  Cmp = Constant::getNullValue(
6643  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6644  } else if (CC == 7) {
6645  Cmp = Constant::getAllOnesValue(
6646  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6647  } else {
6648  ICmpInst::Predicate Pred;
6649  switch (CC) {
6650  default: llvm_unreachable("Unknown condition code");
6651  case 0: Pred = ICmpInst::ICMP_EQ; break;
6652  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
6653  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
6654  case 4: Pred = ICmpInst::ICMP_NE; break;
6655  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
6656  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
6657  }
6658  Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
6659  }
6660 
6661  const auto *C = dyn_cast<Constant>(Ops.back());
6662  if (!C || !C->isAllOnesValue())
6663  Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
6664 
6665  if (NumElts < 8) {
6666  uint32_t Indices[8];
6667  for (unsigned i = 0; i != NumElts; ++i)
6668  Indices[i] = i;
6669  for (unsigned i = NumElts; i != 8; ++i)
6670  Indices[i] = i % NumElts + NumElts;
6671  Cmp = CGF.Builder.CreateShuffleVector(
6672  Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
6673  }
6674  return CGF.Builder.CreateBitCast(Cmp,
6675  IntegerType::get(CGF.getLLVMContext(),
6676  std::max(NumElts, 8U)));
6677 }
6678 
6679 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
6680  const CallExpr *E) {
6681  if (BuiltinID == X86::BI__builtin_ms_va_start ||
6682  BuiltinID == X86::BI__builtin_ms_va_end)
6683  return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
6684  BuiltinID == X86::BI__builtin_ms_va_start);
6685  if (BuiltinID == X86::BI__builtin_ms_va_copy) {
6686  // Lower this manually. We can't reliably determine whether or not any
6687  // given va_copy() is for a Win64 va_list from the calling convention
6688  // alone, because it's legal to do this from a System V ABI function.
6689  // With opaque pointer types, we won't have enough information in LLVM
6690  // IR to determine this from the argument types, either. Best to do it
6691  // now, while we have enough information.
6692  Address DestAddr = EmitMSVAListRef(E->getArg(0));
6693  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6694 
6695  llvm::Type *BPP = Int8PtrPtrTy;
6696 
6697  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
6698  DestAddr.getAlignment());
6699  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
6700  SrcAddr.getAlignment());
6701 
6702  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6703  return Builder.CreateStore(ArgPtr, DestAddr);
6704  }
6705 
6707 
6708  // Find out if any arguments are required to be integer constant expressions.
6709  unsigned ICEArguments = 0;
6711  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6712  assert(Error == ASTContext::GE_None && "Should not codegen an error");
6713 
6714  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
6715  // If this is a normal argument, just emit it as a scalar.
6716  if ((ICEArguments & (1 << i)) == 0) {
6717  Ops.push_back(EmitScalarExpr(E->getArg(i)));
6718  continue;
6719  }
6720 
6721  // If this is required to be a constant, constant fold it so that we know
6722  // that the generated intrinsic gets a ConstantInt.
6723  llvm::APSInt Result;
6724  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6725  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6726  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6727  }
6728 
6729  // These exist so that the builtin that takes an immediate can be bounds
6730  // checked by clang to avoid passing bad immediates to the backend. Since
6731  // AVX has a larger immediate than SSE we would need separate builtins to
6732  // do the different bounds checking. Rather than create a clang specific
6733  // SSE only builtin, this implements eight separate builtins to match gcc
6734  // implementation.
6735  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
6736  Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6737  llvm::Function *F = CGM.getIntrinsic(ID);
6738  return Builder.CreateCall(F, Ops);
6739  };
6740 
6741  // For the vector forms of FP comparisons, translate the builtins directly to
6742  // IR.
6743  // TODO: The builtins could be removed if the SSE header files used vector
6744  // extension comparisons directly (vector ordered/unordered may need
6745  // additional support via __builtin_isnan()).
6746  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
6747  Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
6748  llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
6749  llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
6750  Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
6751  return Builder.CreateBitCast(Sext, FPVecTy);
6752  };
6753 
6754  switch (BuiltinID) {
6755  default: return nullptr;
6756  case X86::BI__builtin_cpu_supports: {
6757  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
6758  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
6759 
6760  // TODO: When/if this becomes more than x86 specific then use a TargetInfo
6761  // based mapping.
6762  // Processor features and mapping to processor feature value.
6763  enum X86Features {
6764  CMOV = 0,
6765  MMX,
6766  POPCNT,
6767  SSE,
6768  SSE2,
6769  SSE3,
6770  SSSE3,
6771  SSE4_1,
6772  SSE4_2,
6773  AVX,
6774  AVX2,
6775  SSE4_A,
6776  FMA4,
6777  XOP,
6778  FMA,
6779  AVX512F,
6780  BMI,
6781  BMI2,
6782  AES,
6783  PCLMUL,
6784  AVX512VL,
6785  AVX512BW,
6786  AVX512DQ,
6787  AVX512CD,
6788  AVX512ER,
6789  AVX512PF,
6790  AVX512VBMI,
6791  AVX512IFMA,
6792  MAX
6793  };
6794 
6795  X86Features Feature = StringSwitch<X86Features>(FeatureStr)
6796  .Case("cmov", X86Features::CMOV)
6797  .Case("mmx", X86Features::MMX)
6798  .Case("popcnt", X86Features::POPCNT)
6799  .Case("sse", X86Features::SSE)
6800  .Case("sse2", X86Features::SSE2)
6801  .Case("sse3", X86Features::SSE3)
6802  .Case("ssse3", X86Features::SSSE3)
6803  .Case("sse4.1", X86Features::SSE4_1)
6804  .Case("sse4.2", X86Features::SSE4_2)
6805  .Case("avx", X86Features::AVX)
6806  .Case("avx2", X86Features::AVX2)
6807  .Case("sse4a", X86Features::SSE4_A)
6808  .Case("fma4", X86Features::FMA4)
6809  .Case("xop", X86Features::XOP)
6810  .Case("fma", X86Features::FMA)
6811  .Case("avx512f", X86Features::AVX512F)
6812  .Case("bmi", X86Features::BMI)
6813  .Case("bmi2", X86Features::BMI2)
6814  .Case("aes", X86Features::AES)
6815  .Case("pclmul", X86Features::PCLMUL)
6816  .Case("avx512vl", X86Features::AVX512VL)
6817  .Case("avx512bw", X86Features::AVX512BW)
6818  .Case("avx512dq", X86Features::AVX512DQ)
6819  .Case("avx512cd", X86Features::AVX512CD)
6820  .Case("avx512er", X86Features::AVX512ER)
6821  .Case("avx512pf", X86Features::AVX512PF)
6822  .Case("avx512vbmi", X86Features::AVX512VBMI)
6823  .Case("avx512ifma", X86Features::AVX512IFMA)
6824  .Default(X86Features::MAX);
6825  assert(Feature != X86Features::MAX && "Invalid feature!");
6826 
6827  // Matching the struct layout from the compiler-rt/libgcc structure that is
6828  // filled in:
6829  // unsigned int __cpu_vendor;
6830  // unsigned int __cpu_type;
6831  // unsigned int __cpu_subtype;
6832  // unsigned int __cpu_features[1];
6833  llvm::Type *STy = llvm::StructType::get(
6834  Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6835 
6836  // Grab the global __cpu_model.
6837  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6838 
6839  // Grab the first (0th) element from the field __cpu_features off of the
6840  // global in the struct STy.
6841  Value *Idxs[] = {
6842  ConstantInt::get(Int32Ty, 0),
6843  ConstantInt::get(Int32Ty, 3),
6844  ConstantInt::get(Int32Ty, 0)
6845  };
6846  Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6847  Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6849 
6850  // Check the value of the bit corresponding to the feature requested.
6851  Value *Bitset = Builder.CreateAnd(
6852  Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
6853  return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6854  }
6855  case X86::BI_mm_prefetch: {
6856  Value *Address = Ops[0];
6857  Value *RW = ConstantInt::get(Int32Ty, 0);
6858  Value *Locality = Ops[1];
6859  Value *Data = ConstantInt::get(Int32Ty, 1);
6860  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6861  return Builder.CreateCall(F, {Address, RW, Locality, Data});
6862  }
6863  case X86::BI__builtin_ia32_undef128:
6864  case X86::BI__builtin_ia32_undef256:
6865  case X86::BI__builtin_ia32_undef512:
6866  return UndefValue::get(ConvertType(E->getType()));
6867  case X86::BI__builtin_ia32_vec_init_v8qi:
6868  case X86::BI__builtin_ia32_vec_init_v4hi:
6869  case X86::BI__builtin_ia32_vec_init_v2si:
6870  return Builder.CreateBitCast(BuildVector(Ops),
6871  llvm::Type::getX86_MMXTy(getLLVMContext()));
6872  case X86::BI__builtin_ia32_vec_ext_v2si:
6873  return Builder.CreateExtractElement(Ops[0],
6874  llvm::ConstantInt::get(Ops[1]->getType(), 0));
6875  case X86::BI__builtin_ia32_ldmxcsr: {
6876  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6877  Builder.CreateStore(Ops[0], Tmp);
6878  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6880  }
6881  case X86::BI__builtin_ia32_stmxcsr: {
6882  Address Tmp = CreateMemTemp(E->getType());
6883  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6885  return Builder.CreateLoad(Tmp, "stmxcsr");
6886  }
6887  case X86::BI__builtin_ia32_xsave:
6888  case X86::BI__builtin_ia32_xsave64:
6889  case X86::BI__builtin_ia32_xrstor:
6890  case X86::BI__builtin_ia32_xrstor64:
6891  case X86::BI__builtin_ia32_xsaveopt:
6892  case X86::BI__builtin_ia32_xsaveopt64:
6893  case X86::BI__builtin_ia32_xrstors:
6894  case X86::BI__builtin_ia32_xrstors64:
6895  case X86::BI__builtin_ia32_xsavec:
6896  case X86::BI__builtin_ia32_xsavec64:
6897  case X86::BI__builtin_ia32_xsaves:
6898  case X86::BI__builtin_ia32_xsaves64: {
6899  Intrinsic::ID ID;
6900 #define INTRINSIC_X86_XSAVE_ID(NAME) \
6901  case X86::BI__builtin_ia32_##NAME: \
6902  ID = Intrinsic::x86_##NAME; \
6903  break
6904  switch (BuiltinID) {
6905  default: llvm_unreachable("Unsupported intrinsic!");
6906  INTRINSIC_X86_XSAVE_ID(xsave);
6907  INTRINSIC_X86_XSAVE_ID(xsave64);
6908  INTRINSIC_X86_XSAVE_ID(xrstor);
6909  INTRINSIC_X86_XSAVE_ID(xrstor64);
6910  INTRINSIC_X86_XSAVE_ID(xsaveopt);
6911  INTRINSIC_X86_XSAVE_ID(xsaveopt64);
6912  INTRINSIC_X86_XSAVE_ID(xrstors);
6913  INTRINSIC_X86_XSAVE_ID(xrstors64);
6914  INTRINSIC_X86_XSAVE_ID(xsavec);
6915  INTRINSIC_X86_XSAVE_ID(xsavec64);
6916  INTRINSIC_X86_XSAVE_ID(xsaves);
6917  INTRINSIC_X86_XSAVE_ID(xsaves64);
6918  }
6919 #undef INTRINSIC_X86_XSAVE_ID
6920  Value *Mhi = Builder.CreateTrunc(
6921  Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
6922  Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
6923  Ops[1] = Mhi;
6924  Ops.push_back(Mlo);
6925  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
6926  }
6927  case X86::BI__builtin_ia32_storedqudi128_mask:
6928  case X86::BI__builtin_ia32_storedqusi128_mask:
6929  case X86::BI__builtin_ia32_storedquhi128_mask:
6930  case X86::BI__builtin_ia32_storedquqi128_mask:
6931  case X86::BI__builtin_ia32_storeupd128_mask:
6932  case X86::BI__builtin_ia32_storeups128_mask:
6933  case X86::BI__builtin_ia32_storedqudi256_mask:
6934  case X86::BI__builtin_ia32_storedqusi256_mask:
6935  case X86::BI__builtin_ia32_storedquhi256_mask:
6936  case X86::BI__builtin_ia32_storedquqi256_mask:
6937  case X86::BI__builtin_ia32_storeupd256_mask:
6938  case X86::BI__builtin_ia32_storeups256_mask:
6939  case X86::BI__builtin_ia32_storedqudi512_mask:
6940  case X86::BI__builtin_ia32_storedqusi512_mask:
6941  case X86::BI__builtin_ia32_storedquhi512_mask:
6942  case X86::BI__builtin_ia32_storedquqi512_mask:
6943  case X86::BI__builtin_ia32_storeupd512_mask:
6944  case X86::BI__builtin_ia32_storeups512_mask:
6945  return EmitX86MaskedStore(*this, Ops, 1);
6946 
6947  case X86::BI__builtin_ia32_movdqa32store128_mask:
6948  case X86::BI__builtin_ia32_movdqa64store128_mask:
6949  case X86::BI__builtin_ia32_storeaps128_mask:
6950  case X86::BI__builtin_ia32_storeapd128_mask:
6951  case X86::BI__builtin_ia32_movdqa32store256_mask:
6952  case X86::BI__builtin_ia32_movdqa64store256_mask:
6953  case X86::BI__builtin_ia32_storeaps256_mask:
6954  case X86::BI__builtin_ia32_storeapd256_mask:
6955  case X86::BI__builtin_ia32_movdqa32store512_mask:
6956  case X86::BI__builtin_ia32_movdqa64store512_mask:
6957  case X86::BI__builtin_ia32_storeaps512_mask:
6958  case X86::BI__builtin_ia32_storeapd512_mask: {
6959  unsigned Align =
6960  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
6961  return EmitX86MaskedStore(*this, Ops, Align);
6962  }
6963  case X86::BI__builtin_ia32_loadups128_mask:
6964  case X86::BI__builtin_ia32_loadups256_mask:
6965  case X86::BI__builtin_ia32_loadups512_mask:
6966  case X86::BI__builtin_ia32_loadupd128_mask:
6967  case X86::BI__builtin_ia32_loadupd256_mask:
6968  case X86::BI__builtin_ia32_loadupd512_mask:
6969  case X86::BI__builtin_ia32_loaddquqi128_mask:
6970  case X86::BI__builtin_ia32_loaddquqi256_mask:
6971  case X86::BI__builtin_ia32_loaddquqi512_mask:
6972  case X86::BI__builtin_ia32_loaddquhi128_mask:
6973  case X86::BI__builtin_ia32_loaddquhi256_mask:
6974  case X86::BI__builtin_ia32_loaddquhi512_mask:
6975  case X86::BI__builtin_ia32_loaddqusi128_mask:
6976  case X86::BI__builtin_ia32_loaddqusi256_mask:
6977  case X86::BI__builtin_ia32_loaddqusi512_mask:
6978  case X86::BI__builtin_ia32_loaddqudi128_mask:
6979  case X86::BI__builtin_ia32_loaddqudi256_mask:
6980  case X86::BI__builtin_ia32_loaddqudi512_mask:
6981  return EmitX86MaskedLoad(*this, Ops, 1);
6982 
6983  case X86::BI__builtin_ia32_loadaps128_mask:
6984  case X86::BI__builtin_ia32_loadaps256_mask:
6985  case X86::BI__builtin_ia32_loadaps512_mask:
6986  case X86::BI__builtin_ia32_loadapd128_mask:
6987  case X86::BI__builtin_ia32_loadapd256_mask:
6988  case X86::BI__builtin_ia32_loadapd512_mask:
6989  case X86::BI__builtin_ia32_movdqa32load128_mask:
6990  case X86::BI__builtin_ia32_movdqa32load256_mask:
6991  case X86::BI__builtin_ia32_movdqa32load512_mask:
6992  case X86::BI__builtin_ia32_movdqa64load128_mask:
6993  case X86::BI__builtin_ia32_movdqa64load256_mask:
6994  case X86::BI__builtin_ia32_movdqa64load512_mask: {
6995  unsigned Align =
6996  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
6997  return EmitX86MaskedLoad(*this, Ops, Align);
6998  }
6999  case X86::BI__builtin_ia32_storehps:
7000  case X86::BI__builtin_ia32_storelps: {
7001  llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7002  llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7003 
7004  // cast val v2i64
7005  Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7006 
7007  // extract (0, 1)
7008  unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7009  llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7010  Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7011 
7012  // cast pointer to i64 & store
7013  Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7014  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7015  }
7016  case X86::BI__builtin_ia32_palignr128:
7017  case X86::BI__builtin_ia32_palignr256:
7018  case X86::BI__builtin_ia32_palignr128_mask:
7019  case X86::BI__builtin_ia32_palignr256_mask:
7020  case X86::BI__builtin_ia32_palignr512_mask: {
7021  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7022 
7023  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7024  assert(NumElts % 16 == 0);
7025 
7026  // If palignr is shifting the pair of vectors more than the size of two
7027  // lanes, emit zero.
7028  if (ShiftVal >= 32)
7029  return llvm::Constant::getNullValue(ConvertType(E->getType()));
7030 
7031  // If palignr is shifting the pair of input vectors more than one lane,
7032  // but less than two lanes, convert to shifting in zeroes.
7033  if (ShiftVal > 16) {
7034  ShiftVal -= 16;
7035  Ops[1] = Ops[0];
7036  Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7037  }
7038 
7039  uint32_t Indices[64];
7040  // 256-bit palignr operates on 128-bit lanes so we need to handle that
7041  for (unsigned l = 0; l != NumElts; l += 16) {
7042  for (unsigned i = 0; i != 16; ++i) {
7043  unsigned Idx = ShiftVal + i;
7044  if (Idx >= 16)
7045  Idx += NumElts - 16; // End of lane, switch operand.
7046  Indices[l + i] = Idx + l;
7047  }
7048  }
7049 
7050  Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7051  makeArrayRef(Indices, NumElts),
7052  "palignr");
7053 
7054  // If this isn't a masked builtin, just return the align operation.
7055  if (Ops.size() == 3)
7056  return Align;
7057 
7058  return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7059  }
7060 
7061  case X86::BI__builtin_ia32_movnti:
7062  case X86::BI__builtin_ia32_movnti64: {
7063  llvm::MDNode *Node = llvm::MDNode::get(
7064  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7065 
7066  // Convert the type of the pointer to a pointer to the stored type.
7067  Value *BC = Builder.CreateBitCast(Ops[0],
7068  llvm::PointerType::getUnqual(Ops[1]->getType()),
7069  "cast");
7070  StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
7071  SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7072 
7073  // No alignment for scalar intrinsic store.
7074  SI->setAlignment(1);
7075  return SI;
7076  }
7077  case X86::BI__builtin_ia32_movntsd:
7078  case X86::BI__builtin_ia32_movntss: {
7079  llvm::MDNode *Node = llvm::MDNode::get(
7080  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7081 
7082  // Extract the 0'th element of the source vector.
7083  Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
7084 
7085  // Convert the type of the pointer to a pointer to the stored type.
7086  Value *BC = Builder.CreateBitCast(Ops[0],
7087  llvm::PointerType::getUnqual(Scl->getType()),
7088  "cast");
7089 
7090  // Unaligned nontemporal store of the scalar value.
7091  StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
7092  SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7093  SI->setAlignment(1);
7094  return SI;
7095  }
7096 
7097  case X86::BI__builtin_ia32_selectb_128:
7098  case X86::BI__builtin_ia32_selectb_256:
7099  case X86::BI__builtin_ia32_selectb_512:
7100  case X86::BI__builtin_ia32_selectw_128:
7101  case X86::BI__builtin_ia32_selectw_256:
7102  case X86::BI__builtin_ia32_selectw_512:
7103  case X86::BI__builtin_ia32_selectd_128:
7104  case X86::BI__builtin_ia32_selectd_256:
7105  case X86::BI__builtin_ia32_selectd_512:
7106  case X86::BI__builtin_ia32_selectq_128:
7107  case X86::BI__builtin_ia32_selectq_256:
7108  case X86::BI__builtin_ia32_selectq_512:
7109  case X86::BI__builtin_ia32_selectps_128:
7110  case X86::BI__builtin_ia32_selectps_256:
7111  case X86::BI__builtin_ia32_selectps_512:
7112  case X86::BI__builtin_ia32_selectpd_128:
7113  case X86::BI__builtin_ia32_selectpd_256:
7114  case X86::BI__builtin_ia32_selectpd_512:
7115  return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7116  case X86::BI__builtin_ia32_pcmpeqb128_mask:
7117  case X86::BI__builtin_ia32_pcmpeqb256_mask:
7118  case X86::BI__builtin_ia32_pcmpeqb512_mask:
7119  case X86::BI__builtin_ia32_pcmpeqw128_mask:
7120  case X86::BI__builtin_ia32_pcmpeqw256_mask:
7121  case X86::BI__builtin_ia32_pcmpeqw512_mask:
7122  case X86::BI__builtin_ia32_pcmpeqd128_mask:
7123  case X86::BI__builtin_ia32_pcmpeqd256_mask:
7124  case X86::BI__builtin_ia32_pcmpeqd512_mask:
7125  case X86::BI__builtin_ia32_pcmpeqq128_mask:
7126  case X86::BI__builtin_ia32_pcmpeqq256_mask:
7127  case X86::BI__builtin_ia32_pcmpeqq512_mask:
7128  return EmitX86MaskedCompare(*this, 0, false, Ops);
7129  case X86::BI__builtin_ia32_pcmpgtb128_mask:
7130  case X86::BI__builtin_ia32_pcmpgtb256_mask:
7131  case X86::BI__builtin_ia32_pcmpgtb512_mask:
7132  case X86::BI__builtin_ia32_pcmpgtw128_mask:
7133  case X86::BI__builtin_ia32_pcmpgtw256_mask:
7134  case X86::BI__builtin_ia32_pcmpgtw512_mask:
7135  case X86::BI__builtin_ia32_pcmpgtd128_mask:
7136  case X86::BI__builtin_ia32_pcmpgtd256_mask:
7137  case X86::BI__builtin_ia32_pcmpgtd512_mask:
7138  case X86::BI__builtin_ia32_pcmpgtq128_mask:
7139  case X86::BI__builtin_ia32_pcmpgtq256_mask:
7140  case X86::BI__builtin_ia32_pcmpgtq512_mask:
7141  return EmitX86MaskedCompare(*this, 6, true, Ops);
7142  case X86::BI__builtin_ia32_cmpb128_mask:
7143  case X86::BI__builtin_ia32_cmpb256_mask:
7144  case X86::BI__builtin_ia32_cmpb512_mask:
7145  case X86::BI__builtin_ia32_cmpw128_mask:
7146  case X86::BI__builtin_ia32_cmpw256_mask:
7147  case X86::BI__builtin_ia32_cmpw512_mask:
7148  case X86::BI__builtin_ia32_cmpd128_mask:
7149  case X86::BI__builtin_ia32_cmpd256_mask:
7150  case X86::BI__builtin_ia32_cmpd512_mask:
7151  case X86::BI__builtin_ia32_cmpq128_mask:
7152  case X86::BI__builtin_ia32_cmpq256_mask:
7153  case X86::BI__builtin_ia32_cmpq512_mask: {
7154  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7155  return EmitX86MaskedCompare(*this, CC, true, Ops);
7156  }
7157  case X86::BI__builtin_ia32_ucmpb128_mask:
7158  case X86::BI__builtin_ia32_ucmpb256_mask:
7159  case X86::BI__builtin_ia32_ucmpb512_mask:
7160  case X86::BI__builtin_ia32_ucmpw128_mask:
7161  case X86::BI__builtin_ia32_ucmpw256_mask:
7162  case X86::BI__builtin_ia32_ucmpw512_mask:
7163  case X86::BI__builtin_ia32_ucmpd128_mask:
7164  case X86::BI__builtin_ia32_ucmpd256_mask:
7165  case X86::BI__builtin_ia32_ucmpd512_mask:
7166  case X86::BI__builtin_ia32_ucmpq128_mask:
7167  case X86::BI__builtin_ia32_ucmpq256_mask:
7168  case X86::BI__builtin_ia32_ucmpq512_mask: {
7169  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7170  return EmitX86MaskedCompare(*this, CC, false, Ops);
7171  }
7172 
7173  case X86::BI__builtin_ia32_vplzcntd_128_mask:
7174  case X86::BI__builtin_ia32_vplzcntd_256_mask:
7175  case X86::BI__builtin_ia32_vplzcntd_512_mask:
7176  case X86::BI__builtin_ia32_vplzcntq_128_mask:
7177  case X86::BI__builtin_ia32_vplzcntq_256_mask:
7178  case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7179  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7180  return EmitX86Select(*this, Ops[2],
7181  Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7182  Ops[1]);
7183  }
7184 
7185  // TODO: Handle 64/512-bit vector widths of min/max.
7186  case X86::BI__builtin_ia32_pmaxsb128:
7187  case X86::BI__builtin_ia32_pmaxsw128:
7188  case X86::BI__builtin_ia32_pmaxsd128:
7189  case X86::BI__builtin_ia32_pmaxsb256:
7190  case X86::BI__builtin_ia32_pmaxsw256:
7191  case X86::BI__builtin_ia32_pmaxsd256: {
7192  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
7193  return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7194  }
7195  case X86::BI__builtin_ia32_pmaxub128:
7196  case X86::BI__builtin_ia32_pmaxuw128:
7197  case X86::BI__builtin_ia32_pmaxud128:
7198  case X86::BI__builtin_ia32_pmaxub256:
7199  case X86::BI__builtin_ia32_pmaxuw256:
7200  case X86::BI__builtin_ia32_pmaxud256: {
7201  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
7202  return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7203  }
7204  case X86::BI__builtin_ia32_pminsb128:
7205  case X86::BI__builtin_ia32_pminsw128:
7206  case X86::BI__builtin_ia32_pminsd128:
7207  case X86::BI__builtin_ia32_pminsb256:
7208  case X86::BI__builtin_ia32_pminsw256:
7209  case X86::BI__builtin_ia32_pminsd256: {
7210  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]);
7211  return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7212  }
7213  case X86::BI__builtin_ia32_pminub128:
7214  case X86::BI__builtin_ia32_pminuw128:
7215  case X86::BI__builtin_ia32_pminud128:
7216  case X86::BI__builtin_ia32_pminub256:
7217  case X86::BI__builtin_ia32_pminuw256:
7218  case X86::BI__builtin_ia32_pminud256: {
7219  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]);
7220  return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7221  }
7222 
7223  // 3DNow!
7224  case X86::BI__builtin_ia32_pswapdsf:
7225  case X86::BI__builtin_ia32_pswapdsi: {
7226  llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7227  Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7228  llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7229  return Builder.CreateCall(F, Ops, "pswapd");
7230  }
7231  case X86::BI__builtin_ia32_rdrand16_step:
7232  case X86::BI__builtin_ia32_rdrand32_step:
7233  case X86::BI__builtin_ia32_rdrand64_step:
7234  case X86::BI__builtin_ia32_rdseed16_step:
7235  case X86::BI__builtin_ia32_rdseed32_step:
7236  case X86::BI__builtin_ia32_rdseed64_step: {
7237  Intrinsic::ID ID;
7238  switch (BuiltinID) {
7239  default: llvm_unreachable("Unsupported intrinsic!");
7240  case X86::BI__builtin_ia32_rdrand16_step:
7241  ID = Intrinsic::x86_rdrand_16;
7242  break;
7243  case X86::BI__builtin_ia32_rdrand32_step:
7244  ID = Intrinsic::x86_rdrand_32;
7245  break;
7246  case X86::BI__builtin_ia32_rdrand64_step:
7247  ID = Intrinsic::x86_rdrand_64;
7248  break;
7249  case X86::BI__builtin_ia32_rdseed16_step:
7250  ID = Intrinsic::x86_rdseed_16;
7251  break;
7252  case X86::BI__builtin_ia32_rdseed32_step:
7253  ID = Intrinsic::x86_rdseed_32;
7254  break;
7255  case X86::BI__builtin_ia32_rdseed64_step:
7256  ID = Intrinsic::x86_rdseed_64;
7257  break;
7258  }
7259 
7260  Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7261  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7262  Ops[0]);
7263  return Builder.CreateExtractValue(Call, 1);
7264  }
7265 
7266  // SSE packed comparison intrinsics
7267  case X86::BI__builtin_ia32_cmpeqps:
7268  case X86::BI__builtin_ia32_cmpeqpd:
7269  return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7270  case X86::BI__builtin_ia32_cmpltps:
7271  case X86::BI__builtin_ia32_cmpltpd:
7272  return getVectorFCmpIR(CmpInst::FCMP_OLT);
7273  case X86::BI__builtin_ia32_cmpleps:
7274  case X86::BI__builtin_ia32_cmplepd:
7275  return getVectorFCmpIR(CmpInst::FCMP_OLE);
7276  case X86::BI__builtin_ia32_cmpunordps:
7277  case X86::BI__builtin_ia32_cmpunordpd:
7278  return getVectorFCmpIR(CmpInst::FCMP_UNO);
7279  case X86::BI__builtin_ia32_cmpneqps:
7280  case X86::BI__builtin_ia32_cmpneqpd:
7281  return getVectorFCmpIR(CmpInst::FCMP_UNE);
7282  case X86::BI__builtin_ia32_cmpnltps:
7283  case X86::BI__builtin_ia32_cmpnltpd:
7284  return getVectorFCmpIR(CmpInst::FCMP_UGE);
7285  case X86::BI__builtin_ia32_cmpnleps:
7286  case X86::BI__builtin_ia32_cmpnlepd:
7287  return getVectorFCmpIR(CmpInst::FCMP_UGT);
7288  case X86::BI__builtin_ia32_cmpordps:
7289  case X86::BI__builtin_ia32_cmpordpd:
7290  return getVectorFCmpIR(CmpInst::FCMP_ORD);
7291  case X86::BI__builtin_ia32_cmpps:
7292  case X86::BI__builtin_ia32_cmpps256:
7293  case X86::BI__builtin_ia32_cmppd:
7294  case X86::BI__builtin_ia32_cmppd256: {
7295  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7296  // If this one of the SSE immediates, we can use native IR.
7297  if (CC < 8) {
7298  FCmpInst::Predicate Pred;
7299  switch (CC) {
7300  case 0: Pred = FCmpInst::FCMP_OEQ; break;
7301  case 1: Pred = FCmpInst::FCMP_OLT; break;
7302  case 2: Pred = FCmpInst::FCMP_OLE; break;
7303  case 3: Pred = FCmpInst::FCMP_UNO; break;
7304  case 4: Pred = FCmpInst::FCMP_UNE; break;
7305  case 5: Pred = FCmpInst::FCMP_UGE; break;
7306  case 6: Pred = FCmpInst::FCMP_UGT; break;
7307  case 7: Pred = FCmpInst::FCMP_ORD; break;
7308  }
7309  return getVectorFCmpIR(Pred);
7310  }
7311 
7312  // We can't handle 8-31 immediates with native IR, use the intrinsic.
7313  Intrinsic::ID ID;
7314  switch (BuiltinID) {
7315  default: llvm_unreachable("Unsupported intrinsic!");
7316  case X86::BI__builtin_ia32_cmpps:
7317  ID = Intrinsic::x86_sse_cmp_ps;
7318  break;
7319  case X86::BI__builtin_ia32_cmpps256:
7320  ID = Intrinsic::x86_avx_cmp_ps_256;
7321  break;
7322  case X86::BI__builtin_ia32_cmppd:
7323  ID = Intrinsic::x86_sse2_cmp_pd;
7324  break;
7325  case X86::BI__builtin_ia32_cmppd256:
7326  ID = Intrinsic::x86_avx_cmp_pd_256;
7327  break;
7328  }
7329 
7330  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7331  }
7332 
7333  // SSE scalar comparison intrinsics
7334  case X86::BI__builtin_ia32_cmpeqss:
7335  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7336  case X86::BI__builtin_ia32_cmpltss:
7337  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7338  case X86::BI__builtin_ia32_cmpless:
7339  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7340  case X86::BI__builtin_ia32_cmpunordss:
7341  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7342  case X86::BI__builtin_ia32_cmpneqss:
7343  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7344  case X86::BI__builtin_ia32_cmpnltss:
7345  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7346  case X86::BI__builtin_ia32_cmpnless:
7347  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7348  case X86::BI__builtin_ia32_cmpordss:
7349  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7350  case X86::BI__builtin_ia32_cmpeqsd:
7351  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7352  case X86::BI__builtin_ia32_cmpltsd:
7353  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7354  case X86::BI__builtin_ia32_cmplesd:
7355  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7356  case X86::BI__builtin_ia32_cmpunordsd:
7357  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7358  case X86::BI__builtin_ia32_cmpneqsd:
7359  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7360  case X86::BI__builtin_ia32_cmpnltsd:
7361  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7362  case X86::BI__builtin_ia32_cmpnlesd:
7363  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7364  case X86::BI__builtin_ia32_cmpordsd:
7365  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7366  }
7367 }
7368 
7369 
7370 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
7371  const CallExpr *E) {
7373 
7374  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
7375  Ops.push_back(EmitScalarExpr(E->getArg(i)));
7376 
7377  Intrinsic::ID ID = Intrinsic::not_intrinsic;
7378 
7379  switch (BuiltinID) {
7380  default: return nullptr;
7381 
7382  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
7383  // call __builtin_readcyclecounter.
7384  case PPC::BI__builtin_ppc_get_timebase:
7385  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
7386 
7387  // vec_ld, vec_lvsl, vec_lvsr
7388  case PPC::BI__builtin_altivec_lvx:
7389  case PPC::BI__builtin_altivec_lvxl:
7390  case PPC::BI__builtin_altivec_lvebx:
7391  case PPC::BI__builtin_altivec_lvehx:
7392  case PPC::BI__builtin_altivec_lvewx:
7393  case PPC::BI__builtin_altivec_lvsl:
7394  case PPC::BI__builtin_altivec_lvsr:
7395  case PPC::BI__builtin_vsx_lxvd2x:
7396  case PPC::BI__builtin_vsx_lxvw4x:
7397  {
7398  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
7399 
7400  Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
7401  Ops.pop_back();
7402 
7403  switch (BuiltinID) {
7404  default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
7405  case PPC::BI__builtin_altivec_lvx:
7406  ID = Intrinsic::ppc_altivec_lvx;
7407  break;
7408  case PPC::BI__builtin_altivec_lvxl:
7409  ID = Intrinsic::ppc_altivec_lvxl;
7410  break;
7411  case PPC::BI__builtin_altivec_lvebx:
7412  ID = Intrinsic::ppc_altivec_lvebx;
7413  break;
7414  case PPC::BI__builtin_altivec_lvehx:
7415  ID = Intrinsic::ppc_altivec_lvehx;
7416  break;
7417  case PPC::BI__builtin_altivec_lvewx:
7418  ID = Intrinsic::ppc_altivec_lvewx;
7419  break;
7420  case PPC::BI__builtin_altivec_lvsl:
7421  ID = Intrinsic::ppc_altivec_lvsl;
7422  break;
7423  case PPC::BI__builtin_altivec_lvsr:
7424  ID = Intrinsic::ppc_altivec_lvsr;
7425  break;
7426  case PPC::BI__builtin_vsx_lxvd2x:
7427  ID = Intrinsic::ppc_vsx_lxvd2x;
7428  break;
7429  case PPC::BI__builtin_vsx_lxvw4x:
7430  ID = Intrinsic::ppc_vsx_lxvw4x;
7431  break;
7432  }
7433  llvm::Function *F = CGM.getIntrinsic(ID);
7434  return Builder.CreateCall(F, Ops, "");
7435  }
7436 
7437  // vec_st
7438  case PPC::BI__builtin_altivec_stvx:
7439  case PPC::BI__builtin_altivec_stvxl:
7440  case PPC::BI__builtin_altivec_stvebx:
7441  case PPC::BI__builtin_altivec_stvehx:
7442  case PPC::BI__builtin_altivec_stvewx:
7443  case PPC::BI__builtin_vsx_stxvd2x:
7444  case PPC::BI__builtin_vsx_stxvw4x:
7445  {
7446  Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
7447  Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
7448  Ops.pop_back();
7449 
7450  switch (BuiltinID) {
7451  default: llvm_unreachable("Unsupported st intrinsic!");
7452  case PPC::BI__builtin_altivec_stvx:
7453  ID = Intrinsic::ppc_altivec_stvx;
7454  break;
7455  case PPC::BI__builtin_altivec_stvxl:
7456  ID = Intrinsic::ppc_altivec_stvxl;
7457  break;
7458  case PPC::BI__builtin_altivec_stvebx:
7459  ID = Intrinsic::ppc_altivec_stvebx;
7460  break;
7461  case PPC::BI__builtin_altivec_stvehx:
7462  ID = Intrinsic::ppc_altivec_stvehx;
7463  break;
7464  case PPC::BI__builtin_altivec_stvewx:
7465  ID = Intrinsic::ppc_altivec_stvewx;
7466  break;
7467  case PPC::BI__builtin_vsx_stxvd2x:
7468  ID = Intrinsic::ppc_vsx_stxvd2x;
7469  break;
7470  case PPC::BI__builtin_vsx_stxvw4x:
7471  ID = Intrinsic::ppc_vsx_stxvw4x;
7472  break;
7473  }
7474  llvm::Function *F = CGM.getIntrinsic(ID);
7475  return Builder.CreateCall(F, Ops, "");
7476  }
7477  // Square root
7478  case PPC::BI__builtin_vsx_xvsqrtsp:
7479  case PPC::BI__builtin_vsx_xvsqrtdp: {
7480  llvm::Type *ResultType = ConvertType(E->getType());
7481  Value *X = EmitScalarExpr(E->getArg(0));
7482  ID = Intrinsic::sqrt;
7483  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7484  return Builder.CreateCall(F, X);
7485  }
7486  // Count leading zeros
7487  case PPC::BI__builtin_altivec_vclzb:
7488  case PPC::BI__builtin_altivec_vclzh:
7489  case PPC::BI__builtin_altivec_vclzw:
7490  case PPC::BI__builtin_altivec_vclzd: {
7491  llvm::Type *ResultType = ConvertType(E->getType());
7492  Value *X = EmitScalarExpr(E->getArg(0));
7493  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7494  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7495  return Builder.CreateCall(F, {X, Undef});
7496  }
7497  // Copy sign
7498  case PPC::BI__builtin_vsx_xvcpsgnsp:
7499  case PPC::BI__builtin_vsx_xvcpsgndp: {
7500  llvm::Type *ResultType = ConvertType(E->getType());
7501  Value *X = EmitScalarExpr(E->getArg(0));
7502  Value *Y = EmitScalarExpr(E->getArg(1));
7503  ID = Intrinsic::copysign;
7504  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7505  return Builder.CreateCall(F, {X, Y});
7506  }
7507  // Rounding/truncation
7508  case PPC::BI__builtin_vsx_xvrspip:
7509  case PPC::BI__builtin_vsx_xvrdpip:
7510  case PPC::BI__builtin_vsx_xvrdpim:
7511  case PPC::BI__builtin_vsx_xvrspim:
7512  case PPC::BI__builtin_vsx_xvrdpi:
7513  case PPC::BI__builtin_vsx_xvrspi:
7514  case PPC::BI__builtin_vsx_xvrdpic:
7515  case PPC::BI__builtin_vsx_xvrspic:
7516  case PPC::BI__builtin_vsx_xvrdpiz:
7517  case PPC::BI__builtin_vsx_xvrspiz: {
7518  llvm::Type *ResultType = ConvertType(E->getType());
7519  Value *X = EmitScalarExpr(E->getArg(0));
7520  if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
7521  BuiltinID == PPC::BI__builtin_vsx_xvrspim)
7522  ID = Intrinsic::floor;
7523  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
7524  BuiltinID == PPC::BI__builtin_vsx_xvrspi)
7525  ID = Intrinsic::round;
7526  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
7527  BuiltinID == PPC::BI__builtin_vsx_xvrspic)
7528  ID = Intrinsic::nearbyint;
7529  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
7530  BuiltinID == PPC::BI__builtin_vsx_xvrspip)
7531  ID = Intrinsic::ceil;
7532  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
7533  BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
7534  ID = Intrinsic::trunc;
7535  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7536  return Builder.CreateCall(F, X);
7537  }
7538 
7539  // Absolute value
7540  case PPC::BI__builtin_vsx_xvabsdp:
7541  case PPC::BI__builtin_vsx_xvabssp: {
7542  llvm::Type *ResultType = ConvertType(E->getType());
7543  Value *X = EmitScalarExpr(E->getArg(0));
7544  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7545  return Builder.CreateCall(F, X);
7546  }
7547 
7548  // FMA variations
7549  case PPC::BI__builtin_vsx_xvmaddadp:
7550  case PPC::BI__builtin_vsx_xvmaddasp:
7551  case PPC::BI__builtin_vsx_xvnmaddadp:
7552  case PPC::BI__builtin_vsx_xvnmaddasp:
7553  case PPC::BI__builtin_vsx_xvmsubadp:
7554  case PPC::BI__builtin_vsx_xvmsubasp:
7555  case PPC::BI__builtin_vsx_xvnmsubadp:
7556  case PPC::BI__builtin_vsx_xvnmsubasp: {
7557  llvm::Type *ResultType = ConvertType(E->getType());
7558  Value *X = EmitScalarExpr(E->getArg(0));
7559  Value *Y = EmitScalarExpr(E->getArg(1));
7560  Value *Z = EmitScalarExpr(E->getArg(2));
7561  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7562  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7563  switch (BuiltinID) {
7564  case PPC::BI__builtin_vsx_xvmaddadp:
7565  case PPC::BI__builtin_vsx_xvmaddasp:
7566  return Builder.CreateCall(F, {X, Y, Z});
7567  case PPC::BI__builtin_vsx_xvnmaddadp:
7568  case PPC::BI__builtin_vsx_xvnmaddasp:
7569  return Builder.CreateFSub(Zero,
7570  Builder.CreateCall(F, {X, Y, Z}), "sub");
7571  case PPC::BI__builtin_vsx_xvmsubadp:
7572  case PPC::BI__builtin_vsx_xvmsubasp:
7573  return Builder.CreateCall(F,
7574  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7575  case PPC::BI__builtin_vsx_xvnmsubadp:
7576  case PPC::BI__builtin_vsx_xvnmsubasp:
7577  Value *FsubRes =
7578  Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7579  return Builder.CreateFSub(Zero, FsubRes, "sub");
7580  }
7581  llvm_unreachable("Unknown FMA operation");
7582  return nullptr; // Suppress no-return warning
7583  }
7584  }
7585 }
7586 
7588  const CallExpr *E) {
7589  switch (BuiltinID) {
7590  case AMDGPU::BI__builtin_amdgcn_div_scale:
7591  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
7592  // Translate from the intrinsics's struct return to the builtin's out
7593  // argument.
7594 
7595  Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
7596 
7597  llvm::Value *X = EmitScalarExpr(E->getArg(0));
7598  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
7599  llvm::Value *Z = EmitScalarExpr(E->getArg(2));
7600 
7601  llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
7602  X->getType());
7603 
7604  llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
7605 
7606  llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
7607  llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
7608 
7609  llvm::Type *RealFlagType
7610  = FlagOutPtr.getPointer()->getType()->getPointerElementType();
7611 
7612  llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
7613  Builder.CreateStore(FlagExt, FlagOutPtr);
7614  return Result;
7615  }
7616  case AMDGPU::BI__builtin_amdgcn_div_fmas:
7617  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
7618  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
7619  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
7620  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
7621  llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
7622 
7623  llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
7624  Src0->getType());
7625  llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
7626  return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
7627  }
7628  case AMDGPU::BI__builtin_amdgcn_div_fixup:
7629  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
7630  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
7631  case AMDGPU::BI__builtin_amdgcn_trig_preop:
7632  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
7633  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
7634  case AMDGPU::BI__builtin_amdgcn_rcp:
7635  case AMDGPU::BI__builtin_amdgcn_rcpf:
7636  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
7637  case AMDGPU::BI__builtin_amdgcn_rsq:
7638  case AMDGPU::BI__builtin_amdgcn_rsqf:
7639  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7640  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
7641  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
7642  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
7643  case AMDGPU::BI__builtin_amdgcn_sinf:
7644  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
7645  case AMDGPU::BI__builtin_amdgcn_cosf:
7646  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
7647  case AMDGPU::BI__builtin_amdgcn_log_clampf:
7648  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
7649  case AMDGPU::BI__builtin_amdgcn_ldexp:
7650  case AMDGPU::BI__builtin_amdgcn_ldexpf:
7651  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7652  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
7653  case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
7654  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
7655  }
7656  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
7657  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
7658  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
7659  }
7660  case AMDGPU::BI__builtin_amdgcn_fract:
7661  case AMDGPU::BI__builtin_amdgcn_fractf:
7662  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
7663  case AMDGPU::BI__builtin_amdgcn_lerp:
7664  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
7665  case AMDGPU::BI__builtin_amdgcn_class:
7666  case AMDGPU::BI__builtin_amdgcn_classf:
7667  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
7668 
7669  case AMDGPU::BI__builtin_amdgcn_read_exec: {
7670  CallInst *CI = cast<CallInst>(
7671  EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
7672  CI->setConvergent();
7673  return CI;
7674  }
7675 
7676  // amdgcn workitem
7677  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
7678  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
7679  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
7680  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
7681  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
7682  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
7683 
7684  // r600 intrinsics
7685  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
7686  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
7687  return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
7688  case AMDGPU::BI__builtin_r600_read_tidig_x:
7689  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
7690  case AMDGPU::BI__builtin_r600_read_tidig_y:
7691  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
7692  case AMDGPU::BI__builtin_r600_read_tidig_z:
7693  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
7694  default:
7695  return nullptr;
7696  }
7697 }
7698 
7699 /// Handle a SystemZ function in which the final argument is a pointer
7700 /// to an int that receives the post-instruction CC value. At the LLVM level
7701 /// this is represented as a function that returns a {result, cc} pair.
7703  unsigned IntrinsicID,
7704  const CallExpr *E) {
7705  unsigned NumArgs = E->getNumArgs() - 1;
7706  SmallVector<Value *, 8> Args(NumArgs);
7707  for (unsigned I = 0; I < NumArgs; ++I)
7708  Args[I] = CGF.EmitScalarExpr(E->getArg(I));
7709  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
7710  Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
7711  Value *Call = CGF.Builder.CreateCall(F, Args);
7712  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
7713  CGF.Builder.CreateStore(CC, CCPtr);
7714  return CGF.Builder.CreateExtractValue(Call, 0);
7715 }
7716 
7718  const CallExpr *E) {
7719  switch (BuiltinID) {
7720  case SystemZ::BI__builtin_tbegin: {
7721  Value *TDB = EmitScalarExpr(E->getArg(0));
7722  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7723  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
7724  return Builder.CreateCall(F, {TDB, Control});
7725  }
7726  case SystemZ::BI__builtin_tbegin_nofloat: {
7727  Value *TDB = EmitScalarExpr(E->getArg(0));
7728  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7729  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
7730  return Builder.CreateCall(F, {TDB, Control});
7731  }
7732  case SystemZ::BI__builtin_tbeginc: {
7733  Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
7734  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
7735  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
7736  return Builder.CreateCall(F, {TDB, Control});
7737  }
7738  case SystemZ::BI__builtin_tabort: {
7739  Value *Data = EmitScalarExpr(E->getArg(0));
7740  Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
7741  return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
7742  }
7743  case SystemZ::BI__builtin_non_tx_store: {
7744  Value *Address = EmitScalarExpr(E->getArg(0));
7745  Value *Data = EmitScalarExpr(E->getArg(1));
7746  Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
7747  return Builder.CreateCall(F, {Data, Address});
7748  }
7749 
7750  // Vector builtins. Note that most vector builtins are mapped automatically
7751  // to target-specific LLVM intrinsics. The ones handled specially here can
7752  // be represented via standard LLVM IR, which is preferable to enable common
7753  // LLVM optimizations.
7754 
7755  case SystemZ::BI__builtin_s390_vpopctb:
7756  case SystemZ::BI__builtin_s390_vpopcth:
7757  case SystemZ::BI__builtin_s390_vpopctf:
7758  case SystemZ::BI__builtin_s390_vpopctg: {
7759  llvm::Type *ResultType = ConvertType(E->getType());
7760  Value *X = EmitScalarExpr(E->getArg(0));
7761  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7762  return Builder.CreateCall(F, X);
7763  }
7764 
7765  case SystemZ::BI__builtin_s390_vclzb:
7766  case SystemZ::BI__builtin_s390_vclzh:
7767  case SystemZ::BI__builtin_s390_vclzf:
7768  case SystemZ::BI__builtin_s390_vclzg: {
7769  llvm::Type *ResultType = ConvertType(E->getType());
7770  Value *X = EmitScalarExpr(E->getArg(0));
7771  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7772  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7773  return Builder.CreateCall(F, {X, Undef});
7774  }
7775 
7776  case SystemZ::BI__builtin_s390_vctzb:
7777  case SystemZ::BI__builtin_s390_vctzh:
7778  case SystemZ::BI__builtin_s390_vctzf:
7779  case SystemZ::BI__builtin_s390_vctzg: {
7780  llvm::Type *ResultType = ConvertType(E->getType());
7781  Value *X = EmitScalarExpr(E->getArg(0));
7782  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7783  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
7784  return Builder.CreateCall(F, {X, Undef});
7785  }
7786 
7787  case SystemZ::BI__builtin_s390_vfsqdb: {
7788  llvm::Type *ResultType = ConvertType(E->getType());
7789  Value *X = EmitScalarExpr(E->getArg(0));
7790  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
7791  return Builder.CreateCall(F, X);
7792  }
7793  case SystemZ::BI__builtin_s390_vfmadb: {
7794  llvm::Type *ResultType = ConvertType(E->getType());
7795  Value *X = EmitScalarExpr(E->getArg(0));
7796  Value *Y = EmitScalarExpr(E->getArg(1));
7797  Value *Z = EmitScalarExpr(E->getArg(2));
7798  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7799  return Builder.CreateCall(F, {X, Y, Z});
7800  }
7801  case SystemZ::BI__builtin_s390_vfmsdb: {
7802  llvm::Type *ResultType = ConvertType(E->getType());
7803  Value *X = EmitScalarExpr(E->getArg(0));
7804  Value *Y = EmitScalarExpr(E->getArg(1));
7805  Value *Z = EmitScalarExpr(E->getArg(2));
7806  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7807  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7808  return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7809  }
7810  case SystemZ::BI__builtin_s390_vflpdb: {
7811  llvm::Type *ResultType = ConvertType(E->getType());
7812  Value *X = EmitScalarExpr(E->getArg(0));
7813  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7814  return Builder.CreateCall(F, X);
7815  }
7816  case SystemZ::BI__builtin_s390_vflndb: {
7817  llvm::Type *ResultType = ConvertType(E->getType());
7818  Value *X = EmitScalarExpr(E->getArg(0));
7819  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7820  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7821  return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
7822  }
7823  case SystemZ::BI__builtin_s390_vfidb: {
7824  llvm::Type *ResultType = ConvertType(E->getType());
7825  Value *X = EmitScalarExpr(E->getArg(0));
7826  // Constant-fold the M4 and M5 mask arguments.
7827  llvm::APSInt M4, M5;
7828  bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
7829  bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
7830  assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
7831  (void)IsConstM4; (void)IsConstM5;
7832  // Check whether this instance of vfidb can be represented via a LLVM
7833  // standard intrinsic. We only support some combinations of M4 and M5.
7834  Intrinsic::ID ID = Intrinsic::not_intrinsic;
7835  switch (M4.getZExtValue()) {
7836  default: break;
7837  case 0: // IEEE-inexact exception allowed
7838  switch (M5.getZExtValue()) {
7839  default: break;
7840  case 0: ID = Intrinsic::rint; break;
7841  }
7842  break;
7843  case 4: // IEEE-inexact exception suppressed
7844  switch (M5.getZExtValue()) {
7845  default: break;
7846  case 0: ID = Intrinsic::nearbyint; break;
7847  case 1: ID = Intrinsic::round; break;
7848  case 5: ID = Intrinsic::trunc; break;
7849  case 6: ID = Intrinsic::ceil; break;
7850  case 7: ID = Intrinsic::floor; break;
7851  }
7852  break;
7853  }
7854  if (ID != Intrinsic::not_intrinsic) {
7855  Function *F = CGM.getIntrinsic(ID, ResultType);
7856  return Builder.CreateCall(F, X);
7857  }
7858  Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
7859  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
7860  Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
7861  return Builder.CreateCall(F, {X, M4Value, M5Value});
7862  }
7863 
7864  // Vector intrisincs that output the post-instruction CC value.
7865 
7866 #define INTRINSIC_WITH_CC(NAME) \
7867  case SystemZ::BI__builtin_##NAME: \
7868  return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
7869 
7870  INTRINSIC_WITH_CC(s390_vpkshs);
7871  INTRINSIC_WITH_CC(s390_vpksfs);
7872  INTRINSIC_WITH_CC(s390_vpksgs);
7873 
7874  INTRINSIC_WITH_CC(s390_vpklshs);
7875  INTRINSIC_WITH_CC(s390_vpklsfs);
7876  INTRINSIC_WITH_CC(s390_vpklsgs);
7877 
7878  INTRINSIC_WITH_CC(s390_vceqbs);
7879  INTRINSIC_WITH_CC(s390_vceqhs);
7880  INTRINSIC_WITH_CC(s390_vceqfs);
7881  INTRINSIC_WITH_CC(s390_vceqgs);
7882 
7883  INTRINSIC_WITH_CC(s390_vchbs);
7884  INTRINSIC_WITH_CC(s390_vchhs);
7885  INTRINSIC_WITH_CC(s390_vchfs);
7886  INTRINSIC_WITH_CC(s390_vchgs);
7887 
7888  INTRINSIC_WITH_CC(s390_vchlbs);
7889  INTRINSIC_WITH_CC(s390_vchlhs);
7890  INTRINSIC_WITH_CC(s390_vchlfs);
7891  INTRINSIC_WITH_CC(s390_vchlgs);
7892 
7893  INTRINSIC_WITH_CC(s390_vfaebs);
7894  INTRINSIC_WITH_CC(s390_vfaehs);
7895  INTRINSIC_WITH_CC(s390_vfaefs);
7896 
7897  INTRINSIC_WITH_CC(s390_vfaezbs);
7898  INTRINSIC_WITH_CC(s390_vfaezhs);
7899  INTRINSIC_WITH_CC(s390_vfaezfs);
7900 
7901  INTRINSIC_WITH_CC(s390_vfeebs);
7902  INTRINSIC_WITH_CC(s390_vfeehs);
7903  INTRINSIC_WITH_CC(s390_vfeefs);
7904 
7905  INTRINSIC_WITH_CC(s390_vfeezbs);
7906  INTRINSIC_WITH_CC(s390_vfeezhs);
7907  INTRINSIC_WITH_CC(s390_vfeezfs);
7908 
7909  INTRINSIC_WITH_CC(s390_vfenebs);
7910  INTRINSIC_WITH_CC(s390_vfenehs);
7911  INTRINSIC_WITH_CC(s390_vfenefs);
7912 
7913  INTRINSIC_WITH_CC(s390_vfenezbs);
7914  INTRINSIC_WITH_CC(s390_vfenezhs);
7915  INTRINSIC_WITH_CC(s390_vfenezfs);
7916 
7917  INTRINSIC_WITH_CC(s390_vistrbs);
7918  INTRINSIC_WITH_CC(s390_vistrhs);
7919  INTRINSIC_WITH_CC(s390_vistrfs);
7920 
7921  INTRINSIC_WITH_CC(s390_vstrcbs);
7922  INTRINSIC_WITH_CC(s390_vstrchs);
7923  INTRINSIC_WITH_CC(s390_vstrcfs);
7924 
7925  INTRINSIC_WITH_CC(s390_vstrczbs);
7926  INTRINSIC_WITH_CC(s390_vstrczhs);
7927  INTRINSIC_WITH_CC(s390_vstrczfs);
7928 
7929  INTRINSIC_WITH_CC(s390_vfcedbs);
7930  INTRINSIC_WITH_CC(s390_vfchdbs);
7931  INTRINSIC_WITH_CC(s390_vfchedbs);
7932 
7933  INTRINSIC_WITH_CC(s390_vftcidb);
7934 
7935 #undef INTRINSIC_WITH_CC
7936 
7937  default:
7938  return nullptr;
7939  }
7940 }
7941 
7943  const CallExpr *E) {
7944  auto MakeLdg = [&](unsigned IntrinsicID) {
7945  Value *Ptr = EmitScalarExpr(E->getArg(0));
7946  AlignmentSource AlignSource;
7947  clang::CharUnits Align =
7948  getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
7949  return Builder.CreateCall(
7950  CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
7951  Ptr->getType()}),
7952  {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
7953  };
7954 
7955  switch (BuiltinID) {
7956  case NVPTX::BI__nvvm_atom_add_gen_i:
7957  case NVPTX::BI__nvvm_atom_add_gen_l:
7958  case NVPTX::BI__nvvm_atom_add_gen_ll:
7959  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
7960 
7961  case NVPTX::BI__nvvm_atom_sub_gen_i:
7962  case NVPTX::BI__nvvm_atom_sub_gen_l:
7963  case NVPTX::BI__nvvm_atom_sub_gen_ll:
7964  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
7965 
7966  case NVPTX::BI__nvvm_atom_and_gen_i:
7967  case NVPTX::BI__nvvm_atom_and_gen_l:
7968  case NVPTX::BI__nvvm_atom_and_gen_ll:
7970 
7971  case NVPTX::BI__nvvm_atom_or_gen_i:
7972  case NVPTX::BI__nvvm_atom_or_gen_l:
7973  case NVPTX::BI__nvvm_atom_or_gen_ll:
7974  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
7975 
7976  case NVPTX::BI__nvvm_atom_xor_gen_i:
7977  case NVPTX::BI__nvvm_atom_xor_gen_l:
7978  case NVPTX::BI__nvvm_atom_xor_gen_ll:
7979  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
7980 
7981  case NVPTX::BI__nvvm_atom_xchg_gen_i:
7982  case NVPTX::BI__nvvm_atom_xchg_gen_l:
7983  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
7984  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
7985 
7986  case NVPTX::BI__nvvm_atom_max_gen_i:
7987  case NVPTX::BI__nvvm_atom_max_gen_l:
7988  case NVPTX::BI__nvvm_atom_max_gen_ll:
7989  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
7990 
7991  case NVPTX::BI__nvvm_atom_max_gen_ui:
7992  case NVPTX::BI__nvvm_atom_max_gen_ul:
7993  case NVPTX::BI__nvvm_atom_max_gen_ull:
7994  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
7995 
7996  case NVPTX::BI__nvvm_atom_min_gen_i:
7997  case NVPTX::BI__nvvm_atom_min_gen_l:
7998  case NVPTX::BI__nvvm_atom_min_gen_ll:
7999  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8000 
8001  case NVPTX::BI__nvvm_atom_min_gen_ui:
8002  case NVPTX::BI__nvvm_atom_min_gen_ul:
8003  case NVPTX::BI__nvvm_atom_min_gen_ull:
8004  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8005 
8006  case NVPTX::BI__nvvm_atom_cas_gen_i:
8007  case NVPTX::BI__nvvm_atom_cas_gen_l:
8008  case NVPTX::BI__nvvm_atom_cas_gen_ll:
8009  // __nvvm_atom_cas_gen_* should return the old value rather than the
8010  // success flag.
8011  return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8012 
8013  case NVPTX::BI__nvvm_atom_add_gen_f: {
8014  Value *Ptr = EmitScalarExpr(E->getArg(0));
8015  Value *Val = EmitScalarExpr(E->getArg(1));
8016  // atomicrmw only deals with integer arguments so we need to use
8017  // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8018  Value *FnALAF32 =
8019  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8020  return Builder.CreateCall(FnALAF32, {Ptr, Val});
8021  }
8022 
8023  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8024  Value *Ptr = EmitScalarExpr(E->getArg(0));
8025  Value *Val = EmitScalarExpr(E->getArg(1));
8026  Value *FnALI32 =
8027  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8028  return Builder.CreateCall(FnALI32, {Ptr, Val});
8029  }
8030 
8031  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8032  Value *Ptr = EmitScalarExpr(E->getArg(0));
8033  Value *Val = EmitScalarExpr(E->getArg(1));
8034  Value *FnALD32 =
8035  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8036  return Builder.CreateCall(FnALD32, {Ptr, Val});
8037  }
8038 
8039  case NVPTX::BI__nvvm_ldg_c:
8040  case NVPTX::BI__nvvm_ldg_c2:
8041  case NVPTX::BI__nvvm_ldg_c4:
8042  case NVPTX::BI__nvvm_ldg_s:
8043  case NVPTX::BI__nvvm_ldg_s2:
8044  case NVPTX::BI__nvvm_ldg_s4:
8045  case NVPTX::BI__nvvm_ldg_i:
8046  case NVPTX::BI__nvvm_ldg_i2:
8047  case NVPTX::BI__nvvm_ldg_i4:
8048  case NVPTX::BI__nvvm_ldg_l:
8049  case NVPTX::BI__nvvm_ldg_ll:
8050  case NVPTX::BI__nvvm_ldg_ll2:
8051  case NVPTX::BI__nvvm_ldg_uc:
8052  case NVPTX::BI__nvvm_ldg_uc2:
8053  case NVPTX::BI__nvvm_ldg_uc4:
8054  case NVPTX::BI__nvvm_ldg_us:
8055  case NVPTX::BI__nvvm_ldg_us2:
8056  case NVPTX::BI__nvvm_ldg_us4:
8057  case NVPTX::BI__nvvm_ldg_ui:
8058  case NVPTX::BI__nvvm_ldg_ui2:
8059  case NVPTX::BI__nvvm_ldg_ui4:
8060  case NVPTX::BI__nvvm_ldg_ul:
8061  case NVPTX::BI__nvvm_ldg_ull:
8062  case NVPTX::BI__nvvm_ldg_ull2:
8063  // PTX Interoperability section 2.2: "For a vector with an even number of
8064  // elements, its alignment is set to number of elements times the alignment
8065  // of its member: n*alignof(t)."
8066  return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8067  case NVPTX::BI__nvvm_ldg_f:
8068  case NVPTX::BI__nvvm_ldg_f2:
8069  case NVPTX::BI__nvvm_ldg_f4:
8070  case NVPTX::BI__nvvm_ldg_d:
8071  case NVPTX::BI__nvvm_ldg_d2:
8072  return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8073  default:
8074  return nullptr;
8075  }
8076 }
8077 
8079  const CallExpr *E) {
8080  switch (BuiltinID) {
8081  case WebAssembly::BI__builtin_wasm_current_memory: {
8082  llvm::Type *ResultType = ConvertType(E->getType());
8083  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
8084  return Builder.CreateCall(Callee);
8085  }
8086  case WebAssembly::BI__builtin_wasm_grow_memory: {
8087  Value *X = EmitScalarExpr(E->getArg(0));
8088  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
8089  return Builder.CreateCall(Callee, X);
8090  }
8091 
8092  default:
8093  return nullptr;
8094  }
8095 }
unsigned getAddressSpace() const
Return the address space of this type.
Definition: Type.h:5375
ReturnValueSlot - Contains the address where the return value of a function can be stored...
Definition: CGCall.h:151
Defines the clang::ASTContext interface.
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:140
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:256
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1561
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
Definition: CGBuiltin.cpp:3761
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2179
A (possibly-)qualified type.
Definition: Type.h:598
#define fma(__x, __y, __z)
Definition: tgmath.h:749
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:354
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2217
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:233
llvm::Module & getModule() const
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:366
AlignmentSource
The source of the alignment of an l-value; an expression of confidence in the alignment actually matc...
Definition: CGValue.h:125
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:7942
llvm::LLVMContext & getLLVMContext()
const TargetInfo & getTarget() const
#define trunc(__x)
Definition: tgmath.h:1223
static const Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:21
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:179
llvm::LoadInst * CreateDefaultAlignedLoad(llvm::Value *Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:127
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, SmallVectorImpl< Value * > &Ops)
Definition: CGBuiltin.cpp:6636
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
Definition: CGBuiltin.cpp:6568
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
StringRef P
llvm::Type * FloatTy
float, double
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size...
Definition: CGBuiltin.cpp:62
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:26
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool IsRead, StringRef SysReg="")
Definition: CGBuiltin.cpp:3858
The base class of the type hierarchy.
Definition: Type.h:1281
bool isBooleanType() const
Definition: Type.h:5743
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:2667
bool isBlockPointerType() const
Definition: Type.h:5488
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:2802
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, const CallExpr *E, llvm::Value *calleeValue)
Definition: CGBuiltin.cpp:305
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:52
const Expr * getCallee() const
Definition: Expr.h:2188
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
Definition: CGBuiltin.cpp:7702
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:1813
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:3955
iterator begin() const
Definition: Type.h:4235
ParmVarDecl - Represents a parameter to a function.
Definition: Decl.h:1377
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:3921
bool isVoidType() const
Definition: Type.h:5680
The collection of all-type qualifiers we support.
Definition: Type.h:117
Expr * IgnoreImpCasts() LLVM_READONLY
IgnoreImpCasts - Skip past any implicit casts which might surround this expression.
Definition: Expr.h:2777
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1)
Definition: CGBuiltin.cpp:3335
void __ovld prefetch(const __global char *p, size_t num_elements)
Prefetch num_elements * sizeof(gentype) bytes into the global cache.
class LLVM_ALIGNAS(8) DependentTemplateSpecializationType const IdentifierInfo * Name
Represents a template specialization type whose template cannot be resolved, e.g. ...
Definition: Type.h:4549
#define pow(__x, __y)
Definition: tgmath.h:497
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
static llvm::VectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool V1Ty=false)
Definition: CGBuiltin.cpp:2524
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:92
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:3207
RValue EmitCall(const CGFunctionInfo &FnInfo, llvm::Value *Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, CGCalleeInfo CalleeInfo=CGCalleeInfo(), llvm::Instruction **callOrInvoke=nullptr)
EmitCall - Generate a call of the given function, expecting the given result type, and using the given argument list which specifies both the LLVM arguments and the types they were derived from.
Definition: CGCall.cpp:3507
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Instrinsic::ID and the expression node, where the return value is the result of the operation.
Definition: CGBuiltin.cpp:145
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2007
static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:2893
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Utility to insert an atomic instruction based on Instrinsic::ID and the expression node...
Definition: CGBuiltin.cpp:86
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition: CGBuiltin.cpp:6542
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
Definition: CGBuilder.h:168
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:3008
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:564
uint32_t Offset
Definition: CacheTokens.cpp:44
#define INTRINSIC_WITH_CC(NAME)
const TemplateArgument & getArg(unsigned Idx) const
Retrieve a specific template argument as a type.
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
bool isQuad() const
bool isUnsigned() const
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:3210
Expr * IgnoreParenCasts() LLVM_READONLY
IgnoreParenCasts - Ignore parentheses and casts.
Definition: Expr.cpp:2326
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:39
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
iterator end() const
RValue EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
Definition: CGBuiltin.cpp:465
detail::InMemoryDirectory::const_iterator I
QualType getType() const
Definition: Decl.h:599
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Represents a prototype with parameter type info, e.g.
Definition: Type.h:3073
llvm::CallInst * EmitNounwindRuntimeCall(llvm::Value *callee, const Twine &name="")
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:2664
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:169
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:38
ASTContext * Context
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:415
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm.va_end.
Definition: CGBuiltin.cpp:390
int * Depth
llvm::Value * getPointer() const
Definition: Address.h:38
#define copysign(__x, __y)
Definition: tgmath.h:625
Expr - This represents one expression.
Definition: Expr.h:105
const char * getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:83
static Address invalid()
Definition: Address.h:35
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource AlignSource=AlignmentSource::Type, llvm::MDNode *TBAAInfo=nullptr, bool isInit=false, QualType TBAABaseTy=QualType(), uint64_t TBAAOffset=0, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
Definition: CGExpr.cpp:1374
#define INTRINSIC_X86_XSAVE_ID(NAME)
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition: CGBuiltin.cpp:6623
ASTContext & getContext() const
CharUnits getNaturalPointeeTypeAlignment(QualType T, AlignmentSource *Source=nullptr)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation...
Definition: CGExpr.cpp:1349
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:223
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
void add(RValue rvalue, QualType type, bool needscopy=false)
Definition: CGCall.h:81
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:73
char __ovld __cnfn min(char x, char y)
Returns y if y < x, otherwise it returns x.
static SVal getValue(SVal val, SValBuilder &svalBuilder)
llvm::LLVMContext & getLLVMContext()
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, SmallVectorImpl< Value * > &Ops, unsigned Align)
Definition: CGBuiltin.cpp:6588
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:335
Address EmitPointerWithAlignment(const Expr *Addr, AlignmentSource *Source=nullptr)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:820
static const NeonIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:2676
bool EvaluateAsInt(llvm::APSInt &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer...
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p, To) is correct.
Definition: CGBuiltin.cpp:402
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=None)
class LLVM_ALIGNAS(8) TemplateSpecializationType unsigned NumArgs
Represents a type template specialization; the template must be a class template, a type alias templa...
Definition: Type.h:4154
static const NeonIntrinsicInfo * findNeonIntrinsicInMap(ArrayRef< NeonIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:3214
The result type of a method or function.
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:128
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:6679
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:29
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, SmallVectorImpl< Value * > &Ops, unsigned Align)
Definition: CGBuiltin.cpp:6606
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:7587
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:7370
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:320
Enumerates target-specific builtins in their own namespaces within namespace clang.
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:160
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeSet ExtraAttrs=llvm::AttributeSet())
Create a new runtime function with the specified type and name.
Kind
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:3782
ASTContext & getContext() const
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation...
Definition: CGExpr.cpp:1363
bool hasSideEffects() const
Definition: Expr.h:556
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource AlignSource=AlignmentSource::Type, llvm::MDNode *TBAAInfo=nullptr, QualType TBAABaseTy=QualType(), uint64_t TBAAOffset=0, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
Definition: CGExpr.cpp:1262
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:2554
const std::string ID
#define rint(__x)
Definition: tgmath.h:1138
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:114
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
APFloat & getFloat()
Definition: APValue.h:208
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:4785
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:191
#define round(__x)
Definition: tgmath.h:1155
OpenMPLinearClauseKind Modifier
Modifier of 'linear' clause.
Definition: OpenMPClause.h:262
bool isIntegerConstantExpr(llvm::APSInt &Result, const ASTContext &Ctx, SourceLocation *Loc=nullptr, bool isEvaluated=true) const
isIntegerConstantExpr - Return true if this expression is a valid integer constant expression...
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition: CGBuiltin.cpp:2573
EltType getEltType() const
An aligned address.
Definition: Address.h:25
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:5849
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:5329
char __ovld __cnfn rotate(char v, char i)
For each element in v, the bits are shifted left by the number of bits given by the corresponding ele...
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression, because a __builtin_ms_va_list is a pointer to a char.
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:193
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to .fabs().
Definition: CGBuiltin.cpp:267
llvm::Value * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:38
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1272
ast_type_traits::DynTypedNode Node
QualType getType() const
Definition: Expr.h:126
CGFunctionInfo - Class to encapsulate the information about a function definition.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition: CGBuiltin.cpp:2594
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:67
This class organizes the cross-function state that is used while generating LLVM code.
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:2671
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:92
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:562
Address CreateMemTemp(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignment...
Definition: CGExpr.cpp:98
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return 0.
Definition: Expr.cpp:1209
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:8078
llvm::LoadInst * CreateAlignedLoad(llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:91
const TargetInfo * getAuxTargetInfo() const
Definition: ASTContext.h:589
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:70
#define ceil(__x)
Definition: tgmath.h:608
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:2476
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
detail::InMemoryDirectory::const_iterator E
#define floor(__x)
Definition: tgmath.h:729
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:113
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2205
Flags to identify the types for overloaded Neon builtins.
bool isFloat() const
Definition: APValue.h:183
Expr * IgnoreParenImpCasts() LLVM_READONLY
IgnoreParenImpCasts - Ignore parentheses and implicit casts.
Definition: Expr.cpp:2413
Decl * getCalleeDecl()
Definition: Expr.cpp:1185
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx)
Definition: CGBuiltin.cpp:2567
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const NeonIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:3272
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops)
Definition: CGBuiltin.cpp:4629
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e...
Definition: Builtins.h:131
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition: CGBuiltin.cpp:4776
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:410
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition: CGBuiltin.cpp:2587
#define nearbyint(__x)
Definition: tgmath.h:1045
void setNontemporal(bool Value)
Definition: CGValue.h:286
char __ovld __cnfn max(char x, char y)
Returns y if x < y, otherwise it returns x.
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:12171
BoundNodesTreeBuilder *const Builder
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition: CGBuiltin.cpp:3233
llvm::Type * ConvertType(QualType T)
#define sqrt(__x)
Definition: tgmath.h:527
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:461
bool isArrayType() const
Definition: Type.h:5521
#define fabs(__x)
Definition: tgmath.h:556
Defines the clang::TargetInfo interface.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2148
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:244
uint64_t Width
Definition: ASTContext.h:82
bool isInt() const
Definition: APValue.h:182
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:276
static RValue get(llvm::Value *V)
Definition: CGValue.h:85
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:120
LValue - This represents an lvalue references.
Definition: CGValue.h:152
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1706
APSInt & getInt()
Definition: APValue.h:200
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:56
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:136
bool isPoly() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:5702
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:3209
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:7717
bool isPointerType() const
Definition: Type.h:5482