LLVM  6.0.0svn
SelectionDAGBuilder.cpp
Go to the documentation of this file.
1 //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SelectionDAGBuilder.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/None.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/Triple.h"
28 #include "llvm/ADT/Twine.h"
33 #include "llvm/Analysis/Loads.h"
38 #include "llvm/CodeGen/Analysis.h"
57 #include "llvm/CodeGen/StackMaps.h"
66 #include "llvm/IR/Argument.h"
67 #include "llvm/IR/Attributes.h"
68 #include "llvm/IR/BasicBlock.h"
69 #include "llvm/IR/CFG.h"
70 #include "llvm/IR/CallSite.h"
71 #include "llvm/IR/CallingConv.h"
72 #include "llvm/IR/Constant.h"
73 #include "llvm/IR/ConstantRange.h"
74 #include "llvm/IR/Constants.h"
75 #include "llvm/IR/DataLayout.h"
77 #include "llvm/IR/DebugLoc.h"
78 #include "llvm/IR/DerivedTypes.h"
79 #include "llvm/IR/Function.h"
81 #include "llvm/IR/InlineAsm.h"
82 #include "llvm/IR/InstrTypes.h"
83 #include "llvm/IR/Instruction.h"
84 #include "llvm/IR/Instructions.h"
85 #include "llvm/IR/IntrinsicInst.h"
86 #include "llvm/IR/Intrinsics.h"
87 #include "llvm/IR/LLVMContext.h"
88 #include "llvm/IR/Metadata.h"
89 #include "llvm/IR/Module.h"
90 #include "llvm/IR/Operator.h"
91 #include "llvm/IR/Statepoint.h"
92 #include "llvm/IR/Type.h"
93 #include "llvm/IR/User.h"
94 #include "llvm/IR/Value.h"
95 #include "llvm/MC/MCContext.h"
96 #include "llvm/MC/MCSymbol.h"
99 #include "llvm/Support/Casting.h"
100 #include "llvm/Support/CodeGen.h"
102 #include "llvm/Support/Compiler.h"
103 #include "llvm/Support/Debug.h"
105 #include "llvm/Support/MathExtras.h"
110 #include <algorithm>
111 #include <cassert>
112 #include <cstddef>
113 #include <cstdint>
114 #include <cstring>
115 #include <iterator>
116 #include <limits>
117 #include <numeric>
118 #include <tuple>
119 #include <utility>
120 #include <vector>
121 
122 using namespace llvm;
123 
124 #define DEBUG_TYPE "isel"
125 
126 /// LimitFloatPrecision - Generate low-precision inline sequences for
127 /// some float libcalls (6, 8 or 12 bits).
128 static unsigned LimitFloatPrecision;
129 
131  LimitFPPrecision("limit-float-precision",
132  cl::desc("Generate low-precision inline sequences "
133  "for some float libcalls"),
134  cl::location(LimitFloatPrecision), cl::Hidden,
135  cl::init(0));
136 
138  "switch-peel-threshold", cl::Hidden, cl::init(66),
139  cl::desc("Set the case probability threshold for peeling the case from a "
140  "switch statement. A value greater than 100 will void this "
141  "optimization"));
142 
143 // Limit the width of DAG chains. This is important in general to prevent
144 // DAG-based analysis from blowing up. For example, alias analysis and
145 // load clustering may not complete in reasonable time. It is difficult to
146 // recognize and avoid this situation within each individual analysis, and
147 // future analyses are likely to have the same behavior. Limiting DAG width is
148 // the safe approach and will be especially important with global DAGs.
149 //
150 // MaxParallelChains default is arbitrarily high to avoid affecting
151 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
152 // sequence over this should have been converted to llvm.memcpy by the
153 // frontend. It is easy to induce this behavior with .ll code such as:
154 // %buffer = alloca [4096 x i8]
155 // %data = load [4096 x i8]* %argPtr
156 // store [4096 x i8] %data, [4096 x i8]* %buffer
157 static const unsigned MaxParallelChains = 64;
158 
159 // True if the Value passed requires ABI mangling as it is a parameter to a
160 // function or a return value from a function which is not an intrinsic.
161 static bool isABIRegCopy(const Value *V) {
162  const bool IsRetInst = V && isa<ReturnInst>(V);
163  const bool IsCallInst = V && isa<CallInst>(V);
164  const bool IsInLineAsm =
165  IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
166  const bool IsIndirectFunctionCall =
167  IsCallInst && !IsInLineAsm &&
168  !static_cast<const CallInst *>(V)->getCalledFunction();
169  // It is possible that the call instruction is an inline asm statement or an
170  // indirect function call in which case the return value of
171  // getCalledFunction() would be nullptr.
172  const bool IsInstrinsicCall =
173  IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall &&
174  static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
176 
177  return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall));
178 }
179 
180 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
181  const SDValue *Parts, unsigned NumParts,
182  MVT PartVT, EVT ValueVT, const Value *V,
183  bool IsABIRegCopy);
184 
185 /// getCopyFromParts - Create a value that contains the specified legal parts
186 /// combined into the value they represent. If the parts combine to a type
187 /// larger than ValueVT then AssertOp can be used to specify whether the extra
188 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
189 /// (ISD::AssertSext).
191  const SDValue *Parts, unsigned NumParts,
192  MVT PartVT, EVT ValueVT, const Value *V,
193  Optional<ISD::NodeType> AssertOp = None,
194  bool IsABIRegCopy = false) {
195  if (ValueVT.isVector())
196  return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
197  PartVT, ValueVT, V, IsABIRegCopy);
198 
199  assert(NumParts > 0 && "No parts to assemble!");
200  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
201  SDValue Val = Parts[0];
202 
203  if (NumParts > 1) {
204  // Assemble the value from multiple parts.
205  if (ValueVT.isInteger()) {
206  unsigned PartBits = PartVT.getSizeInBits();
207  unsigned ValueBits = ValueVT.getSizeInBits();
208 
209  // Assemble the power of 2 part.
210  unsigned RoundParts = NumParts & (NumParts - 1) ?
211  1 << Log2_32(NumParts) : NumParts;
212  unsigned RoundBits = PartBits * RoundParts;
213  EVT RoundVT = RoundBits == ValueBits ?
214  ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
215  SDValue Lo, Hi;
216 
217  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
218 
219  if (RoundParts > 2) {
220  Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
221  PartVT, HalfVT, V);
222  Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
223  RoundParts / 2, PartVT, HalfVT, V);
224  } else {
225  Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
226  Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
227  }
228 
229  if (DAG.getDataLayout().isBigEndian())
230  std::swap(Lo, Hi);
231 
232  Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
233 
234  if (RoundParts < NumParts) {
235  // Assemble the trailing non-power-of-2 part.
236  unsigned OddParts = NumParts - RoundParts;
237  EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
238  Hi = getCopyFromParts(DAG, DL,
239  Parts + RoundParts, OddParts, PartVT, OddVT, V);
240 
241  // Combine the round and odd parts.
242  Lo = Val;
243  if (DAG.getDataLayout().isBigEndian())
244  std::swap(Lo, Hi);
245  EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
246  Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
247  Hi =
248  DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
249  DAG.getConstant(Lo.getValueSizeInBits(), DL,
250  TLI.getPointerTy(DAG.getDataLayout())));
251  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
252  Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
253  }
254  } else if (PartVT.isFloatingPoint()) {
255  // FP split into multiple FP parts (for ppcf128)
256  assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
257  "Unexpected split");
258  SDValue Lo, Hi;
259  Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
260  Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
261  if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
262  std::swap(Lo, Hi);
263  Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
264  } else {
265  // FP split into integer parts (soft fp)
266  assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
267  !PartVT.isVector() && "Unexpected split");
268  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
269  Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
270  }
271  }
272 
273  // There is now one part, held in Val. Correct it to match ValueVT.
274  // PartEVT is the type of the register class that holds the value.
275  // ValueVT is the type of the inline asm operation.
276  EVT PartEVT = Val.getValueType();
277 
278  if (PartEVT == ValueVT)
279  return Val;
280 
281  if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
282  ValueVT.bitsLT(PartEVT)) {
283  // For an FP value in an integer part, we need to truncate to the right
284  // width first.
285  PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
286  Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
287  }
288 
289  // Handle types that have the same size.
290  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
291  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
292 
293  // Handle types with different sizes.
294  if (PartEVT.isInteger() && ValueVT.isInteger()) {
295  if (ValueVT.bitsLT(PartEVT)) {
296  // For a truncate, see if we have any information to
297  // indicate whether the truncated bits will always be
298  // zero or sign-extension.
299  if (AssertOp.hasValue())
300  Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
301  DAG.getValueType(ValueVT));
302  return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
303  }
304  return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
305  }
306 
307  if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
308  // FP_ROUND's are always exact here.
309  if (ValueVT.bitsLT(Val.getValueType()))
310  return DAG.getNode(
311  ISD::FP_ROUND, DL, ValueVT, Val,
312  DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
313 
314  return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
315  }
316 
317  llvm_unreachable("Unknown mismatch!");
318 }
319 
321  const Twine &ErrMsg) {
322  const Instruction *I = dyn_cast_or_null<Instruction>(V);
323  if (!V)
324  return Ctx.emitError(ErrMsg);
325 
326  const char *AsmError = ", possible invalid constraint for vector type";
327  if (const CallInst *CI = dyn_cast<CallInst>(I))
328  if (isa<InlineAsm>(CI->getCalledValue()))
329  return Ctx.emitError(I, ErrMsg + AsmError);
330 
331  return Ctx.emitError(I, ErrMsg);
332 }
333 
334 /// getCopyFromPartsVector - Create a value that contains the specified legal
335 /// parts combined into the value they represent. If the parts combine to a
336 /// type larger than ValueVT then AssertOp can be used to specify whether the
337 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
338 /// ValueVT (ISD::AssertSext).
340  const SDValue *Parts, unsigned NumParts,
341  MVT PartVT, EVT ValueVT, const Value *V,
342  bool IsABIRegCopy) {
343  assert(ValueVT.isVector() && "Not a vector value");
344  assert(NumParts > 0 && "No parts to assemble!");
345  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
346  SDValue Val = Parts[0];
347 
348  // Handle a multi-element vector.
349  if (NumParts > 1) {
350  EVT IntermediateVT;
351  MVT RegisterVT;
352  unsigned NumIntermediates;
353  unsigned NumRegs;
354 
355  if (IsABIRegCopy) {
357  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
358  RegisterVT);
359  } else {
360  NumRegs =
361  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
362  NumIntermediates, RegisterVT);
363  }
364 
365  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
366  NumParts = NumRegs; // Silence a compiler warning.
367  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
368  assert(RegisterVT.getSizeInBits() ==
369  Parts[0].getSimpleValueType().getSizeInBits() &&
370  "Part type sizes don't match!");
371 
372  // Assemble the parts into intermediate operands.
373  SmallVector<SDValue, 8> Ops(NumIntermediates);
374  if (NumIntermediates == NumParts) {
375  // If the register was not expanded, truncate or copy the value,
376  // as appropriate.
377  for (unsigned i = 0; i != NumParts; ++i)
378  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
379  PartVT, IntermediateVT, V);
380  } else if (NumParts > 0) {
381  // If the intermediate type was expanded, build the intermediate
382  // operands from the parts.
383  assert(NumParts % NumIntermediates == 0 &&
384  "Must expand into a divisible number of parts!");
385  unsigned Factor = NumParts / NumIntermediates;
386  for (unsigned i = 0; i != NumIntermediates; ++i)
387  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
388  PartVT, IntermediateVT, V);
389  }
390 
391  // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
392  // intermediate operands.
393  EVT BuiltVectorTy =
394  EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
395  (IntermediateVT.isVector()
396  ? IntermediateVT.getVectorNumElements() * NumParts
397  : NumIntermediates));
398  Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
400  DL, BuiltVectorTy, Ops);
401  }
402 
403  // There is now one part, held in Val. Correct it to match ValueVT.
404  EVT PartEVT = Val.getValueType();
405 
406  if (PartEVT == ValueVT)
407  return Val;
408 
409  if (PartEVT.isVector()) {
410  // If the element type of the source/dest vectors are the same, but the
411  // parts vector has more elements than the value vector, then we have a
412  // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
413  // elements we want.
414  if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
415  assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
416  "Cannot narrow, it would be a lossy transformation");
417  return DAG.getNode(
418  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
419  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
420  }
421 
422  // Vector/Vector bitcast.
423  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
424  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
425 
426  assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
427  "Cannot handle this kind of promotion");
428  // Promoted vector extract
429  return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
430 
431  }
432 
433  // Trivial bitcast if the types are the same size and the destination
434  // vector type is legal.
435  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
436  TLI.isTypeLegal(ValueVT))
437  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
438 
439  if (ValueVT.getVectorNumElements() != 1) {
440  // Certain ABIs require that vectors are passed as integers. For vectors
441  // are the same size, this is an obvious bitcast.
442  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
443  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
444  } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
445  // Bitcast Val back the original type and extract the corresponding
446  // vector we want.
447  unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
448  EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
449  ValueVT.getVectorElementType(), Elts);
450  Val = DAG.getBitcast(WiderVecType, Val);
451  return DAG.getNode(
452  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
453  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
454  }
455 
457  *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
458  return DAG.getUNDEF(ValueVT);
459  }
460 
461  // Handle cases such as i8 -> <1 x i1>
462  EVT ValueSVT = ValueVT.getVectorElementType();
463  if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
464  Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
465  : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
466 
467  return DAG.getBuildVector(ValueVT, DL, Val);
468 }
469 
470 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
471  SDValue Val, SDValue *Parts, unsigned NumParts,
472  MVT PartVT, const Value *V, bool IsABIRegCopy);
473 
474 /// getCopyToParts - Create a series of nodes that contain the specified value
475 /// split into legal parts. If the parts contain more bits than Val, then, for
476 /// integers, ExtendKind can be used to specify how to generate the extra bits.
477 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
478  SDValue *Parts, unsigned NumParts, MVT PartVT,
479  const Value *V,
480  ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
481  bool IsABIRegCopy = false) {
482  EVT ValueVT = Val.getValueType();
483 
484  // Handle the vector case separately.
485  if (ValueVT.isVector())
486  return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
487  IsABIRegCopy);
488 
489  unsigned PartBits = PartVT.getSizeInBits();
490  unsigned OrigNumParts = NumParts;
491  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
492  "Copying to an illegal type!");
493 
494  if (NumParts == 0)
495  return;
496 
497  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
498  EVT PartEVT = PartVT;
499  if (PartEVT == ValueVT) {
500  assert(NumParts == 1 && "No-op copy with multiple parts!");
501  Parts[0] = Val;
502  return;
503  }
504 
505  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
506  // If the parts cover more bits than the value has, promote the value.
507  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
508  assert(NumParts == 1 && "Do not know what to promote to!");
509  Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
510  } else {
511  if (ValueVT.isFloatingPoint()) {
512  // FP values need to be bitcast, then extended if they are being put
513  // into a larger container.
514  ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
515  Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
516  }
517  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
518  ValueVT.isInteger() &&
519  "Unknown mismatch!");
520  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
521  Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
522  if (PartVT == MVT::x86mmx)
523  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
524  }
525  } else if (PartBits == ValueVT.getSizeInBits()) {
526  // Different types of the same size.
527  assert(NumParts == 1 && PartEVT != ValueVT);
528  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
529  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
530  // If the parts cover less bits than value has, truncate the value.
531  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
532  ValueVT.isInteger() &&
533  "Unknown mismatch!");
534  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
535  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
536  if (PartVT == MVT::x86mmx)
537  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
538  }
539 
540  // The value may have changed - recompute ValueVT.
541  ValueVT = Val.getValueType();
542  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
543  "Failed to tile the value with PartVT!");
544 
545  if (NumParts == 1) {
546  if (PartEVT != ValueVT) {
548  "scalar-to-vector conversion failed");
549  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
550  }
551 
552  Parts[0] = Val;
553  return;
554  }
555 
556  // Expand the value into multiple parts.
557  if (NumParts & (NumParts - 1)) {
558  // The number of parts is not a power of 2. Split off and copy the tail.
559  assert(PartVT.isInteger() && ValueVT.isInteger() &&
560  "Do not know what to expand to!");
561  unsigned RoundParts = 1 << Log2_32(NumParts);
562  unsigned RoundBits = RoundParts * PartBits;
563  unsigned OddParts = NumParts - RoundParts;
564  SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
565  DAG.getIntPtrConstant(RoundBits, DL));
566  getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
567 
568  if (DAG.getDataLayout().isBigEndian())
569  // The odd parts were reversed by getCopyToParts - unreverse them.
570  std::reverse(Parts + RoundParts, Parts + NumParts);
571 
572  NumParts = RoundParts;
573  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
574  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
575  }
576 
577  // The number of parts is a power of 2. Repeatedly bisect the value using
578  // EXTRACT_ELEMENT.
579  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
581  ValueVT.getSizeInBits()),
582  Val);
583 
584  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
585  for (unsigned i = 0; i < NumParts; i += StepSize) {
586  unsigned ThisBits = StepSize * PartBits / 2;
587  EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
588  SDValue &Part0 = Parts[i];
589  SDValue &Part1 = Parts[i+StepSize/2];
590 
591  Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
592  ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
593  Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
594  ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
595 
596  if (ThisBits == PartBits && ThisVT != PartVT) {
597  Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
598  Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
599  }
600  }
601  }
602 
603  if (DAG.getDataLayout().isBigEndian())
604  std::reverse(Parts, Parts + OrigNumParts);
605 }
606 
607 
608 /// getCopyToPartsVector - Create a series of nodes that contain the specified
609 /// value split into legal parts.
610 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
611  SDValue Val, SDValue *Parts, unsigned NumParts,
612  MVT PartVT, const Value *V,
613  bool IsABIRegCopy) {
614  EVT ValueVT = Val.getValueType();
615  assert(ValueVT.isVector() && "Not a vector");
616  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
617 
618  if (NumParts == 1) {
619  EVT PartEVT = PartVT;
620  if (PartEVT == ValueVT) {
621  // Nothing to do.
622  } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
623  // Bitconvert vector->vector case.
624  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
625  } else if (PartVT.isVector() &&
626  PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
627  PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
628  EVT ElementVT = PartVT.getVectorElementType();
629  // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
630  // undef elements.
632  for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
633  Ops.push_back(DAG.getNode(
634  ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
635  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
636 
637  for (unsigned i = ValueVT.getVectorNumElements(),
638  e = PartVT.getVectorNumElements(); i != e; ++i)
639  Ops.push_back(DAG.getUNDEF(ElementVT));
640 
641  Val = DAG.getBuildVector(PartVT, DL, Ops);
642 
643  // FIXME: Use CONCAT for 2x -> 4x.
644 
645  //SDValue UndefElts = DAG.getUNDEF(VectorTy);
646  //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
647  } else if (PartVT.isVector() &&
648  PartEVT.getVectorElementType().bitsGE(
649  ValueVT.getVectorElementType()) &&
650  PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
651 
652  // Promoted vector extract
653  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
654  } else {
655  if (ValueVT.getVectorNumElements() == 1) {
656  Val = DAG.getNode(
657  ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
658  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
659  } else {
660  assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
661  "lossy conversion of vector to scalar type");
662  EVT IntermediateType =
663  EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
664  Val = DAG.getBitcast(IntermediateType, Val);
665  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
666  }
667  }
668 
669  assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
670  Parts[0] = Val;
671  return;
672  }
673 
674  // Handle a multi-element vector.
675  EVT IntermediateVT;
676  MVT RegisterVT;
677  unsigned NumIntermediates;
678  unsigned NumRegs;
679  if (IsABIRegCopy) {
680  NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
681  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
682  RegisterVT);
683  } else {
684  NumRegs =
685  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
686  NumIntermediates, RegisterVT);
687  }
688  unsigned NumElements = ValueVT.getVectorNumElements();
689 
690  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
691  NumParts = NumRegs; // Silence a compiler warning.
692  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
693 
694  // Convert the vector to the appropiate type if necessary.
695  unsigned DestVectorNoElts =
696  NumIntermediates *
697  (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
698  EVT BuiltVectorTy = EVT::getVectorVT(
699  *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
700  if (Val.getValueType() != BuiltVectorTy)
701  Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
702 
703  // Split the vector into intermediate operands.
704  SmallVector<SDValue, 8> Ops(NumIntermediates);
705  for (unsigned i = 0; i != NumIntermediates; ++i) {
706  if (IntermediateVT.isVector())
707  Ops[i] =
708  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
709  DAG.getConstant(i * (NumElements / NumIntermediates), DL,
710  TLI.getVectorIdxTy(DAG.getDataLayout())));
711  else
712  Ops[i] = DAG.getNode(
713  ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
714  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
715  }
716 
717  // Split the intermediate operands into legal parts.
718  if (NumParts == NumIntermediates) {
719  // If the register was not expanded, promote or copy the value,
720  // as appropriate.
721  for (unsigned i = 0; i != NumParts; ++i)
722  getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
723  } else if (NumParts > 0) {
724  // If the intermediate type was expanded, split each the value into
725  // legal parts.
726  assert(NumIntermediates != 0 && "division by zero");
727  assert(NumParts % NumIntermediates == 0 &&
728  "Must expand into a divisible number of parts!");
729  unsigned Factor = NumParts / NumIntermediates;
730  for (unsigned i = 0; i != NumIntermediates; ++i)
731  getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
732  }
733 }
734 
736  EVT valuevt, bool IsABIMangledValue)
737  : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
738  RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
739 
741  const DataLayout &DL, unsigned Reg, Type *Ty,
742  bool IsABIMangledValue) {
743  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
744 
745  IsABIMangled = IsABIMangledValue;
746 
747  for (EVT ValueVT : ValueVTs) {
748  unsigned NumRegs = IsABIMangledValue
749  ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
750  : TLI.getNumRegisters(Context, ValueVT);
751  MVT RegisterVT = IsABIMangledValue
752  ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
753  : TLI.getRegisterType(Context, ValueVT);
754  for (unsigned i = 0; i != NumRegs; ++i)
755  Regs.push_back(Reg + i);
756  RegVTs.push_back(RegisterVT);
757  RegCount.push_back(NumRegs);
758  Reg += NumRegs;
759  }
760 }
761 
763  FunctionLoweringInfo &FuncInfo,
764  const SDLoc &dl, SDValue &Chain,
765  SDValue *Flag, const Value *V) const {
766  // A Value with type {} or [0 x %t] needs no registers.
767  if (ValueVTs.empty())
768  return SDValue();
769 
770  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
771 
772  // Assemble the legal parts into the final values.
773  SmallVector<SDValue, 4> Values(ValueVTs.size());
775  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
776  // Copy the legal parts from the registers.
777  EVT ValueVT = ValueVTs[Value];
778  unsigned NumRegs = RegCount[Value];
779  MVT RegisterVT = IsABIMangled
781  : RegVTs[Value];
782 
783  Parts.resize(NumRegs);
784  for (unsigned i = 0; i != NumRegs; ++i) {
785  SDValue P;
786  if (!Flag) {
787  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
788  } else {
789  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
790  *Flag = P.getValue(2);
791  }
792 
793  Chain = P.getValue(1);
794  Parts[i] = P;
795 
796  // If the source register was virtual and if we know something about it,
797  // add an assert node.
799  !RegisterVT.isInteger() || RegisterVT.isVector())
800  continue;
801 
803  FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
804  if (!LOI)
805  continue;
806 
807  unsigned RegSize = RegisterVT.getSizeInBits();
808  unsigned NumSignBits = LOI->NumSignBits;
809  unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
810 
811  if (NumZeroBits == RegSize) {
812  // The current value is a zero.
813  // Explicitly express that as it would be easier for
814  // optimizations to kick in.
815  Parts[i] = DAG.getConstant(0, dl, RegisterVT);
816  continue;
817  }
818 
819  // FIXME: We capture more information than the dag can represent. For
820  // now, just use the tightest assertzext/assertsext possible.
821  bool isSExt = true;
822  EVT FromVT(MVT::Other);
823  if (NumSignBits == RegSize) {
824  isSExt = true; // ASSERT SEXT 1
825  FromVT = MVT::i1;
826  } else if (NumZeroBits >= RegSize - 1) {
827  isSExt = false; // ASSERT ZEXT 1
828  FromVT = MVT::i1;
829  } else if (NumSignBits > RegSize - 8) {
830  isSExt = true; // ASSERT SEXT 8
831  FromVT = MVT::i8;
832  } else if (NumZeroBits >= RegSize - 8) {
833  isSExt = false; // ASSERT ZEXT 8
834  FromVT = MVT::i8;
835  } else if (NumSignBits > RegSize - 16) {
836  isSExt = true; // ASSERT SEXT 16
837  FromVT = MVT::i16;
838  } else if (NumZeroBits >= RegSize - 16) {
839  isSExt = false; // ASSERT ZEXT 16
840  FromVT = MVT::i16;
841  } else if (NumSignBits > RegSize - 32) {
842  isSExt = true; // ASSERT SEXT 32
843  FromVT = MVT::i32;
844  } else if (NumZeroBits >= RegSize - 32) {
845  isSExt = false; // ASSERT ZEXT 32
846  FromVT = MVT::i32;
847  } else {
848  continue;
849  }
850  // Add an assertion node.
851  assert(FromVT != MVT::Other);
852  Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
853  RegisterVT, P, DAG.getValueType(FromVT));
854  }
855 
856  Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
857  NumRegs, RegisterVT, ValueVT, V);
858  Part += NumRegs;
859  Parts.clear();
860  }
861 
862  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
863 }
864 
866  const SDLoc &dl, SDValue &Chain, SDValue *Flag,
867  const Value *V,
868  ISD::NodeType PreferredExtendType) const {
869  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
870  ISD::NodeType ExtendKind = PreferredExtendType;
871 
872  // Get the list of the values's legal parts.
873  unsigned NumRegs = Regs.size();
874  SmallVector<SDValue, 8> Parts(NumRegs);
875  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
876  unsigned NumParts = RegCount[Value];
877 
878  MVT RegisterVT = IsABIMangled
880  : RegVTs[Value];
881 
882  if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
883  ExtendKind = ISD::ZERO_EXTEND;
884 
885  getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
886  &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
887  Part += NumParts;
888  }
889 
890  // Copy the parts into the registers.
891  SmallVector<SDValue, 8> Chains(NumRegs);
892  for (unsigned i = 0; i != NumRegs; ++i) {
893  SDValue Part;
894  if (!Flag) {
895  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
896  } else {
897  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
898  *Flag = Part.getValue(1);
899  }
900 
901  Chains[i] = Part.getValue(0);
902  }
903 
904  if (NumRegs == 1 || Flag)
905  // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
906  // flagged to it. That is the CopyToReg nodes and the user are considered
907  // a single scheduling unit. If we create a TokenFactor and return it as
908  // chain, then the TokenFactor is both a predecessor (operand) of the
909  // user as well as a successor (the TF operands are flagged to the user).
910  // c1, f1 = CopyToReg
911  // c2, f2 = CopyToReg
912  // c3 = TokenFactor c1, c2
913  // ...
914  // = op c3, ..., f2
915  Chain = Chains[NumRegs-1];
916  else
917  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
918 }
919 
920 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
921  unsigned MatchingIdx, const SDLoc &dl,
922  SelectionDAG &DAG,
923  std::vector<SDValue> &Ops) const {
924  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
925 
926  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
927  if (HasMatching)
928  Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
929  else if (!Regs.empty() &&
931  // Put the register class of the virtual registers in the flag word. That
932  // way, later passes can recompute register class constraints for inline
933  // assembly as well as normal instructions.
934  // Don't do this for tied operands that can use the regclass information
935  // from the def.
937  const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
938  Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
939  }
940 
941  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
942  Ops.push_back(Res);
943 
944  if (Code == InlineAsm::Kind_Clobber) {
945  // Clobbers should always have a 1:1 mapping with registers, and may
946  // reference registers that have illegal (e.g. vector) types. Hence, we
947  // shouldn't try to apply any sort of splitting logic to them.
948  assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
949  "No 1:1 mapping from clobbers to regs?");
950  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
951  (void)SP;
952  for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
953  Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
954  assert(
955  (Regs[I] != SP ||
957  "If we clobbered the stack pointer, MFI should know about it.");
958  }
959  return;
960  }
961 
962  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
963  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
964  MVT RegisterVT = RegVTs[Value];
965  for (unsigned i = 0; i != NumRegs; ++i) {
966  assert(Reg < Regs.size() && "Mismatch in # registers expected");
967  unsigned TheReg = Regs[Reg++];
968  Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
969  }
970  }
971 }
972 
974  const TargetLibraryInfo *li) {
975  AA = aa;
976  GFI = gfi;
977  LibInfo = li;
978  DL = &DAG.getDataLayout();
979  Context = DAG.getContext();
980  LPadToCallSiteMap.clear();
981 }
982 
984  NodeMap.clear();
985  UnusedArgNodeMap.clear();
986  PendingLoads.clear();
987  PendingExports.clear();
988  CurInst = nullptr;
989  HasTailCall = false;
990  SDNodeOrder = LowestSDNodeOrder;
991  StatepointLowering.clear();
992 }
993 
995  DanglingDebugInfoMap.clear();
996 }
997 
999  if (PendingLoads.empty())
1000  return DAG.getRoot();
1001 
1002  if (PendingLoads.size() == 1) {
1003  SDValue Root = PendingLoads[0];
1004  DAG.setRoot(Root);
1005  PendingLoads.clear();
1006  return Root;
1007  }
1008 
1009  // Otherwise, we have to make a token factor node.
1010  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1011  PendingLoads);
1012  PendingLoads.clear();
1013  DAG.setRoot(Root);
1014  return Root;
1015 }
1016 
1018  SDValue Root = DAG.getRoot();
1019 
1020  if (PendingExports.empty())
1021  return Root;
1022 
1023  // Turn all of the CopyToReg chains into one factored node.
1024  if (Root.getOpcode() != ISD::EntryToken) {
1025  unsigned i = 0, e = PendingExports.size();
1026  for (; i != e; ++i) {
1027  assert(PendingExports[i].getNode()->getNumOperands() > 1);
1028  if (PendingExports[i].getNode()->getOperand(0) == Root)
1029  break; // Don't add the root if we already indirectly depend on it.
1030  }
1031 
1032  if (i == e)
1033  PendingExports.push_back(Root);
1034  }
1035 
1036  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1037  PendingExports);
1038  PendingExports.clear();
1039  DAG.setRoot(Root);
1040  return Root;
1041 }
1042 
1044  // Set up outgoing PHI node register values before emitting the terminator.
1045  if (isa<TerminatorInst>(&I)) {
1046  HandlePHINodesInSuccessorBlocks(I.getParent());
1047  }
1048 
1049  // Increase the SDNodeOrder if dealing with a non-debug instruction.
1050  if (!isa<DbgInfoIntrinsic>(I))
1051  ++SDNodeOrder;
1052 
1053  CurInst = &I;
1054 
1055  visit(I.getOpcode(), I);
1056 
1057  if (!isa<TerminatorInst>(&I) && !HasTailCall &&
1058  !isStatepoint(&I)) // statepoints handle their exports internally
1059  CopyToExportRegsIfNeeded(&I);
1060 
1061  CurInst = nullptr;
1062 }
1063 
1064 void SelectionDAGBuilder::visitPHI(const PHINode &) {
1065  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1066 }
1067 
1068 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1069  // Note: this doesn't use InstVisitor, because it has to work with
1070  // ConstantExpr's in addition to instructions.
1071  switch (Opcode) {
1072  default: llvm_unreachable("Unknown instruction type encountered!");
1073  // Build the switch statement using the Instruction.def file.
1074 #define HANDLE_INST(NUM, OPCODE, CLASS) \
1075  case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1076 #include "llvm/IR/Instruction.def"
1077  }
1078 }
1079 
1080 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1081 // generate the debug data structures now that we've seen its definition.
1083  SDValue Val) {
1084  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
1085  if (DDI.getDI()) {
1086  const DbgValueInst *DI = DDI.getDI();
1087  DebugLoc dl = DDI.getdl();
1088  unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1089  DILocalVariable *Variable = DI->getVariable();
1090  DIExpression *Expr = DI->getExpression();
1091  assert(Variable->isValidLocationForIntrinsic(dl) &&
1092  "Expected inlined-at fields to agree");
1093  SDDbgValue *SDV;
1094  if (Val.getNode()) {
1095  if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1096  SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
1097  DAG.AddDbgValue(SDV, Val.getNode(), false);
1098  }
1099  } else
1100  DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1101  DanglingDebugInfoMap[V] = DanglingDebugInfo();
1102  }
1103 }
1104 
1105 /// getCopyFromRegs - If there was virtual register allocated for the value V
1106 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1108  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1109  SDValue Result;
1110 
1111  if (It != FuncInfo.ValueMap.end()) {
1112  unsigned InReg = It->second;
1113 
1114  RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1115  DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
1116  SDValue Chain = DAG.getEntryNode();
1117  Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1118  V);
1119  resolveDanglingDebugInfo(V, Result);
1120  }
1121 
1122  return Result;
1123 }
1124 
1125 /// getValue - Return an SDValue for the given Value.
1127  // If we already have an SDValue for this value, use it. It's important
1128  // to do this first, so that we don't create a CopyFromReg if we already
1129  // have a regular SDValue.
1130  SDValue &N = NodeMap[V];
1131  if (N.getNode()) return N;
1132 
1133  // If there's a virtual register allocated and initialized for this
1134  // value, use it.
1135  if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1136  return copyFromReg;
1137 
1138  // Otherwise create a new SDValue and remember it.
1139  SDValue Val = getValueImpl(V);
1140  NodeMap[V] = Val;
1141  resolveDanglingDebugInfo(V, Val);
1142  return Val;
1143 }
1144 
1145 // Return true if SDValue exists for the given Value
1147  return (NodeMap.find(V) != NodeMap.end()) ||
1148  (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
1149 }
1150 
1151 /// getNonRegisterValue - Return an SDValue for the given Value, but
1152 /// don't look in FuncInfo.ValueMap for a virtual register.
1154  // If we already have an SDValue for this value, use it.
1155  SDValue &N = NodeMap[V];
1156  if (N.getNode()) {
1157  if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1158  // Remove the debug location from the node as the node is about to be used
1159  // in a location which may differ from the original debug location. This
1160  // is relevant to Constant and ConstantFP nodes because they can appear
1161  // as constant expressions inside PHI nodes.
1162  N->setDebugLoc(DebugLoc());
1163  }
1164  return N;
1165  }
1166 
1167  // Otherwise create a new SDValue and remember it.
1168  SDValue Val = getValueImpl(V);
1169  NodeMap[V] = Val;
1170  resolveDanglingDebugInfo(V, Val);
1171  return Val;
1172 }
1173 
1174 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1175 /// Create an SDValue for the given value.
1177  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1178 
1179  if (const Constant *C = dyn_cast<Constant>(V)) {
1180  EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1181 
1182  if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1183  return DAG.getConstant(*CI, getCurSDLoc(), VT);
1184 
1185  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1186  return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1187 
1188  if (isa<ConstantPointerNull>(C)) {
1189  unsigned AS = V->getType()->getPointerAddressSpace();
1190  return DAG.getConstant(0, getCurSDLoc(),
1191  TLI.getPointerTy(DAG.getDataLayout(), AS));
1192  }
1193 
1194  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1195  return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1196 
1197  if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1198  return DAG.getUNDEF(VT);
1199 
1200  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1201  visit(CE->getOpcode(), *CE);
1202  SDValue N1 = NodeMap[V];
1203  assert(N1.getNode() && "visit didn't populate the NodeMap!");
1204  return N1;
1205  }
1206 
1207  if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1209  for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1210  OI != OE; ++OI) {
1211  SDNode *Val = getValue(*OI).getNode();
1212  // If the operand is an empty aggregate, there are no values.
1213  if (!Val) continue;
1214  // Add each leaf value from the operand to the Constants list
1215  // to form a flattened list of all the values.
1216  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1217  Constants.push_back(SDValue(Val, i));
1218  }
1219 
1220  return DAG.getMergeValues(Constants, getCurSDLoc());
1221  }
1222 
1223  if (const ConstantDataSequential *CDS =
1224  dyn_cast<ConstantDataSequential>(C)) {
1226  for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1227  SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1228  // Add each leaf value from the operand to the Constants list
1229  // to form a flattened list of all the values.
1230  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1231  Ops.push_back(SDValue(Val, i));
1232  }
1233 
1234  if (isa<ArrayType>(CDS->getType()))
1235  return DAG.getMergeValues(Ops, getCurSDLoc());
1236  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1237  }
1238 
1239  if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1240  assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1241  "Unknown struct or array constant!");
1242 
1244  ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1245  unsigned NumElts = ValueVTs.size();
1246  if (NumElts == 0)
1247  return SDValue(); // empty struct
1249  for (unsigned i = 0; i != NumElts; ++i) {
1250  EVT EltVT = ValueVTs[i];
1251  if (isa<UndefValue>(C))
1252  Constants[i] = DAG.getUNDEF(EltVT);
1253  else if (EltVT.isFloatingPoint())
1254  Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1255  else
1256  Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1257  }
1258 
1259  return DAG.getMergeValues(Constants, getCurSDLoc());
1260  }
1261 
1262  if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1263  return DAG.getBlockAddress(BA, VT);
1264 
1265  VectorType *VecTy = cast<VectorType>(V->getType());
1266  unsigned NumElements = VecTy->getNumElements();
1267 
1268  // Now that we know the number and type of the elements, get that number of
1269  // elements into the Ops array based on what kind of constant it is.
1271  if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1272  for (unsigned i = 0; i != NumElements; ++i)
1273  Ops.push_back(getValue(CV->getOperand(i)));
1274  } else {
1275  assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1276  EVT EltVT =
1277  TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1278 
1279  SDValue Op;
1280  if (EltVT.isFloatingPoint())
1281  Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1282  else
1283  Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1284  Ops.assign(NumElements, Op);
1285  }
1286 
1287  // Create a BUILD_VECTOR node.
1288  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1289  }
1290 
1291  // If this is a static alloca, generate it as the frameindex instead of
1292  // computation.
1293  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1295  FuncInfo.StaticAllocaMap.find(AI);
1296  if (SI != FuncInfo.StaticAllocaMap.end())
1297  return DAG.getFrameIndex(SI->second,
1298  TLI.getFrameIndexTy(DAG.getDataLayout()));
1299  }
1300 
1301  // If this is an instruction which fast-isel has deferred, select it now.
1302  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1303  unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1304 
1305  RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1306  Inst->getType(), isABIRegCopy(V));
1307  SDValue Chain = DAG.getEntryNode();
1308  return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1309  }
1310 
1311  llvm_unreachable("Can't get register for value!");
1312 }
1313 
1314 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1315  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1316  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1317  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1318  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1319  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1320  if (IsMSVCCXX || IsCoreCLR)
1321  CatchPadMBB->setIsEHFuncletEntry();
1322 
1323  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
1324 }
1325 
1326 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1327  // Update machine-CFG edge.
1328  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1329  FuncInfo.MBB->addSuccessor(TargetMBB);
1330 
1331  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1332  bool IsSEH = isAsynchronousEHPersonality(Pers);
1333  if (IsSEH) {
1334  // If this is not a fall-through branch or optimizations are switched off,
1335  // emit the branch.
1336  if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1337  TM.getOptLevel() == CodeGenOpt::None)
1338  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1339  getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1340  return;
1341  }
1342 
1343  // Figure out the funclet membership for the catchret's successor.
1344  // This will be used by the FuncletLayout pass to determine how to order the
1345  // BB's.
1346  // A 'catchret' returns to the outer scope's color.
1347  Value *ParentPad = I.getCatchSwitchParentPad();
1348  const BasicBlock *SuccessorColor;
1349  if (isa<ConstantTokenNone>(ParentPad))
1350  SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1351  else
1352  SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1353  assert(SuccessorColor && "No parent funclet for catchret!");
1354  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1355  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1356 
1357  // Create the terminator node.
1358  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1359  getControlRoot(), DAG.getBasicBlock(TargetMBB),
1360  DAG.getBasicBlock(SuccessorColorMBB));
1361  DAG.setRoot(Ret);
1362 }
1363 
1364 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1365  // Don't emit any special code for the cleanuppad instruction. It just marks
1366  // the start of a funclet.
1367  FuncInfo.MBB->setIsEHFuncletEntry();
1368  FuncInfo.MBB->setIsCleanupFuncletEntry();
1369 }
1370 
1371 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1372 /// many places it could ultimately go. In the IR, we have a single unwind
1373 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1374 /// This function skips over imaginary basic blocks that hold catchswitch
1375 /// instructions, and finds all the "real" machine
1376 /// basic block destinations. As those destinations may not be successors of
1377 /// EHPadBB, here we also calculate the edge probability to those destinations.
1378 /// The passed-in Prob is the edge probability to EHPadBB.
1380  FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1381  BranchProbability Prob,
1382  SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1383  &UnwindDests) {
1384  EHPersonality Personality =
1386  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1387  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1388 
1389  while (EHPadBB) {
1390  const Instruction *Pad = EHPadBB->getFirstNonPHI();
1391  BasicBlock *NewEHPadBB = nullptr;
1392  if (isa<LandingPadInst>(Pad)) {
1393  // Stop on landingpads. They are not funclets.
1394  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1395  break;
1396  } else if (isa<CleanupPadInst>(Pad)) {
1397  // Stop on cleanup pads. Cleanups are always funclet entries for all known
1398  // personalities.
1399  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1400  UnwindDests.back().first->setIsEHFuncletEntry();
1401  break;
1402  } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1403  // Add the catchpad handlers to the possible destinations.
1404  for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1405  UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1406  // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1407  if (IsMSVCCXX || IsCoreCLR)
1408  UnwindDests.back().first->setIsEHFuncletEntry();
1409  }
1410  NewEHPadBB = CatchSwitch->getUnwindDest();
1411  } else {
1412  continue;
1413  }
1414 
1415  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1416  if (BPI && NewEHPadBB)
1417  Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1418  EHPadBB = NewEHPadBB;
1419  }
1420 }
1421 
1422 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1423  // Update successor info.
1425  auto UnwindDest = I.getUnwindDest();
1426  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1427  BranchProbability UnwindDestProb =
1428  (BPI && UnwindDest)
1429  ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1431  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1432  for (auto &UnwindDest : UnwindDests) {
1433  UnwindDest.first->setIsEHPad();
1434  addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1435  }
1436  FuncInfo.MBB->normalizeSuccProbs();
1437 
1438  // Create the terminator node.
1439  SDValue Ret =
1440  DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1441  DAG.setRoot(Ret);
1442 }
1443 
1444 void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1445  report_fatal_error("visitCatchSwitch not yet implemented!");
1446 }
1447 
1448 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1449  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1450  auto &DL = DAG.getDataLayout();
1451  SDValue Chain = getControlRoot();
1453  SmallVector<SDValue, 8> OutVals;
1454 
1455  // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1456  // lower
1457  //
1458  // %val = call <ty> @llvm.experimental.deoptimize()
1459  // ret <ty> %val
1460  //
1461  // differently.
1463  LowerDeoptimizingReturn();
1464  return;
1465  }
1466 
1467  if (!FuncInfo.CanLowerReturn) {
1468  unsigned DemoteReg = FuncInfo.DemoteRegister;
1469  const Function *F = I.getParent()->getParent();
1470 
1471  // Emit a store of the return value through the virtual register.
1472  // Leave Outs empty so that LowerReturn won't try to load return
1473  // registers the usual way.
1474  SmallVector<EVT, 1> PtrValueVTs;
1475  ComputeValueVTs(TLI, DL,
1478  PtrValueVTs);
1479 
1480  SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1481  DemoteReg, PtrValueVTs[0]);
1482  SDValue RetOp = getValue(I.getOperand(0));
1483 
1486  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1487  unsigned NumValues = ValueVTs.size();
1488 
1489  SmallVector<SDValue, 4> Chains(NumValues);
1490  for (unsigned i = 0; i != NumValues; ++i) {
1491  // An aggregate return value cannot wrap around the address space, so
1492  // offsets to its parts don't wrap either.
1493  SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
1494  Chains[i] = DAG.getStore(
1495  Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1496  // FIXME: better loc info would be nice.
1498  }
1499 
1500  Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1501  MVT::Other, Chains);
1502  } else if (I.getNumOperands() != 0) {
1504  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1505  unsigned NumValues = ValueVTs.size();
1506  if (NumValues) {
1507  SDValue RetOp = getValue(I.getOperand(0));
1508 
1509  const Function *F = I.getParent()->getParent();
1510 
1511  ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1512  if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1513  Attribute::SExt))
1514  ExtendKind = ISD::SIGN_EXTEND;
1515  else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1516  Attribute::ZExt))
1517  ExtendKind = ISD::ZERO_EXTEND;
1518 
1519  LLVMContext &Context = F->getContext();
1520  bool RetInReg = F->getAttributes().hasAttribute(
1521  AttributeList::ReturnIndex, Attribute::InReg);
1522 
1523  for (unsigned j = 0; j != NumValues; ++j) {
1524  EVT VT = ValueVTs[j];
1525 
1526  if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1527  VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1528 
1529  unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
1530  MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
1531  SmallVector<SDValue, 4> Parts(NumParts);
1532  getCopyToParts(DAG, getCurSDLoc(),
1533  SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1534  &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
1535 
1536  // 'inreg' on function refers to return value
1537  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1538  if (RetInReg)
1539  Flags.setInReg();
1540 
1541  // Propagate extension type if any
1542  if (ExtendKind == ISD::SIGN_EXTEND)
1543  Flags.setSExt();
1544  else if (ExtendKind == ISD::ZERO_EXTEND)
1545  Flags.setZExt();
1546 
1547  for (unsigned i = 0; i < NumParts; ++i) {
1548  Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1549  VT, /*isfixed=*/true, 0, 0));
1550  OutVals.push_back(Parts[i]);
1551  }
1552  }
1553  }
1554  }
1555 
1556  // Push in swifterror virtual register as the last element of Outs. This makes
1557  // sure swifterror virtual register will be returned in the swifterror
1558  // physical register.
1559  const Function *F = I.getParent()->getParent();
1560  if (TLI.supportSwiftError() &&
1561  F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
1562  assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
1563  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1564  Flags.setSwiftError();
1565  Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1566  EVT(TLI.getPointerTy(DL)) /*argvt*/,
1567  true /*isfixed*/, 1 /*origidx*/,
1568  0 /*partOffs*/));
1569  // Create SDNode for the swifterror virtual register.
1570  OutVals.push_back(
1571  DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
1572  &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
1573  EVT(TLI.getPointerTy(DL))));
1574  }
1575 
1576  bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
1577  CallingConv::ID CallConv =
1579  Chain = DAG.getTargetLoweringInfo().LowerReturn(
1580  Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1581 
1582  // Verify that the target's LowerReturn behaved as expected.
1583  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1584  "LowerReturn didn't return a valid chain!");
1585 
1586  // Update the DAG with the new chain value resulting from return lowering.
1587  DAG.setRoot(Chain);
1588 }
1589 
1590 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
1591 /// created for it, emit nodes to copy the value into the virtual
1592 /// registers.
1594  // Skip empty types
1595  if (V->getType()->isEmptyTy())
1596  return;
1597 
1598  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1599  if (VMI != FuncInfo.ValueMap.end()) {
1600  assert(!V->use_empty() && "Unused value assigned virtual registers!");
1601  CopyValueToVirtualRegister(V, VMI->second);
1602  }
1603 }
1604 
1605 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
1606 /// the current basic block, add it to ValueMap now so that we'll get a
1607 /// CopyTo/FromReg.
1609  // No need to export constants.
1610  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1611 
1612  // Already exported?
1613  if (FuncInfo.isExportedInst(V)) return;
1614 
1615  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1616  CopyValueToVirtualRegister(V, Reg);
1617 }
1618 
1620  const BasicBlock *FromBB) {
1621  // The operands of the setcc have to be in this block. We don't know
1622  // how to export them from some other block.
1623  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1624  // Can export from current BB.
1625  if (VI->getParent() == FromBB)
1626  return true;
1627 
1628  // Is already exported, noop.
1629  return FuncInfo.isExportedInst(V);
1630  }
1631 
1632  // If this is an argument, we can export it if the BB is the entry block or
1633  // if it is already exported.
1634  if (isa<Argument>(V)) {
1635  if (FromBB == &FromBB->getParent()->getEntryBlock())
1636  return true;
1637 
1638  // Otherwise, can only export this if it is already exported.
1639  return FuncInfo.isExportedInst(V);
1640  }
1641 
1642  // Otherwise, constants can always be exported.
1643  return true;
1644 }
1645 
1646 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1648 SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1649  const MachineBasicBlock *Dst) const {
1650  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1651  const BasicBlock *SrcBB = Src->getBasicBlock();
1652  const BasicBlock *DstBB = Dst->getBasicBlock();
1653  if (!BPI) {
1654  // If BPI is not available, set the default probability as 1 / N, where N is
1655  // the number of successors.
1656  auto SuccSize = std::max<uint32_t>(
1657  std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
1658  return BranchProbability(1, SuccSize);
1659  }
1660  return BPI->getEdgeProbability(SrcBB, DstBB);
1661 }
1662 
1663 void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
1664  MachineBasicBlock *Dst,
1665  BranchProbability Prob) {
1666  if (!FuncInfo.BPI)
1667  Src->addSuccessorWithoutProb(Dst);
1668  else {
1669  if (Prob.isUnknown())
1670  Prob = getEdgeProbability(Src, Dst);
1671  Src->addSuccessor(Dst, Prob);
1672  }
1673 }
1674 
1675 static bool InBlock(const Value *V, const BasicBlock *BB) {
1676  if (const Instruction *I = dyn_cast<Instruction>(V))
1677  return I->getParent() == BB;
1678  return true;
1679 }
1680 
1681 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1682 /// This function emits a branch and is used at the leaves of an OR or an
1683 /// AND operator tree.
1684 void
1686  MachineBasicBlock *TBB,
1687  MachineBasicBlock *FBB,
1688  MachineBasicBlock *CurBB,
1689  MachineBasicBlock *SwitchBB,
1690  BranchProbability TProb,
1691  BranchProbability FProb,
1692  bool InvertCond) {
1693  const BasicBlock *BB = CurBB->getBasicBlock();
1694 
1695  // If the leaf of the tree is a comparison, merge the condition into
1696  // the caseblock.
1697  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1698  // The operands of the cmp have to be in this block. We don't know
1699  // how to export them from some other block. If this is the first block
1700  // of the sequence, no exporting is needed.
1701  if (CurBB == SwitchBB ||
1702  (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1703  isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1704  ISD::CondCode Condition;
1705  if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1706  ICmpInst::Predicate Pred =
1707  InvertCond ? IC->getInversePredicate() : IC->getPredicate();
1708  Condition = getICmpCondCode(Pred);
1709  } else {
1710  const FCmpInst *FC = cast<FCmpInst>(Cond);
1711  FCmpInst::Predicate Pred =
1712  InvertCond ? FC->getInversePredicate() : FC->getPredicate();
1713  Condition = getFCmpCondCode(Pred);
1714  if (TM.Options.NoNaNsFPMath)
1715  Condition = getFCmpCodeWithoutNaN(Condition);
1716  }
1717 
1718  CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1719  TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1720  SwitchCases.push_back(CB);
1721  return;
1722  }
1723  }
1724 
1725  // Create a CaseBlock record representing this branch.
1726  ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
1727  CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
1728  nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1729  SwitchCases.push_back(CB);
1730 }
1731 
1732 /// FindMergedConditions - If Cond is an expression like
1734  MachineBasicBlock *TBB,
1735  MachineBasicBlock *FBB,
1736  MachineBasicBlock *CurBB,
1737  MachineBasicBlock *SwitchBB,
1739  BranchProbability TProb,
1740  BranchProbability FProb,
1741  bool InvertCond) {
1742  // Skip over not part of the tree and remember to invert op and operands at
1743  // next level.
1744  if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
1745  const Value *CondOp = BinaryOperator::getNotArgument(Cond);
1746  if (InBlock(CondOp, CurBB->getBasicBlock())) {
1747  FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
1748  !InvertCond);
1749  return;
1750  }
1751  }
1752 
1753  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1754  // Compute the effective opcode for Cond, taking into account whether it needs
1755  // to be inverted, e.g.
1756  // and (not (or A, B)), C
1757  // gets lowered as
1758  // and (and (not A, not B), C)
1759  unsigned BOpc = 0;
1760  if (BOp) {
1761  BOpc = BOp->getOpcode();
1762  if (InvertCond) {
1763  if (BOpc == Instruction::And)
1764  BOpc = Instruction::Or;
1765  else if (BOpc == Instruction::Or)
1766  BOpc = Instruction::And;
1767  }
1768  }
1769 
1770  // If this node is not part of the or/and tree, emit it as a branch.
1771  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1772  BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
1773  BOp->getParent() != CurBB->getBasicBlock() ||
1774  !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1775  !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1776  EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
1777  TProb, FProb, InvertCond);
1778  return;
1779  }
1780 
1781  // Create TmpBB after CurBB.
1782  MachineFunction::iterator BBI(CurBB);
1783  MachineFunction &MF = DAG.getMachineFunction();
1785  CurBB->getParent()->insert(++BBI, TmpBB);
1786 
1787  if (Opc == Instruction::Or) {
1788  // Codegen X | Y as:
1789  // BB1:
1790  // jmp_if_X TBB
1791  // jmp TmpBB
1792  // TmpBB:
1793  // jmp_if_Y TBB
1794  // jmp FBB
1795  //
1796 
1797  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1798  // The requirement is that
1799  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1800  // = TrueProb for original BB.
1801  // Assuming the original probabilities are A and B, one choice is to set
1802  // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
1803  // A/(1+B) and 2B/(1+B). This choice assumes that
1804  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1805  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1806  // TmpBB, but the math is more complicated.
1807 
1808  auto NewTrueProb = TProb / 2;
1809  auto NewFalseProb = TProb / 2 + FProb;
1810  // Emit the LHS condition.
1811  FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
1812  NewTrueProb, NewFalseProb, InvertCond);
1813 
1814  // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
1815  SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
1816  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1817  // Emit the RHS condition into TmpBB.
1818  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1819  Probs[0], Probs[1], InvertCond);
1820  } else {
1821  assert(Opc == Instruction::And && "Unknown merge op!");
1822  // Codegen X & Y as:
1823  // BB1:
1824  // jmp_if_X TmpBB
1825  // jmp FBB
1826  // TmpBB:
1827  // jmp_if_Y TBB
1828  // jmp FBB
1829  //
1830  // This requires creation of TmpBB after CurBB.
1831 
1832  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1833  // The requirement is that
1834  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1835  // = FalseProb for original BB.
1836  // Assuming the original probabilities are A and B, one choice is to set
1837  // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
1838  // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
1839  // TrueProb for BB1 * FalseProb for TmpBB.
1840 
1841  auto NewTrueProb = TProb + FProb / 2;
1842  auto NewFalseProb = FProb / 2;
1843  // Emit the LHS condition.
1844  FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
1845  NewTrueProb, NewFalseProb, InvertCond);
1846 
1847  // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
1848  SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
1849  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1850  // Emit the RHS condition into TmpBB.
1851  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1852  Probs[0], Probs[1], InvertCond);
1853  }
1854 }
1855 
1856 /// If the set of cases should be emitted as a series of branches, return true.
1857 /// If we should emit this as a bunch of and/or'd together conditions, return
1858 /// false.
1859 bool
1860 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
1861  if (Cases.size() != 2) return true;
1862 
1863  // If this is two comparisons of the same values or'd or and'd together, they
1864  // will get folded into a single comparison, so don't emit two blocks.
1865  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1866  Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1867  (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1868  Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1869  return false;
1870  }
1871 
1872  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1873  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1874  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1875  Cases[0].CC == Cases[1].CC &&
1876  isa<Constant>(Cases[0].CmpRHS) &&
1877  cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1878  if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1879  return false;
1880  if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1881  return false;
1882  }
1883 
1884  return true;
1885 }
1886 
1887 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1888  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1889 
1890  // Update machine-CFG edges.
1891  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1892 
1893  if (I.isUnconditional()) {
1894  // Update machine-CFG edges.
1895  BrMBB->addSuccessor(Succ0MBB);
1896 
1897  // If this is not a fall-through branch or optimizations are switched off,
1898  // emit the branch.
1899  if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
1900  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
1901  MVT::Other, getControlRoot(),
1902  DAG.getBasicBlock(Succ0MBB)));
1903 
1904  return;
1905  }
1906 
1907  // If this condition is one of the special cases we handle, do special stuff
1908  // now.
1909  const Value *CondVal = I.getCondition();
1910  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1911 
1912  // If this is a series of conditions that are or'd or and'd together, emit
1913  // this as a sequence of branches instead of setcc's with and/or operations.
1914  // As long as jumps are not expensive, this should improve performance.
1915  // For example, instead of something like:
1916  // cmp A, B
1917  // C = seteq
1918  // cmp D, E
1919  // F = setle
1920  // or C, F
1921  // jnz foo
1922  // Emit:
1923  // cmp A, B
1924  // je foo
1925  // cmp D, E
1926  // jle foo
1927  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1928  Instruction::BinaryOps Opcode = BOp->getOpcode();
1929  if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
1931  (Opcode == Instruction::And || Opcode == Instruction::Or)) {
1932  FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1933  Opcode,
1934  getEdgeProbability(BrMBB, Succ0MBB),
1935  getEdgeProbability(BrMBB, Succ1MBB),
1936  /*InvertCond=*/false);
1937  // If the compares in later blocks need to use values not currently
1938  // exported from this block, export them now. This block should always
1939  // be the first entry.
1940  assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1941 
1942  // Allow some cases to be rejected.
1943  if (ShouldEmitAsBranches(SwitchCases)) {
1944  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
1945  ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1946  ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1947  }
1948 
1949  // Emit the branch for this block.
1950  visitSwitchCase(SwitchCases[0], BrMBB);
1951  SwitchCases.erase(SwitchCases.begin());
1952  return;
1953  }
1954 
1955  // Okay, we decided not to do this, remove any inserted MBB's and clear
1956  // SwitchCases.
1957  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
1958  FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1959 
1960  SwitchCases.clear();
1961  }
1962  }
1963 
1964  // Create a CaseBlock record representing this branch.
1965  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1966  nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
1967 
1968  // Use visitSwitchCase to actually insert the fast branch sequence for this
1969  // cond branch.
1970  visitSwitchCase(CB, BrMBB);
1971 }
1972 
1973 /// visitSwitchCase - Emits the necessary code to represent a single node in
1974 /// the binary search tree resulting from lowering a switch instruction.
1976  MachineBasicBlock *SwitchBB) {
1977  SDValue Cond;
1978  SDValue CondLHS = getValue(CB.CmpLHS);
1979  SDLoc dl = CB.DL;
1980 
1981  // Build the setcc now.
1982  if (!CB.CmpMHS) {
1983  // Fold "(X == true)" to X and "(X == false)" to !X to
1984  // handle common cases produced by branch lowering.
1985  if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1986  CB.CC == ISD::SETEQ)
1987  Cond = CondLHS;
1988  else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1989  CB.CC == ISD::SETEQ) {
1990  SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
1991  Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1992  } else
1993  Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1994  } else {
1995  assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
1996 
1997  const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1998  const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
1999 
2000  SDValue CmpOp = getValue(CB.CmpMHS);
2001  EVT VT = CmpOp.getValueType();
2002 
2003  if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
2004  Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
2005  ISD::SETLE);
2006  } else {
2007  SDValue SUB = DAG.getNode(ISD::SUB, dl,
2008  VT, CmpOp, DAG.getConstant(Low, dl, VT));
2009  Cond = DAG.getSetCC(dl, MVT::i1, SUB,
2010  DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
2011  }
2012  }
2013 
2014  // Update successor info
2015  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2016  // TrueBB and FalseBB are always different unless the incoming IR is
2017  // degenerate. This only happens when running llc on weird IR.
2018  if (CB.TrueBB != CB.FalseBB)
2019  addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2020  SwitchBB->normalizeSuccProbs();
2021 
2022  // If the lhs block is the next block, invert the condition so that we can
2023  // fall through to the lhs instead of the rhs block.
2024  if (CB.TrueBB == NextBlock(SwitchBB)) {
2025  std::swap(CB.TrueBB, CB.FalseBB);
2026  SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2027  Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2028  }
2029 
2030  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2031  MVT::Other, getControlRoot(), Cond,
2032  DAG.getBasicBlock(CB.TrueBB));
2033 
2034  // Insert the false branch. Do this even if it's a fall through branch,
2035  // this makes it easier to do DAG optimizations which require inverting
2036  // the branch condition.
2037  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2038  DAG.getBasicBlock(CB.FalseBB));
2039 
2040  DAG.setRoot(BrCond);
2041 }
2042 
2043 /// visitJumpTable - Emit JumpTable node in the current MBB
2045  // Emit the code for the jump table
2046  assert(JT.Reg != -1U && "Should lower JT Header first!");
2048  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2049  JT.Reg, PTy);
2050  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2051  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2052  MVT::Other, Index.getValue(1),
2053  Table, Index);
2054  DAG.setRoot(BrJumpTable);
2055 }
2056 
2057 /// visitJumpTableHeader - This function emits necessary code to produce index
2058 /// in the JumpTable from switch case.
2060  JumpTableHeader &JTH,
2061  MachineBasicBlock *SwitchBB) {
2062  SDLoc dl = getCurSDLoc();
2063 
2064  // Subtract the lowest switch case value from the value being switched on and
2065  // conditional branch to default mbb if the result is greater than the
2066  // difference between smallest and largest cases.
2067  SDValue SwitchOp = getValue(JTH.SValue);
2068  EVT VT = SwitchOp.getValueType();
2069  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2070  DAG.getConstant(JTH.First, dl, VT));
2071 
2072  // The SDNode we just created, which holds the value being switched on minus
2073  // the smallest case value, needs to be copied to a virtual register so it
2074  // can be used as an index into the jump table in a subsequent basic block.
2075  // This value may be smaller or larger than the target's pointer type, and
2076  // therefore require extension or truncating.
2077  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2078  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2079 
2080  unsigned JumpTableReg =
2081  FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2082  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2083  JumpTableReg, SwitchOp);
2084  JT.Reg = JumpTableReg;
2085 
2086  // Emit the range check for the jump table, and branch to the default block
2087  // for the switch statement if the value being switched on exceeds the largest
2088  // case in the switch.
2089  SDValue CMP = DAG.getSetCC(
2090  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2091  Sub.getValueType()),
2092  Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2093 
2094  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2095  MVT::Other, CopyTo, CMP,
2096  DAG.getBasicBlock(JT.Default));
2097 
2098  // Avoid emitting unnecessary branches to the next block.
2099  if (JT.MBB != NextBlock(SwitchBB))
2100  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2101  DAG.getBasicBlock(JT.MBB));
2102 
2103  DAG.setRoot(BrCond);
2104 }
2105 
2106 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2107 /// variable if there exists one.
2109  SDValue &Chain) {
2110  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2111  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2112  MachineFunction &MF = DAG.getMachineFunction();
2113  Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
2114  MachineSDNode *Node =
2115  DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2116  if (Global) {
2117  MachinePointerInfo MPInfo(Global);
2121  *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
2122  DAG.getEVTAlignment(PtrTy));
2123  Node->setMemRefs(MemRefs, MemRefs + 1);
2124  }
2125  return SDValue(Node, 0);
2126 }
2127 
2128 /// Codegen a new tail for a stack protector check ParentMBB which has had its
2129 /// tail spliced into a stack protector check success bb.
2130 ///
2131 /// For a high level explanation of how this fits into the stack protector
2132 /// generation see the comment on the declaration of class
2133 /// StackProtectorDescriptor.
2134 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2135  MachineBasicBlock *ParentBB) {
2136 
2137  // First create the loads to the guard/stack slot for the comparison.
2138  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2139  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2140 
2141  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2142  int FI = MFI.getStackProtectorIndex();
2143 
2144  SDValue Guard;
2145  SDLoc dl = getCurSDLoc();
2146  SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2147  const Module &M = *ParentBB->getParent()->getFunction().getParent();
2148  unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2149 
2150  // Generate code to load the content of the guard slot.
2151  SDValue GuardVal = DAG.getLoad(
2152  PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
2155 
2156  if (TLI.useStackGuardXorFP())
2157  GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
2158 
2159  // Retrieve guard check function, nullptr if instrumentation is inlined.
2160  if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
2161  // The target provides a guard check function to validate the guard value.
2162  // Generate a call to that function with the content of the guard slot as
2163  // argument.
2164  auto *Fn = cast<Function>(GuardCheck);
2165  FunctionType *FnTy = Fn->getFunctionType();
2166  assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2167 
2170  Entry.Node = GuardVal;
2171  Entry.Ty = FnTy->getParamType(0);
2172  if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
2173  Entry.IsInReg = true;
2174  Args.push_back(Entry);
2175 
2177  CLI.setDebugLoc(getCurSDLoc())
2178  .setChain(DAG.getEntryNode())
2179  .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
2180  getValue(GuardCheck), std::move(Args));
2181 
2182  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2183  DAG.setRoot(Result.second);
2184  return;
2185  }
2186 
2187  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2188  // Otherwise, emit a volatile load to retrieve the stack guard value.
2189  SDValue Chain = DAG.getEntryNode();
2190  if (TLI.useLoadStackGuardNode()) {
2191  Guard = getLoadStackGuard(DAG, dl, Chain);
2192  } else {
2193  const Value *IRGuard = TLI.getSDagStackGuard(M);
2194  SDValue GuardPtr = getValue(IRGuard);
2195 
2196  Guard =
2197  DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
2199  }
2200 
2201  // Perform the comparison via a subtract/getsetcc.
2202  EVT VT = Guard.getValueType();
2203  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
2204 
2205  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2206  *DAG.getContext(),
2207  Sub.getValueType()),
2208  Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2209 
2210  // If the sub is not 0, then we know the guard/stackslot do not equal, so
2211  // branch to failure MBB.
2212  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2213  MVT::Other, GuardVal.getOperand(0),
2214  Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2215  // Otherwise branch to success MBB.
2216  SDValue Br = DAG.getNode(ISD::BR, dl,
2217  MVT::Other, BrCond,
2218  DAG.getBasicBlock(SPD.getSuccessMBB()));
2219 
2220  DAG.setRoot(Br);
2221 }
2222 
2223 /// Codegen the failure basic block for a stack protector check.
2224 ///
2225 /// A failure stack protector machine basic block consists simply of a call to
2226 /// __stack_chk_fail().
2227 ///
2228 /// For a high level explanation of how this fits into the stack protector
2229 /// generation see the comment on the declaration of class
2230 /// StackProtectorDescriptor.
2231 void
2232 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2233  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2234  SDValue Chain =
2235  TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2236  None, false, getCurSDLoc(), false, false).second;
2237  DAG.setRoot(Chain);
2238 }
2239 
2240 /// visitBitTestHeader - This function emits necessary code to produce value
2241 /// suitable for "bit tests"
2243  MachineBasicBlock *SwitchBB) {
2244  SDLoc dl = getCurSDLoc();
2245 
2246  // Subtract the minimum value
2247  SDValue SwitchOp = getValue(B.SValue);
2248  EVT VT = SwitchOp.getValueType();
2249  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2250  DAG.getConstant(B.First, dl, VT));
2251 
2252  // Check range
2253  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2254  SDValue RangeCmp = DAG.getSetCC(
2255  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2256  Sub.getValueType()),
2257  Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2258 
2259  // Determine the type of the test operands.
2260  bool UsePtrType = false;
2261  if (!TLI.isTypeLegal(VT))
2262  UsePtrType = true;
2263  else {
2264  for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
2265  if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2266  // Switch table case range are encoded into series of masks.
2267  // Just use pointer type, it's guaranteed to fit.
2268  UsePtrType = true;
2269  break;
2270  }
2271  }
2272  if (UsePtrType) {
2273  VT = TLI.getPointerTy(DAG.getDataLayout());
2274  Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2275  }
2276 
2277  B.RegVT = VT.getSimpleVT();
2278  B.Reg = FuncInfo.CreateReg(B.RegVT);
2279  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2280 
2281  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2282 
2283  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2284  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2285  SwitchBB->normalizeSuccProbs();
2286 
2287  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2288  MVT::Other, CopyTo, RangeCmp,
2289  DAG.getBasicBlock(B.Default));
2290 
2291  // Avoid emitting unnecessary branches to the next block.
2292  if (MBB != NextBlock(SwitchBB))
2293  BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2294  DAG.getBasicBlock(MBB));
2295 
2296  DAG.setRoot(BrRange);
2297 }
2298 
2299 /// visitBitTestCase - this function produces one "bit test"
2301  MachineBasicBlock* NextMBB,
2302  BranchProbability BranchProbToNext,
2303  unsigned Reg,
2304  BitTestCase &B,
2305  MachineBasicBlock *SwitchBB) {
2306  SDLoc dl = getCurSDLoc();
2307  MVT VT = BB.RegVT;
2308  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2309  SDValue Cmp;
2310  unsigned PopCount = countPopulation(B.Mask);
2311  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2312  if (PopCount == 1) {
2313  // Testing for a single bit; just compare the shift count with what it
2314  // would need to be to shift a 1 bit in that position.
2315  Cmp = DAG.getSetCC(
2316  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2317  ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2318  ISD::SETEQ);
2319  } else if (PopCount == BB.Range) {
2320  // There is only one zero bit in the range, test for it directly.
2321  Cmp = DAG.getSetCC(
2322  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2323  ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2324  ISD::SETNE);
2325  } else {
2326  // Make desired shift
2327  SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2328  DAG.getConstant(1, dl, VT), ShiftOp);
2329 
2330  // Emit bit tests and jumps
2331  SDValue AndOp = DAG.getNode(ISD::AND, dl,
2332  VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2333  Cmp = DAG.getSetCC(
2334  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2335  AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2336  }
2337 
2338  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2339  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2340  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2341  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2342  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2343  // one as they are relative probabilities (and thus work more like weights),
2344  // and hence we need to normalize them to let the sum of them become one.
2345  SwitchBB->normalizeSuccProbs();
2346 
2347  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2348  MVT::Other, getControlRoot(),
2349  Cmp, DAG.getBasicBlock(B.TargetBB));
2350 
2351  // Avoid emitting unnecessary branches to the next block.
2352  if (NextMBB != NextBlock(SwitchBB))
2353  BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2354  DAG.getBasicBlock(NextMBB));
2355 
2356  DAG.setRoot(BrAnd);
2357 }
2358 
2359 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2360  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2361 
2362  // Retrieve successors. Look through artificial IR level blocks like
2363  // catchswitch for successors.
2364  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2365  const BasicBlock *EHPadBB = I.getSuccessor(1);
2366 
2367  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2368  // have to do anything here to lower funclet bundles.
2370  {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2371  "Cannot lower invokes with arbitrary operand bundles yet!");
2372 
2373  const Value *Callee(I.getCalledValue());
2374  const Function *Fn = dyn_cast<Function>(Callee);
2375  if (isa<InlineAsm>(Callee))
2376  visitInlineAsm(&I);
2377  else if (Fn && Fn->isIntrinsic()) {
2378  switch (Fn->getIntrinsicID()) {
2379  default:
2380  llvm_unreachable("Cannot invoke this intrinsic");
2381  case Intrinsic::donothing:
2382  // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2383  break;
2384  case Intrinsic::experimental_patchpoint_void:
2385  case Intrinsic::experimental_patchpoint_i64:
2386  visitPatchpoint(&I, EHPadBB);
2387  break;
2388  case Intrinsic::experimental_gc_statepoint:
2389  LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2390  break;
2391  }
2393  // Currently we do not lower any intrinsic calls with deopt operand bundles.
2394  // Eventually we will support lowering the @llvm.experimental.deoptimize
2395  // intrinsic, and right now there are no plans to support other intrinsics
2396  // with deopt state.
2397  LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2398  } else {
2399  LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2400  }
2401 
2402  // If the value of the invoke is used outside of its defining block, make it
2403  // available as a virtual register.
2404  // We already took care of the exported value for the statepoint instruction
2405  // during call to the LowerStatepoint.
2406  if (!isStatepoint(I)) {
2407  CopyToExportRegsIfNeeded(&I);
2408  }
2409 
2411  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2412  BranchProbability EHPadBBProb =
2413  BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2415  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2416 
2417  // Update successor info.
2418  addSuccessorWithProb(InvokeMBB, Return);
2419  for (auto &UnwindDest : UnwindDests) {
2420  UnwindDest.first->setIsEHPad();
2421  addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2422  }
2423  InvokeMBB->normalizeSuccProbs();
2424 
2425  // Drop into normal successor.
2426  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2427  MVT::Other, getControlRoot(),
2428  DAG.getBasicBlock(Return)));
2429 }
2430 
2431 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2432  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2433 }
2434 
2435 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2436  assert(FuncInfo.MBB->isEHPad() &&
2437  "Call to landingpad not in landing pad!");
2438 
2439  MachineBasicBlock *MBB = FuncInfo.MBB;
2440  addLandingPadInfo(LP, *MBB);
2441 
2442  // If there aren't registers to copy the values into (e.g., during SjLj
2443  // exceptions), then don't bother to create these DAG nodes.
2444  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2445  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2446  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2447  TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2448  return;
2449 
2450  // If landingpad's return type is token type, we don't create DAG nodes
2451  // for its exception pointer and selector value. The extraction of exception
2452  // pointer or selector value from token type landingpads is not currently
2453  // supported.
2454  if (LP.getType()->isTokenTy())
2455  return;
2456 
2458  SDLoc dl = getCurSDLoc();
2459  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2460  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2461 
2462  // Get the two live-in registers as SDValues. The physregs have already been
2463  // copied into virtual registers.
2464  SDValue Ops[2];
2465  if (FuncInfo.ExceptionPointerVirtReg) {
2466  Ops[0] = DAG.getZExtOrTrunc(
2467  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2468  FuncInfo.ExceptionPointerVirtReg,
2469  TLI.getPointerTy(DAG.getDataLayout())),
2470  dl, ValueVTs[0]);
2471  } else {
2472  Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2473  }
2474  Ops[1] = DAG.getZExtOrTrunc(
2475  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2476  FuncInfo.ExceptionSelectorVirtReg,
2477  TLI.getPointerTy(DAG.getDataLayout())),
2478  dl, ValueVTs[1]);
2479 
2480  // Merge into one.
2481  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2482  DAG.getVTList(ValueVTs), Ops);
2483  setValue(&LP, Res);
2484 }
2485 
2486 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2487 #ifndef NDEBUG
2488  for (const CaseCluster &CC : Clusters)
2489  assert(CC.Low == CC.High && "Input clusters must be single-case");
2490 #endif
2491 
2492  std::sort(Clusters.begin(), Clusters.end(),
2493  [](const CaseCluster &a, const CaseCluster &b) {
2494  return a.Low->getValue().slt(b.Low->getValue());
2495  });
2496 
2497  // Merge adjacent clusters with the same destination.
2498  const unsigned N = Clusters.size();
2499  unsigned DstIndex = 0;
2500  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2501  CaseCluster &CC = Clusters[SrcIndex];
2502  const ConstantInt *CaseVal = CC.Low;
2503  MachineBasicBlock *Succ = CC.MBB;
2504 
2505  if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2506  (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2507  // If this case has the same successor and is a neighbour, merge it into
2508  // the previous cluster.
2509  Clusters[DstIndex - 1].High = CaseVal;
2510  Clusters[DstIndex - 1].Prob += CC.Prob;
2511  } else {
2512  std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2513  sizeof(Clusters[SrcIndex]));
2514  }
2515  }
2516  Clusters.resize(DstIndex);
2517 }
2518 
2520  MachineBasicBlock *Last) {
2521  // Update JTCases.
2522  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
2523  if (JTCases[i].first.HeaderBB == First)
2524  JTCases[i].first.HeaderBB = Last;
2525 
2526  // Update BitTestCases.
2527  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2528  if (BitTestCases[i].Parent == First)
2529  BitTestCases[i].Parent = Last;
2530 }
2531 
2532 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2533  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2534 
2535  // Update machine-CFG edges with unique successors.
2537  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
2538  BasicBlock *BB = I.getSuccessor(i);
2539  bool Inserted = Done.insert(BB).second;
2540  if (!Inserted)
2541  continue;
2542 
2543  MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2544  addSuccessorWithProb(IndirectBrMBB, Succ);
2545  }
2546  IndirectBrMBB->normalizeSuccProbs();
2547 
2548  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2549  MVT::Other, getControlRoot(),
2550  getValue(I.getAddress())));
2551 }
2552 
2553 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2554  if (DAG.getTarget().Options.TrapUnreachable)
2555  DAG.setRoot(
2556  DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2557 }
2558 
2559 void SelectionDAGBuilder::visitFSub(const User &I) {
2560  // -0.0 - X --> fneg
2561  Type *Ty = I.getType();
2562  if (isa<Constant>(I.getOperand(0)) &&
2564  SDValue Op2 = getValue(I.getOperand(1));
2565  setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2566  Op2.getValueType(), Op2));
2567  return;
2568  }
2569 
2570  visitBinary(I, ISD::FSUB);
2571 }
2572 
2573 /// Checks if the given instruction performs a vector reduction, in which case
2574 /// we have the freedom to alter the elements in the result as long as the
2575 /// reduction of them stays unchanged.
2576 static bool isVectorReductionOp(const User *I) {
2577  const Instruction *Inst = dyn_cast<Instruction>(I);
2578  if (!Inst || !Inst->getType()->isVectorTy())
2579  return false;
2580 
2581  auto OpCode = Inst->getOpcode();
2582  switch (OpCode) {
2583  case Instruction::Add:
2584  case Instruction::Mul:
2585  case Instruction::And:
2586  case Instruction::Or:
2587  case Instruction::Xor:
2588  break;
2589  case Instruction::FAdd:
2590  case Instruction::FMul:
2591  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2592  if (FPOp->getFastMathFlags().isFast())
2593  break;
2595  default:
2596  return false;
2597  }
2598 
2599  unsigned ElemNum = Inst->getType()->getVectorNumElements();
2600  unsigned ElemNumToReduce = ElemNum;
2601 
2602  // Do DFS search on the def-use chain from the given instruction. We only
2603  // allow four kinds of operations during the search until we reach the
2604  // instruction that extracts the first element from the vector:
2605  //
2606  // 1. The reduction operation of the same opcode as the given instruction.
2607  //
2608  // 2. PHI node.
2609  //
2610  // 3. ShuffleVector instruction together with a reduction operation that
2611  // does a partial reduction.
2612  //
2613  // 4. ExtractElement that extracts the first element from the vector, and we
2614  // stop searching the def-use chain here.
2615  //
2616  // 3 & 4 above perform a reduction on all elements of the vector. We push defs
2617  // from 1-3 to the stack to continue the DFS. The given instruction is not
2618  // a reduction operation if we meet any other instructions other than those
2619  // listed above.
2620 
2621  SmallVector<const User *, 16> UsersToVisit{Inst};
2623  bool ReduxExtracted = false;
2624 
2625  while (!UsersToVisit.empty()) {
2626  auto User = UsersToVisit.back();
2627  UsersToVisit.pop_back();
2628  if (!Visited.insert(User).second)
2629  continue;
2630 
2631  for (const auto &U : User->users()) {
2632  auto Inst = dyn_cast<Instruction>(U);
2633  if (!Inst)
2634  return false;
2635 
2636  if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
2637  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2638  if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
2639  return false;
2640  UsersToVisit.push_back(U);
2641  } else if (const ShuffleVectorInst *ShufInst =
2642  dyn_cast<ShuffleVectorInst>(U)) {
2643  // Detect the following pattern: A ShuffleVector instruction together
2644  // with a reduction that do partial reduction on the first and second
2645  // ElemNumToReduce / 2 elements, and store the result in
2646  // ElemNumToReduce / 2 elements in another vector.
2647 
2648  unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
2649  if (ResultElements < ElemNum)
2650  return false;
2651 
2652  if (ElemNumToReduce == 1)
2653  return false;
2654  if (!isa<UndefValue>(U->getOperand(1)))
2655  return false;
2656  for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
2657  if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
2658  return false;
2659  for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
2660  if (ShufInst->getMaskValue(i) != -1)
2661  return false;
2662 
2663  // There is only one user of this ShuffleVector instruction, which
2664  // must be a reduction operation.
2665  if (!U->hasOneUse())
2666  return false;
2667 
2668  auto U2 = dyn_cast<Instruction>(*U->user_begin());
2669  if (!U2 || U2->getOpcode() != OpCode)
2670  return false;
2671 
2672  // Check operands of the reduction operation.
2673  if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
2674  (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
2675  UsersToVisit.push_back(U2);
2676  ElemNumToReduce /= 2;
2677  } else
2678  return false;
2679  } else if (isa<ExtractElementInst>(U)) {
2680  // At this moment we should have reduced all elements in the vector.
2681  if (ElemNumToReduce != 1)
2682  return false;
2683 
2684  const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
2685  if (!Val || Val->getZExtValue() != 0)
2686  return false;
2687 
2688  ReduxExtracted = true;
2689  } else
2690  return false;
2691  }
2692  }
2693  return ReduxExtracted;
2694 }
2695 
2696 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
2697  SDValue Op1 = getValue(I.getOperand(0));
2698  SDValue Op2 = getValue(I.getOperand(1));
2699 
2700  bool nuw = false;
2701  bool nsw = false;
2702  bool exact = false;
2703  bool vec_redux = false;
2704  FastMathFlags FMF;
2705 
2706  if (const OverflowingBinaryOperator *OFBinOp =
2707  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2708  nuw = OFBinOp->hasNoUnsignedWrap();
2709  nsw = OFBinOp->hasNoSignedWrap();
2710  }
2711  if (const PossiblyExactOperator *ExactOp =
2712  dyn_cast<const PossiblyExactOperator>(&I))
2713  exact = ExactOp->isExact();
2714  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
2715  FMF = FPOp->getFastMathFlags();
2716 
2717  if (isVectorReductionOp(&I)) {
2718  vec_redux = true;
2719  DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
2720  }
2721 
2722  SDNodeFlags Flags;
2723  Flags.setExact(exact);
2724  Flags.setNoSignedWrap(nsw);
2725  Flags.setNoUnsignedWrap(nuw);
2726  Flags.setVectorReduction(vec_redux);
2727  Flags.setAllowReciprocal(FMF.allowReciprocal());
2728  Flags.setAllowContract(FMF.allowContract());
2729  Flags.setNoInfs(FMF.noInfs());
2730  Flags.setNoNaNs(FMF.noNaNs());
2731  Flags.setNoSignedZeros(FMF.noSignedZeros());
2732  Flags.setUnsafeAlgebra(FMF.isFast());
2733 
2734  SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
2735  Op1, Op2, Flags);
2736  setValue(&I, BinNodeValue);
2737 }
2738 
2739 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2740  SDValue Op1 = getValue(I.getOperand(0));
2741  SDValue Op2 = getValue(I.getOperand(1));
2742 
2743  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2744  Op2.getValueType(), DAG.getDataLayout());
2745 
2746  // Coerce the shift amount to the right type if we can.
2747  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2748  unsigned ShiftSize = ShiftTy.getSizeInBits();
2749  unsigned Op2Size = Op2.getValueSizeInBits();
2750  SDLoc DL = getCurSDLoc();
2751 
2752  // If the operand is smaller than the shift count type, promote it.
2753  if (ShiftSize > Op2Size)
2754  Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2755 
2756  // If the operand is larger than the shift count type but the shift
2757  // count type has enough bits to represent any shift value, truncate
2758  // it now. This is a common case and it exposes the truncate to
2759  // optimization early.
2760  else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
2761  Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2762  // Otherwise we'll need to temporarily settle for some other convenient
2763  // type. Type legalization will make adjustments once the shiftee is split.
2764  else
2765  Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2766  }
2767 
2768  bool nuw = false;
2769  bool nsw = false;
2770  bool exact = false;
2771 
2772  if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
2773 
2774  if (const OverflowingBinaryOperator *OFBinOp =
2775  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2776  nuw = OFBinOp->hasNoUnsignedWrap();
2777  nsw = OFBinOp->hasNoSignedWrap();
2778  }
2779  if (const PossiblyExactOperator *ExactOp =
2780  dyn_cast<const PossiblyExactOperator>(&I))
2781  exact = ExactOp->isExact();
2782  }
2783  SDNodeFlags Flags;
2784  Flags.setExact(exact);
2785  Flags.setNoSignedWrap(nsw);
2786  Flags.setNoUnsignedWrap(nuw);
2787  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2788  Flags);
2789  setValue(&I, Res);
2790 }
2791 
2792 void SelectionDAGBuilder::visitSDiv(const User &I) {
2793  SDValue Op1 = getValue(I.getOperand(0));
2794  SDValue Op2 = getValue(I.getOperand(1));
2795 
2796  SDNodeFlags Flags;
2797  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2798  cast<PossiblyExactOperator>(&I)->isExact());
2799  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2800  Op2, Flags));
2801 }
2802 
2803 void SelectionDAGBuilder::visitICmp(const User &I) {
2805  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2806  predicate = IC->getPredicate();
2807  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2808  predicate = ICmpInst::Predicate(IC->getPredicate());
2809  SDValue Op1 = getValue(I.getOperand(0));
2810  SDValue Op2 = getValue(I.getOperand(1));
2811  ISD::CondCode Opcode = getICmpCondCode(predicate);
2812 
2813  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2814  I.getType());
2815  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2816 }
2817 
2818 void SelectionDAGBuilder::visitFCmp(const User &I) {
2820  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2821  predicate = FC->getPredicate();
2822  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2823  predicate = FCmpInst::Predicate(FC->getPredicate());
2824  SDValue Op1 = getValue(I.getOperand(0));
2825  SDValue Op2 = getValue(I.getOperand(1));
2826  ISD::CondCode Condition = getFCmpCondCode(predicate);
2827 
2828  // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
2829  // FIXME: We should propagate the fast-math-flags to the DAG node itself for
2830  // further optimization, but currently FMF is only applicable to binary nodes.
2831  if (TM.Options.NoNaNsFPMath)
2832  Condition = getFCmpCodeWithoutNaN(Condition);
2833  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2834  I.getType());
2835  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2836 }
2837 
2838 // Check if the condition of the select has one use or two users that are both
2839 // selects with the same condition.
2840 static bool hasOnlySelectUsers(const Value *Cond) {
2841  return llvm::all_of(Cond->users(), [](const Value *V) {
2842  return isa<SelectInst>(V);
2843  });
2844 }
2845 
2846 void SelectionDAGBuilder::visitSelect(const User &I) {
2849  ValueVTs);
2850  unsigned NumValues = ValueVTs.size();
2851  if (NumValues == 0) return;
2852 
2853  SmallVector<SDValue, 4> Values(NumValues);
2854  SDValue Cond = getValue(I.getOperand(0));
2855  SDValue LHSVal = getValue(I.getOperand(1));
2856  SDValue RHSVal = getValue(I.getOperand(2));
2857  auto BaseOps = {Cond};
2858  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2860 
2861  // Min/max matching is only viable if all output VTs are the same.
2862  if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
2863  EVT VT = ValueVTs[0];
2864  LLVMContext &Ctx = *DAG.getContext();
2865  auto &TLI = DAG.getTargetLoweringInfo();
2866 
2867  // We care about the legality of the operation after it has been type
2868  // legalized.
2869  while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2870  VT != TLI.getTypeToTransformTo(Ctx, VT))
2871  VT = TLI.getTypeToTransformTo(Ctx, VT);
2872 
2873  // If the vselect is legal, assume we want to leave this as a vector setcc +
2874  // vselect. Otherwise, if this is going to be scalarized, we want to see if
2875  // min/max is legal on the scalar type.
2876  bool UseScalarMinMax = VT.isVector() &&
2877  !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
2878 
2879  Value *LHS, *RHS;
2880  auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2882  switch (SPR.Flavor) {
2883  case SPF_UMAX: Opc = ISD::UMAX; break;
2884  case SPF_UMIN: Opc = ISD::UMIN; break;
2885  case SPF_SMAX: Opc = ISD::SMAX; break;
2886  case SPF_SMIN: Opc = ISD::SMIN; break;
2887  case SPF_FMINNUM:
2888  switch (SPR.NaNBehavior) {
2889  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2890  case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
2891  case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2892  case SPNB_RETURNS_ANY: {
2893  if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
2894  Opc = ISD::FMINNUM;
2895  else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
2896  Opc = ISD::FMINNAN;
2897  else if (UseScalarMinMax)
2898  Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
2900  break;
2901  }
2902  }
2903  break;
2904  case SPF_FMAXNUM:
2905  switch (SPR.NaNBehavior) {
2906  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2907  case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
2908  case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
2909  case SPNB_RETURNS_ANY:
2910 
2911  if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
2912  Opc = ISD::FMAXNUM;
2913  else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
2914  Opc = ISD::FMAXNAN;
2915  else if (UseScalarMinMax)
2916  Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
2918  break;
2919  }
2920  break;
2921  default: break;
2922  }
2923 
2924  if (Opc != ISD::DELETED_NODE &&
2925  (TLI.isOperationLegalOrCustom(Opc, VT) ||
2926  (UseScalarMinMax &&
2927  TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
2928  // If the underlying comparison instruction is used by any other
2929  // instruction, the consumed instructions won't be destroyed, so it is
2930  // not profitable to convert to a min/max.
2931  hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
2932  OpCode = Opc;
2933  LHSVal = getValue(LHS);
2934  RHSVal = getValue(RHS);
2935  BaseOps = {};
2936  }
2937  }
2938 
2939  for (unsigned i = 0; i != NumValues; ++i) {
2940  SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
2941  Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
2942  Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
2943  Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
2944  LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
2945  Ops);
2946  }
2947 
2948  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
2949  DAG.getVTList(ValueVTs), Values));
2950 }
2951 
2952 void SelectionDAGBuilder::visitTrunc(const User &I) {
2953  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2954  SDValue N = getValue(I.getOperand(0));
2955  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2956  I.getType());
2957  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
2958 }
2959 
2960 void SelectionDAGBuilder::visitZExt(const User &I) {
2961  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2962  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2963  SDValue N = getValue(I.getOperand(0));
2964  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2965  I.getType());
2966  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
2967 }
2968 
2969 void SelectionDAGBuilder::visitSExt(const User &I) {
2970  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2971  // SExt also can't be a cast to bool for same reason. So, nothing much to do
2972  SDValue N = getValue(I.getOperand(0));
2973  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2974  I.getType());
2975  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
2976 }
2977 
2978 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
2979  // FPTrunc is never a no-op cast, no need to check
2980  SDValue N = getValue(I.getOperand(0));
2981  SDLoc dl = getCurSDLoc();
2982  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2983  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
2984  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
2985  DAG.getTargetConstant(
2986  0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
2987 }
2988 
2989 void SelectionDAGBuilder::visitFPExt(const User &I) {
2990  // FPExt is never a no-op cast, no need to check
2991  SDValue N = getValue(I.getOperand(0));
2992  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2993  I.getType());
2994  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
2995 }
2996 
2997 void SelectionDAGBuilder::visitFPToUI(const User &I) {
2998  // FPToUI is never a no-op cast, no need to check
2999  SDValue N = getValue(I.getOperand(0));
3000  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3001  I.getType());
3002  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3003 }
3004 
3005 void SelectionDAGBuilder::visitFPToSI(const User &I) {
3006  // FPToSI is never a no-op cast, no need to check
3007  SDValue N = getValue(I.getOperand(0));
3008  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3009  I.getType());
3010  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3011 }
3012 
3013 void SelectionDAGBuilder::visitUIToFP(const User &I) {
3014  // UIToFP is never a no-op cast, no need to check
3015  SDValue N = getValue(I.getOperand(0));
3016  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3017  I.getType());
3018  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3019 }
3020 
3021 void SelectionDAGBuilder::visitSIToFP(const User &I) {
3022  // SIToFP is never a no-op cast, no need to check
3023  SDValue N = getValue(I.getOperand(0));
3024  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3025  I.getType());
3026  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3027 }
3028 
3029 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3030  // What to do depends on the size of the integer and the size of the pointer.
3031  // We can either truncate, zero extend, or no-op, accordingly.
3032  SDValue N = getValue(I.getOperand(0));
3033  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3034  I.getType());
3035  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3036 }
3037 
3038 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3039  // What to do depends on the size of the integer and the size of the pointer.
3040  // We can either truncate, zero extend, or no-op, accordingly.
3041  SDValue N = getValue(I.getOperand(0));
3042  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3043  I.getType());
3044  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3045 }
3046 
3047 void SelectionDAGBuilder::visitBitCast(const User &I) {
3048  SDValue N = getValue(I.getOperand(0));
3049  SDLoc dl = getCurSDLoc();
3050  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3051  I.getType());
3052 
3053  // BitCast assures us that source and destination are the same size so this is
3054  // either a BITCAST or a no-op.
3055  if (DestVT != N.getValueType())
3056  setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3057  DestVT, N)); // convert types.
3058  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3059  // might fold any kind of constant expression to an integer constant and that
3060  // is not what we are looking for. Only recognize a bitcast of a genuine
3061  // constant integer as an opaque constant.
3062  else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3063  setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3064  /*isOpaque*/true));
3065  else
3066  setValue(&I, N); // noop cast.
3067 }
3068 
3069 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3070  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3071  const Value *SV = I.getOperand(0);
3072  SDValue N = getValue(SV);
3073  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3074 
3075  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3076  unsigned DestAS = I.getType()->getPointerAddressSpace();
3077 
3078  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3079  N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3080 
3081  setValue(&I, N);
3082 }
3083 
3084 void SelectionDAGBuilder::visitInsertElement(const User &I) {
3085  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3086  SDValue InVec = getValue(I.getOperand(0));
3087  SDValue InVal = getValue(I.getOperand(1));
3088  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3089  TLI.getVectorIdxTy(DAG.getDataLayout()));
3090  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3091  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3092  InVec, InVal, InIdx));
3093 }
3094 
3095 void SelectionDAGBuilder::visitExtractElement(const User &I) {
3096  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3097  SDValue InVec = getValue(I.getOperand(0));
3098  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3099  TLI.getVectorIdxTy(DAG.getDataLayout()));
3100  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3101  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3102  InVec, InIdx));
3103 }
3104 
3105 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3106  SDValue Src1 = getValue(I.getOperand(0));
3107  SDValue Src2 = getValue(I.getOperand(1));
3108  SDLoc DL = getCurSDLoc();
3109 
3111  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3112  unsigned MaskNumElts = Mask.size();
3113 
3114  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3115  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3116  EVT SrcVT = Src1.getValueType();
3117  unsigned SrcNumElts = SrcVT.getVectorNumElements();
3118 
3119  if (SrcNumElts == MaskNumElts) {
3120  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3121  return;
3122  }
3123 
3124  // Normalize the shuffle vector since mask and vector length don't match.
3125  if (SrcNumElts < MaskNumElts) {
3126  // Mask is longer than the source vectors. We can use concatenate vector to
3127  // make the mask and vectors lengths match.
3128 
3129  if (MaskNumElts % SrcNumElts == 0) {
3130  // Mask length is a multiple of the source vector length.
3131  // Check if the shuffle is some kind of concatenation of the input
3132  // vectors.
3133  unsigned NumConcat = MaskNumElts / SrcNumElts;
3134  bool IsConcat = true;
3135  SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3136  for (unsigned i = 0; i != MaskNumElts; ++i) {
3137  int Idx = Mask[i];
3138  if (Idx < 0)
3139  continue;
3140  // Ensure the indices in each SrcVT sized piece are sequential and that
3141  // the same source is used for the whole piece.
3142  if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3143  (ConcatSrcs[i / SrcNumElts] >= 0 &&
3144  ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3145  IsConcat = false;
3146  break;
3147  }
3148  // Remember which source this index came from.
3149  ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3150  }
3151 
3152  // The shuffle is concatenating multiple vectors together. Just emit
3153  // a CONCAT_VECTORS operation.
3154  if (IsConcat) {
3155  SmallVector<SDValue, 8> ConcatOps;
3156  for (auto Src : ConcatSrcs) {
3157  if (Src < 0)
3158  ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3159  else if (Src == 0)
3160  ConcatOps.push_back(Src1);
3161  else
3162  ConcatOps.push_back(Src2);
3163  }
3164  setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3165  return;
3166  }
3167  }
3168 
3169  unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3170  unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3171  EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3172  PaddedMaskNumElts);
3173 
3174  // Pad both vectors with undefs to make them the same length as the mask.
3175  SDValue UndefVal = DAG.getUNDEF(SrcVT);
3176 
3177  SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3178  SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3179  MOps1[0] = Src1;
3180  MOps2[0] = Src2;
3181 
3182  Src1 = Src1.isUndef()
3183  ? DAG.getUNDEF(PaddedVT)
3184  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3185  Src2 = Src2.isUndef()
3186  ? DAG.getUNDEF(PaddedVT)
3187  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3188 
3189  // Readjust mask for new input vector length.
3190  SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3191  for (unsigned i = 0; i != MaskNumElts; ++i) {
3192  int Idx = Mask[i];
3193  if (Idx >= (int)SrcNumElts)
3194  Idx -= SrcNumElts - PaddedMaskNumElts;
3195  MappedOps[i] = Idx;
3196  }
3197 
3198  SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3199 
3200  // If the concatenated vector was padded, extract a subvector with the
3201  // correct number of elements.
3202  if (MaskNumElts != PaddedMaskNumElts)
3203  Result = DAG.getNode(
3204  ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3205  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3206 
3207  setValue(&I, Result);
3208  return;
3209  }
3210 
3211  if (SrcNumElts > MaskNumElts) {
3212  // Analyze the access pattern of the vector to see if we can extract
3213  // two subvectors and do the shuffle.
3214  int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3215  bool CanExtract = true;
3216  for (int Idx : Mask) {
3217  unsigned Input = 0;
3218  if (Idx < 0)
3219  continue;
3220 
3221  if (Idx >= (int)SrcNumElts) {
3222  Input = 1;
3223  Idx -= SrcNumElts;
3224  }
3225 
3226  // If all the indices come from the same MaskNumElts sized portion of
3227  // the sources we can use extract. Also make sure the extract wouldn't
3228  // extract past the end of the source.
3229  int NewStartIdx = alignDown(Idx, MaskNumElts);
3230  if (NewStartIdx + MaskNumElts > SrcNumElts ||
3231  (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3232  CanExtract = false;
3233  // Make sure we always update StartIdx as we use it to track if all
3234  // elements are undef.
3235  StartIdx[Input] = NewStartIdx;
3236  }
3237 
3238  if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3239  setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3240  return;
3241  }
3242  if (CanExtract) {
3243  // Extract appropriate subvector and generate a vector shuffle
3244  for (unsigned Input = 0; Input < 2; ++Input) {
3245  SDValue &Src = Input == 0 ? Src1 : Src2;
3246  if (StartIdx[Input] < 0)
3247  Src = DAG.getUNDEF(VT);
3248  else {
3249  Src = DAG.getNode(
3250  ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3251  DAG.getConstant(StartIdx[Input], DL,
3252  TLI.getVectorIdxTy(DAG.getDataLayout())));
3253  }
3254  }
3255 
3256  // Calculate new mask.
3257  SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3258  for (int &Idx : MappedOps) {
3259  if (Idx >= (int)SrcNumElts)
3260  Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3261  else if (Idx >= 0)
3262  Idx -= StartIdx[0];
3263  }
3264 
3265  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3266  return;
3267  }
3268  }
3269 
3270  // We can't use either concat vectors or extract subvectors so fall back to
3271  // replacing the shuffle with extract and build vector.
3272  // to insert and build vector.
3273  EVT EltVT = VT.getVectorElementType();
3274  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3276  for (int Idx : Mask) {
3277  SDValue Res;
3278 
3279  if (Idx < 0) {
3280  Res = DAG.getUNDEF(EltVT);
3281  } else {
3282  SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3283  if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3284 
3285  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3286  EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3287  }
3288 
3289  Ops.push_back(Res);
3290  }
3291 
3292  setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3293 }
3294 
3295 void SelectionDAGBuilder::visitInsertValue(const User &I) {
3296  ArrayRef<unsigned> Indices;
3297  if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3298  Indices = IV->getIndices();
3299  else
3300  Indices = cast<ConstantExpr>(&I)->getIndices();
3301 
3302  const Value *Op0 = I.getOperand(0);
3303  const Value *Op1 = I.getOperand(1);
3304  Type *AggTy = I.getType();
3305  Type *ValTy = Op1->getType();
3306  bool IntoUndef = isa<UndefValue>(Op0);
3307  bool FromUndef = isa<UndefValue>(Op1);
3308 
3309  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3310 
3311  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3312  SmallVector<EVT, 4> AggValueVTs;
3313  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3314  SmallVector<EVT, 4> ValValueVTs;
3315  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3316 
3317  unsigned NumAggValues = AggValueVTs.size();
3318  unsigned NumValValues = ValValueVTs.size();
3319  SmallVector<SDValue, 4> Values(NumAggValues);
3320 
3321  // Ignore an insertvalue that produces an empty object
3322  if (!NumAggValues) {
3323  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3324  return;
3325  }
3326 
3327  SDValue Agg = getValue(Op0);
3328  unsigned i = 0;
3329  // Copy the beginning value(s) from the original aggregate.
3330  for (; i != LinearIndex; ++i)
3331  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3332  SDValue(Agg.getNode(), Agg.getResNo() + i);
3333  // Copy values from the inserted value(s).
3334  if (NumValValues) {
3335  SDValue Val = getValue(Op1);
3336  for (; i != LinearIndex + NumValValues; ++i)
3337  Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3338  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3339  }
3340  // Copy remaining value(s) from the original aggregate.
3341  for (; i != NumAggValues; ++i)
3342  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3343  SDValue(Agg.getNode(), Agg.getResNo() + i);
3344 
3345  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3346  DAG.getVTList(AggValueVTs), Values));
3347 }
3348 
3349 void SelectionDAGBuilder::visitExtractValue(const User &I) {
3350  ArrayRef<unsigned> Indices;
3351  if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3352  Indices = EV->getIndices();
3353  else
3354  Indices = cast<ConstantExpr>(&I)->getIndices();
3355 
3356  const Value *Op0 = I.getOperand(0);
3357  Type *AggTy = Op0->getType();
3358  Type *ValTy = I.getType();
3359  bool OutOfUndef = isa<UndefValue>(Op0);
3360 
3361  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3362 
3363  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3364  SmallVector<EVT, 4> ValValueVTs;
3365  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3366 
3367  unsigned NumValValues = ValValueVTs.size();
3368 
3369  // Ignore a extractvalue that produces an empty object
3370  if (!NumValValues) {
3371  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3372  return;
3373  }
3374 
3375  SmallVector<SDValue, 4> Values(NumValValues);
3376 
3377  SDValue Agg = getValue(Op0);
3378  // Copy out the selected value(s).
3379  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3380  Values[i - LinearIndex] =
3381  OutOfUndef ?
3382  DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3383  SDValue(Agg.getNode(), Agg.getResNo() + i);
3384 
3385  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3386  DAG.getVTList(ValValueVTs), Values));
3387 }
3388 
3389 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3390  Value *Op0 = I.getOperand(0);
3391  // Note that the pointer operand may be a vector of pointers. Take the scalar
3392  // element which holds a pointer.
3393  unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3394  SDValue N = getValue(Op0);
3395  SDLoc dl = getCurSDLoc();
3396 
3397  // Normalize Vector GEP - all scalar operands should be converted to the
3398  // splat vector.
3399  unsigned VectorWidth = I.getType()->isVectorTy() ?
3400  cast<VectorType>(I.getType())->getVectorNumElements() : 0;
3401 
3402  if (VectorWidth && !N.getValueType().isVector()) {
3403  LLVMContext &Context = *DAG.getContext();
3404  EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3405  N = DAG.getSplatBuildVector(VT, dl, N);
3406  }
3407 
3408  for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3409  GTI != E; ++GTI) {
3410  const Value *Idx = GTI.getOperand();
3411  if (StructType *StTy = GTI.getStructTypeOrNull()) {
3412  unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3413  if (Field) {
3414  // N = N + Offset
3415  uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3416 
3417  // In an inbounds GEP with an offset that is nonnegative even when
3418  // interpreted as signed, assume there is no unsigned overflow.
3419  SDNodeFlags Flags;
3420  if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3421  Flags.setNoUnsignedWrap(true);
3422 
3423  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3424  DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3425  }
3426  } else {
3427  MVT PtrTy =
3429  unsigned PtrSize = PtrTy.getSizeInBits();
3430  APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3431 
3432  // If this is a scalar constant or a splat vector of constants,
3433  // handle it quickly.
3434  const auto *CI = dyn_cast<ConstantInt>(Idx);
3435  if (!CI && isa<ConstantDataVector>(Idx) &&
3436  cast<ConstantDataVector>(Idx)->getSplatValue())
3437  CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3438 
3439  if (CI) {
3440  if (CI->isZero())
3441  continue;
3442  APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
3443  LLVMContext &Context = *DAG.getContext();
3444  SDValue OffsVal = VectorWidth ?
3445  DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
3446  DAG.getConstant(Offs, dl, PtrTy);
3447 
3448  // In an inbouds GEP with an offset that is nonnegative even when
3449  // interpreted as signed, assume there is no unsigned overflow.
3450  SDNodeFlags Flags;
3451  if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
3452  Flags.setNoUnsignedWrap(true);
3453 
3454  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3455  continue;
3456  }
3457 
3458  // N = N + Idx * ElementSize;
3459  SDValue IdxN = getValue(Idx);
3460 
3461  if (!IdxN.getValueType().isVector() && VectorWidth) {
3462  EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3463  IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3464  }
3465 
3466  // If the index is smaller or larger than intptr_t, truncate or extend
3467  // it.
3468  IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3469 
3470  // If this is a multiply by a power of two, turn it into a shl
3471  // immediately. This is a very common case.
3472  if (ElementSize != 1) {
3473  if (ElementSize.isPowerOf2()) {
3474  unsigned Amt = ElementSize.logBase2();
3475  IdxN = DAG.getNode(ISD::SHL, dl,
3476  N.getValueType(), IdxN,
3477  DAG.getConstant(Amt, dl, IdxN.getValueType()));
3478  } else {
3479  SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3480  IdxN = DAG.getNode(ISD::MUL, dl,
3481  N.getValueType(), IdxN, Scale);
3482  }
3483  }
3484 
3485  N = DAG.getNode(ISD::ADD, dl,
3486  N.getValueType(), N, IdxN);
3487  }
3488  }
3489 
3490  setValue(&I, N);
3491 }
3492 
3493 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3494  // If this is a fixed sized alloca in the entry block of the function,
3495  // allocate it statically on the stack.
3496  if (FuncInfo.StaticAllocaMap.count(&I))
3497  return; // getValue will auto-populate this.
3498 
3499  SDLoc dl = getCurSDLoc();
3500  Type *Ty = I.getAllocatedType();
3501  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3502  auto &DL = DAG.getDataLayout();
3503  uint64_t TySize = DL.getTypeAllocSize(Ty);
3504  unsigned Align =
3505  std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3506 
3507  SDValue AllocSize = getValue(I.getArraySize());
3508 
3509  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
3510  if (AllocSize.getValueType() != IntPtr)
3511  AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3512 
3513  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3514  AllocSize,
3515  DAG.getConstant(TySize, dl, IntPtr));
3516 
3517  // Handle alignment. If the requested alignment is less than or equal to
3518  // the stack alignment, ignore it. If the size is greater than or equal to
3519  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3520  unsigned StackAlign =
3522  if (Align <= StackAlign)
3523  Align = 0;
3524 
3525  // Round the size of the allocation up to the stack alignment size
3526  // by add SA-1 to the size. This doesn't overflow because we're computing
3527  // an address inside an alloca.
3528  SDNodeFlags Flags;
3529  Flags.setNoUnsignedWrap(true);
3530  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
3531  DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
3532 
3533  // Mask out the low bits for alignment purposes.
3534  AllocSize =
3535  DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
3536  DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
3537 
3538  SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
3539  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3540  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3541  setValue(&I, DSA);
3542  DAG.setRoot(DSA.getValue(1));
3543 
3544  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3545 }
3546 
3547 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3548  if (I.isAtomic())
3549  return visitAtomicLoad(I);
3550 
3551  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3552  const Value *SV = I.getOperand(0);
3553  if (TLI.supportSwiftError()) {
3554  // Swifterror values can come from either a function parameter with
3555  // swifterror attribute or an alloca with swifterror attribute.
3556  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
3557  if (Arg->hasSwiftErrorAttr())
3558  return visitLoadFromSwiftError(I);
3559  }
3560 
3561  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
3562  if (Alloca->isSwiftError())
3563  return visitLoadFromSwiftError(I);
3564  }
3565  }
3566 
3567  SDValue Ptr = getValue(SV);
3568 
3569  Type *Ty = I.getType();
3570 
3571  bool isVolatile = I.isVolatile();
3572  bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3573  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
3574  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
3575  unsigned Alignment = I.getAlignment();
3576 
3577  AAMDNodes AAInfo;
3578  I.getAAMetadata(AAInfo);
3579  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3580 
3583  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3584  unsigned NumValues = ValueVTs.size();
3585  if (NumValues == 0)
3586  return;
3587 
3588  SDValue Root;
3589  bool ConstantMemory = false;
3590  if (isVolatile || NumValues > MaxParallelChains)
3591  // Serialize volatile loads with other side effects.
3592  Root = getRoot();
3593  else if (AA && AA->pointsToConstantMemory(MemoryLocation(
3594  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3595  // Do not serialize (non-volatile) loads of constant memory with anything.
3596  Root = DAG.getEntryNode();
3597  ConstantMemory = true;
3598  } else {
3599  // Do not serialize non-volatile loads against each other.
3600  Root = DAG.getRoot();
3601  }
3602 
3603  SDLoc dl = getCurSDLoc();
3604 
3605  if (isVolatile)
3606  Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3607 
3608  // An aggregate load cannot wrap around the address space, so offsets to its
3609  // parts don't wrap either.
3610  SDNodeFlags Flags;
3611  Flags.setNoUnsignedWrap(true);
3612 
3613  SmallVector<SDValue, 4> Values(NumValues);
3614  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3615  EVT PtrVT = Ptr.getValueType();
3616  unsigned ChainI = 0;
3617  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3618  // Serializing loads here may result in excessive register pressure, and
3619  // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3620  // could recover a bit by hoisting nodes upward in the chain by recognizing
3621  // they are side-effect free or do not alias. The optimizer should really
3622  // avoid this case by converting large object/array copies to llvm.memcpy
3623  // (MaxParallelChains should always remain as failsafe).
3624  if (ChainI == MaxParallelChains) {
3625  assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3626  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3627  makeArrayRef(Chains.data(), ChainI));
3628  Root = Chain;
3629  ChainI = 0;
3630  }
3631  SDValue A = DAG.getNode(ISD::ADD, dl,
3632  PtrVT, Ptr,
3633  DAG.getConstant(Offsets[i], dl, PtrVT),
3634  Flags);
3635  auto MMOFlags = MachineMemOperand::MONone;
3636  if (isVolatile)
3637  MMOFlags |= MachineMemOperand::MOVolatile;
3638  if (isNonTemporal)
3640  if (isInvariant)
3641  MMOFlags |= MachineMemOperand::MOInvariant;
3642  if (isDereferenceable)
3644  MMOFlags |= TLI.getMMOFlags(I);
3645 
3646  SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
3647  MachinePointerInfo(SV, Offsets[i]), Alignment,
3648  MMOFlags, AAInfo, Ranges);
3649 
3650  Values[i] = L;
3651  Chains[ChainI] = L.getValue(1);
3652  }
3653 
3654  if (!ConstantMemory) {
3655  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3656  makeArrayRef(Chains.data(), ChainI));
3657  if (isVolatile)
3658  DAG.setRoot(Chain);
3659  else
3660  PendingLoads.push_back(Chain);
3661  }
3662 
3663  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3664  DAG.getVTList(ValueVTs), Values));
3665 }
3666 
3667 void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
3669  "call visitStoreToSwiftError when backend supports swifterror");
3670 
3673  const Value *SrcV = I.getOperand(0);
3675  SrcV->getType(), ValueVTs, &Offsets);
3676  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3677  "expect a single EVT for swifterror");
3678 
3679  SDValue Src = getValue(SrcV);
3680  // Create a virtual register, then update the virtual register.
3681  unsigned VReg; bool CreatedVReg;
3682  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
3683  // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
3684  // Chain can be getRoot or getControlRoot.
3685  SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
3686  SDValue(Src.getNode(), Src.getResNo()));
3687  DAG.setRoot(CopyNode);
3688  if (CreatedVReg)
3689  FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
3690 }
3691 
3692 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
3694  "call visitLoadFromSwiftError when backend supports swifterror");
3695 
3696  assert(!I.isVolatile() &&
3697  I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
3699  "Support volatile, non temporal, invariant for load_from_swift_error");
3700 
3701  const Value *SV = I.getOperand(0);
3702  Type *Ty = I.getType();
3703  AAMDNodes AAInfo;
3704  I.getAAMetadata(AAInfo);
3705  assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
3706  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
3707  "load_from_swift_error should not be constant memory");
3708 
3712  ValueVTs, &Offsets);
3713  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3714  "expect a single EVT for swifterror");
3715 
3716  // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
3717  SDValue L = DAG.getCopyFromReg(
3718  getRoot(), getCurSDLoc(),
3719  FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
3720  ValueVTs[0]);
3721 
3722  setValue(&I, L);
3723 }
3724 
3725 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3726  if (I.isAtomic())
3727  return visitAtomicStore(I);
3728 
3729  const Value *SrcV = I.getOperand(0);
3730  const Value *PtrV = I.getOperand(1);
3731 
3732  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3733  if (TLI.supportSwiftError()) {
3734  // Swifterror values can come from either a function parameter with
3735  // swifterror attribute or an alloca with swifterror attribute.
3736  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
3737  if (Arg->hasSwiftErrorAttr())
3738  return visitStoreToSwiftError(I);
3739  }
3740 
3741  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
3742  if (Alloca->isSwiftError())
3743  return visitStoreToSwiftError(I);
3744  }
3745  }
3746 
3750  SrcV->getType(), ValueVTs, &Offsets);
3751  unsigned NumValues = ValueVTs.size();
3752  if (NumValues == 0)
3753  return;
3754 
3755  // Get the lowered operands. Note that we do this after
3756  // checking if NumResults is zero, because with zero results
3757  // the operands won't have values in the map.
3758  SDValue Src = getValue(SrcV);
3759  SDValue Ptr = getValue(PtrV);
3760 
3761  SDValue Root = getRoot();
3762  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3763  SDLoc dl = getCurSDLoc();
3764  EVT PtrVT = Ptr.getValueType();
3765  unsigned Alignment = I.getAlignment();
3766  AAMDNodes AAInfo;
3767  I.getAAMetadata(AAInfo);
3768 
3769  auto MMOFlags = MachineMemOperand::MONone;
3770  if (I.isVolatile())
3771  MMOFlags |= MachineMemOperand::MOVolatile;
3772  if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
3774  MMOFlags |= TLI.getMMOFlags(I);
3775 
3776  // An aggregate load cannot wrap around the address space, so offsets to its
3777  // parts don't wrap either.
3778  SDNodeFlags Flags;
3779  Flags.setNoUnsignedWrap(true);
3780 
3781  unsigned ChainI = 0;
3782  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3783  // See visitLoad comments.
3784  if (ChainI == MaxParallelChains) {
3785  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3786  makeArrayRef(Chains.data(), ChainI));
3787  Root = Chain;
3788  ChainI = 0;
3789  }
3790  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3791  DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
3792  SDValue St = DAG.getStore(
3793  Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
3794  MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
3795  Chains[ChainI] = St;
3796  }
3797 
3798  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3799  makeArrayRef(Chains.data(), ChainI));
3800  DAG.setRoot(StoreNode);
3801 }
3802 
3803 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
3804  bool IsCompressing) {
3805  SDLoc sdl = getCurSDLoc();
3806 
3807  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3808  unsigned& Alignment) {
3809  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3810  Src0 = I.getArgOperand(0);
3811  Ptr = I.getArgOperand(1);
3812  Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3813  Mask = I.getArgOperand(3);
3814  };
3815  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3816  unsigned& Alignment) {
3817  // llvm.masked.compressstore.*(Src0, Ptr, Mask)
3818  Src0 = I.getArgOperand(0);
3819  Ptr = I.getArgOperand(1);
3820  Mask = I.getArgOperand(2);
3821  Alignment = 0;
3822  };
3823 
3824  Value *PtrOperand, *MaskOperand, *Src0Operand;
3825  unsigned Alignment;
3826  if (IsCompressing)
3827  getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3828  else
3829  getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3830 
3831  SDValue Ptr = getValue(PtrOperand);
3832  SDValue Src0 = getValue(Src0Operand);
3833  SDValue Mask = getValue(MaskOperand);
3834 
3835  EVT VT = Src0.getValueType();
3836  if (!Alignment)
3837  Alignment = DAG.getEVTAlignment(VT);
3838 
3839  AAMDNodes AAInfo;
3840  I.getAAMetadata(AAInfo);
3841 
3842  MachineMemOperand *MMO =
3843  DAG.getMachineFunction().
3844  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3846  Alignment, AAInfo);
3847  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3848  MMO, false /* Truncating */,
3849  IsCompressing);
3850  DAG.setRoot(StoreNode);
3851  setValue(&I, StoreNode);
3852 }
3853 
3854 // Get a uniform base for the Gather/Scatter intrinsic.
3855 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3856 // We try to represent it as a base pointer + vector of indices.
3857 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
3858 // The first operand of the GEP may be a single pointer or a vector of pointers
3859 // Example:
3860 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3861 // or
3862 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
3863 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3864 //
3865 // When the first GEP operand is a single pointer - it is the uniform base we
3866 // are looking for. If first operand of the GEP is a splat vector - we
3867 // extract the splat value and use it as a uniform base.
3868 // In all other cases the function returns 'false'.
3869 static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
3870  SelectionDAGBuilder* SDB) {
3871  SelectionDAG& DAG = SDB->DAG;
3872  LLVMContext &Context = *DAG.getContext();
3873 
3874  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3876  if (!GEP)
3877  return false;
3878 
3879  const Value *GEPPtr = GEP->getPointerOperand();
3880  if (!GEPPtr->getType()->isVectorTy())
3881  Ptr = GEPPtr;
3882  else if (!(Ptr = getSplatValue(GEPPtr)))
3883  return false;
3884 
3885  unsigned FinalIndex = GEP->getNumOperands() - 1;
3886  Value *IndexVal = GEP->getOperand(FinalIndex);
3887 
3888  // Ensure all the other indices are 0.
3889  for (unsigned i = 1; i < FinalIndex; ++i) {
3890  auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
3891  if (!C || !C->isZero())
3892  return false;
3893  }
3894 
3895  // The operands of the GEP may be defined in another basic block.
3896  // In this case we'll not find nodes for the operands.
3897  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
3898  return false;
3899 
3900  Base = SDB->getValue(Ptr);
3901  Index = SDB->getValue(IndexVal);
3902 
3903  if (!Index.getValueType().isVector()) {
3904  unsigned GEPWidth = GEP->getType()->getVectorNumElements();
3905  EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
3906  Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
3907  }
3908  return true;
3909 }
3910 
3911 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
3912  SDLoc sdl = getCurSDLoc();
3913 
3914  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
3915  const Value *Ptr = I.getArgOperand(1);
3916  SDValue Src0 = getValue(I.getArgOperand(0));
3917  SDValue Mask = getValue(I.getArgOperand(3));
3918  EVT VT = Src0.getValueType();
3919  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
3920  if (!Alignment)
3921  Alignment = DAG.getEVTAlignment(VT);
3922  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3923 
3924  AAMDNodes AAInfo;
3925  I.getAAMetadata(AAInfo);
3926 
3927  SDValue Base;
3928  SDValue Index;
3929  const Value *BasePtr = Ptr;
3930  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
3931 
3932  const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
3934  getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
3935  MachineMemOperand::MOStore, VT.getStoreSize(),
3936  Alignment, AAInfo);
3937  if (!UniformBase) {
3938  Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3939  Index = getValue(Ptr);
3940  }
3941  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
3942  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
3943  Ops, MMO);
3944  DAG.setRoot(Scatter);
3945  setValue(&I, Scatter);
3946 }
3947 
3948 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
3949  SDLoc sdl = getCurSDLoc();
3950 
3951  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3952  unsigned& Alignment) {
3953  // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
3954  Ptr = I.getArgOperand(0);
3955  Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3956  Mask = I.getArgOperand(2);
3957  Src0 = I.getArgOperand(3);
3958  };
3959  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3960  unsigned& Alignment) {
3961  // @llvm.masked.expandload.*(Ptr, Mask, Src0)
3962  Ptr = I.getArgOperand(0);
3963  Alignment = 0;
3964  Mask = I.getArgOperand(1);
3965  Src0 = I.getArgOperand(2);
3966  };
3967 
3968  Value *PtrOperand, *MaskOperand, *Src0Operand;
3969  unsigned Alignment;
3970  if (IsExpanding)
3971  getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3972  else
3973  getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3974 
3975  SDValue Ptr = getValue(PtrOperand);
3976  SDValue Src0 = getValue(Src0Operand);
3977  SDValue Mask = getValue(MaskOperand);
3978 
3979  EVT VT = Src0.getValueType();
3980  if (!Alignment)
3981  Alignment = DAG.getEVTAlignment(VT);
3982 
3983  AAMDNodes AAInfo;
3984  I.getAAMetadata(AAInfo);
3985  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3986 
3987  // Do not serialize masked loads of constant memory with anything.
3988  bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
3989  PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
3990  SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
3991 
3992  MachineMemOperand *MMO =
3993  DAG.getMachineFunction().
3994  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3996  Alignment, AAInfo, Ranges);
3997 
3998  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
3999  ISD::NON_EXTLOAD, IsExpanding);
4000  if (AddToChain) {
4001  SDValue OutChain = Load.getValue(1);
4002  DAG.setRoot(OutChain);
4003  }
4004  setValue(&I, Load);
4005 }
4006 
4007 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4008  SDLoc sdl = getCurSDLoc();
4009 
4010  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4011  const Value *Ptr = I.getArgOperand(0);
4012  SDValue Src0 = getValue(I.getArgOperand(3));
4013  SDValue Mask = getValue(I.getArgOperand(2));
4014 
4015  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4016  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4017  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
4018  if (!Alignment)
4019  Alignment = DAG.getEVTAlignment(VT);
4020 
4021  AAMDNodes AAInfo;
4022  I.getAAMetadata(AAInfo);
4023  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4024 
4025  SDValue Root = DAG.getRoot();
4026  SDValue Base;
4027  SDValue Index;
4028  const Value *BasePtr = Ptr;
4029  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
4030  bool ConstantMemory = false;
4031  if (UniformBase &&
4032  AA && AA->pointsToConstantMemory(MemoryLocation(
4033  BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
4034  AAInfo))) {
4035  // Do not serialize (non-volatile) loads of constant memory with anything.
4036  Root = DAG.getEntryNode();
4037  ConstantMemory = true;
4038  }
4039 
4040  MachineMemOperand *MMO =
4041  DAG.getMachineFunction().
4042  getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
4044  Alignment, AAInfo, Ranges);
4045 
4046  if (!UniformBase) {
4047  Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4048  Index = getValue(Ptr);
4049  }
4050  SDValue Ops[] = { Root, Src0, Mask, Base, Index };
4051  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4052  Ops, MMO);
4053 
4054  SDValue OutChain = Gather.getValue(1);
4055  if (!ConstantMemory)
4056  PendingLoads.push_back(OutChain);
4057  setValue(&I, Gather);
4058 }
4059 
4060 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4061  SDLoc dl = getCurSDLoc();
4062  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
4063  AtomicOrdering FailureOrder = I.getFailureOrdering();
4064  SyncScope::ID SSID = I.getSyncScopeID();
4065 
4066  SDValue InChain = getRoot();
4067 
4068  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4069  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4070  SDValue L = DAG.getAtomicCmpSwap(
4071  ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
4072  getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
4074  /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
4075 
4076  SDValue OutChain = L.getValue(2);
4077 
4078  setValue(&I, L);
4079  DAG.setRoot(OutChain);
4080 }
4081 
4082 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4083  SDLoc dl = getCurSDLoc();
4084  ISD::NodeType NT;
4085  switch (I.getOperation()) {
4086  default: llvm_unreachable("Unknown atomicrmw operation");
4087  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4088  case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4089  case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4090  case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4091  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4092  case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4093  case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4094  case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4095  case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4096  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4097  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4098  }
4099  AtomicOrdering Order = I.getOrdering();
4100  SyncScope::ID SSID = I.getSyncScopeID();
4101 
4102  SDValue InChain = getRoot();
4103 
4104  SDValue L =
4105  DAG.getAtomic(NT, dl,
4106  getValue(I.getValOperand()).getSimpleValueType(),
4107  InChain,
4108  getValue(I.getPointerOperand()),
4109  getValue(I.getValOperand()),
4110  I.getPointerOperand(),
4111  /* Alignment=*/ 0, Order, SSID);
4112 
4113  SDValue OutChain = L.getValue(1);
4114 
4115  setValue(&I, L);
4116  DAG.setRoot(OutChain);
4117 }
4118 
4119 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4120  SDLoc dl = getCurSDLoc();
4121  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4122  SDValue Ops[3];
4123  Ops[0] = getRoot();
4124  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4125  TLI.getFenceOperandTy(DAG.getDataLayout()));
4126  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4127  TLI.getFenceOperandTy(DAG.getDataLayout()));
4128  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4129 }
4130 
4131 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4132  SDLoc dl = getCurSDLoc();
4133  AtomicOrdering Order = I.getOrdering();
4134  SyncScope::ID SSID = I.getSyncScopeID();
4135 
4136  SDValue InChain = getRoot();
4137 
4138  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4139  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4140 
4141  if (!TLI.supportsUnalignedAtomics() &&
4142  I.getAlignment() < VT.getStoreSize())
4143  report_fatal_error("Cannot generate unaligned atomic load");
4144 
4145  MachineMemOperand *MMO =
4146  DAG.getMachineFunction().
4147  getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4150  VT.getStoreSize(),
4151  I.getAlignment() ? I.getAlignment() :
4152  DAG.getEVTAlignment(VT),
4153  AAMDNodes(), nullptr, SSID, Order);
4154 
4155  InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4156  SDValue L =
4157  DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
4158  getValue(I.getPointerOperand()), MMO);
4159 
4160  SDValue OutChain = L.getValue(1);
4161 
4162  setValue(&I, L);
4163  DAG.setRoot(OutChain);
4164 }
4165 
4166 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4167  SDLoc dl = getCurSDLoc();
4168 
4169  AtomicOrdering Order = I.getOrdering();
4170  SyncScope::ID SSID = I.getSyncScopeID();
4171 
4172  SDValue InChain = getRoot();
4173 
4174  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4175  EVT VT =
4177 
4178  if (I.getAlignment() < VT.getStoreSize())
4179  report_fatal_error("Cannot generate unaligned atomic store");
4180 
4181  SDValue OutChain =
4182  DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
4183  InChain,
4184  getValue(I.getPointerOperand()),
4185  getValue(I.getValueOperand()),
4187  Order, SSID);
4188 
4189  DAG.setRoot(OutChain);
4190 }
4191 
4192 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4193 /// node.
4194 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4195  unsigned Intrinsic) {
4196  // Ignore the callsite's attributes. A specific call site may be marked with
4197  // readnone, but the lowering code will expect the chain based on the
4198  // definition.
4199  const Function *F = I.getCalledFunction();
4200  bool HasChain = !F->doesNotAccessMemory();
4201  bool OnlyLoad = HasChain && F->onlyReadsMemory();
4202 
4203  // Build the operand list.
4205  if (HasChain) { // If this intrinsic has side-effects, chainify it.
4206  if (OnlyLoad) {
4207  // We don't need to serialize loads against other loads.
4208  Ops.push_back(DAG.getRoot());
4209  } else {
4210  Ops.push_back(getRoot());
4211  }
4212  }
4213 
4214  // Info is set by getTgtMemInstrinsic
4215  TargetLowering::IntrinsicInfo Info;
4216  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4217  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
4218  DAG.getMachineFunction(),
4219  Intrinsic);
4220 
4221  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4222  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
4223  Info.opc == ISD::INTRINSIC_W_CHAIN)
4224  Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4225  TLI.getPointerTy(DAG.getDataLayout())));
4226 
4227  // Add all operands of the call to the operand list.
4228  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
4229  SDValue Op = getValue(I.getArgOperand(i));
4230  Ops.push_back(Op);
4231  }
4232 
4234  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4235 
4236  if (HasChain)
4237  ValueVTs.push_back(MVT::Other);
4238 
4239  SDVTList VTs = DAG.getVTList(ValueVTs);
4240 
4241  // Create the node.
4242  SDValue Result;
4243  if (IsTgtIntrinsic) {
4244  // This is target intrinsic that touches memory
4245  Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
4246  Ops, Info.memVT,
4247  MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
4248  Info.flags, Info.size);
4249  } else if (!HasChain) {
4250  Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4251  } else if (!I.getType()->isVoidTy()) {
4252  Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4253  } else {
4254  Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4255  }
4256 
4257  if (HasChain) {
4258  SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4259  if (OnlyLoad)
4260  PendingLoads.push_back(Chain);
4261  else
4262  DAG.setRoot(Chain);
4263  }
4264 
4265  if (!I.getType()->isVoidTy()) {
4266  if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
4267  EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4268  Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4269  } else
4270  Result = lowerRangeToAssertZExt(DAG, I, Result);
4271 
4272  setValue(&I, Result);
4273  }
4274 }
4275 
4276 /// GetSignificand - Get the significand and build it into a floating-point
4277 /// number with exponent of 1:
4278 ///
4279 /// Op = (Op & 0x007fffff) | 0x3f800000;
4280 ///
4281 /// where Op is the hexadecimal representation of floating point value.
4283  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4284  DAG.getConstant(0x007fffff, dl, MVT::i32));
4285  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4286  DAG.getConstant(0x3f800000, dl, MVT::i32));
4287  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4288 }
4289 
4290 /// GetExponent - Get the exponent:
4291 ///
4292 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4293 ///
4294 /// where Op is the hexadecimal representation of floating point value.
4296  const TargetLowering &TLI, const SDLoc &dl) {
4297  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4298  DAG.getConstant(0x7f800000, dl, MVT::i32));
4299  SDValue t1 = DAG.getNode(
4300  ISD::SRL, dl, MVT::i32, t0,
4301  DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4302  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4303  DAG.getConstant(127, dl, MVT::i32));
4304  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4305 }
4306 
4307 /// getF32Constant - Get 32-bit floating point constant.
4308 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4309  const SDLoc &dl) {
4310  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4311  MVT::f32);
4312 }
4313 
4315  SelectionDAG &DAG) {
4316  // TODO: What fast-math-flags should be set on the floating-point nodes?
4317 
4318  // IntegerPartOfX = ((int32_t)(t0);
4319  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4320 
4321  // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4322  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4323  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4324 
4325  // IntegerPartOfX <<= 23;
4326  IntegerPartOfX = DAG.getNode(
4327  ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4329  DAG.getDataLayout())));
4330 
4331  SDValue TwoToFractionalPartOfX;
4332  if (LimitFloatPrecision <= 6) {
4333  // For floating-point precision of 6:
4334  //
4335  // TwoToFractionalPartOfX =
4336  // 0.997535578f +
4337  // (0.735607626f + 0.252464424f * x) * x;
4338  //
4339  // error 0.0144103317, which is 6 bits
4340  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4341  getF32Constant(DAG, 0x3e814304, dl));
4342  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4343  getF32Constant(DAG, 0x3f3c50c8, dl));
4344  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4345  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4346  getF32Constant(DAG, 0x3f7f5e7e, dl));
4347  } else if (LimitFloatPrecision <= 12) {
4348  // For floating-point precision of 12:
4349  //
4350  // TwoToFractionalPartOfX =
4351  // 0.999892986f +
4352  // (0.696457318f +
4353  // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4354  //
4355  // error 0.000107046256, which is 13 to 14 bits
4356  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4357  getF32Constant(DAG, 0x3da235e3, dl));
4358  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4359  getF32Constant(DAG, 0x3e65b8f3, dl));
4360  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4361  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4362  getF32Constant(DAG, 0x3f324b07, dl));
4363  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4364  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4365  getF32Constant(DAG, 0x3f7ff8fd, dl));
4366  } else { // LimitFloatPrecision <= 18
4367  // For floating-point precision of 18:
4368  //
4369  // TwoToFractionalPartOfX =
4370  // 0.999999982f +
4371  // (0.693148872f +
4372  // (0.240227044f +
4373  // (0.554906021e-1f +
4374  // (0.961591928e-2f +
4375  // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4376  // error 2.47208000*10^(-7), which is better than 18 bits
4377  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4378  getF32Constant(DAG, 0x3924b03e, dl));
4379  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4380  getF32Constant(DAG, 0x3ab24b87, dl));
4381  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4382  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4383  getF32Constant(DAG, 0x3c1d8c17, dl));
4384  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4385  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4386  getF32Constant(DAG, 0x3d634a1d, dl));
4387  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4388  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4389  getF32Constant(DAG, 0x3e75fe14, dl));
4390  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4391  SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4392  getF32Constant(DAG, 0x3f317234, dl));
4393  SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4394  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4395  getF32Constant(DAG, 0x3f800000, dl));
4396  }
4397 
4398  // Add the exponent into the result in integer domain.
4399  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4400  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4401  DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4402 }
4403 
4404 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4405 /// limited-precision mode.
4406 static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4407  const TargetLowering &TLI) {
4408  if (Op.getValueType() == MVT::f32 &&
4409  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4410 
4411  // Put the exponent in the right bit position for later addition to the
4412  // final result:
4413  //
4414  // #define LOG2OFe 1.4426950f
4415  // t0 = Op * LOG2OFe
4416 
4417  // TODO: What fast-math-flags should be set here?
4418  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4419  getF32Constant(DAG, 0x3fb8aa3b, dl));
4420  return getLimitedPrecisionExp2(t0, dl, DAG);
4421  }
4422 
4423  // No special expansion.
4424  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4425 }
4426 
4427 /// expandLog - Lower a log intrinsic. Handles the special sequences for
4428 /// limited-precision mode.
4429 static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4430  const TargetLowering &TLI) {
4431  // TODO: What fast-math-flags should be set on the floating-point nodes?
4432 
4433  if (Op.getValueType() == MVT::f32 &&
4434  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4435  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4436 
4437  // Scale the exponent by log(2) [0.69314718f].
4438  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4439  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4440  getF32Constant(DAG, 0x3f317218, dl));
4441 
4442  // Get the significand and build it into a floating-point number with
4443  // exponent of 1.
4444  SDValue X = GetSignificand(DAG, Op1, dl);
4445 
4446  SDValue LogOfMantissa;
4447  if (LimitFloatPrecision <= 6) {
4448  // For floating-point precision of 6:
4449  //
4450  // LogofMantissa =
4451  // -1.1609546f +
4452  // (1.4034025f - 0.23903021f * x) * x;
4453  //
4454  // error 0.0034276066, which is better than 8 bits
4455  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4456  getF32Constant(DAG, 0xbe74c456, dl));
4457  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4458  getF32Constant(DAG, 0x3fb3a2b1, dl));
4459  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4460  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4461  getF32Constant(DAG, 0x3f949a29, dl));
4462  } else if (LimitFloatPrecision <= 12) {
4463  // For floating-point precision of 12:
4464  //
4465  // LogOfMantissa =
4466  // -1.7417939f +
4467  // (2.8212026f +
4468  // (-1.4699568f +
4469  // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4470  //
4471  // error 0.000061011436, which is 14 bits
4472  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4473  getF32Constant(DAG, 0xbd67b6d6, dl));
4474  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4475  getF32Constant(DAG, 0x3ee4f4b8, dl));
4476  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4477  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4478  getF32Constant(DAG, 0x3fbc278b, dl));
4479  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4480  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4481  getF32Constant(DAG, 0x40348e95, dl));
4482  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4483  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4484  getF32Constant(DAG, 0x3fdef31a, dl));
4485  } else { // LimitFloatPrecision <= 18
4486  // For floating-point precision of 18:
4487  //
4488  // LogOfMantissa =
4489  // -2.1072184f +
4490  // (4.2372794f +
4491  // (-3.7029485f +
4492  // (2.2781945f +
4493  // (-0.87823314f +
4494  // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4495  //
4496  // error 0.0000023660568, which is better than 18 bits
4497  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4498  getF32Constant(DAG, 0xbc91e5ac, dl));
4499  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4500  getF32Constant(DAG, 0x3e4350aa, dl));
4501  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4502  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4503  getF32Constant(DAG, 0x3f60d3e3, dl));
4504  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4505  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4506  getF32Constant(DAG, 0x4011cdf0, dl));
4507  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4508  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4509  getF32Constant(DAG, 0x406cfd1c, dl));
4510  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4511  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4512  getF32Constant(DAG, 0x408797cb, dl));
4513  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4514  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4515  getF32Constant(DAG, 0x4006dcab, dl));
4516  }
4517 
4518  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4519  }
4520 
4521  // No special expansion.
4522  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
4523 }
4524 
4525 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4526 /// limited-precision mode.
4527 static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4528  const TargetLowering &TLI) {
4529  // TODO: What fast-math-flags should be set on the floating-point nodes?
4530 
4531  if (Op.getValueType() == MVT::f32 &&
4532  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4533  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4534 
4535  // Get the exponent.
4536  SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
4537 
4538  // Get the significand and build it into a floating-point number with
4539  // exponent of 1.
4540  SDValue X = GetSignificand(DAG, Op1, dl);
4541 
4542  // Different possible minimax approximations of significand in
4543  // floating-point for various degrees of accuracy over [1,2].
4544  SDValue Log2ofMantissa;
4545  if (LimitFloatPrecision <= 6) {
4546  // For floating-point precision of 6:
4547  //
4548  // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4549  //
4550  // error 0.0049451742, which is more than 7 bits
4551  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4552  getF32Constant(DAG, 0xbeb08fe0, dl));
4553  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4554  getF32Constant(DAG, 0x40019463, dl));
4555  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4556  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4557  getF32Constant(DAG, 0x3fd6633d, dl));
4558  } else if (LimitFloatPrecision <= 12) {
4559  // For floating-point precision of 12:
4560  //
4561  // Log2ofMantissa =
4562  // -2.51285454f +
4563  // (4.07009056f +
4564  // (-2.12067489f +
4565  // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4566  //
4567  // error 0.0000876136000, which is better than 13 bits
4568  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4569  getF32Constant(DAG, 0xbda7262e, dl));
4570  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4571  getF32Constant(DAG, 0x3f25280b, dl));
4572  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4573  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4574  getF32Constant(DAG, 0x4007b923, dl));
4575  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4576  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4577  getF32Constant(DAG, 0x40823e2f, dl));
4578  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4579  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4580  getF32Constant(DAG, 0x4020d29c, dl));
4581  } else { // LimitFloatPrecision <= 18
4582  // For floating-point precision of 18:
4583  //
4584  // Log2ofMantissa =
4585  // -3.0400495f +
4586  // (6.1129976f +
4587  // (-5.3420409f +
4588  // (3.2865683f +
4589  // (-1.2669343f +
4590  // (0.27515199f -
4591  // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4592  //
4593  // error 0.0000018516, which is better than 18 bits
4594  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4595  getF32Constant(DAG, 0xbcd2769e, dl));
4596  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4597  getF32Constant(DAG, 0x3e8ce0b9, dl));
4598  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4599  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4600  getF32Constant(DAG, 0x3fa22ae7, dl));
4601  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4602  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4603  getF32Constant(DAG, 0x40525723, dl));
4604  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4605  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4606  getF32Constant(DAG, 0x40aaf200, dl));
4607  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4608  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4609  getF32Constant(DAG, 0x40c39dad, dl));
4610  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4611  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4612  getF32Constant(DAG, 0x4042902c, dl));
4613  }
4614 
4615  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4616  }
4617 
4618  // No special expansion.
4619  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
4620 }
4621 
4622 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4623 /// limited-precision mode.
4625  const TargetLowering &TLI) {
4626  // TODO: What fast-math-flags should be set on the floating-point nodes?
4627 
4628  if (Op.getValueType() == MVT::f32 &&
4629  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4630  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4631 
4632  // Scale the exponent by log10(2) [0.30102999f].
4633  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4634  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4635  getF32Constant(DAG, 0x3e9a209a, dl));
4636 
4637  // Get the significand and build it into a floating-point number with
4638  // exponent of 1.
4639  SDValue X = GetSignificand(DAG, Op1, dl);
4640 
4641  SDValue Log10ofMantissa;
4642  if (LimitFloatPrecision <= 6) {
4643  // For floating-point precision of 6:
4644  //
4645  // Log10ofMantissa =
4646  // -0.50419619f +
4647  // (0.60948995f - 0.10380950f * x) * x;
4648  //
4649  // error 0.0014886165, which is 6 bits
4650  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4651  getF32Constant(DAG, 0xbdd49a13, dl));
4652  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4653  getF32Constant(DAG, 0x3f1c0789, dl));
4654  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4655  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4656  getF32Constant(DAG, 0x3f011300, dl));
4657  } else if (LimitFloatPrecision <= 12) {
4658  // For floating-point precision of 12:
4659  //
4660  // Log10ofMantissa =
4661  // -0.64831180f +
4662  // (0.91751397f +
4663  // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4664  //
4665  // error 0.00019228036, which is better than 12 bits
4666  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4667  getF32Constant(DAG, 0x3d431f31, dl));
4668  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4669  getF32Constant(DAG, 0x3ea21fb2, dl));
4670  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4671  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4672  getF32Constant(DAG, 0x3f6ae232, dl));
4673  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4674  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4675  getF32Constant(DAG, 0x3f25f7c3, dl));
4676  } else { // LimitFloatPrecision <= 18
4677  // For floating-point precision of 18:
4678  //
4679  // Log10ofMantissa =
4680  // -0.84299375f +
4681  // (1.5327582f +
4682  // (-1.0688956f +
4683  // (0.49102474f +
4684  // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4685  //
4686  // error 0.0000037995730, which is better than 18 bits
4687  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4688  getF32Constant(DAG, 0x3c5d51ce, dl));
4689  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4690  getF32Constant(DAG, 0x3e00685a, dl));
4691  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4692  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4693  getF32Constant(DAG, 0x3efb6798, dl));
4694  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4695  SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4696  getF32Constant(DAG, 0x3f88d192, dl));
4697  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4698  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4699  getF32Constant(DAG, 0x3fc4316c, dl));
4700  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4701  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4702  getF32Constant(DAG, 0x3f57ce70, dl));
4703  }
4704 
4705  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4706  }
4707 
4708  // No special expansion.
4709  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
4710 }
4711 
4712 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4713 /// limited-precision mode.
4714 static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4715  const TargetLowering &TLI) {
4716  if (Op.getValueType() == MVT::f32 &&
4717  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
4718  return getLimitedPrecisionExp2(Op, dl, DAG);
4719 
4720  // No special expansion.
4721  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4722 }
4723 
4724 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
4725 /// limited-precision mode with x == 10.0f.
4726 static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
4727  SelectionDAG &DAG, const TargetLowering &TLI) {
4728  bool IsExp10 = false;
4729  if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
4730  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4731  if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
4732  APFloat Ten(10.0f);
4733  IsExp10 = LHSC->isExactlyValue(Ten);
4734  }
4735  }
4736 
4737  // TODO: What fast-math-flags should be set on the FMUL node?
4738  if (IsExp10) {
4739  // Put the exponent in the right bit position for later addition to the
4740  // final result:
4741  //
4742  // #define LOG2OF10 3.3219281f
4743  // t0 = Op * LOG2OF10;
4744  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4745  getF32Constant(DAG, 0x40549a78, dl));
4746  return getLimitedPrecisionExp2(t0, dl, DAG);
4747  }
4748 
4749  // No special expansion.
4750  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4751 }
4752 
4753 /// ExpandPowI - Expand a llvm.powi intrinsic.
4754 static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
4755  SelectionDAG &DAG) {
4756  // If RHS is a constant, we can expand this out to a multiplication tree,
4757  // otherwise we end up lowering to a call to __powidf2 (for example). When
4758  // optimizing for size, we only want to do this if the expansion would produce
4759  // a small number of multiplies, otherwise we do the full expansion.
4760  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4761  // Get the exponent as a positive value.
4762  unsigned Val = RHSC->getSExtValue();
4763  if ((int)Val < 0) Val = -Val;
4764 
4765  // powi(x, 0) -> 1.0
4766  if (Val == 0)
4767  return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4768 
4769  const Function &F = DAG.getMachineFunction().getFunction();
4770  if (!F.optForSize() ||
4771  // If optimizing for size, don't insert too many multiplies.
4772  // This inserts up to 5 multiplies.
4773  countPopulation(Val) + Log2_32(Val) < 7) {
4774  // We use the simple binary decomposition method to generate the multiply
4775  // sequence. There are more optimal ways to do this (for example,
4776  // powi(x,15) generates one more multiply than it should), but this has
4777  // the benefit of being both really simple and much better than a libcall.
4778  SDValue Res; // Logically starts equal to 1.0
4779  SDValue CurSquare = LHS;
4780  // TODO: Intrinsics should have fast-math-flags that propagate to these
4781  // nodes.
4782  while (Val) {
4783  if (Val & 1) {
4784  if (Res.getNode())
4785  Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4786  else
4787  Res = CurSquare; // 1.0*CurSquare.
4788  }
4789 
4790  CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4791  CurSquare, CurSquare);
4792  Val >>= 1;
4793  }
4794 
4795  // If the original was negative, invert the result, producing 1/(x*x*x).
4796  if (RHSC->getSExtValue() < 0)
4797  Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4798  DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4799  return Res;
4800  }
4801  }
4802 
4803  // Otherwise, expand to a libcall.
4804  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4805 }
4806 
4807 // getUnderlyingArgReg - Find underlying register used for a truncated or
4808 // bitcasted argument.
4809 static unsigned getUnderlyingArgReg(const SDValue &N) {
4810  switch (N.getOpcode()) {
4811  case ISD::CopyFromReg:
4812  return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4813  case ISD::BITCAST:
4814  case ISD::AssertZext:
4815  case ISD::AssertSext:
4816  case ISD::TRUNCATE:
4817  return getUnderlyingArgReg(N.getOperand(0));
4818  default:
4819  return 0;
4820  }
4821 }
4822 
4823 /// If the DbgValueInst is a dbg_value of a function argument, create the
4824 /// corresponding DBG_VALUE machine instruction for it now. At the end of
4825 /// instruction selection, they will be inserted to the entry BB.
4826 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4827  const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4828  DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
4829  const Argument *Arg = dyn_cast<Argument>(V);
4830  if (!Arg)
4831  return false;
4832 
4833  MachineFunction &MF = DAG.getMachineFunction();
4834  const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4835 
4836  bool IsIndirect = false;
4838  // Some arguments' frame index is recorded during argument lowering.
4839  int FI = FuncInfo.getArgumentFrameIndex(Arg);
4840  if (FI != std::numeric_limits<int>::max())
4841  Op = MachineOperand::CreateFI(FI);
4842 
4843  if (!Op && N.getNode()) {
4844  unsigned Reg = getUnderlyingArgReg(N);
4845  if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4846  MachineRegisterInfo &RegInfo = MF.getRegInfo();
4847  unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4848  if (PR)
4849  Reg = PR;
4850  }
4851  if (Reg) {
4852  Op = MachineOperand::CreateReg(Reg, false);
4853  IsIndirect = IsDbgDeclare;
4854  }
4855  }
4856 
4857  if (!Op && N.getNode())
4858  // Check if frame index is available.
4859  if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
4860  if (FrameIndexSDNode *FINode =
4861  dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
4862  Op = MachineOperand::CreateFI(FINode->getIndex());
4863 
4864  if (!Op) {
4865  // Check if ValueMap has reg number.
4866  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4867  if (VMI != FuncInfo.ValueMap.end()) {
4868  const auto &TLI = DAG.getTargetLoweringInfo();
4869  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
4870  V->getType(), isABIRegCopy(V));
4871  unsigned NumRegs =
4872  std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
4873  if (NumRegs > 1) {
4874  unsigned I = 0;
4875  unsigned Offset = 0;
4876  auto RegisterVT = RFV.RegVTs.begin();
4877  for (auto RegCount : RFV.RegCount) {
4878  unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
4879  for (unsigned E = I + RegCount; I != E; ++I) {
4880  // The vregs are guaranteed to be allocated in sequence.
4881  Op = MachineOperand::CreateReg(VMI->second + I, false);
4882  auto FragmentExpr = DIExpression::createFragmentExpression(
4883  Expr, Offset, RegisterSize);
4884  if (!FragmentExpr)
4885  continue;
4886  FuncInfo.ArgDbgValues.push_back(
4887  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
4888  Op->getReg(), Variable, *FragmentExpr));
4889  Offset += RegisterSize;
4890  }
4891  }
4892  return true;
4893  }
4894  Op = MachineOperand::CreateReg(VMI->second, false);
4895  IsIndirect = IsDbgDeclare;
4896  }
4897  }
4898 
4899  if (!Op)
4900  return false;
4901 
4902  assert(Variable->isValidLocationForIntrinsic(DL) &&
4903  "Expected inlined-at fields to agree");
4904  if (Op->isReg())
4905  FuncInfo.ArgDbgValues.push_back(
4906  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
4907  Op->getReg(), Variable, Expr));
4908  else
4909  FuncInfo.ArgDbgValues.push_back(
4910  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
4911  .add(*Op)
4912  .addImm(0)
4913  .addMetadata(Variable)
4914  .addMetadata(Expr));
4915 
4916  return true;
4917 }
4918 
4919 /// Return the appropriate SDDbgValue based on N.
4920 SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
4921  DILocalVariable *Variable,
4922  DIExpression *Expr,
4923  const DebugLoc &dl,
4924  unsigned DbgSDNodeOrder) {
4925  if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
4926  // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
4927  // stack slot locations as such instead of as indirectly addressed
4928  // locations.
4929  return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
4930  DbgSDNodeOrder);
4931  }
4932  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
4933  DbgSDNodeOrder);
4934 }
4935 
4936 // VisualStudio defines setjmp as _setjmp
4937 #if defined(_MSC_VER) && defined(setjmp) && \
4938  !defined(setjmp_undefined_for_msvc)
4939 # pragma push_macro("setjmp")
4940 # undef setjmp
4941 # define setjmp_undefined_for_msvc
4942 #endif
4943 
4944 /// Lower the call to the specified intrinsic function. If we want to emit this
4945 /// as a call to a named external function, return the name. Otherwise, lower it
4946 /// and return null.
4947 const char *
4948 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
4949  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4950  SDLoc sdl = getCurSDLoc();
4951  DebugLoc dl = getCurDebugLoc();
4952  SDValue Res;
4953 
4954  switch (Intrinsic) {
4955  default:
4956  // By default, turn this into a target intrinsic node.
4957  visitTargetIntrinsic(I, Intrinsic);
4958  return nullptr;
4959  case Intrinsic::vastart: visitVAStart(I); return nullptr;
4960  case Intrinsic::vaend: visitVAEnd(I); return nullptr;
4961  case Intrinsic::vacopy: visitVACopy(I); return nullptr;
4962  case Intrinsic::returnaddress:
4963  setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
4964  TLI.getPointerTy(DAG.getDataLayout()),
4965  getValue(I.getArgOperand(0))));
4966  return nullptr;
4967  case Intrinsic::addressofreturnaddress:
4968  setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
4969  TLI.getPointerTy(DAG.getDataLayout())));
4970  return nullptr;
4971  case Intrinsic::frameaddress:
4972  setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
4973  TLI.getPointerTy(DAG.getDataLayout()),
4974  getValue(I.getArgOperand(0))));
4975  return nullptr;
4976  case Intrinsic::read_register: {
4977  Value *Reg = I.getArgOperand(0);
4978  SDValue Chain = getRoot();
4979  SDValue RegName =
4980  DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
4981  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4982  Res = DAG.getNode(ISD::READ_REGISTER, sdl,
4983  DAG.getVTList(VT, MVT::Other), Chain, RegName);
4984  setValue(&I, Res);
4985  DAG.setRoot(Res.getValue(1));
4986  return nullptr;
4987  }
4988  case Intrinsic::write_register: {
4989  Value *Reg = I.getArgOperand(0);
4990  Value *RegValue = I.getArgOperand(1);
4991  SDValue Ch