LLVM  7.0.0svn
SelectionDAGBuilder.cpp
Go to the documentation of this file.
1 //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SelectionDAGBuilder.h"
15 #include "SDNodeDbgValue.h"
16 #include "llvm/ADT/APFloat.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/None.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/Triple.h"
29 #include "llvm/ADT/Twine.h"
34 #include "llvm/Analysis/Loads.h"
39 #include "llvm/CodeGen/Analysis.h"
57 #include "llvm/CodeGen/StackMaps.h"
66 #include "llvm/IR/Argument.h"
67 #include "llvm/IR/Attributes.h"
68 #include "llvm/IR/BasicBlock.h"
69 #include "llvm/IR/CFG.h"
70 #include "llvm/IR/CallSite.h"
71 #include "llvm/IR/CallingConv.h"
72 #include "llvm/IR/Constant.h"
73 #include "llvm/IR/ConstantRange.h"
74 #include "llvm/IR/Constants.h"
75 #include "llvm/IR/DataLayout.h"
77 #include "llvm/IR/DebugLoc.h"
78 #include "llvm/IR/DerivedTypes.h"
79 #include "llvm/IR/Function.h"
81 #include "llvm/IR/InlineAsm.h"
82 #include "llvm/IR/InstrTypes.h"
83 #include "llvm/IR/Instruction.h"
84 #include "llvm/IR/Instructions.h"
85 #include "llvm/IR/IntrinsicInst.h"
86 #include "llvm/IR/Intrinsics.h"
87 #include "llvm/IR/LLVMContext.h"
88 #include "llvm/IR/Metadata.h"
89 #include "llvm/IR/Module.h"
90 #include "llvm/IR/Operator.h"
91 #include "llvm/IR/Statepoint.h"
92 #include "llvm/IR/Type.h"
93 #include "llvm/IR/User.h"
94 #include "llvm/IR/Value.h"
95 #include "llvm/MC/MCContext.h"
96 #include "llvm/MC/MCSymbol.h"
99 #include "llvm/Support/Casting.h"
100 #include "llvm/Support/CodeGen.h"
102 #include "llvm/Support/Compiler.h"
103 #include "llvm/Support/Debug.h"
106 #include "llvm/Support/MathExtras.h"
111 #include <algorithm>
112 #include <cassert>
113 #include <cstddef>
114 #include <cstdint>
115 #include <cstring>
116 #include <iterator>
117 #include <limits>
118 #include <numeric>
119 #include <tuple>
120 #include <utility>
121 #include <vector>
122 
123 using namespace llvm;
124 
125 #define DEBUG_TYPE "isel"
126 
127 /// LimitFloatPrecision - Generate low-precision inline sequences for
128 /// some float libcalls (6, 8 or 12 bits).
129 static unsigned LimitFloatPrecision;
130 
132  LimitFPPrecision("limit-float-precision",
133  cl::desc("Generate low-precision inline sequences "
134  "for some float libcalls"),
135  cl::location(LimitFloatPrecision), cl::Hidden,
136  cl::init(0));
137 
139  "switch-peel-threshold", cl::Hidden, cl::init(66),
140  cl::desc("Set the case probability threshold for peeling the case from a "
141  "switch statement. A value greater than 100 will void this "
142  "optimization"));
143 
144 // Limit the width of DAG chains. This is important in general to prevent
145 // DAG-based analysis from blowing up. For example, alias analysis and
146 // load clustering may not complete in reasonable time. It is difficult to
147 // recognize and avoid this situation within each individual analysis, and
148 // future analyses are likely to have the same behavior. Limiting DAG width is
149 // the safe approach and will be especially important with global DAGs.
150 //
151 // MaxParallelChains default is arbitrarily high to avoid affecting
152 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
153 // sequence over this should have been converted to llvm.memcpy by the
154 // frontend. It is easy to induce this behavior with .ll code such as:
155 // %buffer = alloca [4096 x i8]
156 // %data = load [4096 x i8]* %argPtr
157 // store [4096 x i8] %data, [4096 x i8]* %buffer
158 static const unsigned MaxParallelChains = 64;
159 
160 // True if the Value passed requires ABI mangling as it is a parameter to a
161 // function or a return value from a function which is not an intrinsic.
162 static bool isABIRegCopy(const Value *V) {
163  const bool IsRetInst = V && isa<ReturnInst>(V);
164  const bool IsCallInst = V && isa<CallInst>(V);
165  const bool IsInLineAsm =
166  IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
167  const bool IsIndirectFunctionCall =
168  IsCallInst && !IsInLineAsm &&
169  !static_cast<const CallInst *>(V)->getCalledFunction();
170  // It is possible that the call instruction is an inline asm statement or an
171  // indirect function call in which case the return value of
172  // getCalledFunction() would be nullptr.
173  const bool IsInstrinsicCall =
174  IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall &&
175  static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
177 
178  return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall));
179 }
180 
181 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
182  const SDValue *Parts, unsigned NumParts,
183  MVT PartVT, EVT ValueVT, const Value *V,
184  bool IsABIRegCopy);
185 
186 /// getCopyFromParts - Create a value that contains the specified legal parts
187 /// combined into the value they represent. If the parts combine to a type
188 /// larger than ValueVT then AssertOp can be used to specify whether the extra
189 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
190 /// (ISD::AssertSext).
192  const SDValue *Parts, unsigned NumParts,
193  MVT PartVT, EVT ValueVT, const Value *V,
194  Optional<ISD::NodeType> AssertOp = None,
195  bool IsABIRegCopy = false) {
196  if (ValueVT.isVector())
197  return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
198  PartVT, ValueVT, V, IsABIRegCopy);
199 
200  assert(NumParts > 0 && "No parts to assemble!");
201  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
202  SDValue Val = Parts[0];
203 
204  if (NumParts > 1) {
205  // Assemble the value from multiple parts.
206  if (ValueVT.isInteger()) {
207  unsigned PartBits = PartVT.getSizeInBits();
208  unsigned ValueBits = ValueVT.getSizeInBits();
209 
210  // Assemble the power of 2 part.
211  unsigned RoundParts = NumParts & (NumParts - 1) ?
212  1 << Log2_32(NumParts) : NumParts;
213  unsigned RoundBits = PartBits * RoundParts;
214  EVT RoundVT = RoundBits == ValueBits ?
215  ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
216  SDValue Lo, Hi;
217 
218  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
219 
220  if (RoundParts > 2) {
221  Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
222  PartVT, HalfVT, V);
223  Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
224  RoundParts / 2, PartVT, HalfVT, V);
225  } else {
226  Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
227  Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
228  }
229 
230  if (DAG.getDataLayout().isBigEndian())
231  std::swap(Lo, Hi);
232 
233  Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
234 
235  if (RoundParts < NumParts) {
236  // Assemble the trailing non-power-of-2 part.
237  unsigned OddParts = NumParts - RoundParts;
238  EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
239  Hi = getCopyFromParts(DAG, DL,
240  Parts + RoundParts, OddParts, PartVT, OddVT, V);
241 
242  // Combine the round and odd parts.
243  Lo = Val;
244  if (DAG.getDataLayout().isBigEndian())
245  std::swap(Lo, Hi);
246  EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
247  Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
248  Hi =
249  DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
250  DAG.getConstant(Lo.getValueSizeInBits(), DL,
251  TLI.getPointerTy(DAG.getDataLayout())));
252  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
253  Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
254  }
255  } else if (PartVT.isFloatingPoint()) {
256  // FP split into multiple FP parts (for ppcf128)
257  assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
258  "Unexpected split");
259  SDValue Lo, Hi;
260  Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
261  Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
262  if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
263  std::swap(Lo, Hi);
264  Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
265  } else {
266  // FP split into integer parts (soft fp)
267  assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
268  !PartVT.isVector() && "Unexpected split");
269  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
270  Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
271  }
272  }
273 
274  // There is now one part, held in Val. Correct it to match ValueVT.
275  // PartEVT is the type of the register class that holds the value.
276  // ValueVT is the type of the inline asm operation.
277  EVT PartEVT = Val.getValueType();
278 
279  if (PartEVT == ValueVT)
280  return Val;
281 
282  if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
283  ValueVT.bitsLT(PartEVT)) {
284  // For an FP value in an integer part, we need to truncate to the right
285  // width first.
286  PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
287  Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
288  }
289 
290  // Handle types that have the same size.
291  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
292  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
293 
294  // Handle types with different sizes.
295  if (PartEVT.isInteger() && ValueVT.isInteger()) {
296  if (ValueVT.bitsLT(PartEVT)) {
297  // For a truncate, see if we have any information to
298  // indicate whether the truncated bits will always be
299  // zero or sign-extension.
300  if (AssertOp.hasValue())
301  Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
302  DAG.getValueType(ValueVT));
303  return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
304  }
305  return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
306  }
307 
308  if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
309  // FP_ROUND's are always exact here.
310  if (ValueVT.bitsLT(Val.getValueType()))
311  return DAG.getNode(
312  ISD::FP_ROUND, DL, ValueVT, Val,
313  DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
314 
315  return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
316  }
317 
318  llvm_unreachable("Unknown mismatch!");
319 }
320 
322  const Twine &ErrMsg) {
323  const Instruction *I = dyn_cast_or_null<Instruction>(V);
324  if (!V)
325  return Ctx.emitError(ErrMsg);
326 
327  const char *AsmError = ", possible invalid constraint for vector type";
328  if (const CallInst *CI = dyn_cast<CallInst>(I))
329  if (isa<InlineAsm>(CI->getCalledValue()))
330  return Ctx.emitError(I, ErrMsg + AsmError);
331 
332  return Ctx.emitError(I, ErrMsg);
333 }
334 
335 /// getCopyFromPartsVector - Create a value that contains the specified legal
336 /// parts combined into the value they represent. If the parts combine to a
337 /// type larger than ValueVT then AssertOp can be used to specify whether the
338 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
339 /// ValueVT (ISD::AssertSext).
341  const SDValue *Parts, unsigned NumParts,
342  MVT PartVT, EVT ValueVT, const Value *V,
343  bool IsABIRegCopy) {
344  assert(ValueVT.isVector() && "Not a vector value");
345  assert(NumParts > 0 && "No parts to assemble!");
346  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
347  SDValue Val = Parts[0];
348 
349  // Handle a multi-element vector.
350  if (NumParts > 1) {
351  EVT IntermediateVT;
352  MVT RegisterVT;
353  unsigned NumIntermediates;
354  unsigned NumRegs;
355 
356  if (IsABIRegCopy) {
358  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
359  RegisterVT);
360  } else {
361  NumRegs =
362  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
363  NumIntermediates, RegisterVT);
364  }
365 
366  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
367  NumParts = NumRegs; // Silence a compiler warning.
368  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
369  assert(RegisterVT.getSizeInBits() ==
370  Parts[0].getSimpleValueType().getSizeInBits() &&
371  "Part type sizes don't match!");
372 
373  // Assemble the parts into intermediate operands.
374  SmallVector<SDValue, 8> Ops(NumIntermediates);
375  if (NumIntermediates == NumParts) {
376  // If the register was not expanded, truncate or copy the value,
377  // as appropriate.
378  for (unsigned i = 0; i != NumParts; ++i)
379  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
380  PartVT, IntermediateVT, V);
381  } else if (NumParts > 0) {
382  // If the intermediate type was expanded, build the intermediate
383  // operands from the parts.
384  assert(NumParts % NumIntermediates == 0 &&
385  "Must expand into a divisible number of parts!");
386  unsigned Factor = NumParts / NumIntermediates;
387  for (unsigned i = 0; i != NumIntermediates; ++i)
388  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
389  PartVT, IntermediateVT, V);
390  }
391 
392  // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
393  // intermediate operands.
394  EVT BuiltVectorTy =
395  EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
396  (IntermediateVT.isVector()
397  ? IntermediateVT.getVectorNumElements() * NumParts
398  : NumIntermediates));
399  Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
401  DL, BuiltVectorTy, Ops);
402  }
403 
404  // There is now one part, held in Val. Correct it to match ValueVT.
405  EVT PartEVT = Val.getValueType();
406 
407  if (PartEVT == ValueVT)
408  return Val;
409 
410  if (PartEVT.isVector()) {
411  // If the element type of the source/dest vectors are the same, but the
412  // parts vector has more elements than the value vector, then we have a
413  // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
414  // elements we want.
415  if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
416  assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
417  "Cannot narrow, it would be a lossy transformation");
418  return DAG.getNode(
419  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
420  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
421  }
422 
423  // Vector/Vector bitcast.
424  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
425  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
426 
427  assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
428  "Cannot handle this kind of promotion");
429  // Promoted vector extract
430  return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
431 
432  }
433 
434  // Trivial bitcast if the types are the same size and the destination
435  // vector type is legal.
436  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
437  TLI.isTypeLegal(ValueVT))
438  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
439 
440  if (ValueVT.getVectorNumElements() != 1) {
441  // Certain ABIs require that vectors are passed as integers. For vectors
442  // are the same size, this is an obvious bitcast.
443  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
444  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
445  } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
446  // Bitcast Val back the original type and extract the corresponding
447  // vector we want.
448  unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
449  EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
450  ValueVT.getVectorElementType(), Elts);
451  Val = DAG.getBitcast(WiderVecType, Val);
452  return DAG.getNode(
453  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
454  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
455  }
456 
458  *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
459  return DAG.getUNDEF(ValueVT);
460  }
461 
462  // Handle cases such as i8 -> <1 x i1>
463  EVT ValueSVT = ValueVT.getVectorElementType();
464  if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
465  Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
466  : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
467 
468  return DAG.getBuildVector(ValueVT, DL, Val);
469 }
470 
471 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
472  SDValue Val, SDValue *Parts, unsigned NumParts,
473  MVT PartVT, const Value *V, bool IsABIRegCopy);
474 
475 /// getCopyToParts - Create a series of nodes that contain the specified value
476 /// split into legal parts. If the parts contain more bits than Val, then, for
477 /// integers, ExtendKind can be used to specify how to generate the extra bits.
478 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
479  SDValue *Parts, unsigned NumParts, MVT PartVT,
480  const Value *V,
481  ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
482  bool IsABIRegCopy = false) {
483  EVT ValueVT = Val.getValueType();
484 
485  // Handle the vector case separately.
486  if (ValueVT.isVector())
487  return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
488  IsABIRegCopy);
489 
490  unsigned PartBits = PartVT.getSizeInBits();
491  unsigned OrigNumParts = NumParts;
492  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
493  "Copying to an illegal type!");
494 
495  if (NumParts == 0)
496  return;
497 
498  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
499  EVT PartEVT = PartVT;
500  if (PartEVT == ValueVT) {
501  assert(NumParts == 1 && "No-op copy with multiple parts!");
502  Parts[0] = Val;
503  return;
504  }
505 
506  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
507  // If the parts cover more bits than the value has, promote the value.
508  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
509  assert(NumParts == 1 && "Do not know what to promote to!");
510  Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
511  } else {
512  if (ValueVT.isFloatingPoint()) {
513  // FP values need to be bitcast, then extended if they are being put
514  // into a larger container.
515  ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
516  Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
517  }
518  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
519  ValueVT.isInteger() &&
520  "Unknown mismatch!");
521  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
522  Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
523  if (PartVT == MVT::x86mmx)
524  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
525  }
526  } else if (PartBits == ValueVT.getSizeInBits()) {
527  // Different types of the same size.
528  assert(NumParts == 1 && PartEVT != ValueVT);
529  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
530  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
531  // If the parts cover less bits than value has, truncate the value.
532  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
533  ValueVT.isInteger() &&
534  "Unknown mismatch!");
535  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
536  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
537  if (PartVT == MVT::x86mmx)
538  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
539  }
540 
541  // The value may have changed - recompute ValueVT.
542  ValueVT = Val.getValueType();
543  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
544  "Failed to tile the value with PartVT!");
545 
546  if (NumParts == 1) {
547  if (PartEVT != ValueVT) {
549  "scalar-to-vector conversion failed");
550  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
551  }
552 
553  Parts[0] = Val;
554  return;
555  }
556 
557  // Expand the value into multiple parts.
558  if (NumParts & (NumParts - 1)) {
559  // The number of parts is not a power of 2. Split off and copy the tail.
560  assert(PartVT.isInteger() && ValueVT.isInteger() &&
561  "Do not know what to expand to!");
562  unsigned RoundParts = 1 << Log2_32(NumParts);
563  unsigned RoundBits = RoundParts * PartBits;
564  unsigned OddParts = NumParts - RoundParts;
565  SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
566  DAG.getIntPtrConstant(RoundBits, DL));
567  getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
568 
569  if (DAG.getDataLayout().isBigEndian())
570  // The odd parts were reversed by getCopyToParts - unreverse them.
571  std::reverse(Parts + RoundParts, Parts + NumParts);
572 
573  NumParts = RoundParts;
574  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
575  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
576  }
577 
578  // The number of parts is a power of 2. Repeatedly bisect the value using
579  // EXTRACT_ELEMENT.
580  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
582  ValueVT.getSizeInBits()),
583  Val);
584 
585  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
586  for (unsigned i = 0; i < NumParts; i += StepSize) {
587  unsigned ThisBits = StepSize * PartBits / 2;
588  EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
589  SDValue &Part0 = Parts[i];
590  SDValue &Part1 = Parts[i+StepSize/2];
591 
592  Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
593  ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
594  Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
595  ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
596 
597  if (ThisBits == PartBits && ThisVT != PartVT) {
598  Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
599  Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
600  }
601  }
602  }
603 
604  if (DAG.getDataLayout().isBigEndian())
605  std::reverse(Parts, Parts + OrigNumParts);
606 }
607 
608 
609 /// getCopyToPartsVector - Create a series of nodes that contain the specified
610 /// value split into legal parts.
611 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
612  SDValue Val, SDValue *Parts, unsigned NumParts,
613  MVT PartVT, const Value *V,
614  bool IsABIRegCopy) {
615  EVT ValueVT = Val.getValueType();
616  assert(ValueVT.isVector() && "Not a vector");
617  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
618 
619  if (NumParts == 1) {
620  EVT PartEVT = PartVT;
621  if (PartEVT == ValueVT) {
622  // Nothing to do.
623  } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
624  // Bitconvert vector->vector case.
625  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
626  } else if (PartVT.isVector() &&
627  PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
628  PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
629  EVT ElementVT = PartVT.getVectorElementType();
630  // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
631  // undef elements.
633  for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
634  Ops.push_back(DAG.getNode(
635  ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
636  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
637 
638  for (unsigned i = ValueVT.getVectorNumElements(),
639  e = PartVT.getVectorNumElements(); i != e; ++i)
640  Ops.push_back(DAG.getUNDEF(ElementVT));
641 
642  Val = DAG.getBuildVector(PartVT, DL, Ops);
643 
644  // FIXME: Use CONCAT for 2x -> 4x.
645 
646  //SDValue UndefElts = DAG.getUNDEF(VectorTy);
647  //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
648  } else if (PartVT.isVector() &&
649  PartEVT.getVectorElementType().bitsGE(
650  ValueVT.getVectorElementType()) &&
651  PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
652 
653  // Promoted vector extract
654  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
655  } else {
656  if (ValueVT.getVectorNumElements() == 1) {
657  Val = DAG.getNode(
658  ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
659  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
660  } else {
661  assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
662  "lossy conversion of vector to scalar type");
663  EVT IntermediateType =
664  EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
665  Val = DAG.getBitcast(IntermediateType, Val);
666  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
667  }
668  }
669 
670  assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
671  Parts[0] = Val;
672  return;
673  }
674 
675  // Handle a multi-element vector.
676  EVT IntermediateVT;
677  MVT RegisterVT;
678  unsigned NumIntermediates;
679  unsigned NumRegs;
680  if (IsABIRegCopy) {
681  NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
682  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
683  RegisterVT);
684  } else {
685  NumRegs =
686  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
687  NumIntermediates, RegisterVT);
688  }
689  unsigned NumElements = ValueVT.getVectorNumElements();
690 
691  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
692  NumParts = NumRegs; // Silence a compiler warning.
693  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
694 
695  // Convert the vector to the appropiate type if necessary.
696  unsigned DestVectorNoElts =
697  NumIntermediates *
698  (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
699  EVT BuiltVectorTy = EVT::getVectorVT(
700  *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
701  if (Val.getValueType() != BuiltVectorTy)
702  Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
703 
704  // Split the vector into intermediate operands.
705  SmallVector<SDValue, 8> Ops(NumIntermediates);
706  for (unsigned i = 0; i != NumIntermediates; ++i) {
707  if (IntermediateVT.isVector())
708  Ops[i] =
709  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
710  DAG.getConstant(i * (NumElements / NumIntermediates), DL,
711  TLI.getVectorIdxTy(DAG.getDataLayout())));
712  else
713  Ops[i] = DAG.getNode(
714  ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
715  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
716  }
717 
718  // Split the intermediate operands into legal parts.
719  if (NumParts == NumIntermediates) {
720  // If the register was not expanded, promote or copy the value,
721  // as appropriate.
722  for (unsigned i = 0; i != NumParts; ++i)
723  getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
724  } else if (NumParts > 0) {
725  // If the intermediate type was expanded, split each the value into
726  // legal parts.
727  assert(NumIntermediates != 0 && "division by zero");
728  assert(NumParts % NumIntermediates == 0 &&
729  "Must expand into a divisible number of parts!");
730  unsigned Factor = NumParts / NumIntermediates;
731  for (unsigned i = 0; i != NumIntermediates; ++i)
732  getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
733  }
734 }
735 
737  EVT valuevt, bool IsABIMangledValue)
738  : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
739  RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
740 
742  const DataLayout &DL, unsigned Reg, Type *Ty,
743  bool IsABIMangledValue) {
744  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
745 
746  IsABIMangled = IsABIMangledValue;
747 
748  for (EVT ValueVT : ValueVTs) {
749  unsigned NumRegs = IsABIMangledValue
750  ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
751  : TLI.getNumRegisters(Context, ValueVT);
752  MVT RegisterVT = IsABIMangledValue
753  ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
754  : TLI.getRegisterType(Context, ValueVT);
755  for (unsigned i = 0; i != NumRegs; ++i)
756  Regs.push_back(Reg + i);
757  RegVTs.push_back(RegisterVT);
758  RegCount.push_back(NumRegs);
759  Reg += NumRegs;
760  }
761 }
762 
764  FunctionLoweringInfo &FuncInfo,
765  const SDLoc &dl, SDValue &Chain,
766  SDValue *Flag, const Value *V) const {
767  // A Value with type {} or [0 x %t] needs no registers.
768  if (ValueVTs.empty())
769  return SDValue();
770 
771  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
772 
773  // Assemble the legal parts into the final values.
774  SmallVector<SDValue, 4> Values(ValueVTs.size());
776  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
777  // Copy the legal parts from the registers.
778  EVT ValueVT = ValueVTs[Value];
779  unsigned NumRegs = RegCount[Value];
780  MVT RegisterVT = IsABIMangled
782  : RegVTs[Value];
783 
784  Parts.resize(NumRegs);
785  for (unsigned i = 0; i != NumRegs; ++i) {
786  SDValue P;
787  if (!Flag) {
788  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
789  } else {
790  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
791  *Flag = P.getValue(2);
792  }
793 
794  Chain = P.getValue(1);
795  Parts[i] = P;
796 
797  // If the source register was virtual and if we know something about it,
798  // add an assert node.
800  !RegisterVT.isInteger() || RegisterVT.isVector())
801  continue;
802 
804  FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
805  if (!LOI)
806  continue;
807 
808  unsigned RegSize = RegisterVT.getSizeInBits();
809  unsigned NumSignBits = LOI->NumSignBits;
810  unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
811 
812  if (NumZeroBits == RegSize) {
813  // The current value is a zero.
814  // Explicitly express that as it would be easier for
815  // optimizations to kick in.
816  Parts[i] = DAG.getConstant(0, dl, RegisterVT);
817  continue;
818  }
819 
820  // FIXME: We capture more information than the dag can represent. For
821  // now, just use the tightest assertzext/assertsext possible.
822  bool isSExt;
823  EVT FromVT(MVT::Other);
824  if (NumZeroBits) {
825  FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
826  isSExt = false;
827  } else if (NumSignBits > 1) {
828  FromVT =
829  EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
830  isSExt = true;
831  } else {
832  continue;
833  }
834  // Add an assertion node.
835  assert(FromVT != MVT::Other);
836  Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
837  RegisterVT, P, DAG.getValueType(FromVT));
838  }
839 
840  Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
841  NumRegs, RegisterVT, ValueVT, V);
842  Part += NumRegs;
843  Parts.clear();
844  }
845 
846  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
847 }
848 
850  const SDLoc &dl, SDValue &Chain, SDValue *Flag,
851  const Value *V,
852  ISD::NodeType PreferredExtendType) const {
853  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
854  ISD::NodeType ExtendKind = PreferredExtendType;
855 
856  // Get the list of the values's legal parts.
857  unsigned NumRegs = Regs.size();
858  SmallVector<SDValue, 8> Parts(NumRegs);
859  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
860  unsigned NumParts = RegCount[Value];
861 
862  MVT RegisterVT = IsABIMangled
864  : RegVTs[Value];
865 
866  if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
867  ExtendKind = ISD::ZERO_EXTEND;
868 
869  getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
870  &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
871  Part += NumParts;
872  }
873 
874  // Copy the parts into the registers.
875  SmallVector<SDValue, 8> Chains(NumRegs);
876  for (unsigned i = 0; i != NumRegs; ++i) {
877  SDValue Part;
878  if (!Flag) {
879  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
880  } else {
881  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
882  *Flag = Part.getValue(1);
883  }
884 
885  Chains[i] = Part.getValue(0);
886  }
887 
888  if (NumRegs == 1 || Flag)
889  // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
890  // flagged to it. That is the CopyToReg nodes and the user are considered
891  // a single scheduling unit. If we create a TokenFactor and return it as
892  // chain, then the TokenFactor is both a predecessor (operand) of the
893  // user as well as a successor (the TF operands are flagged to the user).
894  // c1, f1 = CopyToReg
895  // c2, f2 = CopyToReg
896  // c3 = TokenFactor c1, c2
897  // ...
898  // = op c3, ..., f2
899  Chain = Chains[NumRegs-1];
900  else
901  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
902 }
903 
904 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
905  unsigned MatchingIdx, const SDLoc &dl,
906  SelectionDAG &DAG,
907  std::vector<SDValue> &Ops) const {
908  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
909 
910  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
911  if (HasMatching)
912  Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
913  else if (!Regs.empty() &&
915  // Put the register class of the virtual registers in the flag word. That
916  // way, later passes can recompute register class constraints for inline
917  // assembly as well as normal instructions.
918  // Don't do this for tied operands that can use the regclass information
919  // from the def.
921  const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
922  Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
923  }
924 
925  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
926  Ops.push_back(Res);
927 
928  if (Code == InlineAsm::Kind_Clobber) {
929  // Clobbers should always have a 1:1 mapping with registers, and may
930  // reference registers that have illegal (e.g. vector) types. Hence, we
931  // shouldn't try to apply any sort of splitting logic to them.
932  assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
933  "No 1:1 mapping from clobbers to regs?");
934  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
935  (void)SP;
936  for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
937  Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
938  assert(
939  (Regs[I] != SP ||
941  "If we clobbered the stack pointer, MFI should know about it.");
942  }
943  return;
944  }
945 
946  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
947  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
948  MVT RegisterVT = RegVTs[Value];
949  for (unsigned i = 0; i != NumRegs; ++i) {
950  assert(Reg < Regs.size() && "Mismatch in # registers expected");
951  unsigned TheReg = Regs[Reg++];
952  Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
953  }
954  }
955 }
956 
960  unsigned I = 0;
961  for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
962  unsigned RegCount = std::get<0>(CountAndVT);
963  MVT RegisterVT = std::get<1>(CountAndVT);
964  unsigned RegisterSize = RegisterVT.getSizeInBits();
965  for (unsigned E = I + RegCount; I != E; ++I)
966  OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
967  }
968  return OutVec;
969 }
970 
972  const TargetLibraryInfo *li) {
973  AA = aa;
974  GFI = gfi;
975  LibInfo = li;
976  DL = &DAG.getDataLayout();
977  Context = DAG.getContext();
978  LPadToCallSiteMap.clear();
979 }
980 
982  NodeMap.clear();
983  UnusedArgNodeMap.clear();
984  PendingLoads.clear();
985  PendingExports.clear();
986  CurInst = nullptr;
987  HasTailCall = false;
988  SDNodeOrder = LowestSDNodeOrder;
989  StatepointLowering.clear();
990 }
991 
993  DanglingDebugInfoMap.clear();
994 }
995 
997  if (PendingLoads.empty())
998  return DAG.getRoot();
999 
1000  if (PendingLoads.size() == 1) {
1001  SDValue Root = PendingLoads[0];
1002  DAG.setRoot(Root);
1003  PendingLoads.clear();
1004  return Root;
1005  }
1006 
1007  // Otherwise, we have to make a token factor node.
1008  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1009  PendingLoads);
1010  PendingLoads.clear();
1011  DAG.setRoot(Root);
1012  return Root;
1013 }
1014 
1016  SDValue Root = DAG.getRoot();
1017 
1018  if (PendingExports.empty())
1019  return Root;
1020 
1021  // Turn all of the CopyToReg chains into one factored node.
1022  if (Root.getOpcode() != ISD::EntryToken) {
1023  unsigned i = 0, e = PendingExports.size();
1024  for (; i != e; ++i) {
1025  assert(PendingExports[i].getNode()->getNumOperands() > 1);
1026  if (PendingExports[i].getNode()->getOperand(0) == Root)
1027  break; // Don't add the root if we already indirectly depend on it.
1028  }
1029 
1030  if (i == e)
1031  PendingExports.push_back(Root);
1032  }
1033 
1034  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1035  PendingExports);
1036  PendingExports.clear();
1037  DAG.setRoot(Root);
1038  return Root;
1039 }
1040 
1042  // Set up outgoing PHI node register values before emitting the terminator.
1043  if (isa<TerminatorInst>(&I)) {
1044  HandlePHINodesInSuccessorBlocks(I.getParent());
1045  }
1046 
1047  // Increase the SDNodeOrder if dealing with a non-debug instruction.
1048  if (!isa<DbgInfoIntrinsic>(I))
1049  ++SDNodeOrder;
1050 
1051  CurInst = &I;
1052 
1053  visit(I.getOpcode(), I);
1054 
1055  if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
1056  // Propagate the fast-math-flags of this IR instruction to the DAG node that
1057  // maps to this instruction.
1058  // TODO: We could handle all flags (nsw, etc) here.
1059  // TODO: If an IR instruction maps to >1 node, only the final node will have
1060  // flags set.
1061  if (SDNode *Node = getNodeForIRValue(&I)) {
1062  SDNodeFlags IncomingFlags;
1063  IncomingFlags.copyFMF(*FPMO);
1064  if (!Node->getFlags().isDefined())
1065  Node->setFlags(IncomingFlags);
1066  else
1067  Node->intersectFlagsWith(IncomingFlags);
1068  }
1069  }
1070 
1071  if (!isa<TerminatorInst>(&I) && !HasTailCall &&
1072  !isStatepoint(&I)) // statepoints handle their exports internally
1073  CopyToExportRegsIfNeeded(&I);
1074 
1075  CurInst = nullptr;
1076 }
1077 
1078 void SelectionDAGBuilder::visitPHI(const PHINode &) {
1079  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1080 }
1081 
1082 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1083  // Note: this doesn't use InstVisitor, because it has to work with
1084  // ConstantExpr's in addition to instructions.
1085  switch (Opcode) {
1086  default: llvm_unreachable("Unknown instruction type encountered!");
1087  // Build the switch statement using the Instruction.def file.
1088 #define HANDLE_INST(NUM, OPCODE, CLASS) \
1089  case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1090 #include "llvm/IR/Instruction.def"
1091  }
1092 }
1093 
1095  const DIExpression *Expr) {
1096  for (auto &DDIMI : DanglingDebugInfoMap)
1097  for (auto &DDI : DDIMI.second)
1098  if (DDI.getDI()) {
1099  const DbgValueInst *DI = DDI.getDI();
1100  DIVariable *DanglingVariable = DI->getVariable();
1101  DIExpression *DanglingExpr = DI->getExpression();
1102  if (DanglingVariable == Variable &&
1103  Expr->fragmentsOverlap(DanglingExpr)) {
1104  LLVM_DEBUG(dbgs()
1105  << "Dropping dangling debug info for " << *DI << "\n");
1106  DDI = DanglingDebugInfo();
1107  }
1108  }
1109 }
1110 
1111 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1112 // generate the debug data structures now that we've seen its definition.
1114  SDValue Val) {
1115  DanglingDebugInfoVector &DDIV = DanglingDebugInfoMap[V];
1116  for (auto &DDI : DDIV) {
1117  if (!DDI.getDI())
1118  continue;
1119  const DbgValueInst *DI = DDI.getDI();
1120  DebugLoc dl = DDI.getdl();
1121  unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
1122  unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1123  DILocalVariable *Variable = DI->getVariable();
1124  DIExpression *Expr = DI->getExpression();
1125  assert(Variable->isValidLocationForIntrinsic(dl) &&
1126  "Expected inlined-at fields to agree");
1127  SDDbgValue *SDV;
1128  if (Val.getNode()) {
1129  if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1130  LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
1131  << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
1132  LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
1133  // Increase the SDNodeOrder for the DbgValue here to make sure it is
1134  // inserted after the definition of Val when emitting the instructions
1135  // after ISel. An alternative could be to teach
1136  // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
1137  LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
1138  << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
1139  << ValSDNodeOrder << "\n");
1140  SDV = getDbgValue(Val, Variable, Expr, dl,
1141  std::max(DbgSDNodeOrder, ValSDNodeOrder));
1142  DAG.AddDbgValue(SDV, Val.getNode(), false);
1143  } else
1144  LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
1145  << "in EmitFuncArgumentDbgValue\n");
1146  } else
1147  LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1148  }
1149  DanglingDebugInfoMap[V].clear();
1150 }
1151 
1152 /// getCopyFromRegs - If there was virtual register allocated for the value V
1153 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1155  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1156  SDValue Result;
1157 
1158  if (It != FuncInfo.ValueMap.end()) {
1159  unsigned InReg = It->second;
1160 
1161  RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1162  DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
1163  SDValue Chain = DAG.getEntryNode();
1164  Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1165  V);
1166  resolveDanglingDebugInfo(V, Result);
1167  }
1168 
1169  return Result;
1170 }
1171 
1172 /// getValue - Return an SDValue for the given Value.
1174  // If we already have an SDValue for this value, use it. It's important
1175  // to do this first, so that we don't create a CopyFromReg if we already
1176  // have a regular SDValue.
1177  SDValue &N = NodeMap[V];
1178  if (N.getNode()) return N;
1179 
1180  // If there's a virtual register allocated and initialized for this
1181  // value, use it.
1182  if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1183  return copyFromReg;
1184 
1185  // Otherwise create a new SDValue and remember it.
1186  SDValue Val = getValueImpl(V);
1187  NodeMap[V] = Val;
1188  resolveDanglingDebugInfo(V, Val);
1189  return Val;
1190 }
1191 
1192 // Return true if SDValue exists for the given Value
1194  return (NodeMap.find(V) != NodeMap.end()) ||
1195  (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
1196 }
1197 
1198 /// getNonRegisterValue - Return an SDValue for the given Value, but
1199 /// don't look in FuncInfo.ValueMap for a virtual register.
1201  // If we already have an SDValue for this value, use it.
1202  SDValue &N = NodeMap[V];
1203  if (N.getNode()) {
1204  if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1205  // Remove the debug location from the node as the node is about to be used
1206  // in a location which may differ from the original debug location. This
1207  // is relevant to Constant and ConstantFP nodes because they can appear
1208  // as constant expressions inside PHI nodes.
1209  N->setDebugLoc(DebugLoc());
1210  }
1211  return N;
1212  }
1213 
1214  // Otherwise create a new SDValue and remember it.
1215  SDValue Val = getValueImpl(V);
1216  NodeMap[V] = Val;
1217  resolveDanglingDebugInfo(V, Val);
1218  return Val;
1219 }
1220 
1221 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1222 /// Create an SDValue for the given value.
1224  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1225 
1226  if (const Constant *C = dyn_cast<Constant>(V)) {
1227  EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1228 
1229  if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1230  return DAG.getConstant(*CI, getCurSDLoc(), VT);
1231 
1232  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1233  return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1234 
1235  if (isa<ConstantPointerNull>(C)) {
1236  unsigned AS = V->getType()->getPointerAddressSpace();
1237  return DAG.getConstant(0, getCurSDLoc(),
1238  TLI.getPointerTy(DAG.getDataLayout(), AS));
1239  }
1240 
1241  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1242  return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1243 
1244  if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1245  return DAG.getUNDEF(VT);
1246 
1247  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1248  visit(CE->getOpcode(), *CE);
1249  SDValue N1 = NodeMap[V];
1250  assert(N1.getNode() && "visit didn't populate the NodeMap!");
1251  return N1;
1252  }
1253 
1254  if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1256  for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1257  OI != OE; ++OI) {
1258  SDNode *Val = getValue(*OI).getNode();
1259  // If the operand is an empty aggregate, there are no values.
1260  if (!Val) continue;
1261  // Add each leaf value from the operand to the Constants list
1262  // to form a flattened list of all the values.
1263  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1264  Constants.push_back(SDValue(Val, i));
1265  }
1266 
1267  return DAG.getMergeValues(Constants, getCurSDLoc());
1268  }
1269 
1270  if (const ConstantDataSequential *CDS =
1271  dyn_cast<ConstantDataSequential>(C)) {
1273  for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1274  SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1275  // Add each leaf value from the operand to the Constants list
1276  // to form a flattened list of all the values.
1277  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1278  Ops.push_back(SDValue(Val, i));
1279  }
1280 
1281  if (isa<ArrayType>(CDS->getType()))
1282  return DAG.getMergeValues(Ops, getCurSDLoc());
1283  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1284  }
1285 
1286  if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1287  assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1288  "Unknown struct or array constant!");
1289 
1291  ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1292  unsigned NumElts = ValueVTs.size();
1293  if (NumElts == 0)
1294  return SDValue(); // empty struct
1296  for (unsigned i = 0; i != NumElts; ++i) {
1297  EVT EltVT = ValueVTs[i];
1298  if (isa<UndefValue>(C))
1299  Constants[i] = DAG.getUNDEF(EltVT);
1300  else if (EltVT.isFloatingPoint())
1301  Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1302  else
1303  Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1304  }
1305 
1306  return DAG.getMergeValues(Constants, getCurSDLoc());
1307  }
1308 
1309  if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1310  return DAG.getBlockAddress(BA, VT);
1311 
1312  VectorType *VecTy = cast<VectorType>(V->getType());
1313  unsigned NumElements = VecTy->getNumElements();
1314 
1315  // Now that we know the number and type of the elements, get that number of
1316  // elements into the Ops array based on what kind of constant it is.
1318  if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1319  for (unsigned i = 0; i != NumElements; ++i)
1320  Ops.push_back(getValue(CV->getOperand(i)));
1321  } else {
1322  assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1323  EVT EltVT =
1324  TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1325 
1326  SDValue Op;
1327  if (EltVT.isFloatingPoint())
1328  Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1329  else
1330  Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1331  Ops.assign(NumElements, Op);
1332  }
1333 
1334  // Create a BUILD_VECTOR node.
1335  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1336  }
1337 
1338  // If this is a static alloca, generate it as the frameindex instead of
1339  // computation.
1340  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1342  FuncInfo.StaticAllocaMap.find(AI);
1343  if (SI != FuncInfo.StaticAllocaMap.end())
1344  return DAG.getFrameIndex(SI->second,
1345  TLI.getFrameIndexTy(DAG.getDataLayout()));
1346  }
1347 
1348  // If this is an instruction which fast-isel has deferred, select it now.
1349  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1350  unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1351 
1352  RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1353  Inst->getType(), isABIRegCopy(V));
1354  SDValue Chain = DAG.getEntryNode();
1355  return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1356  }
1357 
1358  llvm_unreachable("Can't get register for value!");
1359 }
1360 
1361 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1362  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1363  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1364  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1365  bool IsSEH = isAsynchronousEHPersonality(Pers);
1366  bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
1367  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1368  if (!IsSEH)
1369  CatchPadMBB->setIsEHScopeEntry();
1370  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1371  if (IsMSVCCXX || IsCoreCLR)
1372  CatchPadMBB->setIsEHFuncletEntry();
1373  // Wasm does not need catchpads anymore
1374  if (!IsWasmCXX)
1375  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
1376  getControlRoot()));
1377 }
1378 
1379 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1380  // Update machine-CFG edge.
1381  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1382  FuncInfo.MBB->addSuccessor(TargetMBB);
1383 
1384  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1385  bool IsSEH = isAsynchronousEHPersonality(Pers);
1386  if (IsSEH) {
1387  // If this is not a fall-through branch or optimizations are switched off,
1388  // emit the branch.
1389  if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1390  TM.getOptLevel() == CodeGenOpt::None)
1391  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1392  getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1393  return;
1394  }
1395 
1396  // Figure out the funclet membership for the catchret's successor.
1397  // This will be used by the FuncletLayout pass to determine how to order the
1398  // BB's.
1399  // A 'catchret' returns to the outer scope's color.
1400  Value *ParentPad = I.getCatchSwitchParentPad();
1401  const BasicBlock *SuccessorColor;
1402  if (isa<ConstantTokenNone>(ParentPad))
1403  SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1404  else
1405  SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1406  assert(SuccessorColor && "No parent funclet for catchret!");
1407  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1408  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1409 
1410  // Create the terminator node.
1411  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1412  getControlRoot(), DAG.getBasicBlock(TargetMBB),
1413  DAG.getBasicBlock(SuccessorColorMBB));
1414  DAG.setRoot(Ret);
1415 }
1416 
1417 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1418  // Don't emit any special code for the cleanuppad instruction. It just marks
1419  // the start of an EH scope/funclet.
1420  FuncInfo.MBB->setIsEHScopeEntry();
1421  FuncInfo.MBB->setIsEHFuncletEntry();
1422  FuncInfo.MBB->setIsCleanupFuncletEntry();
1423 }
1424 
1425 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1426 /// many places it could ultimately go. In the IR, we have a single unwind
1427 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1428 /// This function skips over imaginary basic blocks that hold catchswitch
1429 /// instructions, and finds all the "real" machine
1430 /// basic block destinations. As those destinations may not be successors of
1431 /// EHPadBB, here we also calculate the edge probability to those destinations.
1432 /// The passed-in Prob is the edge probability to EHPadBB.
1434  FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1435  BranchProbability Prob,
1436  SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1437  &UnwindDests) {
1438  EHPersonality Personality =
1440  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1441  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1442  bool IsSEH = isAsynchronousEHPersonality(Personality);
1443 
1444  while (EHPadBB) {
1445  const Instruction *Pad = EHPadBB->getFirstNonPHI();
1446  BasicBlock *NewEHPadBB = nullptr;
1447  if (isa<LandingPadInst>(Pad)) {
1448  // Stop on landingpads. They are not funclets.
1449  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1450  break;
1451  } else if (isa<CleanupPadInst>(Pad)) {
1452  // Stop on cleanup pads. Cleanups are always funclet entries for all known
1453  // personalities.
1454  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1455  UnwindDests.back().first->setIsEHScopeEntry();
1456  UnwindDests.back().first->setIsEHFuncletEntry();
1457  break;
1458  } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1459  // Add the catchpad handlers to the possible destinations.
1460  for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1461  UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1462  // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1463  if (IsMSVCCXX || IsCoreCLR)
1464  UnwindDests.back().first->setIsEHFuncletEntry();
1465  if (!IsSEH)
1466  UnwindDests.back().first->setIsEHScopeEntry();
1467  }
1468  NewEHPadBB = CatchSwitch->getUnwindDest();
1469  } else {
1470  continue;
1471  }
1472 
1473  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1474  if (BPI && NewEHPadBB)
1475  Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1476  EHPadBB = NewEHPadBB;
1477  }
1478 }
1479 
1480 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1481  // Update successor info.
1483  auto UnwindDest = I.getUnwindDest();
1484  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1485  BranchProbability UnwindDestProb =
1486  (BPI && UnwindDest)
1487  ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1489  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1490  for (auto &UnwindDest : UnwindDests) {
1491  UnwindDest.first->setIsEHPad();
1492  addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1493  }
1494  FuncInfo.MBB->normalizeSuccProbs();
1495 
1496  // Create the terminator node.
1497  SDValue Ret =
1498  DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1499  DAG.setRoot(Ret);
1500 }
1501 
1502 void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1503  report_fatal_error("visitCatchSwitch not yet implemented!");
1504 }
1505 
1506 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1507  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1508  auto &DL = DAG.getDataLayout();
1509  SDValue Chain = getControlRoot();
1511  SmallVector<SDValue, 8> OutVals;
1512 
1513  // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1514  // lower
1515  //
1516  // %val = call <ty> @llvm.experimental.deoptimize()
1517  // ret <ty> %val
1518  //
1519  // differently.
1521  LowerDeoptimizingReturn();
1522  return;
1523  }
1524 
1525  if (!FuncInfo.CanLowerReturn) {
1526  unsigned DemoteReg = FuncInfo.DemoteRegister;
1527  const Function *F = I.getParent()->getParent();
1528 
1529  // Emit a store of the return value through the virtual register.
1530  // Leave Outs empty so that LowerReturn won't try to load return
1531  // registers the usual way.
1532  SmallVector<EVT, 1> PtrValueVTs;
1533  ComputeValueVTs(TLI, DL,
1536  PtrValueVTs);
1537 
1538  SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1539  DemoteReg, PtrValueVTs[0]);
1540  SDValue RetOp = getValue(I.getOperand(0));
1541 
1544  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1545  unsigned NumValues = ValueVTs.size();
1546 
1547  SmallVector<SDValue, 4> Chains(NumValues);
1548  for (unsigned i = 0; i != NumValues; ++i) {
1549  // An aggregate return value cannot wrap around the address space, so
1550  // offsets to its parts don't wrap either.
1551  SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
1552  Chains[i] = DAG.getStore(
1553  Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1554  // FIXME: better loc info would be nice.
1556  }
1557 
1558  Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1559  MVT::Other, Chains);
1560  } else if (I.getNumOperands() != 0) {
1562  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1563  unsigned NumValues = ValueVTs.size();
1564  if (NumValues) {
1565  SDValue RetOp = getValue(I.getOperand(0));
1566 
1567  const Function *F = I.getParent()->getParent();
1568 
1569  ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1570  if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1571  Attribute::SExt))
1572  ExtendKind = ISD::SIGN_EXTEND;
1573  else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1574  Attribute::ZExt))
1575  ExtendKind = ISD::ZERO_EXTEND;
1576 
1577  LLVMContext &Context = F->getContext();
1578  bool RetInReg = F->getAttributes().hasAttribute(
1579  AttributeList::ReturnIndex, Attribute::InReg);
1580 
1581  for (unsigned j = 0; j != NumValues; ++j) {
1582  EVT VT = ValueVTs[j];
1583 
1584  if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1585  VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1586 
1587  unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
1588  MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
1589  SmallVector<SDValue, 4> Parts(NumParts);
1590  getCopyToParts(DAG, getCurSDLoc(),
1591  SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1592  &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
1593 
1594  // 'inreg' on function refers to return value
1595  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1596  if (RetInReg)
1597  Flags.setInReg();
1598 
1599  // Propagate extension type if any
1600  if (ExtendKind == ISD::SIGN_EXTEND)
1601  Flags.setSExt();
1602  else if (ExtendKind == ISD::ZERO_EXTEND)
1603  Flags.setZExt();
1604 
1605  for (unsigned i = 0; i < NumParts; ++i) {
1606  Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1607  VT, /*isfixed=*/true, 0, 0));
1608  OutVals.push_back(Parts[i]);
1609  }
1610  }
1611  }
1612  }
1613 
1614  // Push in swifterror virtual register as the last element of Outs. This makes
1615  // sure swifterror virtual register will be returned in the swifterror
1616  // physical register.
1617  const Function *F = I.getParent()->getParent();
1618  if (TLI.supportSwiftError() &&
1619  F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
1620  assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
1621  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1622  Flags.setSwiftError();
1623  Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1624  EVT(TLI.getPointerTy(DL)) /*argvt*/,
1625  true /*isfixed*/, 1 /*origidx*/,
1626  0 /*partOffs*/));
1627  // Create SDNode for the swifterror virtual register.
1628  OutVals.push_back(
1629  DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
1630  &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
1631  EVT(TLI.getPointerTy(DL))));
1632  }
1633 
1634  bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
1635  CallingConv::ID CallConv =
1637  Chain = DAG.getTargetLoweringInfo().LowerReturn(
1638  Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1639 
1640  // Verify that the target's LowerReturn behaved as expected.
1641  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1642  "LowerReturn didn't return a valid chain!");
1643 
1644  // Update the DAG with the new chain value resulting from return lowering.
1645  DAG.setRoot(Chain);
1646 }
1647 
1648 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
1649 /// created for it, emit nodes to copy the value into the virtual
1650 /// registers.
1652  // Skip empty types
1653  if (V->getType()->isEmptyTy())
1654  return;
1655 
1656  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1657  if (VMI != FuncInfo.ValueMap.end()) {
1658  assert(!V->use_empty() && "Unused value assigned virtual registers!");
1659  CopyValueToVirtualRegister(V, VMI->second);
1660  }
1661 }
1662 
1663 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
1664 /// the current basic block, add it to ValueMap now so that we'll get a
1665 /// CopyTo/FromReg.
1667  // No need to export constants.
1668  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1669 
1670  // Already exported?
1671  if (FuncInfo.isExportedInst(V)) return;
1672 
1673  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1674  CopyValueToVirtualRegister(V, Reg);
1675 }
1676 
1678  const BasicBlock *FromBB) {
1679  // The operands of the setcc have to be in this block. We don't know
1680  // how to export them from some other block.
1681  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1682  // Can export from current BB.
1683  if (VI->getParent() == FromBB)
1684  return true;
1685 
1686  // Is already exported, noop.
1687  return FuncInfo.isExportedInst(V);
1688  }
1689 
1690  // If this is an argument, we can export it if the BB is the entry block or
1691  // if it is already exported.
1692  if (isa<Argument>(V)) {
1693  if (FromBB == &FromBB->getParent()->getEntryBlock())
1694  return true;
1695 
1696  // Otherwise, can only export this if it is already exported.
1697  return FuncInfo.isExportedInst(V);
1698  }
1699 
1700  // Otherwise, constants can always be exported.
1701  return true;
1702 }
1703 
1704 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1706 SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1707  const MachineBasicBlock *Dst) const {
1708  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1709  const BasicBlock *SrcBB = Src->getBasicBlock();
1710  const BasicBlock *DstBB = Dst->getBasicBlock();
1711  if (!BPI) {
1712  // If BPI is not available, set the default probability as 1 / N, where N is
1713  // the number of successors.
1714  auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
1715  return BranchProbability(1, SuccSize);
1716  }
1717  return BPI->getEdgeProbability(SrcBB, DstBB);
1718 }
1719 
1720 void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
1721  MachineBasicBlock *Dst,
1722  BranchProbability Prob) {
1723  if (!FuncInfo.BPI)
1724  Src->addSuccessorWithoutProb(Dst);
1725  else {
1726  if (Prob.isUnknown())
1727  Prob = getEdgeProbability(Src, Dst);
1728  Src->addSuccessor(Dst, Prob);
1729  }
1730 }
1731 
1732 static bool InBlock(const Value *V, const BasicBlock *BB) {
1733  if (const Instruction *I = dyn_cast<Instruction>(V))
1734  return I->getParent() == BB;
1735  return true;
1736 }
1737 
1738 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1739 /// This function emits a branch and is used at the leaves of an OR or an
1740 /// AND operator tree.
1741 void
1743  MachineBasicBlock *TBB,
1744  MachineBasicBlock *FBB,
1745  MachineBasicBlock *CurBB,
1746  MachineBasicBlock *SwitchBB,
1747  BranchProbability TProb,
1748  BranchProbability FProb,
1749  bool InvertCond) {
1750  const BasicBlock *BB = CurBB->getBasicBlock();
1751 
1752  // If the leaf of the tree is a comparison, merge the condition into
1753  // the caseblock.
1754  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1755  // The operands of the cmp have to be in this block. We don't know
1756  // how to export them from some other block. If this is the first block
1757  // of the sequence, no exporting is needed.
1758  if (CurBB == SwitchBB ||
1759  (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1760  isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1761  ISD::CondCode Condition;
1762  if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1763  ICmpInst::Predicate Pred =
1764  InvertCond ? IC->getInversePredicate() : IC->getPredicate();
1765  Condition = getICmpCondCode(Pred);
1766  } else {
1767  const FCmpInst *FC = cast<FCmpInst>(Cond);
1768  FCmpInst::Predicate Pred =
1769  InvertCond ? FC->getInversePredicate() : FC->getPredicate();
1770  Condition = getFCmpCondCode(Pred);
1771  if (TM.Options.NoNaNsFPMath)
1772  Condition = getFCmpCodeWithoutNaN(Condition);
1773  }
1774 
1775  CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1776  TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1777  SwitchCases.push_back(CB);
1778  return;
1779  }
1780  }
1781 
1782  // Create a CaseBlock record representing this branch.
1783  ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
1784  CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
1785  nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1786  SwitchCases.push_back(CB);
1787 }
1788 
1789 /// FindMergedConditions - If Cond is an expression like
1791  MachineBasicBlock *TBB,
1792  MachineBasicBlock *FBB,
1793  MachineBasicBlock *CurBB,
1794  MachineBasicBlock *SwitchBB,
1796  BranchProbability TProb,
1797  BranchProbability FProb,
1798  bool InvertCond) {
1799  // Skip over not part of the tree and remember to invert op and operands at
1800  // next level.
1801  if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
1802  const Value *CondOp = BinaryOperator::getNotArgument(Cond);
1803  if (InBlock(CondOp, CurBB->getBasicBlock())) {
1804  FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
1805  !InvertCond);
1806  return;
1807  }
1808  }
1809 
1810  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1811  // Compute the effective opcode for Cond, taking into account whether it needs
1812  // to be inverted, e.g.
1813  // and (not (or A, B)), C
1814  // gets lowered as
1815  // and (and (not A, not B), C)
1816  unsigned BOpc = 0;
1817  if (BOp) {
1818  BOpc = BOp->getOpcode();
1819  if (InvertCond) {
1820  if (BOpc == Instruction::And)
1821  BOpc = Instruction::Or;
1822  else if (BOpc == Instruction::Or)
1823  BOpc = Instruction::And;
1824  }
1825  }
1826 
1827  // If this node is not part of the or/and tree, emit it as a branch.
1828  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1829  BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
1830  BOp->getParent() != CurBB->getBasicBlock() ||
1831  !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1832  !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1833  EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
1834  TProb, FProb, InvertCond);
1835  return;
1836  }
1837 
1838  // Create TmpBB after CurBB.
1839  MachineFunction::iterator BBI(CurBB);
1840  MachineFunction &MF = DAG.getMachineFunction();
1842  CurBB->getParent()->insert(++BBI, TmpBB);
1843 
1844  if (Opc == Instruction::Or) {
1845  // Codegen X | Y as:
1846  // BB1:
1847  // jmp_if_X TBB
1848  // jmp TmpBB
1849  // TmpBB:
1850  // jmp_if_Y TBB
1851  // jmp FBB
1852  //
1853 
1854  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1855  // The requirement is that
1856  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1857  // = TrueProb for original BB.
1858  // Assuming the original probabilities are A and B, one choice is to set
1859  // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
1860  // A/(1+B) and 2B/(1+B). This choice assumes that
1861  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1862  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1863  // TmpBB, but the math is more complicated.
1864 
1865  auto NewTrueProb = TProb / 2;
1866  auto NewFalseProb = TProb / 2 + FProb;
1867  // Emit the LHS condition.
1868  FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
1869  NewTrueProb, NewFalseProb, InvertCond);
1870 
1871  // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
1872  SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
1873  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1874  // Emit the RHS condition into TmpBB.
1875  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1876  Probs[0], Probs[1], InvertCond);
1877  } else {
1878  assert(Opc == Instruction::And && "Unknown merge op!");
1879  // Codegen X & Y as:
1880  // BB1:
1881  // jmp_if_X TmpBB
1882  // jmp FBB
1883  // TmpBB:
1884  // jmp_if_Y TBB
1885  // jmp FBB
1886  //
1887  // This requires creation of TmpBB after CurBB.
1888 
1889  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1890  // The requirement is that
1891  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1892  // = FalseProb for original BB.
1893  // Assuming the original probabilities are A and B, one choice is to set
1894  // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
1895  // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
1896  // TrueProb for BB1 * FalseProb for TmpBB.
1897 
1898  auto NewTrueProb = TProb + FProb / 2;
1899  auto NewFalseProb = FProb / 2;
1900  // Emit the LHS condition.
1901  FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
1902  NewTrueProb, NewFalseProb, InvertCond);
1903 
1904  // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
1905  SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
1906  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1907  // Emit the RHS condition into TmpBB.
1908  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1909  Probs[0], Probs[1], InvertCond);
1910  }
1911 }
1912 
1913 /// If the set of cases should be emitted as a series of branches, return true.
1914 /// If we should emit this as a bunch of and/or'd together conditions, return
1915 /// false.
1916 bool
1917 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
1918  if (Cases.size() != 2) return true;
1919 
1920  // If this is two comparisons of the same values or'd or and'd together, they
1921  // will get folded into a single comparison, so don't emit two blocks.
1922  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1923  Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1924  (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1925  Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1926  return false;
1927  }
1928 
1929  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1930  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1931  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1932  Cases[0].CC == Cases[1].CC &&
1933  isa<Constant>(Cases[0].CmpRHS) &&
1934  cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1935  if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1936  return false;
1937  if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1938  return false;
1939  }
1940 
1941  return true;
1942 }
1943 
1944 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1945  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1946 
1947  // Update machine-CFG edges.
1948  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1949 
1950  if (I.isUnconditional()) {
1951  // Update machine-CFG edges.
1952  BrMBB->addSuccessor(Succ0MBB);
1953 
1954  // If this is not a fall-through branch or optimizations are switched off,
1955  // emit the branch.
1956  if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
1957  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
1958  MVT::Other, getControlRoot(),
1959  DAG.getBasicBlock(Succ0MBB)));
1960 
1961  return;
1962  }
1963 
1964  // If this condition is one of the special cases we handle, do special stuff
1965  // now.
1966  const Value *CondVal = I.getCondition();
1967  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1968 
1969  // If this is a series of conditions that are or'd or and'd together, emit
1970  // this as a sequence of branches instead of setcc's with and/or operations.
1971  // As long as jumps are not expensive, this should improve performance.
1972  // For example, instead of something like:
1973  // cmp A, B
1974  // C = seteq
1975  // cmp D, E
1976  // F = setle
1977  // or C, F
1978  // jnz foo
1979  // Emit:
1980  // cmp A, B
1981  // je foo
1982  // cmp D, E
1983  // jle foo
1984  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1985  Instruction::BinaryOps Opcode = BOp->getOpcode();
1986  if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
1988  (Opcode == Instruction::And || Opcode == Instruction::Or)) {
1989  FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1990  Opcode,
1991  getEdgeProbability(BrMBB, Succ0MBB),
1992  getEdgeProbability(BrMBB, Succ1MBB),
1993  /*InvertCond=*/false);
1994  // If the compares in later blocks need to use values not currently
1995  // exported from this block, export them now. This block should always
1996  // be the first entry.
1997  assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1998 
1999  // Allow some cases to be rejected.
2000  if (ShouldEmitAsBranches(SwitchCases)) {
2001  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
2002  ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
2003  ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
2004  }
2005 
2006  // Emit the branch for this block.
2007  visitSwitchCase(SwitchCases[0], BrMBB);
2008  SwitchCases.erase(SwitchCases.begin());
2009  return;
2010  }
2011 
2012  // Okay, we decided not to do this, remove any inserted MBB's and clear
2013  // SwitchCases.
2014  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
2015  FuncInfo.MF->erase(SwitchCases[i].ThisBB);
2016 
2017  SwitchCases.clear();
2018  }
2019  }
2020 
2021  // Create a CaseBlock record representing this branch.
2022  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
2023  nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
2024 
2025  // Use visitSwitchCase to actually insert the fast branch sequence for this
2026  // cond branch.
2027  visitSwitchCase(CB, BrMBB);
2028 }
2029 
2030 /// visitSwitchCase - Emits the necessary code to represent a single node in
2031 /// the binary search tree resulting from lowering a switch instruction.
2033  MachineBasicBlock *SwitchBB) {
2034  SDValue Cond;
2035  SDValue CondLHS = getValue(CB.CmpLHS);
2036  SDLoc dl = CB.DL;
2037 
2038  // Build the setcc now.
2039  if (!CB.CmpMHS) {
2040  // Fold "(X == true)" to X and "(X == false)" to !X to
2041  // handle common cases produced by branch lowering.
2042  if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
2043  CB.CC == ISD::SETEQ)
2044  Cond = CondLHS;
2045  else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
2046  CB.CC == ISD::SETEQ) {
2047  SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
2048  Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
2049  } else
2050  Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
2051  } else {
2052  assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
2053 
2054  const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
2055  const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
2056 
2057  SDValue CmpOp = getValue(CB.CmpMHS);
2058  EVT VT = CmpOp.getValueType();
2059 
2060  if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
2061  Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
2062  ISD::SETLE);
2063  } else {
2064  SDValue SUB = DAG.getNode(ISD::SUB, dl,
2065  VT, CmpOp, DAG.getConstant(Low, dl, VT));
2066  Cond = DAG.getSetCC(dl, MVT::i1, SUB,
2067  DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
2068  }
2069  }
2070 
2071  // Update successor info
2072  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2073  // TrueBB and FalseBB are always different unless the incoming IR is
2074  // degenerate. This only happens when running llc on weird IR.
2075  if (CB.TrueBB != CB.FalseBB)
2076  addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2077  SwitchBB->normalizeSuccProbs();
2078 
2079  // If the lhs block is the next block, invert the condition so that we can
2080  // fall through to the lhs instead of the rhs block.
2081  if (CB.TrueBB == NextBlock(SwitchBB)) {
2082  std::swap(CB.TrueBB, CB.FalseBB);
2083  SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2084  Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2085  }
2086 
2087  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2088  MVT::Other, getControlRoot(), Cond,
2089  DAG.getBasicBlock(CB.TrueBB));
2090 
2091  // Insert the false branch. Do this even if it's a fall through branch,
2092  // this makes it easier to do DAG optimizations which require inverting
2093  // the branch condition.
2094  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2095  DAG.getBasicBlock(CB.FalseBB));
2096 
2097  DAG.setRoot(BrCond);
2098 }
2099 
2100 /// visitJumpTable - Emit JumpTable node in the current MBB
2102  // Emit the code for the jump table
2103  assert(JT.Reg != -1U && "Should lower JT Header first!");
2105  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2106  JT.Reg, PTy);
2107  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2108  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2109  MVT::Other, Index.getValue(1),
2110  Table, Index);
2111  DAG.setRoot(BrJumpTable);
2112 }
2113 
2114 /// visitJumpTableHeader - This function emits necessary code to produce index
2115 /// in the JumpTable from switch case.
2117  JumpTableHeader &JTH,
2118  MachineBasicBlock *SwitchBB) {
2119  SDLoc dl = getCurSDLoc();
2120 
2121  // Subtract the lowest switch case value from the value being switched on and
2122  // conditional branch to default mbb if the result is greater than the
2123  // difference between smallest and largest cases.
2124  SDValue SwitchOp = getValue(JTH.SValue);
2125  EVT VT = SwitchOp.getValueType();
2126  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2127  DAG.getConstant(JTH.First, dl, VT));
2128 
2129  // The SDNode we just created, which holds the value being switched on minus
2130  // the smallest case value, needs to be copied to a virtual register so it
2131  // can be used as an index into the jump table in a subsequent basic block.
2132  // This value may be smaller or larger than the target's pointer type, and
2133  // therefore require extension or truncating.
2134  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2135  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2136 
2137  unsigned JumpTableReg =
2138  FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2139  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2140  JumpTableReg, SwitchOp);
2141  JT.Reg = JumpTableReg;
2142 
2143  // Emit the range check for the jump table, and branch to the default block
2144  // for the switch statement if the value being switched on exceeds the largest
2145  // case in the switch.
2146  SDValue CMP = DAG.getSetCC(
2147  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2148  Sub.getValueType()),
2149  Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2150 
2151  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2152  MVT::Other, CopyTo, CMP,
2153  DAG.getBasicBlock(JT.Default));
2154 
2155  // Avoid emitting unnecessary branches to the next block.
2156  if (JT.MBB != NextBlock(SwitchBB))
2157  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2158  DAG.getBasicBlock(JT.MBB));
2159 
2160  DAG.setRoot(BrCond);
2161 }
2162 
2163 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2164 /// variable if there exists one.
2166  SDValue &Chain) {
2167  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2168  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2169  MachineFunction &MF = DAG.getMachineFunction();
2171  MachineSDNode *Node =
2172  DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2173  if (Global) {
2174  MachinePointerInfo MPInfo(Global);
2178  *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
2179  DAG.getEVTAlignment(PtrTy));
2180  Node->setMemRefs(MemRefs, MemRefs + 1);
2181  }
2182  return SDValue(Node, 0);
2183 }
2184 
2185 /// Codegen a new tail for a stack protector check ParentMBB which has had its
2186 /// tail spliced into a stack protector check success bb.
2187 ///
2188 /// For a high level explanation of how this fits into the stack protector
2189 /// generation see the comment on the declaration of class
2190 /// StackProtectorDescriptor.
2191 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2192  MachineBasicBlock *ParentBB) {
2193 
2194  // First create the loads to the guard/stack slot for the comparison.
2195  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2196  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2197 
2198  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2199  int FI = MFI.getStackProtectorIndex();
2200 
2201  SDValue Guard;
2202  SDLoc dl = getCurSDLoc();
2203  SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2204  const Module &M = *ParentBB->getParent()->getFunction().getParent();
2205  unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2206 
2207  // Generate code to load the content of the guard slot.
2208  SDValue GuardVal = DAG.getLoad(
2209  PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
2212 
2213  if (TLI.useStackGuardXorFP())
2214  GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
2215 
2216  // Retrieve guard check function, nullptr if instrumentation is inlined.
2217  if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
2218  // The target provides a guard check function to validate the guard value.
2219  // Generate a call to that function with the content of the guard slot as
2220  // argument.
2221  auto *Fn = cast<Function>(GuardCheck);
2222  FunctionType *FnTy = Fn->getFunctionType();
2223  assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2224 
2227  Entry.Node = GuardVal;
2228  Entry.Ty = FnTy->getParamType(0);
2229  if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
2230  Entry.IsInReg = true;
2231  Args.push_back(Entry);
2232 
2234  CLI.setDebugLoc(getCurSDLoc())
2235  .setChain(DAG.getEntryNode())
2236  .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
2237  getValue(GuardCheck), std::move(Args));
2238 
2239  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2240  DAG.setRoot(Result.second);
2241  return;
2242  }
2243 
2244  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2245  // Otherwise, emit a volatile load to retrieve the stack guard value.
2246  SDValue Chain = DAG.getEntryNode();
2247  if (TLI.useLoadStackGuardNode()) {
2248  Guard = getLoadStackGuard(DAG, dl, Chain);
2249  } else {
2250  const Value *IRGuard = TLI.getSDagStackGuard(M);
2251  SDValue GuardPtr = getValue(IRGuard);
2252 
2253  Guard =
2254  DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
2256  }
2257 
2258  // Perform the comparison via a subtract/getsetcc.
2259  EVT VT = Guard.getValueType();
2260  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
2261 
2262  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2263  *DAG.getContext(),
2264  Sub.getValueType()),
2265  Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2266 
2267  // If the sub is not 0, then we know the guard/stackslot do not equal, so
2268  // branch to failure MBB.
2269  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2270  MVT::Other, GuardVal.getOperand(0),
2271  Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2272  // Otherwise branch to success MBB.
2273  SDValue Br = DAG.getNode(ISD::BR, dl,
2274  MVT::Other, BrCond,
2275  DAG.getBasicBlock(SPD.getSuccessMBB()));
2276 
2277  DAG.setRoot(Br);
2278 }
2279 
2280 /// Codegen the failure basic block for a stack protector check.
2281 ///
2282 /// A failure stack protector machine basic block consists simply of a call to
2283 /// __stack_chk_fail().
2284 ///
2285 /// For a high level explanation of how this fits into the stack protector
2286 /// generation see the comment on the declaration of class
2287 /// StackProtectorDescriptor.
2288 void
2289 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2290  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2291  SDValue Chain =
2292  TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2293  None, false, getCurSDLoc(), false, false).second;
2294  DAG.setRoot(Chain);
2295 }
2296 
2297 /// visitBitTestHeader - This function emits necessary code to produce value
2298 /// suitable for "bit tests"
2300  MachineBasicBlock *SwitchBB) {
2301  SDLoc dl = getCurSDLoc();
2302 
2303  // Subtract the minimum value
2304  SDValue SwitchOp = getValue(B.SValue);
2305  EVT VT = SwitchOp.getValueType();
2306  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2307  DAG.getConstant(B.First, dl, VT));
2308 
2309  // Check range
2310  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2311  SDValue RangeCmp = DAG.getSetCC(
2312  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2313  Sub.getValueType()),
2314  Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2315 
2316  // Determine the type of the test operands.
2317  bool UsePtrType = false;
2318  if (!TLI.isTypeLegal(VT))
2319  UsePtrType = true;
2320  else {
2321  for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
2322  if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2323  // Switch table case range are encoded into series of masks.
2324  // Just use pointer type, it's guaranteed to fit.
2325  UsePtrType = true;
2326  break;
2327  }
2328  }
2329  if (UsePtrType) {
2330  VT = TLI.getPointerTy(DAG.getDataLayout());
2331  Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2332  }
2333 
2334  B.RegVT = VT.getSimpleVT();
2335  B.Reg = FuncInfo.CreateReg(B.RegVT);
2336  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2337 
2338  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2339 
2340  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2341  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2342  SwitchBB->normalizeSuccProbs();
2343 
2344  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2345  MVT::Other, CopyTo, RangeCmp,
2346  DAG.getBasicBlock(B.Default));
2347 
2348  // Avoid emitting unnecessary branches to the next block.
2349  if (MBB != NextBlock(SwitchBB))
2350  BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2351  DAG.getBasicBlock(MBB));
2352 
2353  DAG.setRoot(BrRange);
2354 }
2355 
2356 /// visitBitTestCase - this function produces one "bit test"
2358  MachineBasicBlock* NextMBB,
2359  BranchProbability BranchProbToNext,
2360  unsigned Reg,
2361  BitTestCase &B,
2362  MachineBasicBlock *SwitchBB) {
2363  SDLoc dl = getCurSDLoc();
2364  MVT VT = BB.RegVT;
2365  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2366  SDValue Cmp;
2367  unsigned PopCount = countPopulation(B.Mask);
2368  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2369  if (PopCount == 1) {
2370  // Testing for a single bit; just compare the shift count with what it
2371  // would need to be to shift a 1 bit in that position.
2372  Cmp = DAG.getSetCC(
2373  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2374  ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2375  ISD::SETEQ);
2376  } else if (PopCount == BB.Range) {
2377  // There is only one zero bit in the range, test for it directly.
2378  Cmp = DAG.getSetCC(
2379  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2380  ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2381  ISD::SETNE);
2382  } else {
2383  // Make desired shift
2384  SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2385  DAG.getConstant(1, dl, VT), ShiftOp);
2386 
2387  // Emit bit tests and jumps
2388  SDValue AndOp = DAG.getNode(ISD::AND, dl,
2389  VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2390  Cmp = DAG.getSetCC(
2391  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2392  AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2393  }
2394 
2395  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2396  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2397  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2398  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2399  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2400  // one as they are relative probabilities (and thus work more like weights),
2401  // and hence we need to normalize them to let the sum of them become one.
2402  SwitchBB->normalizeSuccProbs();
2403 
2404  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2405  MVT::Other, getControlRoot(),
2406  Cmp, DAG.getBasicBlock(B.TargetBB));
2407 
2408  // Avoid emitting unnecessary branches to the next block.
2409  if (NextMBB != NextBlock(SwitchBB))
2410  BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2411  DAG.getBasicBlock(NextMBB));
2412 
2413  DAG.setRoot(BrAnd);
2414 }
2415 
2416 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2417  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2418 
2419  // Retrieve successors. Look through artificial IR level blocks like
2420  // catchswitch for successors.
2421  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2422  const BasicBlock *EHPadBB = I.getSuccessor(1);
2423 
2424  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2425  // have to do anything here to lower funclet bundles.
2427  {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2428  "Cannot lower invokes with arbitrary operand bundles yet!");
2429 
2430  const Value *Callee(I.getCalledValue());
2431  const Function *Fn = dyn_cast<Function>(Callee);
2432  if (isa<InlineAsm>(Callee))
2433  visitInlineAsm(&I);
2434  else if (Fn && Fn->isIntrinsic()) {
2435  switch (Fn->getIntrinsicID()) {
2436  default:
2437  llvm_unreachable("Cannot invoke this intrinsic");
2438  case Intrinsic::donothing:
2439  // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2440  break;
2441  case Intrinsic::experimental_patchpoint_void:
2442  case Intrinsic::experimental_patchpoint_i64:
2443  visitPatchpoint(&I, EHPadBB);
2444  break;
2445  case Intrinsic::experimental_gc_statepoint:
2446  LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2447  break;
2448  }
2450  // Currently we do not lower any intrinsic calls with deopt operand bundles.
2451  // Eventually we will support lowering the @llvm.experimental.deoptimize
2452  // intrinsic, and right now there are no plans to support other intrinsics
2453  // with deopt state.
2454  LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2455  } else {
2456  LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2457  }
2458 
2459  // If the value of the invoke is used outside of its defining block, make it
2460  // available as a virtual register.
2461  // We already took care of the exported value for the statepoint instruction
2462  // during call to the LowerStatepoint.
2463  if (!isStatepoint(I)) {
2464  CopyToExportRegsIfNeeded(&I);
2465  }
2466 
2468  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2469  BranchProbability EHPadBBProb =
2470  BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2472  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2473 
2474  // Update successor info.
2475  addSuccessorWithProb(InvokeMBB, Return);
2476  for (auto &UnwindDest : UnwindDests) {
2477  UnwindDest.first->setIsEHPad();
2478  addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2479  }
2480  InvokeMBB->normalizeSuccProbs();
2481 
2482  // Drop into normal successor.
2483  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2484  MVT::Other, getControlRoot(),
2485  DAG.getBasicBlock(Return)));
2486 }
2487 
2488 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2489  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2490 }
2491 
2492 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2493  assert(FuncInfo.MBB->isEHPad() &&
2494  "Call to landingpad not in landing pad!");
2495 
2496  MachineBasicBlock *MBB = FuncInfo.MBB;
2497  addLandingPadInfo(LP, *MBB);
2498 
2499  // If there aren't registers to copy the values into (e.g., during SjLj
2500  // exceptions), then don't bother to create these DAG nodes.
2501  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2502  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2503  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2504  TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2505  return;
2506 
2507  // If landingpad's return type is token type, we don't create DAG nodes
2508  // for its exception pointer and selector value. The extraction of exception
2509  // pointer or selector value from token type landingpads is not currently
2510  // supported.
2511  if (LP.getType()->isTokenTy())
2512  return;
2513 
2515  SDLoc dl = getCurSDLoc();
2516  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2517  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2518 
2519  // Get the two live-in registers as SDValues. The physregs have already been
2520  // copied into virtual registers.
2521  SDValue Ops[2];
2522  if (FuncInfo.ExceptionPointerVirtReg) {
2523  Ops[0] = DAG.getZExtOrTrunc(
2524  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2525  FuncInfo.ExceptionPointerVirtReg,
2526  TLI.getPointerTy(DAG.getDataLayout())),
2527  dl, ValueVTs[0]);
2528  } else {
2529  Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2530  }
2531  Ops[1] = DAG.getZExtOrTrunc(
2532  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2533  FuncInfo.ExceptionSelectorVirtReg,
2534  TLI.getPointerTy(DAG.getDataLayout())),
2535  dl, ValueVTs[1]);
2536 
2537  // Merge into one.
2538  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2539  DAG.getVTList(ValueVTs), Ops);
2540  setValue(&LP, Res);
2541 }
2542 
2543 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2544 #ifndef NDEBUG
2545  for (const CaseCluster &CC : Clusters)
2546  assert(CC.Low == CC.High && "Input clusters must be single-case");
2547 #endif
2548 
2549  llvm::sort(Clusters.begin(), Clusters.end(),
2550  [](const CaseCluster &a, const CaseCluster &b) {
2551  return a.Low->getValue().slt(b.Low->getValue());
2552  });
2553 
2554  // Merge adjacent clusters with the same destination.
2555  const unsigned N = Clusters.size();
2556  unsigned DstIndex = 0;
2557  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2558  CaseCluster &CC = Clusters[SrcIndex];
2559  const ConstantInt *CaseVal = CC.Low;
2560  MachineBasicBlock *Succ = CC.MBB;
2561 
2562  if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2563  (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2564  // If this case has the same successor and is a neighbour, merge it into
2565  // the previous cluster.
2566  Clusters[DstIndex - 1].High = CaseVal;
2567  Clusters[DstIndex - 1].Prob += CC.Prob;
2568  } else {
2569  std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2570  sizeof(Clusters[SrcIndex]));
2571  }
2572  }
2573  Clusters.resize(DstIndex);
2574 }
2575 
2577  MachineBasicBlock *Last) {
2578  // Update JTCases.
2579  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
2580  if (JTCases[i].first.HeaderBB == First)
2581  JTCases[i].first.HeaderBB = Last;
2582 
2583  // Update BitTestCases.
2584  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2585  if (BitTestCases[i].Parent == First)
2586  BitTestCases[i].Parent = Last;
2587 }
2588 
2589 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2590  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2591 
2592  // Update machine-CFG edges with unique successors.
2594  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
2595  BasicBlock *BB = I.getSuccessor(i);
2596  bool Inserted = Done.insert(BB).second;
2597  if (!Inserted)
2598  continue;
2599 
2600  MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2601  addSuccessorWithProb(IndirectBrMBB, Succ);
2602  }
2603  IndirectBrMBB->normalizeSuccProbs();
2604 
2605  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2606  MVT::Other, getControlRoot(),
2607  getValue(I.getAddress())));
2608 }
2609 
2610 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2611  if (!DAG.getTarget().Options.TrapUnreachable)
2612  return;
2613 
2614  // We may be able to ignore unreachable behind a noreturn call.
2616  const BasicBlock &BB = *I.getParent();
2617  if (&I != &BB.front()) {
2619  std::prev(BasicBlock::const_iterator(&I));
2620  if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
2621  if (Call->doesNotReturn())
2622  return;
2623  }
2624  }
2625  }
2626 
2627  DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2628 }
2629 
2630 void SelectionDAGBuilder::visitFSub(const User &I) {
2631  // -0.0 - X --> fneg
2632  Type *Ty = I.getType();
2633  if (isa<Constant>(I.getOperand(0)) &&
2635  SDValue Op2 = getValue(I.getOperand(1));
2636  setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2637  Op2.getValueType(), Op2));
2638  return;
2639  }
2640 
2641  visitBinary(I, ISD::FSUB);
2642 }
2643 
2644 /// Checks if the given instruction performs a vector reduction, in which case
2645 /// we have the freedom to alter the elements in the result as long as the
2646 /// reduction of them stays unchanged.
2647 static bool isVectorReductionOp(const User *I) {
2648  const Instruction *Inst = dyn_cast<Instruction>(I);
2649  if (!Inst || !Inst->getType()->isVectorTy())
2650  return false;
2651 
2652  auto OpCode = Inst->getOpcode();
2653  switch (OpCode) {
2654  case Instruction::Add:
2655  case Instruction::Mul:
2656  case Instruction::And:
2657  case Instruction::Or:
2658  case Instruction::Xor:
2659  break;
2660  case Instruction::FAdd:
2661  case Instruction::FMul:
2662  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2663  if (FPOp->getFastMathFlags().isFast())
2664  break;
2666  default:
2667  return false;
2668  }
2669 
2670  unsigned ElemNum = Inst->getType()->getVectorNumElements();
2671  unsigned ElemNumToReduce = ElemNum;
2672 
2673  // Do DFS search on the def-use chain from the given instruction. We only
2674  // allow four kinds of operations during the search until we reach the
2675  // instruction that extracts the first element from the vector:
2676  //
2677  // 1. The reduction operation of the same opcode as the given instruction.
2678  //
2679  // 2. PHI node.
2680  //
2681  // 3. ShuffleVector instruction together with a reduction operation that
2682  // does a partial reduction.
2683  //
2684  // 4. ExtractElement that extracts the first element from the vector, and we
2685  // stop searching the def-use chain here.
2686  //
2687  // 3 & 4 above perform a reduction on all elements of the vector. We push defs
2688  // from 1-3 to the stack to continue the DFS. The given instruction is not
2689  // a reduction operation if we meet any other instructions other than those
2690  // listed above.
2691 
2692  SmallVector<const User *, 16> UsersToVisit{Inst};
2694  bool ReduxExtracted = false;
2695 
2696  while (!UsersToVisit.empty()) {
2697  auto User = UsersToVisit.back();
2698  UsersToVisit.pop_back();
2699  if (!Visited.insert(User).second)
2700  continue;
2701 
2702  for (const auto &U : User->users()) {
2703  auto Inst = dyn_cast<Instruction>(U);
2704  if (!Inst)
2705  return false;
2706 
2707  if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
2708  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2709  if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
2710  return false;
2711  UsersToVisit.push_back(U);
2712  } else if (const ShuffleVectorInst *ShufInst =
2713  dyn_cast<ShuffleVectorInst>(U)) {
2714  // Detect the following pattern: A ShuffleVector instruction together
2715  // with a reduction that do partial reduction on the first and second
2716  // ElemNumToReduce / 2 elements, and store the result in
2717  // ElemNumToReduce / 2 elements in another vector.
2718 
2719  unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
2720  if (ResultElements < ElemNum)
2721  return false;
2722 
2723  if (ElemNumToReduce == 1)
2724  return false;
2725  if (!isa<UndefValue>(U->getOperand(1)))
2726  return false;
2727  for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
2728  if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
2729  return false;
2730  for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
2731  if (ShufInst->getMaskValue(i) != -1)
2732  return false;
2733 
2734  // There is only one user of this ShuffleVector instruction, which
2735  // must be a reduction operation.
2736  if (!U->hasOneUse())
2737  return false;
2738 
2739  auto U2 = dyn_cast<Instruction>(*U->user_begin());
2740  if (!U2 || U2->getOpcode() != OpCode)
2741  return false;
2742 
2743  // Check operands of the reduction operation.
2744  if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
2745  (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
2746  UsersToVisit.push_back(U2);
2747  ElemNumToReduce /= 2;
2748  } else
2749  return false;
2750  } else if (isa<ExtractElementInst>(U)) {
2751  // At this moment we should have reduced all elements in the vector.
2752  if (ElemNumToReduce != 1)
2753  return false;
2754 
2755  const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
2756  if (!Val || Val->getZExtValue() != 0)
2757  return false;
2758 
2759  ReduxExtracted = true;
2760  } else
2761  return false;
2762  }
2763  }
2764  return ReduxExtracted;
2765 }
2766 
2767 void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
2768  SDNodeFlags Flags;
2769  if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
2770  Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
2771  Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
2772  }
2773  if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
2774  Flags.setExact(ExactOp->isExact());
2775  }
2776  if (isVectorReductionOp(&I)) {
2777  Flags.setVectorReduction(true);
2778  LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
2779  }
2780 
2781  SDValue Op1 = getValue(I.getOperand(0));
2782  SDValue Op2 = getValue(I.getOperand(1));
2783  SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
2784  Op1, Op2, Flags);
2785  setValue(&I, BinNodeValue);
2786 }
2787 
2788 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2789  SDValue Op1 = getValue(I.getOperand(0));
2790  SDValue Op2 = getValue(I.getOperand(1));
2791 
2792  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2793  Op2.getValueType(), DAG.getDataLayout());
2794 
2795  // Coerce the shift amount to the right type if we can.
2796  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2797  unsigned ShiftSize = ShiftTy.getSizeInBits();
2798  unsigned Op2Size = Op2.getValueSizeInBits();
2799  SDLoc DL = getCurSDLoc();
2800 
2801  // If the operand is smaller than the shift count type, promote it.
2802  if (ShiftSize > Op2Size)
2803  Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2804 
2805  // If the operand is larger than the shift count type but the shift
2806  // count type has enough bits to represent any shift value, truncate
2807  // it now. This is a common case and it exposes the truncate to
2808  // optimization early.
2809  else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
2810  Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2811  // Otherwise we'll need to temporarily settle for some other convenient
2812  // type. Type legalization will make adjustments once the shiftee is split.
2813  else
2814  Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2815  }
2816 
2817  bool nuw = false;
2818  bool nsw = false;
2819  bool exact = false;
2820 
2821  if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
2822 
2823  if (const OverflowingBinaryOperator *OFBinOp =
2824  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2825  nuw = OFBinOp->hasNoUnsignedWrap();
2826  nsw = OFBinOp->hasNoSignedWrap();
2827  }
2828  if (const PossiblyExactOperator *ExactOp =
2829  dyn_cast<const PossiblyExactOperator>(&I))
2830  exact = ExactOp->isExact();
2831  }
2832  SDNodeFlags Flags;
2833  Flags.setExact(exact);
2834  Flags.setNoSignedWrap(nsw);
2835  Flags.setNoUnsignedWrap(nuw);
2836  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2837  Flags);
2838  setValue(&I, Res);
2839 }
2840 
2841 void SelectionDAGBuilder::visitSDiv(const User &I) {
2842  SDValue Op1 = getValue(I.getOperand(0));
2843  SDValue Op2 = getValue(I.getOperand(1));
2844 
2845  SDNodeFlags Flags;
2846  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2847  cast<PossiblyExactOperator>(&I)->isExact());
2848  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2849  Op2, Flags));
2850 }
2851 
2852 void SelectionDAGBuilder::visitICmp(const User &I) {
2854  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2855  predicate = IC->getPredicate();
2856  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2857  predicate = ICmpInst::Predicate(IC->getPredicate());
2858  SDValue Op1 = getValue(I.getOperand(0));
2859  SDValue Op2 = getValue(I.getOperand(1));
2860  ISD::CondCode Opcode = getICmpCondCode(predicate);
2861 
2862  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2863  I.getType());
2864  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2865 }
2866 
2867 void SelectionDAGBuilder::visitFCmp(const User &I) {
2869  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2870  predicate = FC->getPredicate();
2871  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2872  predicate = FCmpInst::Predicate(FC->getPredicate());
2873  SDValue Op1 = getValue(I.getOperand(0));
2874  SDValue Op2 = getValue(I.getOperand(1));
2875 
2876  ISD::CondCode Condition = getFCmpCondCode(predicate);
2877  auto *FPMO = dyn_cast<FPMathOperator>(&I);
2878  if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
2879  Condition = getFCmpCodeWithoutNaN(Condition);
2880 
2881  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2882  I.getType());
2883  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2884 }
2885 
2886 // Check if the condition of the select has one use or two users that are both
2887 // selects with the same condition.
2888 static bool hasOnlySelectUsers(const Value *Cond) {
2889  return llvm::all_of(Cond->users(), [](const Value *V) {
2890  return isa<SelectInst>(V);
2891  });
2892 }
2893 
2894 void SelectionDAGBuilder::visitSelect(const User &I) {
2897  ValueVTs);
2898  unsigned NumValues = ValueVTs.size();
2899  if (NumValues == 0) return;
2900 
2901  SmallVector<SDValue, 4> Values(NumValues);
2902  SDValue Cond = getValue(I.getOperand(0));
2903  SDValue LHSVal = getValue(I.getOperand(1));
2904  SDValue RHSVal = getValue(I.getOperand(2));
2905  auto BaseOps = {Cond};
2906  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2908 
2909  // Min/max matching is only viable if all output VTs are the same.
2910  if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
2911  EVT VT = ValueVTs[0];
2912  LLVMContext &Ctx = *DAG.getContext();
2913  auto &TLI = DAG.getTargetLoweringInfo();
2914 
2915  // We care about the legality of the operation after it has been type
2916  // legalized.
2917  while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2918  VT != TLI.getTypeToTransformTo(Ctx, VT))
2919  VT = TLI.getTypeToTransformTo(Ctx, VT);
2920 
2921  // If the vselect is legal, assume we want to leave this as a vector setcc +
2922  // vselect. Otherwise, if this is going to be scalarized, we want to see if
2923  // min/max is legal on the scalar type.
2924  bool UseScalarMinMax = VT.isVector() &&
2925  !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
2926 
2927  Value *LHS, *RHS;
2928  auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2930  switch (SPR.Flavor) {
2931  case SPF_UMAX: Opc = ISD::UMAX; break;
2932  case SPF_UMIN: Opc = ISD::UMIN; break;
2933  case SPF_SMAX: Opc = ISD::SMAX; break;
2934  case SPF_SMIN: Opc = ISD::SMIN; break;
2935  case SPF_FMINNUM:
2936  switch (SPR.NaNBehavior) {
2937  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2938  case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
2939  case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2940  case SPNB_RETURNS_ANY: {
2941  if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
2942  Opc = ISD::FMINNUM;
2943  else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
2944  Opc = ISD::FMINNAN;
2945  else if (UseScalarMinMax)
2946  Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
2948  break;
2949  }
2950  }
2951  break;
2952  case SPF_FMAXNUM:
2953  switch (SPR.NaNBehavior) {
2954  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2955  case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
2956  case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
2957  case SPNB_RETURNS_ANY:
2958 
2959  if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
2960  Opc = ISD::FMAXNUM;
2961  else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
2962  Opc = ISD::FMAXNAN;
2963  else if (UseScalarMinMax)
2964  Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
2966  break;
2967  }
2968  break;
2969  default: break;
2970  }
2971 
2972  if (Opc != ISD::DELETED_NODE &&
2973  (TLI.isOperationLegalOrCustom(Opc, VT) ||
2974  (UseScalarMinMax &&
2975  TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
2976  // If the underlying comparison instruction is used by any other
2977  // instruction, the consumed instructions won't be destroyed, so it is
2978  // not profitable to convert to a min/max.
2979  hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
2980  OpCode = Opc;
2981  LHSVal = getValue(LHS);
2982  RHSVal = getValue(RHS);
2983  BaseOps = {};
2984  }
2985  }
2986 
2987  for (unsigned i = 0; i != NumValues; ++i) {
2988  SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
2989  Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
2990  Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
2991  Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
2992  LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
2993  Ops);
2994  }
2995 
2996  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
2997  DAG.getVTList(ValueVTs), Values));
2998 }
2999 
3000 void SelectionDAGBuilder::visitTrunc(const User &I) {
3001  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3002  SDValue N = getValue(I.getOperand(0));
3003  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3004  I.getType());
3005  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
3006 }
3007 
3008 void SelectionDAGBuilder::visitZExt(const User &I) {
3009  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3010  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3011  SDValue N = getValue(I.getOperand(0));
3012  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3013  I.getType());
3014  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
3015 }
3016 
3017 void SelectionDAGBuilder::visitSExt(const User &I) {
3018  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3019  // SExt also can't be a cast to bool for same reason. So, nothing much to do
3020  SDValue N = getValue(I.getOperand(0));
3021  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3022  I.getType());
3023  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
3024 }
3025 
3026 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
3027  // FPTrunc is never a no-op cast, no need to check
3028  SDValue N = getValue(I.getOperand(0));
3029  SDLoc dl = getCurSDLoc();
3030  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3031  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3032  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
3033  DAG.getTargetConstant(
3034  0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
3035 }
3036 
3037 void SelectionDAGBuilder::visitFPExt(const User &I) {
3038  // FPExt is never a no-op cast, no need to check
3039  SDValue N = getValue(I.getOperand(0));
3040  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3041  I.getType());
3042  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
3043 }
3044 
3045 void SelectionDAGBuilder::visitFPToUI(const User &I) {
3046  // FPToUI is never a no-op cast, no need to check
3047  SDValue N = getValue(I.getOperand(0));
3048  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3049  I.getType());
3050  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3051 }
3052 
3053 void SelectionDAGBuilder::visitFPToSI(const User &I) {
3054  // FPToSI is never a no-op cast, no need to check
3055  SDValue N = getValue(I.getOperand(0));
3056  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3057  I.getType());
3058  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3059 }
3060 
3061 void SelectionDAGBuilder::visitUIToFP(const User &I) {
3062  // UIToFP is never a no-op cast, no need to check
3063  SDValue N = getValue(I.getOperand(0));
3064  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3065  I.getType());
3066  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3067 }
3068 
3069 void SelectionDAGBuilder::visitSIToFP(const User &I) {
3070  // SIToFP is never a no-op cast, no need to check
3071  SDValue N = getValue(I.getOperand(0));
3072  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3073  I.getType());
3074  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3075 }
3076 
3077 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3078  // What to do depends on the size of the integer and the size of the pointer.
3079  // We can either truncate, zero extend, or no-op, accordingly.
3080  SDValue N = getValue(I.getOperand(0));
3081  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3082  I.getType());
3083  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3084 }
3085 
3086 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3087  // What to do depends on the size of the integer and the size of the pointer.
3088  // We can either truncate, zero extend, or no-op, accordingly.
3089  SDValue N = getValue(I.getOperand(0));
3090  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3091  I.getType());
3092  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3093 }
3094 
3095 void SelectionDAGBuilder::visitBitCast(const User &I) {
3096  SDValue N = getValue(I.getOperand(0));
3097  SDLoc dl = getCurSDLoc();
3098  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3099  I.getType());
3100 
3101  // BitCast assures us that source and destination are the same size so this is
3102  // either a BITCAST or a no-op.
3103  if (DestVT != N.getValueType())
3104  setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3105  DestVT, N)); // convert types.
3106  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3107  // might fold any kind of constant expression to an integer constant and that
3108  // is not what we are looking for. Only recognize a bitcast of a genuine
3109  // constant integer as an opaque constant.
3110  else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3111  setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3112  /*isOpaque*/true));
3113  else
3114  setValue(&I, N); // noop cast.
3115 }
3116 
3117 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3118  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3119  const Value *SV = I.getOperand(0);
3120  SDValue N = getValue(SV);
3121  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3122 
3123  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3124  unsigned DestAS = I.getType()->getPointerAddressSpace();
3125 
3126  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3127  N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3128 
3129  setValue(&I, N);
3130 }
3131 
3132 void SelectionDAGBuilder::visitInsertElement(const User &I) {
3133  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3134  SDValue InVec = getValue(I.getOperand(0));
3135  SDValue InVal = getValue(I.getOperand(1));
3136  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3137  TLI.getVectorIdxTy(DAG.getDataLayout()));
3138  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3139  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3140  InVec, InVal, InIdx));
3141 }
3142 
3143 void SelectionDAGBuilder::visitExtractElement(const User &I) {
3144  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3145  SDValue InVec = getValue(I.getOperand(0));
3146  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3147  TLI.getVectorIdxTy(DAG.getDataLayout()));
3148  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3149  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3150  InVec, InIdx));
3151 }
3152 
3153 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3154  SDValue Src1 = getValue(I.getOperand(0));
3155  SDValue Src2 = getValue(I.getOperand(1));
3156  SDLoc DL = getCurSDLoc();
3157 
3159  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3160  unsigned MaskNumElts = Mask.size();
3161 
3162  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3163  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3164  EVT SrcVT = Src1.getValueType();
3165  unsigned SrcNumElts = SrcVT.getVectorNumElements();
3166 
3167  if (SrcNumElts == MaskNumElts) {
3168  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3169  return;
3170  }
3171 
3172  // Normalize the shuffle vector since mask and vector length don't match.
3173  if (SrcNumElts < MaskNumElts) {
3174  // Mask is longer than the source vectors. We can use concatenate vector to
3175  // make the mask and vectors lengths match.
3176 
3177  if (MaskNumElts % SrcNumElts == 0) {
3178  // Mask length is a multiple of the source vector length.
3179  // Check if the shuffle is some kind of concatenation of the input
3180  // vectors.
3181  unsigned NumConcat = MaskNumElts / SrcNumElts;
3182  bool IsConcat = true;
3183  SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3184  for (unsigned i = 0; i != MaskNumElts; ++i) {
3185  int Idx = Mask[i];
3186  if (Idx < 0)
3187  continue;
3188  // Ensure the indices in each SrcVT sized piece are sequential and that
3189  // the same source is used for the whole piece.
3190  if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3191  (ConcatSrcs[i / SrcNumElts] >= 0 &&
3192  ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3193  IsConcat = false;
3194  break;
3195  }
3196  // Remember which source this index came from.
3197  ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3198  }
3199 
3200  // The shuffle is concatenating multiple vectors together. Just emit
3201  // a CONCAT_VECTORS operation.
3202  if (IsConcat) {
3203  SmallVector<SDValue, 8> ConcatOps;
3204  for (auto Src : ConcatSrcs) {
3205  if (Src < 0)
3206  ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3207  else if (Src == 0)
3208  ConcatOps.push_back(Src1);
3209  else
3210  ConcatOps.push_back(Src2);
3211  }
3212  setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3213  return;
3214  }
3215  }
3216 
3217  unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3218  unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3219  EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3220  PaddedMaskNumElts);
3221 
3222  // Pad both vectors with undefs to make them the same length as the mask.
3223  SDValue UndefVal = DAG.getUNDEF(SrcVT);
3224 
3225  SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3226  SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3227  MOps1[0] = Src1;
3228  MOps2[0] = Src2;
3229 
3230  Src1 = Src1.isUndef()
3231  ? DAG.getUNDEF(PaddedVT)
3232  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3233  Src2 = Src2.isUndef()
3234  ? DAG.getUNDEF(PaddedVT)
3235  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3236 
3237  // Readjust mask for new input vector length.
3238  SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3239  for (unsigned i = 0; i != MaskNumElts; ++i) {
3240  int Idx = Mask[i];
3241  if (Idx >= (int)SrcNumElts)
3242  Idx -= SrcNumElts - PaddedMaskNumElts;
3243  MappedOps[i] = Idx;
3244  }
3245 
3246  SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3247 
3248  // If the concatenated vector was padded, extract a subvector with the
3249  // correct number of elements.
3250  if (MaskNumElts != PaddedMaskNumElts)
3251  Result = DAG.getNode(
3252  ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3253  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3254 
3255  setValue(&I, Result);
3256  return;
3257  }
3258 
3259  if (SrcNumElts > MaskNumElts) {
3260  // Analyze the access pattern of the vector to see if we can extract
3261  // two subvectors and do the shuffle.
3262  int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3263  bool CanExtract = true;
3264  for (int Idx : Mask) {
3265  unsigned Input = 0;
3266  if (Idx < 0)
3267  continue;
3268 
3269  if (Idx >= (int)SrcNumElts) {
3270  Input = 1;
3271  Idx -= SrcNumElts;
3272  }
3273 
3274  // If all the indices come from the same MaskNumElts sized portion of
3275  // the sources we can use extract. Also make sure the extract wouldn't
3276  // extract past the end of the source.
3277  int NewStartIdx = alignDown(Idx, MaskNumElts);
3278  if (NewStartIdx + MaskNumElts > SrcNumElts ||
3279  (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3280  CanExtract = false;
3281  // Make sure we always update StartIdx as we use it to track if all
3282  // elements are undef.
3283  StartIdx[Input] = NewStartIdx;
3284  }
3285 
3286  if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3287  setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3288  return;
3289  }
3290  if (CanExtract) {
3291  // Extract appropriate subvector and generate a vector shuffle
3292  for (unsigned Input = 0; Input < 2; ++Input) {
3293  SDValue &Src = Input == 0 ? Src1 : Src2;
3294  if (StartIdx[Input] < 0)
3295  Src = DAG.getUNDEF(VT);
3296  else {
3297  Src = DAG.getNode(
3298  ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3299  DAG.getConstant(StartIdx[Input], DL,
3300  TLI.getVectorIdxTy(DAG.getDataLayout())));
3301  }
3302  }
3303 
3304  // Calculate new mask.
3305  SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3306  for (int &Idx : MappedOps) {
3307  if (Idx >= (int)SrcNumElts)
3308  Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3309  else if (Idx >= 0)
3310  Idx -= StartIdx[0];
3311  }
3312 
3313  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3314  return;
3315  }
3316  }
3317 
3318  // We can't use either concat vectors or extract subvectors so fall back to
3319  // replacing the shuffle with extract and build vector.
3320  // to insert and build vector.
3321  EVT EltVT = VT.getVectorElementType();
3322  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3324  for (int Idx : Mask) {
3325  SDValue Res;
3326 
3327  if (Idx < 0) {
3328  Res = DAG.getUNDEF(EltVT);
3329  } else {
3330  SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3331  if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3332 
3333  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3334  EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3335  }
3336 
3337  Ops.push_back(Res);
3338  }
3339 
3340  setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3341 }
3342 
3343 void SelectionDAGBuilder::visitInsertValue(const User &I) {
3344  ArrayRef<unsigned> Indices;
3345  if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3346  Indices = IV->getIndices();
3347  else
3348  Indices = cast<ConstantExpr>(&I)->getIndices();
3349 
3350  const Value *Op0 = I.getOperand(0);
3351  const Value *Op1 = I.getOperand(1);
3352  Type *AggTy = I.getType();
3353  Type *ValTy = Op1->getType();
3354  bool IntoUndef = isa<UndefValue>(Op0);
3355  bool FromUndef = isa<UndefValue>(Op1);
3356 
3357  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3358 
3359  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3360  SmallVector<EVT, 4> AggValueVTs;
3361  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3362  SmallVector<EVT, 4> ValValueVTs;
3363  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3364 
3365  unsigned NumAggValues = AggValueVTs.size();
3366  unsigned NumValValues = ValValueVTs.size();
3367  SmallVector<SDValue, 4> Values(NumAggValues);
3368 
3369  // Ignore an insertvalue that produces an empty object
3370  if (!NumAggValues) {
3371  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3372  return;
3373  }
3374 
3375  SDValue Agg = getValue(Op0);
3376  unsigned i = 0;
3377  // Copy the beginning value(s) from the original aggregate.
3378  for (; i != LinearIndex; ++i)
3379  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3380  SDValue(Agg.getNode(), Agg.getResNo() + i);
3381  // Copy values from the inserted value(s).
3382  if (NumValValues) {
3383  SDValue Val = getValue(Op1);
3384  for (; i != LinearIndex + NumValValues; ++i)
3385  Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3386  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3387  }
3388  // Copy remaining value(s) from the original aggregate.
3389  for (; i != NumAggValues; ++i)
3390  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3391  SDValue(Agg.getNode(), Agg.getResNo() + i);
3392 
3393  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3394  DAG.getVTList(AggValueVTs), Values));
3395 }
3396 
3397 void SelectionDAGBuilder::visitExtractValue(const User &I) {
3398  ArrayRef<unsigned> Indices;
3399  if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3400  Indices = EV->getIndices();
3401  else
3402  Indices = cast<ConstantExpr>(&I)->getIndices();
3403 
3404  const Value *Op0 = I.getOperand(0);
3405  Type *AggTy = Op0->getType();
3406  Type *ValTy = I.getType();
3407  bool OutOfUndef = isa<UndefValue>(Op0);
3408 
3409  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3410 
3411  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3412  SmallVector<EVT, 4> ValValueVTs;
3413  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3414 
3415  unsigned NumValValues = ValValueVTs.size();
3416 
3417  // Ignore a extractvalue that produces an empty object
3418  if (!NumValValues) {
3419  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3420  return;
3421  }
3422 
3423  SmallVector<SDValue, 4> Values(NumValValues);
3424 
3425  SDValue Agg = getValue(Op0);
3426  // Copy out the selected value(s).
3427  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3428  Values[i - LinearIndex] =
3429  OutOfUndef ?
3430  DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3431  SDValue(Agg.getNode(), Agg.getResNo() + i);
3432 
3433  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3434  DAG.getVTList(ValValueVTs), Values));
3435 }
3436 
3437 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3438  Value *Op0 = I.getOperand(0);
3439  // Note that the pointer operand may be a vector of pointers. Take the scalar
3440  // element which holds a pointer.
3441  unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3442  SDValue N = getValue(Op0);
3443  SDLoc dl = getCurSDLoc();
3444 
3445  // Normalize Vector GEP - all scalar operands should be converted to the
3446  // splat vector.
3447  unsigned VectorWidth = I.getType()->isVectorTy() ?
3448  cast<VectorType>(I.getType())->getVectorNumElements() : 0;
3449 
3450  if (VectorWidth && !N.getValueType().isVector()) {
3451  LLVMContext &Context = *DAG.getContext();
3452  EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3453  N = DAG.getSplatBuildVector(VT, dl, N);
3454  }
3455 
3456  for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3457  GTI != E; ++GTI) {
3458  const Value *Idx = GTI.getOperand();
3459  if (StructType *StTy = GTI.getStructTypeOrNull()) {
3460  unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3461  if (Field) {
3462  // N = N + Offset
3463  uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3464 
3465  // In an inbounds GEP with an offset that is nonnegative even when
3466  // interpreted as signed, assume there is no unsigned overflow.
3467  SDNodeFlags Flags;
3468  if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3469  Flags.setNoUnsignedWrap(true);
3470 
3471  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3472  DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3473  }
3474  } else {
3475  unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
3476  MVT IdxTy = MVT::getIntegerVT(IdxSize);
3477  APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3478 
3479  // If this is a scalar constant or a splat vector of constants,
3480  // handle it quickly.
3481  const auto *CI = dyn_cast<ConstantInt>(Idx);
3482  if (!CI && isa<ConstantDataVector>(Idx) &&
3483  cast<ConstantDataVector>(Idx)->getSplatValue())
3484  CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3485 
3486  if (CI) {
3487  if (CI->isZero())
3488  continue;
3489  APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
3490  LLVMContext &Context = *DAG.getContext();
3491  SDValue OffsVal = VectorWidth ?
3492  DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
3493  DAG.getConstant(Offs, dl, IdxTy);
3494 
3495  // In an inbouds GEP with an offset that is nonnegative even when
3496  // interpreted as signed, assume there is no unsigned overflow.
3497  SDNodeFlags Flags;
3498  if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
3499  Flags.setNoUnsignedWrap(true);
3500 
3501  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3502  continue;
3503  }
3504 
3505  // N = N + Idx * ElementSize;
3506  SDValue IdxN = getValue(Idx);
3507 
3508  if (!IdxN.getValueType().isVector() && VectorWidth) {
3509  EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3510  IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3511  }
3512 
3513  // If the index is smaller or larger than intptr_t, truncate or extend
3514  // it.
3515  IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3516 
3517  // If this is a multiply by a power of two, turn it into a shl
3518  // immediately. This is a very common case.
3519  if (ElementSize != 1) {
3520  if (ElementSize.isPowerOf2()) {
3521  unsigned Amt = ElementSize.logBase2();
3522  IdxN = DAG.getNode(ISD::SHL, dl,
3523  N.getValueType(), IdxN,
3524  DAG.getConstant(Amt, dl, IdxN.getValueType()));
3525  } else {
3526  SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3527  IdxN = DAG.getNode(ISD::MUL, dl,
3528  N.getValueType(), IdxN, Scale);
3529  }
3530  }
3531 
3532  N = DAG.getNode(ISD::ADD, dl,
3533  N.getValueType(), N, IdxN);
3534  }
3535  }
3536 
3537  setValue(&I, N);
3538 }
3539 
3540 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3541  // If this is a fixed sized alloca in the entry block of the function,
3542  // allocate it statically on the stack.
3543  if (FuncInfo.StaticAllocaMap.count(&I))
3544  return; // getValue will auto-populate this.
3545 
3546  SDLoc dl = getCurSDLoc();
3547  Type *Ty = I.getAllocatedType();
3548  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3549  auto &DL = DAG.getDataLayout();
3550  uint64_t TySize = DL.getTypeAllocSize(Ty);
3551  unsigned Align =
3552  std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3553 
3554  SDValue AllocSize = getValue(I.getArraySize());
3555 
3556  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
3557  if (AllocSize.getValueType() != IntPtr)
3558  AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3559 
3560  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3561  AllocSize,
3562  DAG.getConstant(TySize, dl, IntPtr));
3563 
3564  // Handle alignment. If the requested alignment is less than or equal to
3565  // the stack alignment, ignore it. If the size is greater than or equal to
3566  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3567  unsigned StackAlign =
3569  if (Align <= StackAlign)
3570  Align = 0;
3571 
3572  // Round the size of the allocation up to the stack alignment size
3573  // by add SA-1 to the size. This doesn't overflow because we're computing
3574  // an address inside an alloca.
3575  SDNodeFlags Flags;
3576  Flags.setNoUnsignedWrap(true);
3577  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
3578  DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
3579 
3580  // Mask out the low bits for alignment purposes.
3581  AllocSize =
3582  DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
3583  DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
3584 
3585  SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
3586  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3587  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3588  setValue(&I, DSA);
3589  DAG.setRoot(DSA.getValue(1));
3590 
3591  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3592 }
3593 
3594 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3595  if (I.isAtomic())
3596  return visitAtomicLoad(I);
3597 
3598  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3599  const Value *SV = I.getOperand(0);
3600  if (TLI.supportSwiftError()) {
3601  // Swifterror values can come from either a function parameter with
3602  // swifterror attribute or an alloca with swifterror attribute.
3603  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
3604  if (Arg->hasSwiftErrorAttr())
3605  return visitLoadFromSwiftError(I);
3606  }
3607 
3608  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
3609  if (Alloca->isSwiftError())
3610  return visitLoadFromSwiftError(I);
3611  }
3612  }
3613 
3614  SDValue Ptr = getValue(SV);
3615 
3616  Type *Ty = I.getType();
3617 
3618  bool isVolatile = I.isVolatile();
3619  bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3620  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
3621  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
3622  unsigned Alignment = I.getAlignment();
3623 
3624  AAMDNodes AAInfo;
3625  I.getAAMetadata(AAInfo);
3626  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3627 
3630  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3631  unsigned NumValues = ValueVTs.size();
3632  if (NumValues == 0)
3633  return;
3634 
3635  SDValue Root;
3636  bool ConstantMemory = false;
3637  if (isVolatile || NumValues > MaxParallelChains)
3638  // Serialize volatile loads with other side effects.
3639  Root = getRoot();
3640  else if (AA && AA->pointsToConstantMemory(MemoryLocation(
3641  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3642  // Do not serialize (non-volatile) loads of constant memory with anything.
3643  Root = DAG.getEntryNode();
3644  ConstantMemory = true;
3645  } else {
3646  // Do not serialize non-volatile loads against each other.
3647  Root = DAG.getRoot();
3648  }
3649 
3650  SDLoc dl = getCurSDLoc();
3651 
3652  if (isVolatile)
3653  Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3654 
3655  // An aggregate load cannot wrap around the address space, so offsets to its
3656  // parts don't wrap either.
3657  SDNodeFlags Flags;
3658  Flags.setNoUnsignedWrap(true);
3659 
3660  SmallVector<SDValue, 4> Values(NumValues);
3661  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3662  EVT PtrVT = Ptr.getValueType();
3663  unsigned ChainI = 0;
3664  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3665  // Serializing loads here may result in excessive register pressure, and
3666  // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3667  // could recover a bit by hoisting nodes upward in the chain by recognizing
3668  // they are side-effect free or do not alias. The optimizer should really
3669  // avoid this case by converting large object/array copies to llvm.memcpy
3670  // (MaxParallelChains should always remain as failsafe).
3671  if (ChainI == MaxParallelChains) {
3672  assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3673  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3674  makeArrayRef(Chains.data(), ChainI));
3675  Root = Chain;
3676  ChainI = 0;
3677  }
3678  SDValue A = DAG.getNode(ISD::ADD, dl,
3679  PtrVT, Ptr,
3680  DAG.getConstant(Offsets[i], dl, PtrVT),
3681  Flags);
3682  auto MMOFlags = MachineMemOperand::MONone;
3683  if (isVolatile)
3684  MMOFlags |= MachineMemOperand::MOVolatile;
3685  if (isNonTemporal)
3687  if (isInvariant)
3688  MMOFlags |= MachineMemOperand::MOInvariant;
3689  if (isDereferenceable)
3691  MMOFlags |= TLI.getMMOFlags(I);
3692 
3693  SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
3694  MachinePointerInfo(SV, Offsets[i]), Alignment,
3695  MMOFlags, AAInfo, Ranges);
3696 
3697  Values[i] = L;
3698  Chains[ChainI] = L.getValue(1);
3699  }
3700 
3701  if (!ConstantMemory) {
3702  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3703  makeArrayRef(Chains.data(), ChainI));
3704  if (isVolatile)
3705  DAG.setRoot(Chain);
3706  else
3707  PendingLoads.push_back(Chain);
3708  }
3709 
3710  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3711  DAG.getVTList(ValueVTs), Values));
3712 }
3713 
3714 void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
3716  "call visitStoreToSwiftError when backend supports swifterror");
3717 
3720  const Value *SrcV = I.getOperand(0);
3722  SrcV->getType(), ValueVTs, &Offsets);
3723  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3724  "expect a single EVT for swifterror");
3725 
3726  SDValue Src = getValue(SrcV);
3727  // Create a virtual register, then update the virtual register.
3728  unsigned VReg; bool CreatedVReg;
3729  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
3730  // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
3731  // Chain can be getRoot or getControlRoot.
3732  SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
3733  SDValue(Src.getNode(), Src.getResNo()));
3734  DAG.setRoot(CopyNode);
3735  if (CreatedVReg)
3736  FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
3737 }
3738 
3739 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
3741  "call visitLoadFromSwiftError when backend supports swifterror");
3742 
3743  assert(!I.isVolatile() &&
3744  I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
3746  "Support volatile, non temporal, invariant for load_from_swift_error");
3747 
3748  const Value *SV = I.getOperand(0);
3749  Type *Ty = I.getType();
3750  AAMDNodes AAInfo;
3751  I.getAAMetadata(AAInfo);
3752  assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
3753  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
3754  "load_from_swift_error should not be constant memory");
3755 
3759  ValueVTs, &Offsets);
3760  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3761  "expect a single EVT for swifterror");
3762 
3763  // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
3764  SDValue L = DAG.getCopyFromReg(
3765  getRoot(), getCurSDLoc(),
3766  FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
3767  ValueVTs[0]);
3768 
3769  setValue(&I, L);
3770 }
3771 
3772 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3773  if (I.isAtomic())
3774  return visitAtomicStore(I);
3775 
3776  const Value *SrcV = I.getOperand(0);
3777  const Value *PtrV = I.getOperand(1);
3778 
3779  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3780  if (TLI.supportSwiftError()) {
3781  // Swifterror values can come from either a function parameter with
3782  // swifterror attribute or an alloca with swifterror attribute.
3783  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
3784  if (Arg->hasSwiftErrorAttr())
3785  return visitStoreToSwiftError(I);
3786  }
3787 
3788  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
3789  if (Alloca->isSwiftError())
3790  return visitStoreToSwiftError(I);
3791  }
3792  }
3793 
3797  SrcV->getType(), ValueVTs, &Offsets);
3798  unsigned NumValues = ValueVTs.size();
3799  if (NumValues == 0)
3800  return;
3801 
3802  // Get the lowered operands. Note that we do this after
3803  // checking if NumResults is zero, because with zero results
3804  // the operands won't have values in the map.
3805  SDValue Src = getValue(SrcV);
3806  SDValue Ptr = getValue(PtrV);
3807 
3808  SDValue Root = getRoot();
3809  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3810  SDLoc dl = getCurSDLoc();
3811  EVT PtrVT = Ptr.getValueType();
3812  unsigned Alignment = I.getAlignment();
3813  AAMDNodes AAInfo;
3814  I.getAAMetadata(AAInfo);
3815 
3816  auto MMOFlags = MachineMemOperand::MONone;
3817  if (I.isVolatile())
3818  MMOFlags |= MachineMemOperand::MOVolatile;
3819  if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
3821  MMOFlags |= TLI.getMMOFlags(I);
3822 
3823  // An aggregate load cannot wrap around the address space, so offsets to its
3824  // parts don't wrap either.
3825  SDNodeFlags Flags;
3826  Flags.setNoUnsignedWrap(true);
3827 
3828  unsigned ChainI = 0;
3829  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3830  // See visitLoad comments.
3831  if (ChainI == MaxParallelChains) {
3832  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3833  makeArrayRef(Chains.data(), ChainI));
3834  Root = Chain;
3835  ChainI = 0;
3836  }
3837  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3838  DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
3839  SDValue St = DAG.getStore(
3840  Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
3841  MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
3842  Chains[ChainI] = St;
3843  }
3844 
3845  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3846  makeArrayRef(Chains.data(), ChainI));
3847  DAG.setRoot(StoreNode);
3848 }
3849 
3850 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
3851  bool IsCompressing) {
3852  SDLoc sdl = getCurSDLoc();
3853 
3854  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3855  unsigned& Alignment) {
3856  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3857  Src0 = I.getArgOperand(0);
3858  Ptr = I.getArgOperand(1);
3859  Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3860  Mask = I.getArgOperand(3);
3861  };
3862  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3863  unsigned& Alignment) {
3864  // llvm.masked.compressstore.*(Src0, Ptr, Mask)
3865  Src0 = I.getArgOperand(0);
3866  Ptr = I.getArgOperand(1);
3867  Mask = I.getArgOperand(2);
3868  Alignment = 0;
3869  };
3870 
3871  Value *PtrOperand, *MaskOperand, *Src0Operand;
3872  unsigned Alignment;
3873  if (IsCompressing)
3874  getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3875  else
3876  getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3877 
3878  SDValue Ptr = getValue(PtrOperand);
3879  SDValue Src0 = getValue(Src0Operand);
3880  SDValue Mask = getValue(MaskOperand);
3881 
3882  EVT VT = Src0.getValueType();
3883  if (!Alignment)
3884  Alignment = DAG.getEVTAlignment(VT);
3885 
3886  AAMDNodes AAInfo;
3887  I.getAAMetadata(AAInfo);
3888 
3889  MachineMemOperand *MMO =
3890  DAG.getMachineFunction().
3891  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3893  Alignment, AAInfo);
3894  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3895  MMO, false /* Truncating */,
3896  IsCompressing);
3897  DAG.setRoot(StoreNode);
3898  setValue(&I, StoreNode);
3899 }
3900 
3901 // Get a uniform base for the Gather/Scatter intrinsic.
3902 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3903 // We try to represent it as a base pointer + vector of indices.
3904 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
3905 // The first operand of the GEP may be a single pointer or a vector of pointers
3906 // Example:
3907 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3908 // or
3909 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
3910 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3911 //
3912 // When the first GEP operand is a single pointer - it is the uniform base we
3913 // are looking for. If first operand of the GEP is a splat vector - we
3914 // extract the splat value and use it as a uniform base.
3915 // In all other cases the function returns 'false'.
3916 static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
3917  SDValue &Scale, SelectionDAGBuilder* SDB) {
3918  SelectionDAG& DAG = SDB->DAG;
3919  LLVMContext &Context = *DAG.getContext();
3920 
3921  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3923  if (!GEP)
3924  return false;
3925 
3926  const Value *GEPPtr = GEP->getPointerOperand();
3927  if (!GEPPtr->getType()->isVectorTy())
3928  Ptr = GEPPtr;
3929  else if (!(Ptr = getSplatValue(GEPPtr)))
3930  return false;
3931 
3932  unsigned FinalIndex = GEP->getNumOperands() - 1;
3933  Value *IndexVal = GEP->getOperand(FinalIndex);
3934 
3935  // Ensure all the other indices are 0.
3936  for (unsigned i = 1; i < FinalIndex; ++i) {
3937  auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
3938  if (!C || !C->isZero())
3939  return false;
3940  }
3941 
3942  // The operands of the GEP may be defined in another basic block.
3943  // In this case we'll not find nodes for the operands.
3944  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
3945  return false;
3946 
3947  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3948  const DataLayout &DL = DAG.getDataLayout();
3949  Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
3950  SDB->getCurSDLoc(), TLI.getPointerTy(DL));
3951  Base = SDB->getValue(Ptr);
3952  Index = SDB->getValue(IndexVal);
3953 
3954  if (!Index.getValueType().isVector()) {
3955  unsigned GEPWidth = GEP->getType()->getVectorNumElements();
3956  EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
3957  Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
3958  }
3959  return true;
3960 }
3961 
3962 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
3963  SDLoc sdl = getCurSDLoc();
3964 
3965  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
3966  const Value *Ptr = I.getArgOperand(1);
3967  SDValue Src0 = getValue(I.getArgOperand(0));
3968  SDValue Mask = getValue(I.getArgOperand(3));
3969  EVT VT = Src0.getValueType();
3970  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
3971  if (!Alignment)
3972  Alignment = DAG.getEVTAlignment(VT);
3973  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3974 
3975  AAMDNodes AAInfo;
3976  I.getAAMetadata(AAInfo);
3977 
3978  SDValue Base;
3979  SDValue Index;
3980  SDValue Scale;
3981  const Value *BasePtr = Ptr;
3982  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
3983 
3984  const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
3986  getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
3987  MachineMemOperand::MOStore, VT.getStoreSize(),
3988  Alignment, AAInfo);
3989  if (!UniformBase) {
3990  Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3991  Index = getValue(Ptr);
3992  Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3993  }
3994  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
3995  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
3996  Ops, MMO);
3997  DAG.setRoot(Scatter);
3998  setValue(&I, Scatter);
3999 }
4000 
4001 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
4002  SDLoc sdl = getCurSDLoc();
4003 
4004  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4005  unsigned& Alignment) {
4006  // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
4007  Ptr = I.getArgOperand(0);
4008  Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
4009  Mask = I.getArgOperand(2);
4010  Src0 = I.getArgOperand(3);
4011  };
4012  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4013  unsigned& Alignment) {
4014  // @llvm.masked.expandload.*(Ptr, Mask, Src0)
4015  Ptr = I.getArgOperand(0);
4016  Alignment = 0;
4017  Mask = I.getArgOperand(1);
4018  Src0 = I.getArgOperand(2);
4019  };
4020 
4021  Value *PtrOperand, *MaskOperand, *Src0Operand;
4022  unsigned Alignment;
4023  if (IsExpanding)
4024  getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4025  else
4026  getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4027 
4028  SDValue Ptr = getValue(PtrOperand);
4029  SDValue Src0 = getValue(Src0Operand);
4030  SDValue Mask = getValue(MaskOperand);
4031 
4032  EVT VT = Src0.getValueType();
4033  if (!Alignment)
4034  Alignment = DAG.getEVTAlignment(VT);
4035 
4036  AAMDNodes AAInfo;
4037  I.getAAMetadata(AAInfo);
4038  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4039 
4040  // Do not serialize masked loads of constant memory with anything.
4041  bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
4042  PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
4043  SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
4044 
4045  MachineMemOperand *MMO =
4046  DAG.getMachineFunction().
4047  getMachineMemOperand(MachinePointerInfo(PtrOperand),
4049  Alignment, AAInfo, Ranges);
4050 
4051  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
4052  ISD::NON_EXTLOAD, IsExpanding);
4053  if (AddToChain) {
4054  SDValue OutChain = Load.getValue(1);
4055  DAG.setRoot(OutChain);
4056  }
4057  setValue(&I, Load);
4058 }
4059 
4060 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4061  SDLoc sdl = getCurSDLoc();
4062 
4063  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4064  const Value *Ptr = I.getArgOperand(0);
4065  SDValue Src0 = getValue(I.getArgOperand(3));
4066  SDValue Mask = getValue(I.getArgOperand(2));
4067 
4068  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4069  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4070  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
4071  if (!Alignment)
4072  Alignment = DAG.getEVTAlignment(VT);
4073 
4074  AAMDNodes AAInfo;
4075  I.getAAMetadata(AAInfo);
4076  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4077 
4078  SDValue Root = DAG.getRoot();
4079  SDValue Base;
4080  SDValue Index;
4081  SDValue Scale;
4082  const Value *BasePtr = Ptr;
4083  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
4084  bool ConstantMemory = false;
4085  if (UniformBase &&
4086  AA && AA->pointsToConstantMemory(MemoryLocation(
4087  BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
4088  AAInfo))) {
4089  // Do not serialize (non-volatile) loads of constant memory with anything.
4090  Root = DAG.getEntryNode();
4091  ConstantMemory = true;
4092  }
4093 
4094  MachineMemOperand *MMO =
4095  DAG.getMachineFunction().
4096  getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
4098  Alignment, AAInfo, Ranges);
4099 
4100  if (!UniformBase) {
4101  Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4102  Index = getValue(Ptr);
4103  Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4104  }
4105  SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
4106  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4107  Ops, MMO);
4108 
4109  SDValue OutChain = Gather.getValue(1);
4110  if (!ConstantMemory)
4111  PendingLoads.push_back(OutChain);
4112  setValue(&I, Gather);
4113 }
4114 
4115 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4116  SDLoc dl = getCurSDLoc();
4117  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
4118  AtomicOrdering FailureOrder = I.getFailureOrdering();
4119  SyncScope::ID SSID = I.getSyncScopeID();
4120 
4121  SDValue InChain = getRoot();
4122 
4123  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4124  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4125  SDValue L = DAG.getAtomicCmpSwap(
4126  ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
4127  getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
4129  /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
4130 
4131  SDValue OutChain = L.getValue(2);
4132 
4133  setValue(&I, L);
4134  DAG.setRoot(OutChain);
4135 }
4136 
4137 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4138  SDLoc dl = getCurSDLoc();
4139  ISD::NodeType NT;
4140  switch (I.getOperation()) {
4141  default: llvm_unreachable("Unknown atomicrmw operation");
4142  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4143  case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4144  case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4145  case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4146  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4147  case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4148  case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4149  case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4150  case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4151  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4152  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4153  }
4154  AtomicOrdering Order = I.getOrdering();
4155  SyncScope::ID SSID = I.getSyncScopeID();
4156 
4157  SDValue InChain = getRoot();
4158 
4159  SDValue L =
4160  DAG.getAtomic(NT, dl,
4161  getValue(I.getValOperand()).getSimpleValueType(),
4162  InChain,
4163  getValue(I.getPointerOperand()),
4164  getValue(I.getValOperand()),
4165  I.getPointerOperand(),
4166  /* Alignment=*/ 0, Order, SSID);
4167 
4168  SDValue OutChain = L.getValue(1);
4169 
4170  setValue(&I, L);
4171  DAG.setRoot(OutChain);
4172 }
4173 
4174 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4175  SDLoc dl = getCurSDLoc();
4176  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4177  SDValue Ops[3];
4178  Ops[0] = getRoot();
4179  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4180  TLI.getFenceOperandTy(DAG.getDataLayout()));
4181  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4182  TLI.getFenceOperandTy(DAG.getDataLayout()));
4183  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4184 }
4185 
4186 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4187  SDLoc dl = getCurSDLoc();
4188  AtomicOrdering Order = I.getOrdering();
4189  SyncScope::ID SSID = I.getSyncScopeID();
4190 
4191  SDValue InChain = getRoot();
4192 
4193  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4194  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4195 
4196  if (!TLI.supportsUnalignedAtomics() &&
4197  I.getAlignment() < VT.getStoreSize())
4198  report_fatal_error("Cannot generate unaligned atomic load");
4199 
4200  MachineMemOperand *MMO =
4201  DAG.getMachineFunction().
4202  getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4205  VT.getStoreSize(),
4206  I.getAlignment() ? I.getAlignment() :
4207  DAG.getEVTAlignment(VT),
4208  AAMDNodes(), nullptr, SSID, Order);
4209 
4210  InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4211  SDValue L =
4212  DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
4213  getValue(I.getPointerOperand()), MMO);
4214 
4215  SDValue OutChain = L.getValue(1);
4216 
4217  setValue(&I, L);
4218  DAG.setRoot(OutChain);
4219 }
4220 
4221 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4222  SDLoc dl = getCurSDLoc();
4223 
4224  AtomicOrdering Order = I.getOrdering();
4225  SyncScope::ID SSID = I.getSyncScopeID();
4226 
4227  SDValue InChain = getRoot();
4228 
4229  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4230  EVT VT =
4232 
4233  if (I.getAlignment() < VT.getStoreSize())
4234  report_fatal_error("Cannot generate unaligned atomic store");
4235 
4236  SDValue OutChain =
4237  DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
4238  InChain,
4239  getValue(I.getPointerOperand()),
4240  getValue(I.getValueOperand()),
4242  Order, SSID);
4243 
4244  DAG.setRoot(OutChain);
4245 }
4246 
4247 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4248 /// node.
4249 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4250  unsigned Intrinsic) {
4251  // Ignore the callsite's attributes. A specific call site may be marked with
4252  // readnone, but the lowering code will expect the chain based on the
4253  // definition.
4254  const Function *F = I.getCalledFunction();
4255  bool HasChain = !F->doesNotAccessMemory();
4256  bool OnlyLoad = HasChain && F->onlyReadsMemory();
4257 
4258  // Build the operand list.
4260  if (HasChain) { // If this intrinsic has side-effects, chainify it.
4261  if (OnlyLoad) {
4262  // We don't need to serialize loads against other loads.
4263  Ops.push_back(DAG.getRoot());
4264  } else {
4265  Ops.push_back(getRoot());
4266  }
4267  }
4268 
4269  // Info is set by getTgtMemInstrinsic
4270  TargetLowering::IntrinsicInfo Info;
4271  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4272  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
4273  DAG.getMachineFunction(),
4274  Intrinsic);
4275 
4276  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4277  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
4278  Info.opc == ISD::INTRINSIC_W_CHAIN)
4279  Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4280  TLI.getPointerTy(DAG.getDataLayout())));
4281 
4282  // Add all operands of the call to the operand list.
4283  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
4284  SDValue Op = getValue(I.getArgOperand(i));
4285  Ops.push_back(Op);
4286  }
4287 
4289  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4290 
4291  if (HasChain)
4292  ValueVTs.push_back(MVT::Other);
4293 
4294  SDVTList VTs = DAG.getVTList(ValueVTs);
4295 
4296  // Create the node.
4297  SDValue Result;
4298  if (IsTgtIntrinsic) {
4299  // This is target intrinsic that touches memory
4300  Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
4301  Ops, Info.memVT,
4302  MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
4303  Info.flags, Info.size);
4304  } else if (!HasChain) {
4305  Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4306  } else if (!I.getType()->isVoidTy()) {
4307  Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4308  } else {
4309  Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4310  }
4311 
4312  if (HasChain) {
4313  SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4314  if (OnlyLoad)
4315  PendingLoads.push_back(Chain);
4316  else
4317  DAG.setRoot(Chain);
4318  }
4319 
4320  if (!I.getType()->isVoidTy()) {
4321  if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
4322  EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4323  Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4324  } else
4325  Result = lowerRangeToAssertZExt(DAG, I, Result);
4326 
4327  setValue(&I, Result);
4328  }
4329 }
4330 
4331 /// GetSignificand - Get the significand and build it into a floating-point
4332 /// number with exponent of 1:
4333 ///
4334 /// Op = (Op & 0x007fffff) | 0x3f800000;
4335 ///
4336 /// where Op is the hexadecimal representation of floating point value.
4338  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4339  DAG.getConstant(0x007fffff, dl, MVT::i32));
4340  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4341  DAG.getConstant(0x3f800000, dl, MVT::i32));
4342  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4343 }
4344 
4345 /// GetExponent - Get the exponent:
4346 ///
4347 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4348 ///
4349 /// where Op is the hexadecimal representation of floating point value.
4351  const TargetLowering &TLI, const SDLoc &dl) {
4352  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4353  DAG.getConstant(0x7f800000, dl, MVT::i32));
4354  SDValue t1 = DAG.getNode(
4355  ISD::SRL, dl, MVT::i32, t0,
4356  DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4357  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4358  DAG.getConstant(127, dl, MVT::i32));
4359  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4360 }
4361 
4362 /// getF32Constant - Get 32-bit floating point constant.
4363 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4364  const SDLoc &dl) {
4365  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4366  MVT::f32);
4367 }
4368 
4370  SelectionDAG &DAG) {
4371  // TODO: What fast-math-flags should be set on the floating-point nodes?
4372 
4373  // IntegerPartOfX = ((int32_t)(t0);
4374  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4375 
4376  // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4377  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4378  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4379 
4380  // IntegerPartOfX <<= 23;
4381  IntegerPartOfX = DAG.getNode(
4382  ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4384  DAG.getDataLayout())));
4385 
4386  SDValue TwoToFractionalPartOfX;
4387  if (LimitFloatPrecision <= 6) {
4388  // For floating-point precision of 6:
4389  //
4390  // TwoToFractionalPartOfX =
4391  // 0.997535578f +
4392  // (0.735607626f + 0.252464424f * x) * x;
4393  //
4394  // error 0.0144103317, which is 6 bits
4395  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4396  getF32Constant(DAG, 0x3e814304, dl));
4397  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4398  getF32Constant(DAG, 0x3f3c50c8, dl));
4399  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4400  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4401  getF32Constant(DAG, 0x3f7f5e7e, dl));
4402  } else if (LimitFloatPrecision <= 12) {
4403  // For floating-point precision of 12:
4404  //
4405  // TwoToFractionalPartOfX =
4406  // 0.999892986f +
4407  // (0.696457318f +
4408  // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4409  //
4410  // error 0.000107046256, which is 13 to 14 bits
4411  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4412  getF32Constant(DAG, 0x3da235e3, dl));
4413  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4414  getF32Constant(DAG, 0x3e65b8f3, dl));
4415  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4416  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4417  getF32Constant(DAG, 0x3f324b07, dl));
4418  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4419  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4420  getF32Constant(DAG, 0x3f7ff8fd, dl));
4421  } else { // LimitFloatPrecision <= 18
4422  // For floating-point precision of 18:
4423  //
4424  // TwoToFractionalPartOfX =
4425  // 0.999999982f +
4426  // (0.693148872f +
4427  // (0.240227044f +
4428  // (0.554906021e-1f +
4429  // (0.961591928e-2f +
4430  // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4431  // error 2.47208000*10^(-7), which is better than 18 bits
4432  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4433  getF32Constant(DAG, 0x3924b03e, dl));
4434  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4435  getF32Constant(DAG, 0x3ab24b87, dl));
4436  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4437  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4438  getF32Constant(DAG, 0x3c1d8c17, dl));
4439  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4440  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4441  getF32Constant(DAG, 0x3d634a1d, dl));
4442  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4443  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4444  getF32Constant(DAG, 0x3e75fe14, dl));
4445  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4446  SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4447  getF32Constant(DAG, 0x3f317234, dl));
4448  SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4449  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4450  getF32Constant(DAG, 0x3f800000, dl));
4451  }
4452 
4453  // Add the exponent into the result in integer domain.
4454  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4455  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4456  DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4457 }
4458 
4459 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4460 /// limited-precision mode.
4461 static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4462  const TargetLowering &TLI) {
4463  if (Op.getValueType() == MVT::f32 &&
4464  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4465 
4466  // Put the exponent in the right bit position for later addition to the
4467  // final result:
4468  //
4469  // #define LOG2OFe 1.4426950f
4470  // t0 = Op * LOG2OFe
4471 
4472  // TODO: What fast-math-flags should be set here?
4473  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4474  getF32Constant(DAG, 0x3fb8aa3b, dl));
4475  return getLimitedPrecisionExp2(t0, dl, DAG);
4476  }
4477 
4478  // No special expansion.
4479  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4480 }
4481 
4482 /// expandLog - Lower a log intrinsic. Handles the special sequences for
4483 /// limited-precision mode.
4484 static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4485  const TargetLowering &TLI) {
4486  // TODO: What fast-math-flags should be set on the floating-point nodes?
4487 
4488  if (Op.getValueType() == MVT::f32 &&
4489  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4490  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4491 
4492  // Scale the exponent by log(2) [0.69314718f].
4493  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4494  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4495  getF32Constant(DAG, 0x3f317218, dl));
4496 
4497  // Get the significand and build it into a floating-point number with
4498  // exponent of 1.
4499  SDValue X = GetSignificand(DAG, Op1, dl);
4500 
4501  SDValue LogOfMantissa;
4502  if (LimitFloatPrecision <= 6) {
4503  // For floating-point precision of 6:
4504  //
4505  // LogofMantissa =
4506  // -1.1609546f +
4507  // (1.4034025f - 0.23903021f * x) * x;
4508  //
4509  // error 0.0034276066, which is better than 8 bits
4510  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4511  getF32Constant(DAG, 0xbe74c456, dl));
4512  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4513  getF32Constant(DAG, 0x3fb3a2b1, dl));
4514  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4515  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4516  getF32Constant(DAG, 0x3f949a29, dl));
4517  } else if (LimitFloatPrecision <= 12) {
4518  // For floating-point precision of 12:
4519  //
4520  // LogOfMantissa =
4521  // -1.7417939f +
4522  // (2.8212026f +
4523  // (-1.4699568f +
4524  // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4525  //
4526  // error 0.000061011436, which is 14 bits
4527  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4528  getF32Constant(DAG, 0xbd67b6d6, dl));
4529  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4530  getF32Constant(DAG, 0x3ee4f4b8, dl));
4531  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4532  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4533  getF32Constant(DAG, 0x3fbc278b, dl));
4534  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4535  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4536  getF32Constant(DAG, 0x40348e95, dl));
4537  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4538  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4539  getF32Constant(DAG, 0x3fdef31a, dl));
4540  } else { // LimitFloatPrecision <= 18
4541  // For floating-point precision of 18:
4542  //
4543  // LogOfMantissa =
4544  // -2.1072184f +
4545  // (4.2372794f +
4546  // (-3.7029485f +
4547  // (2.2781945f +
4548  // (-0.87823314f +
4549  // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4550  //
4551  // error 0.0000023660568, which is better than 18 bits
4552  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4553  getF32Constant(DAG, 0xbc91e5ac, dl));
4554  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4555  getF32Constant(DAG, 0x3e4350aa, dl));
4556  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4557  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4558  getF32Constant(DAG, 0x3f60d3e3, dl));
4559  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4560  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4561  getF32Constant(DAG, 0x4011cdf0, dl));
4562  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4563  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4564  getF32Constant(DAG, 0x406cfd1c, dl));
4565  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4566  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4567  getF32Constant(DAG, 0x408797cb, dl));
4568  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4569  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4570  getF32Constant(DAG, 0x4006dcab, dl));
4571  }
4572 
4573  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4574  }
4575 
4576  // No special expansion.
4577  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
4578 }
4579 
4580 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4581 /// limited-precision mode.
4582 static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4583  const TargetLowering &TLI) {
4584  // TODO: What fast-math-flags should be set on the floating-point nodes?
4585 
4586  if (Op.getValueType() == MVT::f32 &&
4587  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4588  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4589 
4590  // Get the exponent.
4591  SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
4592 
4593  // Get the significand and build it into a floating-point number with
4594  // exponent of 1.
4595  SDValue X = GetSignificand(DAG, Op1, dl);
4596 
4597  // Different possible minimax approximations of significand in
4598  // floating-point for various degrees of accuracy over [1,2].
4599  SDValue Log2ofMantissa;
4600  if (LimitFloatPrecision <= 6) {
4601  // For floating-point precision of 6:
4602  //
4603  // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4604  //
4605  // error 0.0049451742, which is more than 7 bits
4606  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4607  getF32Constant(DAG, 0xbeb08fe0, dl));
4608  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4609  getF32Constant(DAG, 0x40019463, dl));
4610  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4611  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4612  getF32Constant(DAG, 0x3fd6633d, dl));
4613  } else if (LimitFloatPrecision <= 12) {
4614  // For floating-point precision of 12:
4615  //
4616  // Log2ofMantissa =
4617  // -2.51285454f +
4618  // (4.07009056f +
4619  // (-2.12067489f +
4620  // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4621  //
4622  // error 0.0000876136000, which is better than 13 bits
4623  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4624  getF32Constant(DAG, 0xbda7262e, dl));
4625  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4626  getF32Constant(DAG, 0x3f25280b, dl));
4627  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4628  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4629  getF32Constant(DAG, 0x4007b923, dl));
4630  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4631  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4632  getF32Constant(DAG, 0x40823e2f, dl));
4633  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4634  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4635  getF32Constant(DAG, 0x4020d29c, dl));
4636  } else { // LimitFloatPrecision <= 18
4637  // For floating-point precision of 18:
4638  //
4639  // Log2ofMantissa =
4640  // -3.0400495f +
4641  // (6.1129976f +
4642  // (-5.3420409f +
4643  // (3.2865683f +
4644  // (-1.2669343f +
4645  // (0.27515199f -
4646  // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4647  //
4648  // error 0.0000018516, which is better than 18 bits
4649  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4650  getF32Constant(DAG, 0xbcd2769e, dl));
4651  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4652  getF32Constant(DAG, 0x3e8ce0b9, dl));
4653  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4654  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4655  getF32Constant(DAG, 0x3fa22ae7, dl));
4656  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4657  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4658  getF32Constant(DAG, 0x40525723, dl));
4659  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4660  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4661  getF32Constant(DAG, 0x40aaf200, dl));
4662  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4663  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4664  getF32Constant(DAG, 0x40c39dad, dl));
4665  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4666  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4667  getF32Constant(DAG, 0x4042902c, dl));
4668  }
4669 
4670  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4671  }
4672 
4673  // No special expansion.
4674  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
4675 }
4676 
4677 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4678 /// limited-precision mode.
4680  const TargetLowering &TLI) {
4681  // TODO: What fast-math-flags should be set on the floating-point nodes?
4682 
4683  if (Op.getValueType() == MVT::f32 &&
4684  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4685  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4686 
4687  // Scale the exponent by log10(2) [0.30102999f].
4688  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4689  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4690  getF32Constant(DAG, 0x3e9a209a, dl));
4691 
4692  // Get the significand and build it into a floating-point number with
4693  // exponent of 1.
4694  SDValue X = GetSignificand(DAG, Op1, dl);
4695 
4696  SDValue Log10ofMantissa;
4697  if (LimitFloatPrecision <= 6) {
4698  // For floating-point precision of 6:
4699  //
4700  // Log10ofMantissa =
4701  // -0.50419619f +
4702  // (0.60948995f - 0.10380950f * x) * x;
4703  //
4704  // error 0.0014886165, which is 6 bits
4705  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4706  getF32Constant(DAG, 0xbdd49a13, dl));
4707  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4708  getF32Constant(DAG, 0x3f1c0789, dl));
4709  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4710  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4711  getF32Constant(DAG, 0x3f011300, dl));
4712  } else if (LimitFloatPrecision <= 12) {
4713  // For floating-point precision of 12:
4714  //
4715  // Log10ofMantissa =
4716  // -0.64831180f +
4717  // (0.91751397f +
4718  // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4719  //
4720  // error 0.00019228036, which is better than 12 bits
4721  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4722  getF32Constant(DAG, 0x3d431f31, dl));
4723  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4724  getF32Constant(DAG, 0x3ea21fb2, dl));
4725  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4726  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4727  getF32Constant(DAG, 0x3f6ae232, dl));
4728  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4729  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4730  getF32Constant(DAG, 0x3f25f7c3, dl));
4731  } else { // LimitFloatPrecision <= 18
4732  // For floating-point precision of 18:
4733  //
4734  // Log10ofMantissa =
4735  // -0.84299375f +
4736  // (1.5327582f +
4737  // (-1.0688956f +
4738  // (0.49102474f +
4739  // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4740  //
4741  // error 0.0000037995730, which is better than 18 bits
4742  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4743  getF32Constant(DAG, 0x3c5d51ce, dl));
4744  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4745  getF32Constant(DAG, 0x3e00685a, dl));
4746  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4747  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4748  getF32Constant(DAG, 0x3efb6798, dl));
4749  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4750  SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4751  getF32Constant(DAG, 0x3f88d192, dl));
4752  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4753  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4754  getF32Constant(DAG, 0x3fc4316c, dl));
4755  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4756  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4757  getF32Constant(DAG, 0x3f57ce70, dl));
4758  }
4759 
4760  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4761  }
4762 
4763  // No special expansion.
4764  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
4765 }
4766 
4767 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4768 /// limited-precision mode.
4769 static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4770  const TargetLowering &TLI) {
4771  if (Op.getValueType() == MVT::f32 &&
4772  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
4773  return getLimitedPrecisionExp2(Op, dl, DAG);
4774 
4775  // No special expansion.
4776  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4777 }
4778 
4779 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
4780 /// limited-precision mode with x == 10.0f.
4781 static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
4782  SelectionDAG &DAG, const TargetLowering &TLI) {
4783  bool IsExp10 = false;
4784  if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
4785  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4786  if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
4787  APFloat Ten(10.0f);
4788  IsExp10 = LHSC->isExactlyValue(Ten);
4789  }
4790  }
4791 
4792  // TODO: What fast-math-flags should be set on the FMUL node?
4793  if (IsExp10) {
4794  // Put the exponent in the right bit position for later addition to the
4795  // final result:
4796  //
4797  // #define LOG2OF10 3.3219281f
4798  // t0 = Op * LOG2OF10;
4799  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4800  getF32Constant(DAG, 0x40549a78, dl));
4801  return getLimitedPrecisionExp2(t0, dl, DAG);
4802  }
4803 
4804  // No special expansion.
4805  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4806 }
4807 
4808 /// ExpandPowI - Expand a llvm.powi intrinsic.
4809 static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
4810  SelectionDAG &DAG) {
4811  // If RHS is a constant, we can expand this out to a multiplication tree,
4812  // otherwise we end up lowering to a call to __powidf2 (for example). When
4813  // optimizing for size, we only want to do this if the expansion would produce
4814  // a small number of multiplies, otherwise we do the full expansion.
4815  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4816  // Get the exponent as a positive value.
4817  unsigned Val = RHSC->getSExtValue();
4818  if ((int)Val < 0) Val = -Val;
4819 
4820  // powi(x, 0) -> 1.0
4821  if (Val == 0)
4822  return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4823 
4824  const Function &F = DAG.getMachineFunction().getFunction();
4825  if (!F.optForSize() ||
4826  // If optimizing for size, don't insert too many multiplies.
4827  // This inserts up to 5 multiplies.
4828  countPopulation(Val) + Log2_32(Val) < 7) {
4829  // We use the simple binary decomposition method to generate the multiply
4830  // sequence. There are more optimal ways to do this (for example,
4831  // powi(x,15) generates one more multiply than it should), but this has
4832  // the benefit of being both really simple and much better than a libcall.
4833  SDValue Res; // Logically starts equal to 1.0
4834  SDValue CurSquare = LHS;
4835  // TODO: Intrinsics should have fast-math-flags that propagate to these
4836  // nodes.
4837  while (Val) {
4838  if (Val & 1) {
4839  if (Res.getNode())
4840  Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4841  else
4842  Res = CurSquare; // 1.0*CurSquare.
4843  }
4844 
4845  CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4846  CurSquare, CurSquare);
4847  Val >>= 1;
4848  }
4849 
4850  // If the original was negative, invert the result, producing 1/(x*x*x).
4851  if (RHSC->getSExtValue() < 0)
4852  Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4853  DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4854  return Res;
4855  }
4856  }
4857 
4858  // Otherwise, expand to a libcall.
4859  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4860 }
4861 
4862 // getUnderlyingArgReg - Find underlying register used for a truncated or
4863 // bitcasted argument.
4864 static unsigned getUnderlyingArgReg(const SDValue &N) {
4865  switch (N.getOpcode()) {
4866  case ISD::CopyFromReg:
4867  return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4868  case ISD::BITCAST:
4869  case ISD::AssertZext:
4870  case ISD::AssertSext:
4871  case ISD::TRUNCATE:
4872  return getUnderlyingArgReg(N.getOperand(0));
4873  default:
4874  return 0;
4875  }
4876 }
4877 
4878 /// If the DbgValueInst is a dbg_value of a function argument, create the
4879 /// corresponding DBG_VALUE machine instruction for it now. At the end of
4880 /// instruction selection, they will be inserted to the entry BB.
4881 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4882  const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4883  DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
4884  const Argument *Arg = dyn_cast<Argument>(V);
4885  if (!Arg)
4886  return false;
4887 
4888  MachineFunction &MF = DAG.getMachineFunction();
4889  const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4890 
4891  bool IsIndirect = false;
4893  // Some arguments' frame index is recorded during argument lowering.
4894  int FI = FuncInfo.getArgumentFrameIndex(Arg);
4895  if (FI != std::numeric_limits<int>::max())
4896  Op = MachineOperand::CreateFI(FI);
4897 
4898  if (!Op && N.getNode()) {
4899  unsigned Reg = getUnderlyingArgReg(N);
4900  if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4901  MachineRegisterInfo &RegInfo = MF.getRegInfo();
4902  unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4903  if (PR)
4904  Reg = PR;
4905  }
4906  if (Reg) {
4907  Op = MachineOperand::CreateReg(Reg, false);
4908  IsIndirect = IsDbgDeclare;
4909  }
4910  }
4911 
4912  if (!Op && N.getNode())
4913  // Check if frame index is available.
4914  if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
4915  if (FrameIndexSDNode *FINode =
4916  dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
4917  Op = MachineOperand::CreateFI(FINode->getIndex());
4918 
4919  if (!Op) {
4920  // Check if ValueMap has reg number.
4921  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4922  if (VMI != FuncInfo.ValueMap.end()) {
4923  const auto &TLI = DAG.getTargetLoweringInfo();
4924  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
4925  V->getType(), isABIRegCopy(V));
4926  if (RFV.occupiesMultipleRegs()) {
4927  unsigned Offset = 0;
4928  for (auto RegAndSize : RFV.getRegsAndSizes()) {
4929  Op = MachineOperand::CreateReg(RegAndSize.first, false);
4930  auto FragmentExpr = DIExpression::createFragmentExpression(
4931  Expr, Offset, RegAndSize.second);
4932  if (!FragmentExpr)
4933  continue;
4934  FuncInfo.ArgDbgValues.push_back(
4935  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
4936  Op->getReg(), Variable, *FragmentExpr));
4937  Offset += RegAndSize.second;
4938  }
4939  return true;
4940  }
4941  Op = MachineOperand::CreateReg(VMI->second, false);
4942  IsIndirect = IsDbgDeclare;
4943  }
4944  }
4945 
4946  if (!Op)
4947  return false;
4948 
4949  assert(Variable->isValidLocationForIntrinsic(DL) &&
4950  "Expected inlined-at fields to agree");
4951  IsIndirect = (Op->isReg()) ? IsIndirect : true;
4952  FuncInfo.ArgDbgValues.push_back(
4953  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
4954  *Op, Variable, Expr));
4955 
4956  return true;
4957 }
4958 
4959 /// Return the appropriate SDDbgValue based on N.
4960 SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
4961  DILocalVariable *Variable,
4962  DIExpression *Expr,
4963  const DebugLoc &dl,
4964  unsigned DbgSDNodeOrder) {
4965  if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
4966  // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
4967  // stack slot locations as such instead of as indirectly addressed
4968  // locations.
4969  return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
4970  DbgSDNodeOrder);
4971  }
4972  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
4973  DbgSDNodeOrder);
4974 }
4975</