LLVM  10.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DAG pattern matching instruction selector for X86,
10 // converting from a legalized dag to a X86 dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86.h"
15 #include "X86MachineFunctionInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "X86TargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/KnownBits.h"
36 #include <stdint.h>
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "x86-isel"
40 
41 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42 
43 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
44  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
45  cl::Hidden);
46 
47 //===----------------------------------------------------------------------===//
48 // Pattern Matcher Implementation
49 //===----------------------------------------------------------------------===//
50 
51 namespace {
52  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
53  /// numbers for the leaves of the matched tree.
54  struct X86ISelAddressMode {
55  enum {
56  RegBase,
57  FrameIndexBase
58  } BaseType;
59 
60  // This is really a union, discriminated by BaseType!
61  SDValue Base_Reg;
62  int Base_FrameIndex;
63 
64  unsigned Scale;
65  SDValue IndexReg;
66  int32_t Disp;
67  SDValue Segment;
68  const GlobalValue *GV;
69  const Constant *CP;
70  const BlockAddress *BlockAddr;
71  const char *ES;
72  MCSymbol *MCSym;
73  int JT;
74  unsigned Align; // CP alignment.
75  unsigned char SymbolFlags; // X86II::MO_*
76  bool NegateIndex = false;
77 
78  X86ISelAddressMode()
79  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
80  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
81  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
82 
83  bool hasSymbolicDisplacement() const {
84  return GV != nullptr || CP != nullptr || ES != nullptr ||
85  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
86  }
87 
88  bool hasBaseOrIndexReg() const {
89  return BaseType == FrameIndexBase ||
90  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
91  }
92 
93  /// Return true if this addressing mode is already RIP-relative.
94  bool isRIPRelative() const {
95  if (BaseType != RegBase) return false;
96  if (RegisterSDNode *RegNode =
97  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
98  return RegNode->getReg() == X86::RIP;
99  return false;
100  }
101 
102  void setBaseReg(SDValue Reg) {
103  BaseType = RegBase;
104  Base_Reg = Reg;
105  }
106 
107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108  void dump(SelectionDAG *DAG = nullptr) {
109  dbgs() << "X86ISelAddressMode " << this << '\n';
110  dbgs() << "Base_Reg ";
111  if (Base_Reg.getNode())
112  Base_Reg.getNode()->dump(DAG);
113  else
114  dbgs() << "nul\n";
115  if (BaseType == FrameIndexBase)
116  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
117  dbgs() << " Scale " << Scale << '\n'
118  << "IndexReg ";
119  if (NegateIndex)
120  dbgs() << "negate ";
121  if (IndexReg.getNode())
122  IndexReg.getNode()->dump(DAG);
123  else
124  dbgs() << "nul\n";
125  dbgs() << " Disp " << Disp << '\n'
126  << "GV ";
127  if (GV)
128  GV->dump();
129  else
130  dbgs() << "nul";
131  dbgs() << " CP ";
132  if (CP)
133  CP->dump();
134  else
135  dbgs() << "nul";
136  dbgs() << '\n'
137  << "ES ";
138  if (ES)
139  dbgs() << ES;
140  else
141  dbgs() << "nul";
142  dbgs() << " MCSym ";
143  if (MCSym)
144  dbgs() << MCSym;
145  else
146  dbgs() << "nul";
147  dbgs() << " JT" << JT << " Align" << Align << '\n';
148  }
149 #endif
150  };
151 }
152 
153 namespace {
154  //===--------------------------------------------------------------------===//
155  /// ISel - X86-specific code to select X86 machine instructions for
156  /// SelectionDAG operations.
157  ///
158  class X86DAGToDAGISel final : public SelectionDAGISel {
159  /// Keep a pointer to the X86Subtarget around so that we can
160  /// make the right decision when generating code for different targets.
161  const X86Subtarget *Subtarget;
162 
163  /// If true, selector should try to optimize for code size instead of
164  /// performance.
165  bool OptForSize;
166 
167  /// If true, selector should try to optimize for minimum code size.
168  bool OptForMinSize;
169 
170  /// Disable direct TLS access through segment registers.
171  bool IndirectTlsSegRefs;
172 
173  public:
174  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
175  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
176  OptForMinSize(false), IndirectTlsSegRefs(false) {}
177 
178  StringRef getPassName() const override {
179  return "X86 DAG->DAG Instruction Selection";
180  }
181 
182  bool runOnMachineFunction(MachineFunction &MF) override {
183  // Reset the subtarget each time through.
184  Subtarget = &MF.getSubtarget<X86Subtarget>();
185  IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
186  "indirect-tls-seg-refs");
187 
188  // OptFor[Min]Size are used in pattern predicates that isel is matching.
189  OptForSize = MF.getFunction().hasOptSize();
190  OptForMinSize = MF.getFunction().hasMinSize();
191  assert((!OptForMinSize || OptForSize) &&
192  "OptForMinSize implies OptForSize");
193 
195  return true;
196  }
197 
198  void EmitFunctionEntryCode() override;
199 
200  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
201 
202  void PreprocessISelDAG() override;
203  void PostprocessISelDAG() override;
204 
205 // Include the pieces autogenerated from the target description.
206 #include "X86GenDAGISel.inc"
207 
208  private:
209  void Select(SDNode *N) override;
210 
211  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
212  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
213  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
214  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
215  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
216  bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
217  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
218  unsigned Depth);
219  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
220  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
221  SDValue &Scale, SDValue &Index, SDValue &Disp,
222  SDValue &Segment);
223  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
224  SDValue &Scale, SDValue &Index, SDValue &Disp,
225  SDValue &Segment);
226  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
227  bool selectLEAAddr(SDValue N, SDValue &Base,
228  SDValue &Scale, SDValue &Index, SDValue &Disp,
229  SDValue &Segment);
230  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
231  SDValue &Scale, SDValue &Index, SDValue &Disp,
232  SDValue &Segment);
233  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
234  SDValue &Scale, SDValue &Index, SDValue &Disp,
235  SDValue &Segment);
236  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
237  SDValue &Base, SDValue &Scale,
238  SDValue &Index, SDValue &Disp,
239  SDValue &Segment,
240  SDValue &NodeWithChain);
241  bool selectRelocImm(SDValue N, SDValue &Op);
242 
243  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
244  SDValue &Base, SDValue &Scale,
245  SDValue &Index, SDValue &Disp,
246  SDValue &Segment);
247 
248  // Convenience method where P is also root.
249  bool tryFoldLoad(SDNode *P, SDValue N,
250  SDValue &Base, SDValue &Scale,
251  SDValue &Index, SDValue &Disp,
252  SDValue &Segment) {
253  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
254  }
255 
256  /// Implement addressing mode selection for inline asm expressions.
257  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
258  unsigned ConstraintID,
259  std::vector<SDValue> &OutOps) override;
260 
261  void emitSpecialCodeForMain();
262 
263  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
264  MVT VT, SDValue &Base, SDValue &Scale,
265  SDValue &Index, SDValue &Disp,
266  SDValue &Segment) {
267  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
268  Base = CurDAG->getTargetFrameIndex(
269  AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
270  else if (AM.Base_Reg.getNode())
271  Base = AM.Base_Reg;
272  else
273  Base = CurDAG->getRegister(0, VT);
274 
275  Scale = getI8Imm(AM.Scale, DL);
276 
277  // Negate the index if needed.
278  if (AM.NegateIndex) {
279  unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
280  SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
281  AM.IndexReg), 0);
282  AM.IndexReg = Neg;
283  }
284 
285  if (AM.IndexReg.getNode())
286  Index = AM.IndexReg;
287  else
288  Index = CurDAG->getRegister(0, VT);
289 
290  // These are 32-bit even in 64-bit mode since RIP-relative offset
291  // is 32-bit.
292  if (AM.GV)
293  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
294  MVT::i32, AM.Disp,
295  AM.SymbolFlags);
296  else if (AM.CP)
297  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
298  AM.Align, AM.Disp, AM.SymbolFlags);
299  else if (AM.ES) {
300  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
301  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
302  } else if (AM.MCSym) {
303  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
304  assert(AM.SymbolFlags == 0 && "oo");
305  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
306  } else if (AM.JT != -1) {
307  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
308  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
309  } else if (AM.BlockAddr)
310  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
311  AM.SymbolFlags);
312  else
313  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
314 
315  if (AM.Segment.getNode())
316  Segment = AM.Segment;
317  else
318  Segment = CurDAG->getRegister(0, MVT::i16);
319  }
320 
321  // Utility function to determine whether we should avoid selecting
322  // immediate forms of instructions for better code size or not.
323  // At a high level, we'd like to avoid such instructions when
324  // we have similar constants used within the same basic block
325  // that can be kept in a register.
326  //
327  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
328  uint32_t UseCount = 0;
329 
330  // Do not want to hoist if we're not optimizing for size.
331  // TODO: We'd like to remove this restriction.
332  // See the comment in X86InstrInfo.td for more info.
333  if (!OptForSize)
334  return false;
335 
336  // Walk all the users of the immediate.
337  for (SDNode::use_iterator UI = N->use_begin(),
338  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
339 
340  SDNode *User = *UI;
341 
342  // This user is already selected. Count it as a legitimate use and
343  // move on.
344  if (User->isMachineOpcode()) {
345  UseCount++;
346  continue;
347  }
348 
349  // We want to count stores of immediates as real uses.
350  if (User->getOpcode() == ISD::STORE &&
351  User->getOperand(1).getNode() == N) {
352  UseCount++;
353  continue;
354  }
355 
356  // We don't currently match users that have > 2 operands (except
357  // for stores, which are handled above)
358  // Those instruction won't match in ISEL, for now, and would
359  // be counted incorrectly.
360  // This may change in the future as we add additional instruction
361  // types.
362  if (User->getNumOperands() != 2)
363  continue;
364 
365  // If this can match to INC/DEC, don't count it as a use.
366  if (User->getOpcode() == ISD::ADD &&
367  (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0))))
368  continue;
369 
370  // Immediates that are used for offsets as part of stack
371  // manipulation should be left alone. These are typically
372  // used to indicate SP offsets for argument passing and
373  // will get pulled into stores/pushes (implicitly).
374  if (User->getOpcode() == X86ISD::ADD ||
375  User->getOpcode() == ISD::ADD ||
376  User->getOpcode() == X86ISD::SUB ||
377  User->getOpcode() == ISD::SUB) {
378 
379  // Find the other operand of the add/sub.
380  SDValue OtherOp = User->getOperand(0);
381  if (OtherOp.getNode() == N)
382  OtherOp = User->getOperand(1);
383 
384  // Don't count if the other operand is SP.
385  RegisterSDNode *RegNode;
386  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
387  (RegNode = dyn_cast_or_null<RegisterSDNode>(
388  OtherOp->getOperand(1).getNode())))
389  if ((RegNode->getReg() == X86::ESP) ||
390  (RegNode->getReg() == X86::RSP))
391  continue;
392  }
393 
394  // ... otherwise, count this and move on.
395  UseCount++;
396  }
397 
398  // If we have more than 1 use, then recommend for hoisting.
399  return (UseCount > 1);
400  }
401 
402  /// Return a target constant with the specified value of type i8.
403  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
404  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
405  }
406 
407  /// Return a target constant with the specified value, of type i32.
408  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
409  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
410  }
411 
412  /// Return a target constant with the specified value, of type i64.
413  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
414  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
415  }
416 
417  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
418  const SDLoc &DL) {
419  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
420  uint64_t Index = N->getConstantOperandVal(1);
421  MVT VecVT = N->getOperand(0).getSimpleValueType();
422  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
423  }
424 
425  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
426  const SDLoc &DL) {
427  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
428  uint64_t Index = N->getConstantOperandVal(2);
429  MVT VecVT = N->getSimpleValueType(0);
430  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
431  }
432 
433  // Helper to detect unneeded and instructions on shift amounts. Called
434  // from PatFrags in tablegen.
435  bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
436  assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
437  const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
438 
439  if (Val.countTrailingOnes() >= Width)
440  return true;
441 
442  APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
443  return Mask.countTrailingOnes() >= Width;
444  }
445 
446  /// Return an SDNode that returns the value of the global base register.
447  /// Output instructions required to initialize the global base register,
448  /// if necessary.
449  SDNode *getGlobalBaseReg();
450 
451  /// Return a reference to the TargetMachine, casted to the target-specific
452  /// type.
453  const X86TargetMachine &getTargetMachine() const {
454  return static_cast<const X86TargetMachine &>(TM);
455  }
456 
457  /// Return a reference to the TargetInstrInfo, casted to the target-specific
458  /// type.
459  const X86InstrInfo *getInstrInfo() const {
460  return Subtarget->getInstrInfo();
461  }
462 
463  /// Address-mode matching performs shift-of-and to and-of-shift
464  /// reassociation in order to expose more scaled addressing
465  /// opportunities.
466  bool ComplexPatternFuncMutatesDAG() const override {
467  return true;
468  }
469 
470  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
471 
472  /// Returns whether this is a relocatable immediate in the range
473  /// [-2^Width .. 2^Width-1].
474  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
475  if (auto *CN = dyn_cast<ConstantSDNode>(N))
476  return isInt<Width>(CN->getSExtValue());
477  return isSExtAbsoluteSymbolRef(Width, N);
478  }
479 
480  // Indicates we should prefer to use a non-temporal load for this load.
481  bool useNonTemporalLoad(LoadSDNode *N) const {
482  if (!N->isNonTemporal())
483  return false;
484 
485  unsigned StoreSize = N->getMemoryVT().getStoreSize();
486 
487  if (N->getAlignment() < StoreSize)
488  return false;
489 
490  switch (StoreSize) {
491  default: llvm_unreachable("Unsupported store size");
492  case 4:
493  case 8:
494  return false;
495  case 16:
496  return Subtarget->hasSSE41();
497  case 32:
498  return Subtarget->hasAVX2();
499  case 64:
500  return Subtarget->hasAVX512();
501  }
502  }
503 
504  bool foldLoadStoreIntoMemOperand(SDNode *Node);
505  MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
506  bool matchBitExtract(SDNode *Node);
507  bool shrinkAndImmediate(SDNode *N);
508  bool isMaskZeroExtended(SDNode *N) const;
509  bool tryShiftAmountMod(SDNode *N);
510  bool combineIncDecVector(SDNode *Node);
511  bool tryShrinkShlLogicImm(SDNode *N);
512  bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
513 
514  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
515  const SDLoc &dl, MVT VT, SDNode *Node);
516  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
517  const SDLoc &dl, MVT VT, SDNode *Node,
518  SDValue &InFlag);
519 
520  bool tryOptimizeRem8Extend(SDNode *N);
521 
522  bool onlyUsesZeroFlag(SDValue Flags) const;
523  bool hasNoSignFlagUses(SDValue Flags) const;
524  bool hasNoCarryFlagUses(SDValue Flags) const;
525  };
526 }
527 
528 
529 // Returns true if this masked compare can be implemented legally with this
530 // type.
531 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
532  unsigned Opcode = N->getOpcode();
533  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
534  Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
535  // We can get 256-bit 8 element types here without VLX being enabled. When
536  // this happens we will use 512-bit operations and the mask will not be
537  // zero extended.
538  EVT OpVT = N->getOperand(0).getValueType();
539  if (OpVT.is256BitVector() || OpVT.is128BitVector())
540  return Subtarget->hasVLX();
541 
542  return true;
543  }
544  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
545  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
546  Opcode == X86ISD::FSETCCM_SAE)
547  return true;
548 
549  return false;
550 }
551 
552 // Returns true if we can assume the writer of the mask has zero extended it
553 // for us.
554 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
555  // If this is an AND, check if we have a compare on either side. As long as
556  // one side guarantees the mask is zero extended, the AND will preserve those
557  // zeros.
558  if (N->getOpcode() == ISD::AND)
559  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
560  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
561 
562  return isLegalMaskCompare(N, Subtarget);
563 }
564 
565 bool
566 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
567  if (OptLevel == CodeGenOpt::None) return false;
568 
569  if (!N.hasOneUse())
570  return false;
571 
572  if (N.getOpcode() != ISD::LOAD)
573  return true;
574 
575  // Don't fold non-temporal loads if we have an instruction for them.
576  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
577  return false;
578 
579  // If N is a load, do additional profitability checks.
580  if (U == Root) {
581  switch (U->getOpcode()) {
582  default: break;
583  case X86ISD::ADD:
584  case X86ISD::ADC:
585  case X86ISD::SUB:
586  case X86ISD::SBB:
587  case X86ISD::AND:
588  case X86ISD::XOR:
589  case X86ISD::OR:
590  case ISD::ADD:
591  case ISD::ADDCARRY:
592  case ISD::AND:
593  case ISD::OR:
594  case ISD::XOR: {
595  SDValue Op1 = U->getOperand(1);
596 
597  // If the other operand is a 8-bit immediate we should fold the immediate
598  // instead. This reduces code size.
599  // e.g.
600  // movl 4(%esp), %eax
601  // addl $4, %eax
602  // vs.
603  // movl $4, %eax
604  // addl 4(%esp), %eax
605  // The former is 2 bytes shorter. In case where the increment is 1, then
606  // the saving can be 4 bytes (by using incl %eax).
607  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
608  if (Imm->getAPIntValue().isSignedIntN(8))
609  return false;
610 
611  // If this is a 64-bit AND with an immediate that fits in 32-bits,
612  // prefer using the smaller and over folding the load. This is needed to
613  // make sure immediates created by shrinkAndImmediate are always folded.
614  // Ideally we would narrow the load during DAG combine and get the
615  // best of both worlds.
616  if (U->getOpcode() == ISD::AND &&
617  Imm->getAPIntValue().getBitWidth() == 64 &&
618  Imm->getAPIntValue().isIntN(32))
619  return false;
620 
621  // If this really a zext_inreg that can be represented with a movzx
622  // instruction, prefer that.
623  // TODO: We could shrink the load and fold if it is non-volatile.
624  if (U->getOpcode() == ISD::AND &&
625  (Imm->getAPIntValue() == UINT8_MAX ||
626  Imm->getAPIntValue() == UINT16_MAX ||
627  Imm->getAPIntValue() == UINT32_MAX))
628  return false;
629 
630  // ADD/SUB with can negate the immediate and use the opposite operation
631  // to fit 128 into a sign extended 8 bit immediate.
632  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
633  (-Imm->getAPIntValue()).isSignedIntN(8))
634  return false;
635  }
636 
637  // If the other operand is a TLS address, we should fold it instead.
638  // This produces
639  // movl %gs:0, %eax
640  // leal i@NTPOFF(%eax), %eax
641  // instead of
642  // movl $i@NTPOFF, %eax
643  // addl %gs:0, %eax
644  // if the block also has an access to a second TLS address this will save
645  // a load.
646  // FIXME: This is probably also true for non-TLS addresses.
647  if (Op1.getOpcode() == X86ISD::Wrapper) {
648  SDValue Val = Op1.getOperand(0);
650  return false;
651  }
652 
653  // Don't fold load if this matches the BTS/BTR/BTC patterns.
654  // BTS: (or X, (shl 1, n))
655  // BTR: (and X, (rotl -2, n))
656  // BTC: (xor X, (shl 1, n))
657  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
658  if (U->getOperand(0).getOpcode() == ISD::SHL &&
660  return false;
661 
662  if (U->getOperand(1).getOpcode() == ISD::SHL &&
664  return false;
665  }
666  if (U->getOpcode() == ISD::AND) {
667  SDValue U0 = U->getOperand(0);
668  SDValue U1 = U->getOperand(1);
669  if (U0.getOpcode() == ISD::ROTL) {
670  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
671  if (C && C->getSExtValue() == -2)
672  return false;
673  }
674 
675  if (U1.getOpcode() == ISD::ROTL) {
676  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
677  if (C && C->getSExtValue() == -2)
678  return false;
679  }
680  }
681 
682  break;
683  }
684  case ISD::SHL:
685  case ISD::SRA:
686  case ISD::SRL:
687  // Don't fold a load into a shift by immediate. The BMI2 instructions
688  // support folding a load, but not an immediate. The legacy instructions
689  // support folding an immediate, but can't fold a load. Folding an
690  // immediate is preferable to folding a load.
691  if (isa<ConstantSDNode>(U->getOperand(1)))
692  return false;
693 
694  break;
695  }
696  }
697 
698  // Prevent folding a load if this can implemented with an insert_subreg or
699  // a move that implicitly zeroes.
700  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
701  isNullConstant(Root->getOperand(2)) &&
702  (Root->getOperand(0).isUndef() ||
704  return false;
705 
706  return true;
707 }
708 
709 /// Replace the original chain operand of the call with
710 /// load's chain operand and move load below the call's chain operand.
712  SDValue Call, SDValue OrigChain) {
714  SDValue Chain = OrigChain.getOperand(0);
715  if (Chain.getNode() == Load.getNode())
716  Ops.push_back(Load.getOperand(0));
717  else {
718  assert(Chain.getOpcode() == ISD::TokenFactor &&
719  "Unexpected chain operand");
720  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
721  if (Chain.getOperand(i).getNode() == Load.getNode())
722  Ops.push_back(Load.getOperand(0));
723  else
724  Ops.push_back(Chain.getOperand(i));
725  SDValue NewChain =
726  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
727  Ops.clear();
728  Ops.push_back(NewChain);
729  }
730  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
731  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
732  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
733  Load.getOperand(1), Load.getOperand(2));
734 
735  Ops.clear();
736  Ops.push_back(SDValue(Load.getNode(), 1));
737  Ops.append(Call->op_begin() + 1, Call->op_end());
738  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
739 }
740 
741 /// Return true if call address is a load and it can be
742 /// moved below CALLSEQ_START and the chains leading up to the call.
743 /// Return the CALLSEQ_START by reference as a second output.
744 /// In the case of a tail call, there isn't a callseq node between the call
745 /// chain and the load.
746 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
747  // The transformation is somewhat dangerous if the call's chain was glued to
748  // the call. After MoveBelowOrigChain the load is moved between the call and
749  // the chain, this can create a cycle if the load is not folded. So it is
750  // *really* important that we are sure the load will be folded.
751  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
752  return false;
753  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
754  if (!LD ||
755  !LD->isSimple() ||
758  return false;
759 
760  // Now let's find the callseq_start.
761  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
762  if (!Chain.hasOneUse())
763  return false;
764  Chain = Chain.getOperand(0);
765  }
766 
767  if (!Chain.getNumOperands())
768  return false;
769  // Since we are not checking for AA here, conservatively abort if the chain
770  // writes to memory. It's not safe to move the callee (a load) across a store.
771  if (isa<MemSDNode>(Chain.getNode()) &&
772  cast<MemSDNode>(Chain.getNode())->writeMem())
773  return false;
774  if (Chain.getOperand(0).getNode() == Callee.getNode())
775  return true;
776  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
777  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
778  Callee.getValue(1).hasOneUse())
779  return true;
780  return false;
781 }
782 
783 void X86DAGToDAGISel::PreprocessISelDAG() {
784  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
785  E = CurDAG->allnodes_end(); I != E; ) {
786  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
787 
788  // If this is a target specific AND node with no flag usages, turn it back
789  // into ISD::AND to enable test instruction matching.
790  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
791  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
792  N->getOperand(0), N->getOperand(1));
793  --I;
794  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
795  ++I;
796  CurDAG->DeleteNode(N);
797  continue;
798  }
799 
800  switch (N->getOpcode()) {
801  case ISD::FP_TO_SINT:
802  case ISD::FP_TO_UINT: {
803  // Replace vector fp_to_s/uint with their X86 specific equivalent so we
804  // don't need 2 sets of patterns.
805  if (!N->getSimpleValueType(0).isVector())
806  break;
807 
808  unsigned NewOpc;
809  switch (N->getOpcode()) {
810  default: llvm_unreachable("Unexpected opcode!");
811  case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
812  case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
813  }
814  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
815  N->getOperand(0));
816  --I;
817  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
818  ++I;
819  CurDAG->DeleteNode(N);
820  continue;
821  }
822  case ISD::SHL:
823  case ISD::SRA:
824  case ISD::SRL: {
825  // Replace vector shifts with their X86 specific equivalent so we don't
826  // need 2 sets of patterns.
827  if (!N->getValueType(0).isVector())
828  break;
829 
830  unsigned NewOpc;
831  switch (N->getOpcode()) {
832  default: llvm_unreachable("Unexpected opcode!");
833  case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
834  case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
835  case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
836  }
837  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
838  N->getOperand(0), N->getOperand(1));
839  --I;
840  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
841  ++I;
842  CurDAG->DeleteNode(N);
843  continue;
844  }
845  case ISD::ANY_EXTEND:
847  // Replace vector any extend with the zero extend equivalents so we don't
848  // need 2 sets of patterns. Ignore vXi1 extensions.
849  if (!N->getValueType(0).isVector() ||
851  break;
852 
853  unsigned NewOpc = N->getOpcode() == ISD::ANY_EXTEND
856 
857  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
858  N->getOperand(0));
859  --I;
860  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
861  ++I;
862  CurDAG->DeleteNode(N);
863  continue;
864  }
865  case ISD::FCEIL:
866  case ISD::FFLOOR:
867  case ISD::FTRUNC:
868  case ISD::FNEARBYINT:
869  case ISD::FRINT: {
870  // Replace fp rounding with their X86 specific equivalent so we don't
871  // need 2 sets of patterns.
872  unsigned Imm;
873  switch (N->getOpcode()) {
874  default: llvm_unreachable("Unexpected opcode!");
875  case ISD::FCEIL: Imm = 0xA; break;
876  case ISD::FFLOOR: Imm = 0x9; break;
877  case ISD::FTRUNC: Imm = 0xB; break;
878  case ISD::FNEARBYINT: Imm = 0xC; break;
879  case ISD::FRINT: Imm = 0x4; break;
880  }
881  SDLoc dl(N);
882  SDValue Res = CurDAG->getNode(
883  X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0),
884  CurDAG->getTargetConstant(Imm, dl, MVT::i8));
885  --I;
886  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
887  ++I;
888  CurDAG->DeleteNode(N);
889  continue;
890  }
891  case X86ISD::FANDN:
892  case X86ISD::FAND:
893  case X86ISD::FOR:
894  case X86ISD::FXOR: {
895  // Widen scalar fp logic ops to vector to reduce isel patterns.
896  // FIXME: Can we do this during lowering/combine.
897  MVT VT = N->getSimpleValueType(0);
898  if (VT.isVector() || VT == MVT::f128)
899  break;
900 
901  MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
902  SDLoc dl(N);
903  SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
904  N->getOperand(0));
905  SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
906  N->getOperand(1));
907 
908  SDValue Res;
909  if (Subtarget->hasSSE2()) {
910  EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
911  Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
912  Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
913  unsigned Opc;
914  switch (N->getOpcode()) {
915  default: llvm_unreachable("Unexpected opcode!");
916  case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
917  case X86ISD::FAND: Opc = ISD::AND; break;
918  case X86ISD::FOR: Opc = ISD::OR; break;
919  case X86ISD::FXOR: Opc = ISD::XOR; break;
920  }
921  Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
922  Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
923  } else {
924  Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
925  }
926  Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
927  CurDAG->getIntPtrConstant(0, dl));
928  --I;
929  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
930  ++I;
931  CurDAG->DeleteNode(N);
932  continue;
933  }
934  }
935 
936  if (OptLevel != CodeGenOpt::None &&
937  // Only do this when the target can fold the load into the call or
938  // jmp.
939  !Subtarget->useRetpolineIndirectCalls() &&
940  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
941  (N->getOpcode() == X86ISD::TC_RETURN &&
942  (Subtarget->is64Bit() ||
943  !getTargetMachine().isPositionIndependent())))) {
944  /// Also try moving call address load from outside callseq_start to just
945  /// before the call to allow it to be folded.
946  ///
947  /// [Load chain]
948  /// ^
949  /// |
950  /// [Load]
951  /// ^ ^
952  /// | |
953  /// / \--
954  /// / |
955  ///[CALLSEQ_START] |
956  /// ^ |
957  /// | |
958  /// [LOAD/C2Reg] |
959  /// | |
960  /// \ /
961  /// \ /
962  /// [CALL]
963  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
964  SDValue Chain = N->getOperand(0);
965  SDValue Load = N->getOperand(1);
966  if (!isCalleeLoad(Load, Chain, HasCallSeq))
967  continue;
968  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
969  ++NumLoadMoved;
970  continue;
971  }
972 
973  // Lower fpround and fpextend nodes that target the FP stack to be store and
974  // load to the stack. This is a gross hack. We would like to simply mark
975  // these as being illegal, but when we do that, legalize produces these when
976  // it expands calls, then expands these in the same legalize pass. We would
977  // like dag combine to be able to hack on these between the call expansion
978  // and the node legalization. As such this pass basically does "really
979  // late" legalization of these inline with the X86 isel pass.
980  // FIXME: This should only happen when not compiled with -O0.
981  switch (N->getOpcode()) {
982  default: continue;
983  case ISD::FP_ROUND:
984  case ISD::FP_EXTEND:
985  {
986  MVT SrcVT = N->getOperand(0).getSimpleValueType();
987  MVT DstVT = N->getSimpleValueType(0);
988 
989  // If any of the sources are vectors, no fp stack involved.
990  if (SrcVT.isVector() || DstVT.isVector())
991  continue;
992 
993  // If the source and destination are SSE registers, then this is a legal
994  // conversion that should not be lowered.
995  const X86TargetLowering *X86Lowering =
996  static_cast<const X86TargetLowering *>(TLI);
997  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
998  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
999  if (SrcIsSSE && DstIsSSE)
1000  continue;
1001 
1002  if (!SrcIsSSE && !DstIsSSE) {
1003  // If this is an FPStack extension, it is a noop.
1004  if (N->getOpcode() == ISD::FP_EXTEND)
1005  continue;
1006  // If this is a value-preserving FPStack truncation, it is a noop.
1007  if (N->getConstantOperandVal(1))
1008  continue;
1009  }
1010 
1011  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1012  // FPStack has extload and truncstore. SSE can fold direct loads into other
1013  // operations. Based on this, decide what we want to do.
1014  MVT MemVT;
1015  if (N->getOpcode() == ISD::FP_ROUND)
1016  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1017  else
1018  MemVT = SrcIsSSE ? SrcVT : DstVT;
1019 
1020  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1021  SDLoc dl(N);
1022 
1023  // FIXME: optimize the case where the src/dest is a load or store?
1024 
1025  SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
1026  MemTmp, MachinePointerInfo(), MemVT);
1027  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1028  MachinePointerInfo(), MemVT);
1029 
1030  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1031  // extload we created. This will cause general havok on the dag because
1032  // anything below the conversion could be folded into other existing nodes.
1033  // To avoid invalidating 'I', back it up to the convert node.
1034  --I;
1035  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1036  break;
1037  }
1038 
1039  //The sequence of events for lowering STRICT_FP versions of these nodes requires
1040  //dealing with the chain differently, as there is already a preexisting chain.
1041  case ISD::STRICT_FP_ROUND:
1042  case ISD::STRICT_FP_EXTEND:
1043  {
1044  MVT SrcVT = N->getOperand(1).getSimpleValueType();
1045  MVT DstVT = N->getSimpleValueType(0);
1046 
1047  // If any of the sources are vectors, no fp stack involved.
1048  if (SrcVT.isVector() || DstVT.isVector())
1049  continue;
1050 
1051  // If the source and destination are SSE registers, then this is a legal
1052  // conversion that should not be lowered.
1053  const X86TargetLowering *X86Lowering =
1054  static_cast<const X86TargetLowering *>(TLI);
1055  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1056  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1057  if (SrcIsSSE && DstIsSSE)
1058  continue;
1059 
1060  if (!SrcIsSSE && !DstIsSSE) {
1061  // If this is an FPStack extension, it is a noop.
1062  if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1063  continue;
1064  // If this is a value-preserving FPStack truncation, it is a noop.
1065  if (N->getConstantOperandVal(2))
1066  continue;
1067  }
1068 
1069  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1070  // FPStack has extload and truncstore. SSE can fold direct loads into other
1071  // operations. Based on this, decide what we want to do.
1072  MVT MemVT;
1073  if (N->getOpcode() == ISD::STRICT_FP_ROUND)
1074  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1075  else
1076  MemVT = SrcIsSSE ? SrcVT : DstVT;
1077 
1078  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1079  SDLoc dl(N);
1080 
1081  // FIXME: optimize the case where the src/dest is a load or store?
1082 
1083  //Since the operation is StrictFP, use the preexisting chain.
1084  SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
1085  MemTmp, MachinePointerInfo(), MemVT);
1086  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1087  MachinePointerInfo(), MemVT);
1088 
1089  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1090  // extload we created. This will cause general havok on the dag because
1091  // anything below the conversion could be folded into other existing nodes.
1092  // To avoid invalidating 'I', back it up to the convert node.
1093  --I;
1094  CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1095  break;
1096  }
1097  }
1098 
1099 
1100  // Now that we did that, the node is dead. Increment the iterator to the
1101  // next node to process, then delete N.
1102  ++I;
1103  CurDAG->DeleteNode(N);
1104  }
1105 
1106  // The load+call transform above can leave some dead nodes in the graph. Make
1107  // sure we remove them. Its possible some of the other transforms do to so
1108  // just remove dead nodes unconditionally.
1109  CurDAG->RemoveDeadNodes();
1110 }
1111 
1112 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1113 bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
1114  unsigned Opc = N->getMachineOpcode();
1115  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1116  Opc != X86::MOVSX64rr8)
1117  return false;
1118 
1119  SDValue N0 = N->getOperand(0);
1120 
1121  // We need to be extracting the lower bit of an extend.
1122  if (!N0.isMachineOpcode() ||
1123  N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
1124  N0.getConstantOperandVal(1) != X86::sub_8bit)
1125  return false;
1126 
1127  // We're looking for either a movsx or movzx to match the original opcode.
1128  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1129  : X86::MOVSX32rr8_NOREX;
1130  SDValue N00 = N0.getOperand(0);
1131  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
1132  return false;
1133 
1134  if (Opc == X86::MOVSX64rr8) {
1135  // If we had a sign extend from 8 to 64 bits. We still need to go from 32
1136  // to 64.
1137  MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1138  MVT::i64, N00);
1139  ReplaceUses(N, Extend);
1140  } else {
1141  // Ok we can drop this extend and just use the original extend.
1142  ReplaceUses(N, N00.getNode());
1143  }
1144 
1145  return true;
1146 }
1147 
1148 void X86DAGToDAGISel::PostprocessISelDAG() {
1149  // Skip peepholes at -O0.
1150  if (TM.getOptLevel() == CodeGenOpt::None)
1151  return;
1152 
1153  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1154 
1155  bool MadeChange = false;
1156  while (Position != CurDAG->allnodes_begin()) {
1157  SDNode *N = &*--Position;
1158  // Skip dead nodes and any non-machine opcodes.
1159  if (N->use_empty() || !N->isMachineOpcode())
1160  continue;
1161 
1162  if (tryOptimizeRem8Extend(N)) {
1163  MadeChange = true;
1164  continue;
1165  }
1166 
1167  // Look for a TESTrr+ANDrr pattern where both operands of the test are
1168  // the same. Rewrite to remove the AND.
1169  unsigned Opc = N->getMachineOpcode();
1170  if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
1171  Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
1172  N->getOperand(0) == N->getOperand(1) &&
1173  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1174  N->getOperand(0).isMachineOpcode()) {
1175  SDValue And = N->getOperand(0);
1176  unsigned N0Opc = And.getMachineOpcode();
1177  if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
1178  N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
1179  MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
1180  MVT::i32,
1181  And.getOperand(0),
1182  And.getOperand(1));
1183  ReplaceUses(N, Test);
1184  MadeChange = true;
1185  continue;
1186  }
1187  if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1188  N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
1189  unsigned NewOpc;
1190  switch (N0Opc) {
1191  case X86::AND8rm: NewOpc = X86::TEST8mr; break;
1192  case X86::AND16rm: NewOpc = X86::TEST16mr; break;
1193  case X86::AND32rm: NewOpc = X86::TEST32mr; break;
1194  case X86::AND64rm: NewOpc = X86::TEST64mr; break;
1195  }
1196 
1197  // Need to swap the memory and register operand.
1198  SDValue Ops[] = { And.getOperand(1),
1199  And.getOperand(2),
1200  And.getOperand(3),
1201  And.getOperand(4),
1202  And.getOperand(5),
1203  And.getOperand(0),
1204  And.getOperand(6) /* Chain */ };
1205  MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1206  MVT::i32, MVT::Other, Ops);
1207  ReplaceUses(N, Test);
1208  MadeChange = true;
1209  continue;
1210  }
1211  }
1212 
1213  // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1214  // used. We're doing this late so we can prefer to fold the AND into masked
1215  // comparisons. Doing that can be better for the live range of the mask
1216  // register.
1217  if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1218  Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1219  N->getOperand(0) == N->getOperand(1) &&
1220  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1221  N->getOperand(0).isMachineOpcode() &&
1222  onlyUsesZeroFlag(SDValue(N, 0))) {
1223  SDValue And = N->getOperand(0);
1224  unsigned N0Opc = And.getMachineOpcode();
1225  // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1226  // KAND instructions and KTEST use the same ISA feature.
1227  if (N0Opc == X86::KANDBrr ||
1228  (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1229  N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1230  unsigned NewOpc;
1231  switch (Opc) {
1232  default: llvm_unreachable("Unexpected opcode!");
1233  case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
1234  case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
1235  case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
1236  case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
1237  }
1238  MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1239  MVT::i32,
1240  And.getOperand(0),
1241  And.getOperand(1));
1242  ReplaceUses(N, KTest);
1243  MadeChange = true;
1244  continue;
1245  }
1246  }
1247 
1248  // Attempt to remove vectors moves that were inserted to zero upper bits.
1249  if (Opc != TargetOpcode::SUBREG_TO_REG)
1250  continue;
1251 
1252  unsigned SubRegIdx = N->getConstantOperandVal(2);
1253  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1254  continue;
1255 
1256  SDValue Move = N->getOperand(1);
1257  if (!Move.isMachineOpcode())
1258  continue;
1259 
1260  // Make sure its one of the move opcodes we recognize.
1261  switch (Move.getMachineOpcode()) {
1262  default:
1263  continue;
1264  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1265  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1266  case X86::VMOVDQArr: case X86::VMOVDQUrr:
1267  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1268  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1269  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1270  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1271  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1272  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1273  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1274  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1275  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1276  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1277  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1278  break;
1279  }
1280 
1281  SDValue In = Move.getOperand(0);
1282  if (!In.isMachineOpcode() ||
1283  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1284  continue;
1285 
1286  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1287  // the SHA instructions which use a legacy encoding.
1288  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1289  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1290  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1291  (TSFlags & X86II::EncodingMask) != X86II::XOP)
1292  continue;
1293 
1294  // Producing instruction is another vector instruction. We can drop the
1295  // move.
1296  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1297  MadeChange = true;
1298  }
1299 
1300  if (MadeChange)
1301  CurDAG->RemoveDeadNodes();
1302 }
1303 
1304 
1305 /// Emit any code that needs to be executed only in the main function.
1306 void X86DAGToDAGISel::emitSpecialCodeForMain() {
1307  if (Subtarget->isTargetCygMing()) {
1309  auto &DL = CurDAG->getDataLayout();
1310 
1311  TargetLowering::CallLoweringInfo CLI(*CurDAG);
1312  CLI.setChain(CurDAG->getRoot())
1313  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1314  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1315  std::move(Args));
1316  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1317  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1318  CurDAG->setRoot(Result.second);
1319  }
1320 }
1321 
1322 void X86DAGToDAGISel::EmitFunctionEntryCode() {
1323  // If this is main, emit special code for main.
1324  const Function &F = MF->getFunction();
1325  if (F.hasExternalLinkage() && F.getName() == "main")
1326  emitSpecialCodeForMain();
1327 }
1328 
1329 static bool isDispSafeForFrameIndex(int64_t Val) {
1330  // On 64-bit platforms, we can run into an issue where a frame index
1331  // includes a displacement that, when added to the explicit displacement,
1332  // will overflow the displacement field. Assuming that the frame index
1333  // displacement fits into a 31-bit integer (which is only slightly more
1334  // aggressive than the current fundamental assumption that it fits into
1335  // a 32-bit integer), a 31-bit disp should always be safe.
1336  return isInt<31>(Val);
1337 }
1338 
1339 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1340  X86ISelAddressMode &AM) {
1341  // If there's no offset to fold, we don't need to do any work.
1342  if (Offset == 0)
1343  return false;
1344 
1345  // Cannot combine ExternalSymbol displacements with integer offsets.
1346  if (AM.ES || AM.MCSym)
1347  return true;
1348 
1349  int64_t Val = AM.Disp + Offset;
1350  CodeModel::Model M = TM.getCodeModel();
1351  if (Subtarget->is64Bit()) {
1353  AM.hasSymbolicDisplacement()))
1354  return true;
1355  // In addition to the checks required for a register base, check that
1356  // we do not try to use an unsafe Disp with a frame index.
1357  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1359  return true;
1360  }
1361  AM.Disp = Val;
1362  return false;
1363 
1364 }
1365 
1366 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1367  SDValue Address = N->getOperand(1);
1368 
1369  // load gs:0 -> GS segment register.
1370  // load fs:0 -> FS segment register.
1371  //
1372  // This optimization is valid because the GNU TLS model defines that
1373  // gs:0 (or fs:0 on X86-64) contains its own address.
1374  // For more information see http://people.redhat.com/drepper/tls.pdf
1375  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1376  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1377  !IndirectTlsSegRefs &&
1378  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1379  Subtarget->isTargetFuchsia()))
1380  switch (N->getPointerInfo().getAddrSpace()) {
1381  case 256:
1382  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1383  return false;
1384  case 257:
1385  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1386  return false;
1387  // Address space 258 is not handled here, because it is not used to
1388  // address TLS areas.
1389  }
1390 
1391  return true;
1392 }
1393 
1394 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1395 /// mode. These wrap things that will resolve down into a symbol reference.
1396 /// If no match is possible, this returns true, otherwise it returns false.
1397 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1398  // If the addressing mode already has a symbol as the displacement, we can
1399  // never match another symbol.
1400  if (AM.hasSymbolicDisplacement())
1401  return true;
1402 
1403  bool IsRIPRelTLS = false;
1404  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1405  if (IsRIPRel) {
1406  SDValue Val = N.getOperand(0);
1408  IsRIPRelTLS = true;
1409  }
1410 
1411  // We can't use an addressing mode in the 64-bit large code model.
1412  // Global TLS addressing is an exception. In the medium code model,
1413  // we use can use a mode when RIP wrappers are present.
1414  // That signifies access to globals that are known to be "near",
1415  // such as the GOT itself.
1416  CodeModel::Model M = TM.getCodeModel();
1417  if (Subtarget->is64Bit() &&
1418  ((M == CodeModel::Large && !IsRIPRelTLS) ||
1419  (M == CodeModel::Medium && !IsRIPRel)))
1420  return true;
1421 
1422  // Base and index reg must be 0 in order to use %rip as base.
1423  if (IsRIPRel && AM.hasBaseOrIndexReg())
1424  return true;
1425 
1426  // Make a local copy in case we can't do this fold.
1427  X86ISelAddressMode Backup = AM;
1428 
1429  int64_t Offset = 0;
1430  SDValue N0 = N.getOperand(0);
1431  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1432  AM.GV = G->getGlobal();
1433  AM.SymbolFlags = G->getTargetFlags();
1434  Offset = G->getOffset();
1435  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1436  AM.CP = CP->getConstVal();
1437  AM.Align = CP->getAlignment();
1438  AM.SymbolFlags = CP->getTargetFlags();
1439  Offset = CP->getOffset();
1440  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1441  AM.ES = S->getSymbol();
1442  AM.SymbolFlags = S->getTargetFlags();
1443  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1444  AM.MCSym = S->getMCSymbol();
1445  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1446  AM.JT = J->getIndex();
1447  AM.SymbolFlags = J->getTargetFlags();
1448  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1449  AM.BlockAddr = BA->getBlockAddress();
1450  AM.SymbolFlags = BA->getTargetFlags();
1451  Offset = BA->getOffset();
1452  } else
1453  llvm_unreachable("Unhandled symbol reference node.");
1454 
1455  if (foldOffsetIntoAddress(Offset, AM)) {
1456  AM = Backup;
1457  return true;
1458  }
1459 
1460  if (IsRIPRel)
1461  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1462 
1463  // Commit the changes now that we know this fold is safe.
1464  return false;
1465 }
1466 
1467 /// Add the specified node to the specified addressing mode, returning true if
1468 /// it cannot be done. This just pattern matches for the addressing mode.
1469 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1470  if (matchAddressRecursively(N, AM, 0))
1471  return true;
1472 
1473  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1474  // a smaller encoding and avoids a scaled-index.
1475  if (AM.Scale == 2 &&
1476  AM.BaseType == X86ISelAddressMode::RegBase &&
1477  AM.Base_Reg.getNode() == nullptr) {
1478  AM.Base_Reg = AM.IndexReg;
1479  AM.Scale = 1;
1480  }
1481 
1482  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1483  // because it has a smaller encoding.
1484  // TODO: Which other code models can use this?
1485  switch (TM.getCodeModel()) {
1486  default: break;
1487  case CodeModel::Small:
1488  case CodeModel::Kernel:
1489  if (Subtarget->is64Bit() &&
1490  AM.Scale == 1 &&
1491  AM.BaseType == X86ISelAddressMode::RegBase &&
1492  AM.Base_Reg.getNode() == nullptr &&
1493  AM.IndexReg.getNode() == nullptr &&
1494  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1495  AM.hasSymbolicDisplacement())
1496  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1497  break;
1498  }
1499 
1500  return false;
1501 }
1502 
1503 bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1504  unsigned Depth) {
1505  // Add an artificial use to this node so that we can keep track of
1506  // it if it gets CSE'd with a different node.
1507  HandleSDNode Handle(N);
1508 
1509  X86ISelAddressMode Backup = AM;
1510  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1511  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1512  return false;
1513  AM = Backup;
1514 
1515  // Try again after commuting the operands.
1516  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1517  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1518  return false;
1519  AM = Backup;
1520 
1521  // If we couldn't fold both operands into the address at the same time,
1522  // see if we can just put each operand into a register and fold at least
1523  // the add.
1524  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1525  !AM.Base_Reg.getNode() &&
1526  !AM.IndexReg.getNode()) {
1527  N = Handle.getValue();
1528  AM.Base_Reg = N.getOperand(0);
1529  AM.IndexReg = N.getOperand(1);
1530  AM.Scale = 1;
1531  return false;
1532  }
1533  N = Handle.getValue();
1534  return true;
1535 }
1536 
1537 // Insert a node into the DAG at least before the Pos node's position. This
1538 // will reposition the node as needed, and will assign it a node ID that is <=
1539 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1540 // IDs! The selection DAG must no longer depend on their uniqueness when this
1541 // is used.
1542 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1543  if (N->getNodeId() == -1 ||
1546  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1547  // Mark Node as invalid for pruning as after this it may be a successor to a
1548  // selected node but otherwise be in the same position of Pos.
1549  // Conservatively mark it with the same -abs(Id) to assure node id
1550  // invariant is preserved.
1551  N->setNodeId(Pos->getNodeId());
1553  }
1554 }
1555 
1556 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1557 // safe. This allows us to convert the shift and and into an h-register
1558 // extract and a scaled index. Returns false if the simplification is
1559 // performed.
1561  uint64_t Mask,
1562  SDValue Shift, SDValue X,
1563  X86ISelAddressMode &AM) {
1564  if (Shift.getOpcode() != ISD::SRL ||
1565  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1566  !Shift.hasOneUse())
1567  return true;
1568 
1569  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1570  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1571  Mask != (0xffu << ScaleLog))
1572  return true;
1573 
1574  MVT VT = N.getSimpleValueType();
1575  SDLoc DL(N);
1576  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1577  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1578  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1579  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1580  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1581  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1582 
1583  // Insert the new nodes into the topological ordering. We must do this in
1584  // a valid topological ordering as nothing is going to go back and re-sort
1585  // these nodes. We continually insert before 'N' in sequence as this is
1586  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1587  // hierarchy left to express.
1588  insertDAGNode(DAG, N, Eight);
1589  insertDAGNode(DAG, N, Srl);
1590  insertDAGNode(DAG, N, NewMask);
1591  insertDAGNode(DAG, N, And);
1592  insertDAGNode(DAG, N, ShlCount);
1593  insertDAGNode(DAG, N, Shl);
1594  DAG.ReplaceAllUsesWith(N, Shl);
1595  DAG.RemoveDeadNode(N.getNode());
1596  AM.IndexReg = And;
1597  AM.Scale = (1 << ScaleLog);
1598  return false;
1599 }
1600 
1601 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1602 // allows us to fold the shift into this addressing mode. Returns false if the
1603 // transform succeeded.
1605  X86ISelAddressMode &AM) {
1606  SDValue Shift = N.getOperand(0);
1607 
1608  // Use a signed mask so that shifting right will insert sign bits. These
1609  // bits will be removed when we shift the result left so it doesn't matter
1610  // what we use. This might allow a smaller immediate encoding.
1611  int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
1612 
1613  // If we have an any_extend feeding the AND, look through it to see if there
1614  // is a shift behind it. But only if the AND doesn't use the extended bits.
1615  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
1616  bool FoundAnyExtend = false;
1617  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
1618  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
1619  isUInt<32>(Mask)) {
1620  FoundAnyExtend = true;
1621  Shift = Shift.getOperand(0);
1622  }
1623 
1624  if (Shift.getOpcode() != ISD::SHL ||
1625  !isa<ConstantSDNode>(Shift.getOperand(1)))
1626  return true;
1627 
1628  SDValue X = Shift.getOperand(0);
1629 
1630  // Not likely to be profitable if either the AND or SHIFT node has more
1631  // than one use (unless all uses are for address computation). Besides,
1632  // isel mechanism requires their node ids to be reused.
1633  if (!N.hasOneUse() || !Shift.hasOneUse())
1634  return true;
1635 
1636  // Verify that the shift amount is something we can fold.
1637  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1638  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1639  return true;
1640 
1641  MVT VT = N.getSimpleValueType();
1642  SDLoc DL(N);
1643  if (FoundAnyExtend) {
1644  SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
1645  insertDAGNode(DAG, N, NewX);
1646  X = NewX;
1647  }
1648 
1649  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1650  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1651  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1652 
1653  // Insert the new nodes into the topological ordering. We must do this in
1654  // a valid topological ordering as nothing is going to go back and re-sort
1655  // these nodes. We continually insert before 'N' in sequence as this is
1656  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1657  // hierarchy left to express.
1658  insertDAGNode(DAG, N, NewMask);
1659  insertDAGNode(DAG, N, NewAnd);
1660  insertDAGNode(DAG, N, NewShift);
1661  DAG.ReplaceAllUsesWith(N, NewShift);
1662  DAG.RemoveDeadNode(N.getNode());
1663 
1664  AM.Scale = 1 << ShiftAmt;
1665  AM.IndexReg = NewAnd;
1666  return false;
1667 }
1668 
1669 // Implement some heroics to detect shifts of masked values where the mask can
1670 // be replaced by extending the shift and undoing that in the addressing mode
1671 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1672 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1673 // the addressing mode. This results in code such as:
1674 //
1675 // int f(short *y, int *lookup_table) {
1676 // ...
1677 // return *y + lookup_table[*y >> 11];
1678 // }
1679 //
1680 // Turning into:
1681 // movzwl (%rdi), %eax
1682 // movl %eax, %ecx
1683 // shrl $11, %ecx
1684 // addl (%rsi,%rcx,4), %eax
1685 //
1686 // Instead of:
1687 // movzwl (%rdi), %eax
1688 // movl %eax, %ecx
1689 // shrl $9, %ecx
1690 // andl $124, %rcx
1691 // addl (%rsi,%rcx), %eax
1692 //
1693 // Note that this function assumes the mask is provided as a mask *after* the
1694 // value is shifted. The input chain may or may not match that, but computing
1695 // such a mask is trivial.
1697  uint64_t Mask,
1698  SDValue Shift, SDValue X,
1699  X86ISelAddressMode &AM) {
1700  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1701  !isa<ConstantSDNode>(Shift.getOperand(1)))
1702  return true;
1703 
1704  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1705  unsigned MaskLZ = countLeadingZeros(Mask);
1706  unsigned MaskTZ = countTrailingZeros(Mask);
1707 
1708  // The amount of shift we're trying to fit into the addressing mode is taken
1709  // from the trailing zeros of the mask.
1710  unsigned AMShiftAmt = MaskTZ;
1711 
1712  // There is nothing we can do here unless the mask is removing some bits.
1713  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1714  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1715 
1716  // We also need to ensure that mask is a continuous run of bits.
1717  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1718 
1719  // Scale the leading zero count down based on the actual size of the value.
1720  // Also scale it down based on the size of the shift.
1721  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1722  if (MaskLZ < ScaleDown)
1723  return true;
1724  MaskLZ -= ScaleDown;
1725 
1726  // The final check is to ensure that any masked out high bits of X are
1727  // already known to be zero. Otherwise, the mask has a semantic impact
1728  // other than masking out a couple of low bits. Unfortunately, because of
1729  // the mask, zero extensions will be removed from operands in some cases.
1730  // This code works extra hard to look through extensions because we can
1731  // replace them with zero extensions cheaply if necessary.
1732  bool ReplacingAnyExtend = false;
1733  if (X.getOpcode() == ISD::ANY_EXTEND) {
1734  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1736  // Assume that we'll replace the any-extend with a zero-extend, and
1737  // narrow the search to the extended value.
1738  X = X.getOperand(0);
1739  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1740  ReplacingAnyExtend = true;
1741  }
1742  APInt MaskedHighBits =
1744  KnownBits Known = DAG.computeKnownBits(X);
1745  if (MaskedHighBits != Known.Zero) return true;
1746 
1747  // We've identified a pattern that can be transformed into a single shift
1748  // and an addressing mode. Make it so.
1749  MVT VT = N.getSimpleValueType();
1750  if (ReplacingAnyExtend) {
1751  assert(X.getValueType() != VT);
1752  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1753  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1754  insertDAGNode(DAG, N, NewX);
1755  X = NewX;
1756  }
1757  SDLoc DL(N);
1758  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1759  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1760  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1761  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1762 
1763  // Insert the new nodes into the topological ordering. We must do this in
1764  // a valid topological ordering as nothing is going to go back and re-sort
1765  // these nodes. We continually insert before 'N' in sequence as this is
1766  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1767  // hierarchy left to express.
1768  insertDAGNode(DAG, N, NewSRLAmt);
1769  insertDAGNode(DAG, N, NewSRL);
1770  insertDAGNode(DAG, N, NewSHLAmt);
1771  insertDAGNode(DAG, N, NewSHL);
1772  DAG.ReplaceAllUsesWith(N, NewSHL);
1773  DAG.RemoveDeadNode(N.getNode());
1774 
1775  AM.Scale = 1 << AMShiftAmt;
1776  AM.IndexReg = NewSRL;
1777  return false;
1778 }
1779 
1780 // Transform "(X >> SHIFT) & (MASK << C1)" to
1781 // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1782 // matched to a BEXTR later. Returns false if the simplification is performed.
1784  uint64_t Mask,
1785  SDValue Shift, SDValue X,
1786  X86ISelAddressMode &AM,
1787  const X86Subtarget &Subtarget) {
1788  if (Shift.getOpcode() != ISD::SRL ||
1789  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1790  !Shift.hasOneUse() || !N.hasOneUse())
1791  return true;
1792 
1793  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1794  if (!Subtarget.hasTBM() &&
1795  !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1796  return true;
1797 
1798  // We need to ensure that mask is a continuous run of bits.
1799  if (!isShiftedMask_64(Mask)) return true;
1800 
1801  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1802 
1803  // The amount of shift we're trying to fit into the addressing mode is taken
1804  // from the trailing zeros of the mask.
1805  unsigned AMShiftAmt = countTrailingZeros(Mask);
1806 
1807  // There is nothing we can do here unless the mask is removing some bits.
1808  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1809  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1810 
1811  MVT VT = N.getSimpleValueType();
1812  SDLoc DL(N);
1813  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1814  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1815  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1816  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1817  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1818  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1819 
1820  // Insert the new nodes into the topological ordering. We must do this in
1821  // a valid topological ordering as nothing is going to go back and re-sort
1822  // these nodes. We continually insert before 'N' in sequence as this is
1823  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1824  // hierarchy left to express.
1825  insertDAGNode(DAG, N, NewSRLAmt);
1826  insertDAGNode(DAG, N, NewSRL);
1827  insertDAGNode(DAG, N, NewMask);
1828  insertDAGNode(DAG, N, NewAnd);
1829  insertDAGNode(DAG, N, NewSHLAmt);
1830  insertDAGNode(DAG, N, NewSHL);
1831  DAG.ReplaceAllUsesWith(N, NewSHL);
1832  DAG.RemoveDeadNode(N.getNode());
1833 
1834  AM.Scale = 1 << AMShiftAmt;
1835  AM.IndexReg = NewAnd;
1836  return false;
1837 }
1838 
1839 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1840  unsigned Depth) {
1841  SDLoc dl(N);
1842  LLVM_DEBUG({
1843  dbgs() << "MatchAddress: ";
1844  AM.dump(CurDAG);
1845  });
1846  // Limit recursion.
1847  if (Depth > 5)
1848  return matchAddressBase(N, AM);
1849 
1850  // If this is already a %rip relative address, we can only merge immediates
1851  // into it. Instead of handling this in every case, we handle it here.
1852  // RIP relative addressing: %rip + 32-bit displacement!
1853  if (AM.isRIPRelative()) {
1854  // FIXME: JumpTable and ExternalSymbol address currently don't like
1855  // displacements. It isn't very important, but this should be fixed for
1856  // consistency.
1857  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1858  return true;
1859 
1860  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1861  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1862  return false;
1863  return true;
1864  }
1865 
1866  switch (N.getOpcode()) {
1867  default: break;
1868  case ISD::LOCAL_RECOVER: {
1869  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1870  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1871  // Use the symbol and don't prefix it.
1872  AM.MCSym = ESNode->getMCSymbol();
1873  return false;
1874  }
1875  break;
1876  }
1877  case ISD::Constant: {
1878  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1879  if (!foldOffsetIntoAddress(Val, AM))
1880  return false;
1881  break;
1882  }
1883 
1884  case X86ISD::Wrapper:
1885  case X86ISD::WrapperRIP:
1886  if (!matchWrapper(N, AM))
1887  return false;
1888  break;
1889 
1890  case ISD::LOAD:
1891  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1892  return false;
1893  break;
1894 
1895  case ISD::FrameIndex:
1896  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1897  AM.Base_Reg.getNode() == nullptr &&
1898  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1899  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1900  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1901  return false;
1902  }
1903  break;
1904 
1905  case ISD::SHL:
1906  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1907  break;
1908 
1909  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1910  unsigned Val = CN->getZExtValue();
1911  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1912  // that the base operand remains free for further matching. If
1913  // the base doesn't end up getting used, a post-processing step
1914  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1915  if (Val == 1 || Val == 2 || Val == 3) {
1916  AM.Scale = 1 << Val;
1917  SDValue ShVal = N.getOperand(0);
1918 
1919  // Okay, we know that we have a scale by now. However, if the scaled
1920  // value is an add of something and a constant, we can fold the
1921  // constant into the disp field here.
1922  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1923  AM.IndexReg = ShVal.getOperand(0);
1924  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1925  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1926  if (!foldOffsetIntoAddress(Disp, AM))
1927  return false;
1928  }
1929 
1930  AM.IndexReg = ShVal;
1931  return false;
1932  }
1933  }
1934  break;
1935 
1936  case ISD::SRL: {
1937  // Scale must not be used already.
1938  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1939 
1940  // We only handle up to 64-bit values here as those are what matter for
1941  // addressing mode optimizations.
1942  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
1943  "Unexpected value size!");
1944 
1945  SDValue And = N.getOperand(0);
1946  if (And.getOpcode() != ISD::AND) break;
1947  SDValue X = And.getOperand(0);
1948 
1949  // The mask used for the transform is expected to be post-shift, but we
1950  // found the shift first so just apply the shift to the mask before passing
1951  // it down.
1952  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1953  !isa<ConstantSDNode>(And.getOperand(1)))
1954  break;
1955  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1956 
1957  // Try to fold the mask and shift into the scale, and return false if we
1958  // succeed.
1959  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1960  return false;
1961  break;
1962  }
1963 
1964  case ISD::SMUL_LOHI:
1965  case ISD::UMUL_LOHI:
1966  // A mul_lohi where we need the low part can be folded as a plain multiply.
1967  if (N.getResNo() != 0) break;
1969  case ISD::MUL:
1970  case X86ISD::MUL_IMM:
1971  // X*[3,5,9] -> X+X*[2,4,8]
1972  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1973  AM.Base_Reg.getNode() == nullptr &&
1974  AM.IndexReg.getNode() == nullptr) {
1975  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1976  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1977  CN->getZExtValue() == 9) {
1978  AM.Scale = unsigned(CN->getZExtValue())-1;
1979 
1980  SDValue MulVal = N.getOperand(0);
1981  SDValue Reg;
1982 
1983  // Okay, we know that we have a scale by now. However, if the scaled
1984  // value is an add of something and a constant, we can fold the
1985  // constant into the disp field here.
1986  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1987  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1988  Reg = MulVal.getOperand(0);
1989  ConstantSDNode *AddVal =
1990  cast<ConstantSDNode>(MulVal.getOperand(1));
1991  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1992  if (foldOffsetIntoAddress(Disp, AM))
1993  Reg = N.getOperand(0);
1994  } else {
1995  Reg = N.getOperand(0);
1996  }
1997 
1998  AM.IndexReg = AM.Base_Reg = Reg;
1999  return false;
2000  }
2001  }
2002  break;
2003 
2004  case ISD::SUB: {
2005  // Given A-B, if A can be completely folded into the address and
2006  // the index field with the index field unused, use -B as the index.
2007  // This is a win if a has multiple parts that can be folded into
2008  // the address. Also, this saves a mov if the base register has
2009  // other uses, since it avoids a two-address sub instruction, however
2010  // it costs an additional mov if the index register has other uses.
2011 
2012  // Add an artificial use to this node so that we can keep track of
2013  // it if it gets CSE'd with a different node.
2014  HandleSDNode Handle(N);
2015 
2016  // Test if the LHS of the sub can be folded.
2017  X86ISelAddressMode Backup = AM;
2018  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
2019  N = Handle.getValue();
2020  AM = Backup;
2021  break;
2022  }
2023  N = Handle.getValue();
2024  // Test if the index field is free for use.
2025  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2026  AM = Backup;
2027  break;
2028  }
2029 
2030  int Cost = 0;
2031  SDValue RHS = N.getOperand(1);
2032  // If the RHS involves a register with multiple uses, this
2033  // transformation incurs an extra mov, due to the neg instruction
2034  // clobbering its operand.
2035  if (!RHS.getNode()->hasOneUse() ||
2036  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2037  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2038  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2039  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2040  RHS.getOperand(0).getValueType() == MVT::i32))
2041  ++Cost;
2042  // If the base is a register with multiple uses, this
2043  // transformation may save a mov.
2044  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2045  !AM.Base_Reg.getNode()->hasOneUse()) ||
2046  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2047  --Cost;
2048  // If the folded LHS was interesting, this transformation saves
2049  // address arithmetic.
2050  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2051  ((AM.Disp != 0) && (Backup.Disp == 0)) +
2052  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2053  --Cost;
2054  // If it doesn't look like it may be an overall win, don't do it.
2055  if (Cost >= 0) {
2056  AM = Backup;
2057  break;
2058  }
2059 
2060  // Ok, the transformation is legal and appears profitable. Go for it.
2061  // Negation will be emitted later to avoid creating dangling nodes if this
2062  // was an unprofitable LEA.
2063  AM.IndexReg = RHS;
2064  AM.NegateIndex = true;
2065  AM.Scale = 1;
2066  return false;
2067  }
2068 
2069  case ISD::ADD:
2070  if (!matchAdd(N, AM, Depth))
2071  return false;
2072  break;
2073 
2074  case ISD::OR:
2075  // We want to look through a transform in InstCombine and DAGCombiner that
2076  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
2077  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
2078  // An 'lea' can then be used to match the shift (multiply) and add:
2079  // and $1, %esi
2080  // lea (%rsi, %rdi, 8), %rax
2081  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
2082  !matchAdd(N, AM, Depth))
2083  return false;
2084  break;
2085 
2086  case ISD::AND: {
2087  // Perform some heroic transforms on an and of a constant-count shift
2088  // with a constant to enable use of the scaled offset field.
2089 
2090  // Scale must not be used already.
2091  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2092 
2093  // We only handle up to 64-bit values here as those are what matter for
2094  // addressing mode optimizations.
2095  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2096  "Unexpected value size!");
2097 
2098  if (!isa<ConstantSDNode>(N.getOperand(1)))
2099  break;
2100 
2101  if (N.getOperand(0).getOpcode() == ISD::SRL) {
2102  SDValue Shift = N.getOperand(0);
2103  SDValue X = Shift.getOperand(0);
2104 
2105  uint64_t Mask = N.getConstantOperandVal(1);
2106 
2107  // Try to fold the mask and shift into an extract and scale.
2108  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
2109  return false;
2110 
2111  // Try to fold the mask and shift directly into the scale.
2112  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
2113  return false;
2114 
2115  // Try to fold the mask and shift into BEXTR and scale.
2116  if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
2117  return false;
2118  }
2119 
2120  // Try to swap the mask and shift to place shifts which can be done as
2121  // a scale on the outside of the mask.
2122  if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
2123  return false;
2124 
2125  break;
2126  }
2127  case ISD::ZERO_EXTEND: {
2128  // Try to widen a zexted shift left to the same size as its use, so we can
2129  // match the shift as a scale factor.
2130  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2131  break;
2132  if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
2133  break;
2134 
2135  // Give up if the shift is not a valid scale factor [1,2,3].
2136  SDValue Shl = N.getOperand(0);
2137  auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
2138  if (!ShAmtC || ShAmtC->getZExtValue() > 3)
2139  break;
2140 
2141  // The narrow shift must only shift out zero bits (it must be 'nuw').
2142  // That makes it safe to widen to the destination type.
2144  ShAmtC->getZExtValue());
2145  if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
2146  break;
2147 
2148  // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
2149  MVT VT = N.getSimpleValueType();
2150  SDLoc DL(N);
2151  SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
2152  SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
2153 
2154  // Convert the shift to scale factor.
2155  AM.Scale = 1 << ShAmtC->getZExtValue();
2156  AM.IndexReg = Zext;
2157 
2158  insertDAGNode(*CurDAG, N, Zext);
2159  insertDAGNode(*CurDAG, N, NewShl);
2160  CurDAG->ReplaceAllUsesWith(N, NewShl);
2161  CurDAG->RemoveDeadNode(N.getNode());
2162  return false;
2163  }
2164  }
2165 
2166  return matchAddressBase(N, AM);
2167 }
2168 
2169 /// Helper for MatchAddress. Add the specified node to the
2170 /// specified addressing mode without any further recursion.
2171 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
2172  // Is the base register already occupied?
2173  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2174  // If so, check to see if the scale index register is set.
2175  if (!AM.IndexReg.getNode()) {
2176  AM.IndexReg = N;
2177  AM.Scale = 1;
2178  return false;
2179  }
2180 
2181  // Otherwise, we cannot select it.
2182  return true;
2183  }
2184 
2185  // Default, generate it as a register.
2186  AM.BaseType = X86ISelAddressMode::RegBase;
2187  AM.Base_Reg = N;
2188  return false;
2189 }
2190 
2191 /// Helper for selectVectorAddr. Handles things that can be folded into a
2192 /// gather scatter address. The index register and scale should have already
2193 /// been handled.
2194 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2195  // TODO: Support other operations.
2196  switch (N.getOpcode()) {
2197  case ISD::Constant: {
2198  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2199  if (!foldOffsetIntoAddress(Val, AM))
2200  return false;
2201  break;
2202  }
2203  case X86ISD::Wrapper:
2204  if (!matchWrapper(N, AM))
2205  return false;
2206  break;
2207  }
2208 
2209  return matchAddressBase(N, AM);
2210 }
2211 
2212 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
2213  SDValue &Scale, SDValue &Index,
2214  SDValue &Disp, SDValue &Segment) {
2215  X86ISelAddressMode AM;
2216  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
2217  AM.IndexReg = Mgs->getIndex();
2218  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
2219 
2220  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2221  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2222  if (AddrSpace == 256)
2223  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2224  if (AddrSpace == 257)
2225  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2226  if (AddrSpace == 258)
2227  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2228 
2229  SDLoc DL(N);
2230  MVT VT = N.getSimpleValueType();
2231 
2232  // Try to match into the base and displacement fields.
2233  if (matchVectorAddress(N, AM))
2234  return false;
2235 
2236  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2237  return true;
2238 }
2239 
2240 /// Returns true if it is able to pattern match an addressing mode.
2241 /// It returns the operands which make up the maximal addressing mode it can
2242 /// match by reference.
2243 ///
2244 /// Parent is the parent node of the addr operand that is being matched. It
2245 /// is always a load, store, atomic node, or null. It is only null when
2246 /// checking memory operands for inline asm nodes.
2247 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2248  SDValue &Scale, SDValue &Index,
2249  SDValue &Disp, SDValue &Segment) {
2250  X86ISelAddressMode AM;
2251 
2252  if (Parent &&
2253  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2254  // that are not a MemSDNode, and thus don't have proper addrspace info.
2255  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2256  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2257  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2258  Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
2259  Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2260  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2261  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2262  unsigned AddrSpace =
2263  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2264  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2265  if (AddrSpace == 256)
2266  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2267  if (AddrSpace == 257)
2268  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2269  if (AddrSpace == 258)
2270  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2271  }
2272 
2273  // Save the DL and VT before calling matchAddress, it can invalidate N.
2274  SDLoc DL(N);
2275  MVT VT = N.getSimpleValueType();
2276 
2277  if (matchAddress(N, AM))
2278  return false;
2279 
2280  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2281  return true;
2282 }
2283 
2284 // We can only fold a load if all nodes between it and the root node have a
2285 // single use. If there are additional uses, we could end up duplicating the
2286 // load.
2287 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
2288  while (User != Root) {
2289  if (!User->hasOneUse())
2290  return false;
2291  User = *User->use_begin();
2292  }
2293 
2294  return true;
2295 }
2296 
2297 /// Match a scalar SSE load. In particular, we want to match a load whose top
2298 /// elements are either undef or zeros. The load flavor is derived from the
2299 /// type of N, which is either v4f32 or v2f64.
2300 ///
2301 /// We also return:
2302 /// PatternChainNode: this is the matched node that has a chain input and
2303 /// output.
2304 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
2305  SDValue N, SDValue &Base,
2306  SDValue &Scale, SDValue &Index,
2307  SDValue &Disp, SDValue &Segment,
2308  SDValue &PatternNodeWithChain) {
2309  if (!hasSingleUsesFromRoot(Root, Parent))
2310  return false;
2311 
2312  // We can allow a full vector load here since narrowing a load is ok unless
2313  // it's volatile or atomic.
2314  if (ISD::isNON_EXTLoad(N.getNode())) {
2315  LoadSDNode *LD = cast<LoadSDNode>(N);
2316  if (LD->isSimple() &&
2317  IsProfitableToFold(N, LD, Root) &&
2318  IsLegalToFold(N, Parent, Root, OptLevel)) {
2319  PatternNodeWithChain = N;
2320  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2321  Segment);
2322  }
2323  }
2324 
2325  // We can also match the special zero extended load opcode.
2326  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
2327  PatternNodeWithChain = N;
2328  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2329  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2330  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
2331  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
2332  Segment);
2333  }
2334  }
2335 
2336  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
2337  // once. Otherwise the load might get duplicated and the chain output of the
2338  // duplicate load will not be observed by all dependencies.
2339  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2340  PatternNodeWithChain = N.getOperand(0);
2341  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2342  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2343  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2344  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2345  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2346  Segment);
2347  }
2348  }
2349 
2350  return false;
2351 }
2352 
2353 
2354 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2355  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2356  uint64_t ImmVal = CN->getZExtValue();
2357  if (!isUInt<32>(ImmVal))
2358  return false;
2359 
2360  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2361  return true;
2362  }
2363 
2364  // In static codegen with small code model, we can get the address of a label
2365  // into a register with 'movl'
2366  if (N->getOpcode() != X86ISD::Wrapper)
2367  return false;
2368 
2369  N = N.getOperand(0);
2370 
2371  // At least GNU as does not accept 'movl' for TPOFF relocations.
2372  // FIXME: We could use 'movl' when we know we are targeting MC.
2374  return false;
2375 
2376  Imm = N;
2377  if (N->getOpcode() != ISD::TargetGlobalAddress)
2378  return TM.getCodeModel() == CodeModel::Small;
2379 
2381  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2382  if (!CR)
2383  return TM.getCodeModel() == CodeModel::Small;
2384 
2385  return CR->getUnsignedMax().ult(1ull << 32);
2386 }
2387 
2388 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2389  SDValue &Scale, SDValue &Index,
2390  SDValue &Disp, SDValue &Segment) {
2391  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2392  SDLoc DL(N);
2393 
2394  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2395  return false;
2396 
2398  if (RN && RN->getReg() == 0)
2399  Base = CurDAG->getRegister(0, MVT::i64);
2400  else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
2401  // Base could already be %rip, particularly in the x32 ABI.
2402  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2403  MVT::i64), 0);
2404  Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2405  Base);
2406  }
2407 
2408  RN = dyn_cast<RegisterSDNode>(Index);
2409  if (RN && RN->getReg() == 0)
2410  Index = CurDAG->getRegister(0, MVT::i64);
2411  else {
2412  assert(Index.getValueType() == MVT::i32 &&
2413  "Expect to be extending 32-bit registers for use in LEA");
2414  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2415  MVT::i64), 0);
2416  Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2417  Index);
2418  }
2419 
2420  return true;
2421 }
2422 
2423 /// Calls SelectAddr and determines if the maximal addressing
2424 /// mode it matches can be cost effectively emitted as an LEA instruction.
2425 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2426  SDValue &Base, SDValue &Scale,
2427  SDValue &Index, SDValue &Disp,
2428  SDValue &Segment) {
2429  X86ISelAddressMode AM;
2430 
2431  // Save the DL and VT before calling matchAddress, it can invalidate N.
2432  SDLoc DL(N);
2433  MVT VT = N.getSimpleValueType();
2434 
2435  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2436  // segments.
2437  SDValue Copy = AM.Segment;
2438  SDValue T = CurDAG->getRegister(0, MVT::i32);
2439  AM.Segment = T;
2440  if (matchAddress(N, AM))
2441  return false;
2442  assert (T == AM.Segment);
2443  AM.Segment = Copy;
2444 
2445  unsigned Complexity = 0;
2446  if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2447  Complexity = 1;
2448  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2449  Complexity = 4;
2450 
2451  if (AM.IndexReg.getNode())
2452  Complexity++;
2453 
2454  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2455  // a simple shift.
2456  if (AM.Scale > 1)
2457  Complexity++;
2458 
2459  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2460  // to a LEA. This is determined with some experimentation but is by no means
2461  // optimal (especially for code size consideration). LEA is nice because of
2462  // its three-address nature. Tweak the cost function again when we can run
2463  // convertToThreeAddress() at register allocation time.
2464  if (AM.hasSymbolicDisplacement()) {
2465  // For X86-64, always use LEA to materialize RIP-relative addresses.
2466  if (Subtarget->is64Bit())
2467  Complexity = 4;
2468  else
2469  Complexity += 2;
2470  }
2471 
2472  // Heuristic: try harder to form an LEA from ADD if the operands set flags.
2473  // Unlike ADD, LEA does not affect flags, so we will be less likely to require
2474  // duplicating flag-producing instructions later in the pipeline.
2475  if (N.getOpcode() == ISD::ADD) {
2476  auto isMathWithFlags = [](SDValue V) {
2477  switch (V.getOpcode()) {
2478  case X86ISD::ADD:
2479  case X86ISD::SUB:
2480  case X86ISD::ADC:
2481  case X86ISD::SBB:
2482  /* TODO: These opcodes can be added safely, but we may want to justify
2483  their inclusion for different reasons (better for reg-alloc).
2484  case X86ISD::SMUL:
2485  case X86ISD::UMUL:
2486  case X86ISD::OR:
2487  case X86ISD::XOR:
2488  case X86ISD::AND:
2489  */
2490  // Value 1 is the flag output of the node - verify it's not dead.
2491  return !SDValue(V.getNode(), 1).use_empty();
2492  default:
2493  return false;
2494  }
2495  };
2496  // TODO: This could be an 'or' rather than 'and' to make the transform more
2497  // likely to happen. We might want to factor in whether there's a
2498  // load folding opportunity for the math op that disappears with LEA.
2499  if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
2500  Complexity++;
2501  }
2502 
2503  if (AM.Disp)
2504  Complexity++;
2505 
2506  // If it isn't worth using an LEA, reject it.
2507  if (Complexity <= 2)
2508  return false;
2509 
2510  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2511  return true;
2512 }
2513 
2514 /// This is only run on TargetGlobalTLSAddress nodes.
2515 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2516  SDValue &Scale, SDValue &Index,
2517  SDValue &Disp, SDValue &Segment) {
2519  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2520 
2521  X86ISelAddressMode AM;
2522  AM.GV = GA->getGlobal();
2523  AM.Disp += GA->getOffset();
2524  AM.SymbolFlags = GA->getTargetFlags();
2525 
2526  MVT VT = N.getSimpleValueType();
2527  if (VT == MVT::i32) {
2528  AM.Scale = 1;
2529  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2530  }
2531 
2532  getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
2533  return true;
2534 }
2535 
2536 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2537  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2538  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2539  N.getValueType());
2540  return true;
2541  }
2542 
2543  // Keep track of the original value type and whether this value was
2544  // truncated. If we see a truncation from pointer type to VT that truncates
2545  // bits that are known to be zero, we can use a narrow reference.
2546  EVT VT = N.getValueType();
2547  bool WasTruncated = false;
2548  if (N.getOpcode() == ISD::TRUNCATE) {
2549  WasTruncated = true;
2550  N = N.getOperand(0);
2551  }
2552 
2553  if (N.getOpcode() != X86ISD::Wrapper)
2554  return false;
2555 
2556  // We can only use non-GlobalValues as immediates if they were not truncated,
2557  // as we do not have any range information. If we have a GlobalValue and the
2558  // address was not truncated, we can select it as an operand directly.
2559  unsigned Opc = N.getOperand(0)->getOpcode();
2560  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2561  Op = N.getOperand(0);
2562  // We can only select the operand directly if we didn't have to look past a
2563  // truncate.
2564  return !WasTruncated;
2565  }
2566 
2567  // Check that the global's range fits into VT.
2568  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2569  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2570  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2571  return false;
2572 
2573  // Okay, we can use a narrow reference.
2574  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2575  GA->getOffset(), GA->getTargetFlags());
2576  return true;
2577 }
2578 
2579 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2580  SDValue &Base, SDValue &Scale,
2581  SDValue &Index, SDValue &Disp,
2582  SDValue &Segment) {
2583  assert(Root && P && "Unknown root/parent nodes");
2584  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2585  !IsProfitableToFold(N, P, Root) ||
2586  !IsLegalToFold(N, P, Root, OptLevel))
2587  return false;
2588 
2589  return selectAddr(N.getNode(),
2590  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2591 }
2592 
2593 /// Return an SDNode that returns the value of the global base register.
2594 /// Output instructions required to initialize the global base register,
2595 /// if necessary.
2596 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2597  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2598  auto &DL = MF->getDataLayout();
2599  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2600 }
2601 
2602 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2603  if (N->getOpcode() == ISD::TRUNCATE)
2604  N = N->getOperand(0).getNode();
2605  if (N->getOpcode() != X86ISD::Wrapper)
2606  return false;
2607 
2608  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2609  if (!GA)
2610  return false;
2611 
2612  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2613  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2614  CR->getSignedMax().slt(1ull << Width);
2615 }
2616 
2618  assert(N->isMachineOpcode() && "Unexpected node");
2620  unsigned Opc = N->getMachineOpcode();
2621  if (Opc == X86::JCC_1)
2622  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
2623  else if (Opc == X86::SETCCr)
2624  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
2625  else if (Opc == X86::SETCCm)
2626  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
2627  else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
2628  Opc == X86::CMOV64rr)
2629  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
2630  else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
2631  Opc == X86::CMOV64rm)
2632  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
2633 
2634  return CC;
2635 }
2636 
2637 /// Test whether the given X86ISD::CMP node has any users that use a flag
2638 /// other than ZF.
2639 bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2640  // Examine each user of the node.
2641  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2642  UI != UE; ++UI) {
2643  // Only check things that use the flags.
2644  if (UI.getUse().getResNo() != Flags.getResNo())
2645  continue;
2646  // Only examine CopyToReg uses that copy to EFLAGS.
2647  if (UI->getOpcode() != ISD::CopyToReg ||
2648  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2649  return false;
2650  // Examine each user of the CopyToReg use.
2651  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2652  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2653  // Only examine the Flag result.
2654  if (FlagUI.getUse().getResNo() != 1) continue;
2655  // Anything unusual: assume conservatively.
2656  if (!FlagUI->isMachineOpcode()) return false;
2657  // Examine the condition code of the user.
2658  X86::CondCode CC = getCondFromNode(*FlagUI);
2659 
2660  switch (CC) {
2661  // Comparisons which only use the zero flag.
2662  case X86::COND_E: case X86::COND_NE:
2663  continue;
2664  // Anything else: assume conservatively.
2665  default:
2666  return false;
2667  }
2668  }
2669  }
2670  return true;
2671 }
2672 
2673 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2674 /// flag to be accurate.
2675 bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2676  // Examine each user of the node.
2677  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2678  UI != UE; ++UI) {
2679  // Only check things that use the flags.
2680  if (UI.getUse().getResNo() != Flags.getResNo())
2681  continue;
2682  // Only examine CopyToReg uses that copy to EFLAGS.
2683  if (UI->getOpcode() != ISD::CopyToReg ||
2684  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2685  return false;
2686  // Examine each user of the CopyToReg use.
2687  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2688  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2689  // Only examine the Flag result.
2690  if (FlagUI.getUse().getResNo() != 1) continue;
2691  // Anything unusual: assume conservatively.
2692  if (!FlagUI->isMachineOpcode()) return false;
2693  // Examine the condition code of the user.
2694  X86::CondCode CC = getCondFromNode(*FlagUI);
2695 
2696  switch (CC) {
2697  // Comparisons which don't examine the SF flag.
2698  case X86::COND_A: case X86::COND_AE:
2699  case X86::COND_B: case X86::COND_BE:
2700  case X86::COND_E: case X86::COND_NE:
2701  case X86::COND_O: case X86::COND_NO:
2702  case X86::COND_P: case X86::COND_NP:
2703  continue;
2704  // Anything else: assume conservatively.
2705  default:
2706  return false;
2707  }
2708  }
2709  }
2710  return true;
2711 }
2712 
2714  switch (CC) {
2715  // Comparisons which don't examine the CF flag.
2716  case X86::COND_O: case X86::COND_NO:
2717  case X86::COND_E: case X86::COND_NE:
2718  case X86::COND_S: case X86::COND_NS:
2719  case X86::COND_P: case X86::COND_NP:
2720  case X86::COND_L: case X86::COND_GE:
2721  case X86::COND_G: case X86::COND_LE:
2722  return false;
2723  // Anything else: assume conservatively.
2724  default:
2725  return true;
2726  }
2727 }
2728 
2729 /// Test whether the given node which sets flags has any uses which require the
2730 /// CF flag to be accurate.
2731  bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2732  // Examine each user of the node.
2733  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2734  UI != UE; ++UI) {
2735  // Only check things that use the flags.
2736  if (UI.getUse().getResNo() != Flags.getResNo())
2737  continue;
2738 
2739  unsigned UIOpc = UI->getOpcode();
2740 
2741  if (UIOpc == ISD::CopyToReg) {
2742  // Only examine CopyToReg uses that copy to EFLAGS.
2743  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2744  return false;
2745  // Examine each user of the CopyToReg use.
2746  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2747  FlagUI != FlagUE; ++FlagUI) {
2748  // Only examine the Flag result.
2749  if (FlagUI.getUse().getResNo() != 1)
2750  continue;
2751  // Anything unusual: assume conservatively.
2752  if (!FlagUI->isMachineOpcode())
2753  return false;
2754  // Examine the condition code of the user.
2755  X86::CondCode CC = getCondFromNode(*FlagUI);
2756 
2757  if (mayUseCarryFlag(CC))
2758  return false;
2759  }
2760 
2761  // This CopyToReg is ok. Move on to the next user.
2762  continue;
2763  }
2764 
2765  // This might be an unselected node. So look for the pre-isel opcodes that
2766  // use flags.
2767  unsigned CCOpNo;
2768  switch (UIOpc) {
2769  default:
2770  // Something unusual. Be conservative.
2771  return false;
2772  case X86ISD::SETCC: CCOpNo = 0; break;
2773  case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2774  case X86ISD::CMOV: CCOpNo = 2; break;
2775  case X86ISD::BRCOND: CCOpNo = 2; break;
2776  }
2777 
2778  X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2779  if (mayUseCarryFlag(CC))
2780  return false;
2781  }
2782  return true;
2783 }
2784 
2785 /// Check whether or not the chain ending in StoreNode is suitable for doing
2786 /// the {load; op; store} to modify transformation.
2788  SDValue StoredVal, SelectionDAG *CurDAG,
2789  unsigned LoadOpNo,
2790  LoadSDNode *&LoadNode,
2791  SDValue &InputChain) {
2792  // Is the stored value result 0 of the operation?
2793  if (StoredVal.getResNo() != 0) return false;
2794 
2795  // Are there other uses of the operation other than the store?
2796  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2797 
2798  // Is the store non-extending and non-indexed?
2799  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2800  return false;
2801 
2802  SDValue Load = StoredVal->getOperand(LoadOpNo);
2803  // Is the stored value a non-extending and non-indexed load?
2804  if (!ISD::isNormalLoad(Load.getNode())) return false;
2805 
2806  // Return LoadNode by reference.
2807  LoadNode = cast<LoadSDNode>(Load);
2808 
2809  // Is store the only read of the loaded value?
2810  if (!Load.hasOneUse())
2811  return false;
2812 
2813  // Is the address of the store the same as the load?
2814  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2815  LoadNode->getOffset() != StoreNode->getOffset())
2816  return false;
2817 
2818  bool FoundLoad = false;
2819  SmallVector<SDValue, 4> ChainOps;
2820  SmallVector<const SDNode *, 4> LoopWorklist;
2822  const unsigned int Max = 1024;
2823 
2824  // Visualization of Load-Op-Store fusion:
2825  // -------------------------
2826  // Legend:
2827  // *-lines = Chain operand dependencies.
2828  // |-lines = Normal operand dependencies.
2829  // Dependencies flow down and right. n-suffix references multiple nodes.
2830  //
2831  // C Xn C
2832  // * * *
2833  // * * *
2834  // Xn A-LD Yn TF Yn
2835  // * * \ | * |
2836  // * * \ | * |
2837  // * * \ | => A--LD_OP_ST
2838  // * * \| \
2839  // TF OP \
2840  // * | \ Zn
2841  // * | \
2842  // A-ST Zn
2843  //
2844 
2845  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2846  // #2: Yn -> LD
2847  // #3: ST -> Zn
2848 
2849  // Ensure the transform is safe by checking for the dual
2850  // dependencies to make sure we do not induce a loop.
2851 
2852  // As LD is a predecessor to both OP and ST we can do this by checking:
2853  // a). if LD is a predecessor to a member of Xn or Yn.
2854  // b). if a Zn is a predecessor to ST.
2855 
2856  // However, (b) can only occur through being a chain predecessor to
2857  // ST, which is the same as Zn being a member or predecessor of Xn,
2858  // which is a subset of LD being a predecessor of Xn. So it's
2859  // subsumed by check (a).
2860 
2861  SDValue Chain = StoreNode->getChain();
2862 
2863  // Gather X elements in ChainOps.
2864  if (Chain == Load.getValue(1)) {
2865  FoundLoad = true;
2866  ChainOps.push_back(Load.getOperand(0));
2867  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2868  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2869  SDValue Op = Chain.getOperand(i);
2870  if (Op == Load.getValue(1)) {
2871  FoundLoad = true;
2872  // Drop Load, but keep its chain. No cycle check necessary.
2873  ChainOps.push_back(Load.getOperand(0));
2874  continue;
2875  }
2876  LoopWorklist.push_back(Op.getNode());
2877  ChainOps.push_back(Op);
2878  }
2879  }
2880 
2881  if (!FoundLoad)
2882  return false;
2883 
2884  // Worklist is currently Xn. Add Yn to worklist.
2885  for (SDValue Op : StoredVal->ops())
2886  if (Op.getNode() != LoadNode)
2887  LoopWorklist.push_back(Op.getNode());
2888 
2889  // Check (a) if Load is a predecessor to Xn + Yn
2890  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2891  true))
2892  return false;
2893 
2894  InputChain =
2895  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2896  return true;
2897 }
2898 
2899 // Change a chain of {load; op; store} of the same value into a simple op
2900 // through memory of that value, if the uses of the modified value and its
2901 // address are suitable.
2902 //
2903 // The tablegen pattern memory operand pattern is currently not able to match
2904 // the case where the EFLAGS on the original operation are used.
2905 //
2906 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2907 // be transferred from a node in the pattern to the result node, probably with
2908 // a new keyword. For example, we have this
2909 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2910 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2911 // (implicit EFLAGS)]>;
2912 // but maybe need something like this
2913 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2914 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2915 // (transferrable EFLAGS)]>;
2916 //
2917 // Until then, we manually fold these and instruction select the operation
2918 // here.
2919 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2920  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2921  SDValue StoredVal = StoreNode->getOperand(1);
2922  unsigned Opc = StoredVal->getOpcode();
2923 
2924  // Before we try to select anything, make sure this is memory operand size
2925  // and opcode we can handle. Note that this must match the code below that
2926  // actually lowers the opcodes.
2927  EVT MemVT = StoreNode->getMemoryVT();
2928  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2929  MemVT != MVT::i8)
2930  return false;
2931 
2932  bool IsCommutable = false;
2933  bool IsNegate = false;
2934  switch (Opc) {
2935  default:
2936  return false;
2937  case X86ISD::SUB:
2938  IsNegate = isNullConstant(StoredVal.getOperand(0));
2939  break;
2940  case X86ISD::SBB:
2941  break;
2942  case X86ISD::ADD:
2943  case X86ISD::ADC:
2944  case X86ISD::AND:
2945  case X86ISD::OR:
2946  case X86ISD::XOR:
2947  IsCommutable = true;
2948  break;
2949  }
2950 
2951  unsigned LoadOpNo = IsNegate ? 1 : 0;
2952  LoadSDNode *LoadNode = nullptr;
2953  SDValue InputChain;
2954  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2955  LoadNode, InputChain)) {
2956  if (!IsCommutable)
2957  return false;
2958 
2959  // This operation is commutable, try the other operand.
2960  LoadOpNo = 1;
2961  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2962  LoadNode, InputChain))
2963  return false;
2964  }
2965 
2966  SDValue Base, Scale, Index, Disp, Segment;
2967  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2968  Segment))
2969  return false;
2970 
2971  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2972  unsigned Opc8) {
2973  switch (MemVT.getSimpleVT().SimpleTy) {
2974  case MVT::i64:
2975  return Opc64;
2976  case MVT::i32:
2977  return Opc32;
2978  case MVT::i16:
2979  return Opc16;
2980  case MVT::i8:
2981  return Opc8;
2982  default:
2983  llvm_unreachable("Invalid size!");
2984  }
2985  };
2986 
2987  MachineSDNode *Result;
2988  switch (Opc) {
2989  case X86ISD::SUB:
2990  // Handle negate.
2991  if (IsNegate) {
2992  unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
2993  X86::NEG8m);
2994  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2995  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2996  MVT::Other, Ops);
2997  break;
2998  }
3000  case X86ISD::ADD:
3001  // Try to match inc/dec.
3002  if (!Subtarget->slowIncDec() || OptForSize) {
3003  bool IsOne = isOneConstant(StoredVal.getOperand(1));
3004  bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
3005  // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
3006  if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
3007  unsigned NewOpc =
3008  ((Opc == X86ISD::ADD) == IsOne)
3009  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3010  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
3011  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3012  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3013  MVT::Other, Ops);
3014  break;
3015  }
3016  }
3018  case X86ISD::ADC:
3019  case X86ISD::SBB:
3020  case X86ISD::AND:
3021  case X86ISD::OR:
3022  case X86ISD::XOR: {
3023  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
3024  switch (Opc) {
3025  case X86ISD::ADD:
3026  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3027  X86::ADD8mr);
3028  case X86ISD::ADC:
3029  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3030  X86::ADC8mr);
3031  case X86ISD::SUB:
3032  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3033  X86::SUB8mr);
3034  case X86ISD::SBB:
3035  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3036  X86::SBB8mr);
3037  case X86ISD::AND:
3038  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3039  X86::AND8mr);
3040  case X86ISD::OR:
3041  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3042  case X86ISD::XOR:
3043  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3044  X86::XOR8mr);
3045  default:
3046  llvm_unreachable("Invalid opcode!");
3047  }
3048  };
3049  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
3050  switch (Opc) {
3051  case X86ISD::ADD:
3052  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
3053  case X86ISD::ADC:
3054  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
3055  case X86ISD::SUB:
3056  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
3057  case X86ISD::SBB:
3058  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
3059  case X86ISD::AND:
3060  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
3061  case X86ISD::OR:
3062  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
3063  case X86ISD::XOR:
3064  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
3065  default:
3066  llvm_unreachable("Invalid opcode!");
3067  }
3068  };
3069  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
3070  switch (Opc) {
3071  case X86ISD::ADD:
3072  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3073  X86::ADD8mi);
3074  case X86ISD::ADC:
3075  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3076  X86::ADC8mi);
3077  case X86ISD::SUB:
3078  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3079  X86::SUB8mi);
3080  case X86ISD::SBB:
3081  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3082  X86::SBB8mi);
3083  case X86ISD::AND:
3084  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3085  X86::AND8mi);
3086  case X86ISD::OR:
3087  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3088  X86::OR8mi);
3089  case X86ISD::XOR:
3090  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3091  X86::XOR8mi);
3092  default:
3093  llvm_unreachable("Invalid opcode!");
3094  }
3095  };
3096 
3097  unsigned NewOpc = SelectRegOpcode(Opc);
3098  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3099 
3100  // See if the operand is a constant that we can fold into an immediate
3101  // operand.
3102  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3103  int64_t OperandV = OperandC->getSExtValue();
3104 
3105  // Check if we can shrink the operand enough to fit in an immediate (or
3106  // fit into a smaller immediate) by negating it and switching the
3107  // operation.
3108  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
3109  ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3110  (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3111  isInt<32>(-OperandV))) &&
3112  hasNoCarryFlagUses(StoredVal.getValue(1))) {
3113  OperandV = -OperandV;
3114  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
3115  }
3116 
3117  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
3118  // the larger immediate operand.
3119  if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
3120  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3121  NewOpc = SelectImm8Opcode(Opc);
3122  } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3123  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3124  NewOpc = SelectImmOpcode(Opc);
3125  }
3126  }
3127 
3128  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
3129  SDValue CopyTo =
3130  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3131  StoredVal.getOperand(2), SDValue());
3132 
3133  const SDValue Ops[] = {Base, Scale, Index, Disp,
3134  Segment, Operand, CopyTo, CopyTo.getValue(1)};
3135  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3136  Ops);
3137  } else {
3138  const SDValue Ops[] = {Base, Scale, Index, Disp,
3139  Segment, Operand, InputChain};
3140  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3141  Ops);
3142  }
3143  break;
3144  }
3145  default:
3146  llvm_unreachable("Invalid opcode!");
3147  }
3148 
3149  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3150  LoadNode->getMemOperand()};
3151  CurDAG->setNodeMemRefs(Result, MemOps);
3152 
3153  // Update Load Chain uses as well.
3154  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
3155  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
3156  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
3157  CurDAG->RemoveDeadNode(Node);
3158  return true;
3159 }
3160 
3161 // See if this is an X & Mask that we can match to BEXTR/BZHI.
3162 // Where Mask is one of the following patterns:
3163 // a) x & (1 << nbits) - 1
3164 // b) x & ~(-1 << nbits)
3165 // c) x & (-1 >> (32 - y))
3166 // d) x << (32 - y) >> (32 - y)
3167 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
3168  assert(
3169  (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
3170  "Should be either an and-mask, or right-shift after clearing high bits.");
3171 
3172  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
3173  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3174  return false;
3175 
3176  MVT NVT = Node->getSimpleValueType(0);
3177 
3178  // Only supported for 32 and 64 bits.
3179  if (NVT != MVT::i32 && NVT != MVT::i64)
3180  return false;
3181 
3182  SDValue NBits;
3183 
3184  // If we have BMI2's BZHI, we are ok with muti-use patterns.
3185  // Else, if we only have BMI1's BEXTR, we require one-use.
3186  const bool CanHaveExtraUses = Subtarget->hasBMI2();
3187  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
3188  return CanHaveExtraUses ||
3189  Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3190  };
3191  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
3192  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
3193 
3194  auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
3195  if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3196  assert(V.getSimpleValueType() == MVT::i32 &&
3197  V.getOperand(0).getSimpleValueType() == MVT::i64 &&
3198  "Expected i64 -> i32 truncation");
3199  V = V.getOperand(0);
3200  }
3201  return V;
3202  };
3203 
3204  // a) x & ((1 << nbits) + (-1))
3205  auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
3206  &NBits](SDValue Mask) -> bool {
3207  // Match `add`. Must only have one use!
3208  if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3209  return false;
3210  // We should be adding all-ones constant (i.e. subtracting one.)
3211  if (!isAllOnesConstant(Mask->getOperand(1)))
3212  return false;
3213  // Match `1 << nbits`. Might be truncated. Must only have one use!
3214  SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3215  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3216  return false;
3217  if (!isOneConstant(M0->getOperand(0)))
3218  return false;
3219  NBits = M0->getOperand(1);
3220  return true;
3221  };
3222 
3223  auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
3224  V = peekThroughOneUseTruncation(V);
3225  return CurDAG->MaskedValueIsAllOnes(
3226  V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
3227  NVT.getSizeInBits()));
3228  };
3229 
3230  // b) x & ~(-1 << nbits)
3231  auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3232  &NBits](SDValue Mask) -> bool {
3233  // Match `~()`. Must only have one use!
3234  if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
3235  return false;
3236  // The -1 only has to be all-ones for the final Node's NVT.
3237  if (!isAllOnes(Mask->getOperand(1)))
3238  return false;
3239  // Match `-1 << nbits`. Might be truncated. Must only have one use!
3240  SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3241  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3242  return false;
3243  // The -1 only has to be all-ones for the final Node's NVT.
3244  if (!isAllOnes(M0->getOperand(0)))
3245  return false;
3246  NBits = M0->getOperand(1);
3247  return true;
3248  };
3249 
3250  // Match potentially-truncated (bitwidth - y)
3251  auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
3252  unsigned Bitwidth) {
3253  // Skip over a truncate of the shift amount.
3254  if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
3255  ShiftAmt = ShiftAmt.getOperand(0);
3256  // The trunc should have been the only user of the real shift amount.
3257  if (!checkOneUse(ShiftAmt))
3258  return false;
3259  }
3260  // Match the shift amount as: (bitwidth - y). It should go away, too.
3261  if (ShiftAmt.getOpcode() != ISD::SUB)
3262  return false;
3263  auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
3264  if (!V0 || V0->getZExtValue() != Bitwidth)
3265  return false;
3266  NBits = ShiftAmt.getOperand(1);
3267  return true;
3268  };
3269 
3270  // c) x & (-1 >> (32 - y))
3271  auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
3272  matchShiftAmt](SDValue Mask) -> bool {
3273  // The mask itself may be truncated.
3274  Mask = peekThroughOneUseTruncation(Mask);
3275  unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
3276  // Match `l>>`. Must only have one use!
3277  if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3278  return false;
3279  // We should be shifting truly all-ones constant.
3280  if (!isAllOnesConstant(Mask.getOperand(0)))
3281  return false;
3282  SDValue M1 = Mask.getOperand(1);
3283  // The shift amount should not be used externally.
3284  if (!checkOneUse(M1))
3285  return false;
3286  return matchShiftAmt(M1, Bitwidth);
3287  };
3288 
3289  SDValue X;
3290 
3291  // d) x << (32 - y) >> (32 - y)
3292  auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
3293  &X](SDNode *Node) -> bool {
3294  if (Node->getOpcode() != ISD::SRL)
3295  return false;
3296  SDValue N0 = Node->getOperand(0);
3297  if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
3298  return false;
3299  unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
3300  SDValue N1 = Node->getOperand(1);
3301  SDValue N01 = N0->getOperand(1);
3302  // Both of the shifts must be by the exact same value.
3303  // There should not be any uses of the shift amount outside of the pattern.
3304  if (N1 != N01 || !checkTwoUse(N1))
3305  return false;
3306  if (!matchShiftAmt(N1, Bitwidth))
3307  return false;
3308  X = N0->getOperand(0);
3309  return true;
3310  };
3311 
3312  auto matchLowBitMask = [matchPatternA, matchPatternB,
3313  matchPatternC](SDValue Mask) -> bool {
3314  return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3315  };
3316 
3317  if (Node->getOpcode() == ISD::AND) {
3318  X = Node->getOperand(0);
3319  SDValue Mask = Node->getOperand(1);
3320 
3321  if (matchLowBitMask(Mask)) {
3322  // Great.
3323  } else {
3324  std::swap(X, Mask);
3325  if (!matchLowBitMask(Mask))
3326  return false;
3327  }
3328  } else if (!matchPatternD(Node))
3329  return false;
3330 
3331  SDLoc DL(Node);
3332 
3333  // Truncate the shift amount.
3334  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3335  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3336 
3337  // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3338  // All the other bits are undefined, we do not care about them.
3339  SDValue ImplDef = SDValue(
3340  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3341  insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3342 
3343  SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
3344  insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
3345  NBits = SDValue(
3346  CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
3347  NBits, SRIdxVal), 0);
3348  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3349 
3350  if (Subtarget->hasBMI2()) {
3351  // Great, just emit the the BZHI..
3352  if (NVT != MVT::i32) {
3353  // But have to place the bit count into the wide-enough register first.
3354  NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
3355  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3356  }
3357 
3358  SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
3359  ReplaceNode(Node, Extract.getNode());
3360  SelectCode(Extract.getNode());
3361  return true;
3362  }
3363 
3364  // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
3365  // *logically* shifted (potentially with one-use trunc inbetween),
3366  // and the truncation was the only use of the shift,
3367  // and if so look past one-use truncation.
3368  {
3369  SDValue RealX = peekThroughOneUseTruncation(X);
3370  // FIXME: only if the shift is one-use?
3371  if (RealX != X && RealX.getOpcode() == ISD::SRL)
3372  X = RealX;
3373  }
3374 
3375  MVT XVT = X.getSimpleValueType();
3376 
3377  // Else, emitting BEXTR requires one more step.
3378  // The 'control' of BEXTR has the pattern of:
3379  // [15...8 bit][ 7...0 bit] location
3380  // [ bit count][ shift] name
3381  // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3382 
3383  // Shift NBits left by 8 bits, thus producing 'control'.
3384  // This makes the low 8 bits to be zero.
3385  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3386  SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
3387  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3388 
3389  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3390  // FIXME: only if the shift is one-use?
3391  if (X.getOpcode() == ISD::SRL) {
3392  SDValue ShiftAmt = X.getOperand(1);
3393  X = X.getOperand(0);
3394 
3395  assert(ShiftAmt.getValueType() == MVT::i8 &&
3396  "Expected shift amount to be i8");
3397 
3398  // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3399  // We could zext to i16 in some form, but we intentionally don't do that.
3400  SDValue OrigShiftAmt = ShiftAmt;
3401  ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
3402  insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3403 
3404  // And now 'or' these low 8 bits of shift amount into the 'control'.
3405  Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
3406  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3407  }
3408 
3409  // But have to place the 'control' into the wide-enough register first.
3410  if (XVT != MVT::i32) {
3411  Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
3412  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3413  }
3414 
3415  // And finally, form the BEXTR itself.
3416  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3417 
3418  // The 'X' was originally truncated. Do that now.
3419  if (XVT != NVT) {
3420  insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
3421  Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3422  }
3423 
3424  ReplaceNode(Node, Extract.getNode());
3425  SelectCode(Extract.getNode());
3426 
3427  return true;
3428 }
3429 
3430 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3431 MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3432  MVT NVT = Node->getSimpleValueType(0);
3433  SDLoc dl(Node);
3434 
3435  SDValue N0 = Node->getOperand(0);
3436  SDValue N1 = Node->getOperand(1);
3437 
3438  // If we have TBM we can use an immediate for the control. If we have BMI
3439  // we should only do this if the BEXTR instruction is implemented well.
3440  // Otherwise moving the control into a register makes this more costly.
3441  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3442  // hoisting the move immediate would make it worthwhile with a less optimal
3443  // BEXTR?
3444  if (!Subtarget->hasTBM() &&
3445  !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
3446  return nullptr;
3447 
3448  // Must have a shift right.
3449  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3450  return nullptr;
3451 
3452  // Shift can't have additional users.
3453  if (!N0->hasOneUse())
3454  return nullptr;
3455 
3456  // Only supported for 32 and 64 bits.
3457  if (NVT != MVT::i32 && NVT != MVT::i64)
3458  return nullptr;
3459 
3460  // Shift amount and RHS of and must be constant.
3461  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3462  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3463  if (!MaskCst || !ShiftCst)
3464  return nullptr;
3465 
3466  // And RHS must be a mask.
3467  uint64_t Mask = MaskCst->getZExtValue();
3468  if (!isMask_64(Mask))
3469  return nullptr;
3470 
3471  uint64_t Shift = ShiftCst->getZExtValue();
3472  uint64_t MaskSize = countPopulation(Mask);
3473 
3474  // Don't interfere with something that can be handled by extracting AH.
3475  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3476  if (Shift == 8 && MaskSize == 8)
3477  return nullptr;
3478 
3479  // Make sure we are only using bits that were in the original value, not
3480  // shifted in.
3481  if (Shift + MaskSize > NVT.getSizeInBits())
3482  return nullptr;
3483 
3484  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3485  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3486  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3487 
3488  // BMI requires the immediate to placed in a register.
3489  if (!Subtarget->hasTBM()) {
3490  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3491  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3492  unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3493  New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
3494  }
3495 
3496  MachineSDNode *NewNode;
3497  SDValue Input = N0->getOperand(0);
3498  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3499  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3500  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
3501  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3502  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3503  // Update the chain.
3504  ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
3505  // Record the mem-refs
3506  CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3507  } else {
3508  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New);
3509  }
3510 
3511  return NewNode;
3512 }
3513 
3514 // Emit a PCMISTR(I/M) instruction.
3515 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3516  bool MayFoldLoad, const SDLoc &dl,
3517  MVT VT, SDNode *Node) {
3518  SDValue N0 = Node->getOperand(0);
3519  SDValue N1 = Node->getOperand(1);
3520  SDValue Imm = Node->getOperand(2);
3521  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3522  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3523 
3524  // Try to fold a load. No need to check alignment.
3525  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3526  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3527  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3528  N1.getOperand(0) };
3529  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3530  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3531  // Update the chain.
3532  ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3533  // Record the mem-refs
3534  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3535  return CNode;
3536  }
3537 
3538  SDValue Ops[] = { N0, N1, Imm };
3539  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3540  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3541  return CNode;
3542 }
3543 
3544 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3545 // to emit a second instruction after this one. This is needed since we have two
3546 // copyToReg nodes glued before this and we need to continue that glue through.
3547 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3548  bool MayFoldLoad, const SDLoc &dl,
3549  MVT VT, SDNode *Node,
3550  SDValue &InFlag) {
3551  SDValue N0 = Node->getOperand(0);
3552  SDValue N2 = Node->getOperand(2);
3553  SDValue Imm = Node->getOperand(4);
3554  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3555  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3556 
3557  // Try to fold a load. No need to check alignment.
3558  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3559  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3560  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3561  N2.getOperand(0), InFlag };
3562  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3563  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3564  InFlag = SDValue(CNode, 3);
3565  // Update the chain.
3566  ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3567  // Record the mem-refs
3568  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3569  return CNode;
3570  }
3571 
3572  SDValue Ops[] = { N0, N2, Imm, InFlag };
3573  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3574  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3575  InFlag = SDValue(CNode, 2);
3576  return CNode;
3577 }
3578 
3579 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3580  EVT VT = N->getValueType(0);
3581 
3582  // Only handle scalar shifts.
3583  if (VT.isVector())
3584  return false;
3585 
3586  // Narrower shifts only mask to 5 bits in hardware.
3587  unsigned Size = VT == MVT::i64 ? 64 : 32;
3588 
3589  SDValue OrigShiftAmt = N->getOperand(1);
3590  SDValue ShiftAmt = OrigShiftAmt;
3591  SDLoc DL(N);
3592 
3593  // Skip over a truncate of the shift amount.
3594  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3595  ShiftAmt = ShiftAmt->getOperand(0);
3596 
3597  // This function is called after X86DAGToDAGISel::matchBitExtract(),
3598  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3599 
3600  SDValue NewShiftAmt;
3601  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3602  SDValue Add0 = ShiftAmt->getOperand(0);
3603  SDValue Add1 = ShiftAmt->getOperand(1);
3604  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3605  // to avoid the ADD/SUB.
3606  if (isa<ConstantSDNode>(Add1) &&
3607  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3608  NewShiftAmt = Add0;
3609  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3610  // generate a NEG instead of a SUB of a constant.
3611  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3612  isa<ConstantSDNode>(Add0) &&
3613  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3614  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3615  // Insert a negate op.
3616  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3617  // that uses it that's not a shift.
3618  EVT SubVT = ShiftAmt.getValueType();
3619  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3620  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3621  NewShiftAmt = Neg;
3622 
3623  // Insert these operands into a valid topological order so they can
3624  // get selected independently.
3625  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3626  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3627  } else
3628  return false;
3629  } else
3630  return false;
3631 
3632  if (NewShiftAmt.getValueType() != MVT::i8) {
3633  // Need to truncate the shift amount.
3634  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3635  // Add to a correct topological ordering.
3636  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3637  }
3638 
3639  // Insert a new mask to keep the shift amount legal. This should be removed
3640  // by isel patterns.
3641  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3642  CurDAG->getConstant(Size - 1, DL, MVT::i8));
3643  // Place in a correct topological ordering.
3644  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3645 
3646  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3647  NewShiftAmt);
3648  if (UpdatedNode != N) {
3649  // If we found an existing node, we should replace ourselves with that node
3650  // and wait for it to be selected after its other users.
3651  ReplaceNode(N, UpdatedNode);
3652  return true;
3653  }
3654 
3655  // If the original shift amount is now dead, delete it so that we don't run
3656  // it through isel.
3657  if (OrigShiftAmt.getNode()->use_empty())
3658  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3659 
3660  // Now that we've optimized the shift amount, defer to normal isel to get
3661  // load folding and legacy vs BMI2 selection without repeating it here.
3662  SelectCode(N);
3663  return true;
3664 }
3665 
3666 bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
3667  MVT NVT = N->getSimpleValueType(0);
3668  unsigned Opcode = N->getOpcode();
3669  SDLoc dl(N);
3670 
3671  // For operations of the form (x << C1) op C2, check if we can use a smaller
3672  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3673  SDValue Shift = N->getOperand(0);
3674  SDValue N1 = N->getOperand(1);
3675 
3677  if (!Cst)
3678  return false;
3679 
3680  int64_t Val = Cst->getSExtValue();
3681 
3682  // If we have an any_extend feeding the AND, look through it to see if there
3683  // is a shift behind it. But only if the AND doesn't use the extended bits.
3684  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
3685  bool FoundAnyExtend = false;
3686  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
3687  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
3688  isUInt<32>(Val)) {
3689  FoundAnyExtend = true;
3690  Shift = Shift.getOperand(0);
3691  }
3692 
3693  if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
3694  return false;
3695 
3696  // i8 is unshrinkable, i16 should be promoted to i32.
3697  if (NVT != MVT::i32 && NVT != MVT::i64)
3698  return false;
3699 
3700  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
3701  if (!ShlCst)
3702  return false;
3703 
3704  uint64_t ShAmt = ShlCst->getZExtValue();
3705 
3706  // Make sure that we don't change the operation by removing bits.
3707  // This only matters for OR and XOR, AND is unaffected.
3708  uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
3709  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
3710  return false;
3711 
3712  // Check the minimum bitwidth for the new constant.
3713  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
3714  auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
3715  if (Opcode == ISD::AND) {
3716  // AND32ri is the same as AND64ri32 with zext imm.
3717  // Try this before sign extended immediates below.
3718  ShiftedVal = (uint64_t)Val >> ShAmt;
3719  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3720  return true;
3721  // Also swap order when the AND can become MOVZX.
3722  if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
3723  return true;
3724  }
3725  ShiftedVal = Val >> ShAmt;
3726  if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
3727  (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
3728  return true;
3729  if (Opcode != ISD::AND) {
3730  // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
3731  ShiftedVal = (uint64_t)Val >> ShAmt;
3732  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3733  return true;
3734  }
3735  return false;
3736  };
3737 
3738  int64_t ShiftedVal;
3739  if (!CanShrinkImmediate(ShiftedVal))
3740  return false;
3741 
3742  // Ok, we can reorder to get a smaller immediate.
3743 
3744  // But, its possible the original immediate allowed an AND to become MOVZX.
3745  // Doing this late due to avoid the MakedValueIsZero call as late as
3746  // possible.
3747  if (Opcode == ISD::AND) {
3748  // Find the smallest zext this could possibly be.
3749  unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
3750  ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
3751 
3752  // Figure out which bits need to be zero to achieve that mask.
3753  APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
3754  ZExtWidth);
3755  NeededMask &= ~Cst->getAPIntValue();
3756 
3757  if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
3758  return false;
3759  }
3760 
3761  SDValue X = Shift.getOperand(0);
3762  if (FoundAnyExtend) {
3763  SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
3764  insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
3765  X = NewX;
3766  }
3767 
3768  SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
3769  insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
3770  SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
3771  insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
3772  SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
3773  Shift.getOperand(1));
3774  ReplaceNode(N, NewSHL.getNode());
3775  SelectCode(NewSHL.getNode());
3776  return true;
3777 }
3778 
3779 /// Convert vector increment or decrement to sub/add with an all-ones constant:
3780 /// add X, <1, 1...> --> sub X, <-1, -1...>
3781 /// sub X, <1, 1...> --> add X, <-1, -1...>
3782 /// The all-ones vector constant can be materialized using a pcmpeq instruction
3783 /// that is commonly recognized as an idiom (has no register dependency), so
3784 /// that's better/smaller than loading a splat 1 constant.
3785 bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) {
3786  assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) &&
3787  "Unexpected opcode for increment/decrement transform");
3788 
3789  EVT VT = Node->getValueType(0);
3790  assert(VT.isVector() && "Should only be called for vectors.");
3791 
3792  SDValue X = Node->getOperand(0);
3793  SDValue OneVec = Node->getOperand(1);
3794 
3795  APInt SplatVal;
3796  if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue())
3797  return false;
3798 
3799  SDLoc DL(Node);
3800  SDValue OneConstant, AllOnesVec;
3801 
3802  APInt Ones = APInt::getAllOnesValue(32);
3803  assert(VT.getSizeInBits() % 32 == 0 &&
3804  "Expected bit count to be a multiple of 32");
3805  OneConstant = CurDAG->getConstant(Ones, DL, MVT::i32);
3806  insertDAGNode(*CurDAG, X, OneConstant);
3807 
3808  unsigned NumElts = VT.getSizeInBits() / 32;
3809  assert(NumElts > 0 && "Expected to get non-empty vector.");
3810  AllOnesVec = CurDAG->getSplatBuildVector(MVT::getVectorVT(MVT::i32, NumElts),
3811  DL, OneConstant);
3812  insertDAGNode(*CurDAG, X, AllOnesVec);
3813 
3814  AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec);
3815  insertDAGNode(*CurDAG, X, AllOnesVec);
3816 
3817  unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
3818  SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec);
3819 
3820  ReplaceNode(Node, NewNode.getNode());
3821  SelectCode(NewNode.getNode());
3822  return true;
3823 }
3824 
3825 /// If the high bits of an 'and' operand are known zero, try setting the
3826 /// high bits of an 'and' constant operand to produce a smaller encoding by
3827 /// creating a small, sign-extended negative immediate rather than a large
3828 /// positive one. This reverses a transform in SimplifyDemandedBits that
3829 /// shrinks mask constants by clearing bits. There is also a possibility that
3830 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3831 /// case, just replace the 'and'. Return 'true' if the node is replaced.
3832 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3833  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3834  // have immediate operands.
3835  MVT VT = And->getSimpleValueType(0);
3836  if (VT != MVT::i32 && VT != MVT::i64)
3837  return false;
3838 
3839  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3840  if (!And1C)
3841  return false;
3842 
3843  // Bail out if the mask constant is already negative. It's can't shrink more.
3844  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3845  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3846  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3847  // are negative too.
3848  APInt MaskVal = And1C->getAPIntValue();
3849  unsigned MaskLZ = MaskVal.countLeadingZeros();
3850  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3851  return false;
3852 
3853  // Don't extend into the upper 32 bits of a 64 bit mask.
3854  if (VT == MVT::i64 && MaskLZ >= 32) {
3855  MaskLZ -= 32;
3856  MaskVal = MaskVal.trunc(32);
3857  }
3858 
3859  SDValue And0 = And->getOperand(0);
3860  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3861  APInt NegMaskVal = MaskVal | HighZeros;
3862 
3863  // If a negative constant would not allow a smaller encoding, there's no need
3864  // to continue. Only change the constant when we know it's a win.
3865  unsigned MinWidth = NegMaskVal.getMinSignedBits();
3866  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3867  return false;
3868 
3869  // Extend masks if we truncated above.
3870  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3871  NegMaskVal = NegMaskVal.zext(64);
3872  HighZeros = HighZeros.zext(64);
3873  }
3874 
3875  // The variable operand must be all zeros in the top bits to allow using the
3876  // new, negative constant as the mask.
3877  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3878  return false;
3879 
3880  // Check if the mask is -1. In that case, this is an unnecessary instruction
3881  // that escaped earlier analysis.
3882  if (NegMaskVal.isAllOnesValue()) {
3883  ReplaceNode(And, And0.getNode());
3884  return true;
3885  }
3886 
3887  // A negative mask allows a smaller encoding. Create a new 'and' node.
3888  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3889  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3890  ReplaceNode(And, NewAnd.getNode());
3891  SelectCode(NewAnd.getNode());
3892  return true;
3893 }
3894 
3895 static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
3896  bool FoldedBCast, bool Masked) {
3897  if (Masked) {
3898  if (FoldedLoad) {
3899  switch (TestVT.SimpleTy) {
3900  default: llvm_unreachable("Unexpected VT!");
3901  case MVT::v16i8:
3902  return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
3903  case MVT::v8i16:
3904  return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
3905  case MVT::v4i32:
3906  return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
3907  case MVT::v2i64:
3908  return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
3909  case MVT::v32i8:
3910  return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
3911  case MVT::v16i16:
3912  return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
3913  case MVT::v8i32:
3914  return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
3915  case MVT::v4i64:
3916  return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
3917  case MVT::v64i8:
3918  return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
3919  case MVT::v32i16:
3920  return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
3921  case MVT::v16i32:
3922  return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
3923  case MVT::v8i64:
3924  return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
3925  }
3926  }
3927 
3928  if (FoldedBCast) {
3929  switch (TestVT.SimpleTy) {
3930  default: llvm_unreachable("Unexpected VT!");
3931  case MVT::v4i32:
3932  return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
3933  case MVT::v2i64:
3934  return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
3935  case MVT::v8i32:
3936  return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
3937  case MVT::v4i64:
3938  return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
3939  case MVT::v16i32:
3940  return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
3941  case MVT::v8i64:
3942  return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
3943  }
3944  }
3945 
3946  switch (TestVT.SimpleTy) {
3947  default: llvm_unreachable("Unexpected VT!");
3948  case MVT::v16i8:
3949  return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
3950  case MVT::v8i16:
3951  return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
3952  case MVT::v4i32:
3953  return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
3954  case MVT::v2i64:
3955  return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
3956  case MVT::v32i8:
3957  return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
3958  case MVT::v16i16:
3959  return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
3960  case MVT::v8i32:
3961  return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
3962  case MVT::v4i64:
3963  return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
3964  case MVT::v64i8:
3965  return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
3966  case MVT::v32i16:
3967  return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
3968  case MVT::v16i32:
3969  return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
3970  case MVT::v8i64:
3971  return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
3972  }
3973  }
3974 
3975  if (FoldedLoad) {
3976  switch (TestVT.SimpleTy) {
3977  default: llvm_unreachable("Unexpected VT!");
3978  case MVT::v16i8:
3979  return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
3980  case MVT::v8i16:
3981  return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
3982  case MVT::v4i32:
3983  return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
3984  case MVT::v2i64:
3985  return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
3986  case MVT::v32i8:
3987  return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
3988  case MVT::v16i16:
3989  return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
3990  case MVT::v8i32:
3991  return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
3992  case MVT::v4i64:
3993  return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
3994  case MVT::v64i8:
3995  return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
3996  case MVT::v32i16:
3997  return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
3998  case MVT::v16i32:
3999  return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
4000  case MVT::v8i64:
4001  return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
4002  }
4003  }
4004 
4005  if (FoldedBCast) {
4006  switch (TestVT.SimpleTy) {
4007  default: llvm_unreachable("Unexpected VT!");
4008  case MVT::v4i32:
4009  return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
4010  case MVT::v2i64:
4011  return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
4012  case MVT::v8i32:
4013  return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
4014  case MVT::v4i64:
4015  return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
4016  case MVT::v16i32:
4017  return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
4018  case MVT::v8i64:
4019  return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
4020  }
4021  }
4022 
4023  switch (TestVT.SimpleTy) {
4024  default: llvm_unreachable("Unexpected VT!");
4025  case MVT::v16i8:
4026  return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
4027  case MVT::v8i16:
4028  return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
4029  case MVT::v4i32:
4030  return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
4031  case MVT::v2i64:
4032  return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
4033  case MVT::v32i8:
4034  return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
4035  case MVT::v16i16:
4036  return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
4037  case MVT::v8i32:
4038  return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
4039  case MVT::v4i64:
4040  return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
4041  case MVT::v64i8:
4042  return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
4043  case MVT::v32i16:
4044  return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
4045  case MVT::v16i32:
4046  return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
4047  case MVT::v8i64:
4048  return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
4049  }
4050 }
4051 
4052 // Try to create VPTESTM instruction. If InMask is not null, it will be used
4053 // to form a masked operation.
4054 bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
4055  SDValue InMask) {
4056  assert(Subtarget->hasAVX512() && "Expected AVX512!");
4058  "Unexpected VT!");
4059 
4060  // Look for equal and not equal compares.
4061  ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
4062  if (CC != ISD::SETEQ && CC != ISD::SETNE)
4063  return false;
4064 
4065  // See if we're comparing against zero. This should have been canonicalized
4066  // to RHS during lowering.
4068  return false;
4069 
4070  SDValue N0 = Setcc.getOperand(0);
4071 
4072  MVT CmpVT = N0.getSimpleValueType();
4073  MVT CmpSVT = CmpVT.getVectorElementType();
4074 
4075  // Start with both operands the same. We'll try to refine this.
4076  SDValue Src0 = N0;
4077  SDValue Src1 = N0;
4078 
4079  {
4080  // Look through single use bitcasts.
4081  SDValue N0Temp = N0;
4082  if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
4083  N0Temp = N0.getOperand(0);
4084 
4085  // Look for single use AND.
4086  if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
4087  Src0 = N0Temp.getOperand(0);
4088  Src1 = N0Temp.getOperand(1);
4089  }
4090  }
4091 
4092  // Without VLX we need to widen the load.
4093  bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4094 
4095  // We can only fold loads if the sources are unique.
4096  bool CanFoldLoads = Src0 != Src1;
4097 
4098  // Try to fold loads unless we need to widen.
4099  bool FoldedLoad = false;
4100  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
4101  if (!Widen && CanFoldLoads) {
4102  Load = Src1;
4103  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
4104  Tmp4);
4105  if (!FoldedLoad) {
4106  // And is computative.
4107  Load = Src0;
4108  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
4109  Tmp3, Tmp4);
4110  if (FoldedLoad)
4111  std::swap(Src0, Src1);
4112  }
4113  }
4114 
4115  auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
4116  // Look through single use bitcasts.
4117  if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse())
4118  Src = Src.getOperand(0);
4119 
4120  if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) {
4121  Parent = Src.getNode();
4122  Src = Src.getOperand(0);
4123  if (Src.getSimpleValueType() == CmpSVT)
4124  return Src;
4125  }
4126 
4127  return SDValue();
4128  };
4129 
4130  // If we didn't fold a load, try to match broadcast. No widening limitation
4131  // for this. But only 32 and 64 bit types are supported.
4132  bool FoldedBCast = false;
4133  if (!FoldedLoad && CanFoldLoads &&
4134  (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
4135  SDNode *ParentNode = nullptr;
4136  if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
4137  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
4138  Tmp1, Tmp2, Tmp3, Tmp4);
4139  }
4140 
4141  // Try the other operand.
4142  if (!FoldedBCast) {
4143  if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
4144  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
4145  Tmp1, Tmp2, Tmp3, Tmp4);
4146  if (FoldedBCast)
4147  std::swap(Src0, Src1);
4148  }
4149  }
4150  }
4151 
4152  auto getMaskRC = [](MVT MaskVT) {
4153  switch (MaskVT.SimpleTy) {
4154  default: llvm_unreachable("Unexpected VT!");
4155  case MVT::v2i1: return X86::VK2RegClassID;
4156  case MVT::v4i1: return X86::VK4RegClassID;
4157  case MVT::v8i1: return X86::VK8RegClassID;
4158  case MVT::v16i1: return X86::VK16RegClassID;
4159  case MVT::v32i1: return X86::VK32RegClassID;
4160  case MVT::v64i1: return X86::VK64RegClassID;
4161  }
4162  };
4163 
4164  bool IsMasked = InMask.getNode() != nullptr;
4165 
4166  SDLoc dl(Root);
4167 
4168  MVT ResVT = Setcc.getSimpleValueType();
4169  MVT MaskVT = ResVT;
4170  if (Widen) {
4171  // Widen the inputs using insert_subreg or copy_to_regclass.
4172  unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
4173  unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
4174  unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
4175  CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
4176  MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4177  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4178  CmpVT), 0);
4179  Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
4180 
4181  assert(!FoldedLoad && "Shouldn't have folded the load");
4182  if (!FoldedBCast)
4183  Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
4184 
4185  if (IsMasked) {
4186  // Widen the mask.
4187  unsigned RegClass = getMaskRC(MaskVT);
4188  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4189  InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4190  dl, MaskVT, InMask, RC), 0);
4191  }
4192  }
4193 
4194  bool IsTestN = CC == ISD::SETEQ;
4195  unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4196  IsMasked);
4197 
4198  MachineSDNode *CNode;
4199  if (FoldedLoad || FoldedBCast) {
4200  SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4201 
4202  if (IsMasked) {
4203  SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4204  Load.getOperand(0) };
4205  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4206  } else {
4207  SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4208  Load.getOperand(0) };
4209  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4210  }
4211 
4212  // Update the chain.
4213  ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
4214  // Record the mem-refs
4215  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
4216  } else {
4217  if (IsMasked)
4218  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
4219  else
4220  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
4221  }
4222 
4223  // If we widened, we need to shrink the mask VT.
4224  if (Widen) {
4225  unsigned RegClass = getMaskRC(ResVT);
4226  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4227  CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4228  dl, ResVT, SDValue(CNode, 0), RC);
4229  }
4230 
4231  ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
4232  CurDAG->RemoveDeadNode(Root);
4233  return true;
4234 }
4235 
4236 void X86DAGToDAGISel::Select(SDNode *Node) {
4237  MVT NVT = Node->getSimpleValueType(0);
4238  unsigned Opcode = Node->getOpcode();
4239  SDLoc dl(Node);
4240 
4241  if (Node->isMachineOpcode()) {
4242  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
4243  Node->setNodeId(-1);
4244  return; // Already selected.
4245  }
4246 
4247  switch (Opcode) {
4248  default: break;
4249  case ISD::INTRINSIC_VOID: {
4250  unsigned IntNo = Node->getConstantOperandVal(1);
4251  switch (IntNo) {
4252  default: break;
4253  case Intrinsic::x86_sse3_monitor:
4254  case Intrinsic::x86_monitorx:
4255  case Intrinsic::x86_clzero: {
4256  bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
4257 
4258  unsigned Opc = 0;
4259  switch (IntNo) {
4260  default: llvm_unreachable("Unexpected intrinsic!");
4261  case Intrinsic::x86_sse3_monitor:
4262  if (!Subtarget->hasSSE3())
4263  break;
4264  Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
4265  break;
4266  case Intrinsic::x86_monitorx:
4267  if (!Subtarget->hasMWAITX())
4268  break;
4269  Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
4270  break;
4271  case Intrinsic::x86_clzero:
4272  if (!Subtarget->hasCLZERO())
4273  break;
4274  Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
4275  break;
4276  }
4277 
4278  if (Opc) {
4279  unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
4280  SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
4281  Node->getOperand(2), SDValue());
4282  SDValue InFlag = Chain.getValue(1);
4283 
4284  if (IntNo == Intrinsic::x86_sse3_monitor ||
4285  IntNo == Intrinsic::x86_monitorx) {
4286  // Copy the other two operands to ECX and EDX.
4287  Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
4288  InFlag);
4289  InFlag = Chain.getValue(1);
4290  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
4291  InFlag);
4292  InFlag = Chain.getValue(1);
4293  }
4294 
4295  MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4296  { Chain, InFlag});
4297  ReplaceNode(Node, CNode);
4298  return;
4299  }
4300  }
4301  }
4302 
4303  break;
4304  }
4305  case ISD::BRIND: {
4306  if (Subtarget->isTargetNaCl())
4307  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
4308  // leave the instruction alone.
4309  break;
4310  if (Subtarget->isTarget64BitILP32()) {
4311  // Converts a 32-bit register to a 64-bit, zero-extended version of
4312  // it. This is needed because x86-64 can do many things, but jmp %r32
4313  // ain't one of them.
4314  const SDValue &Target = Node->getOperand(1);
4316  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
4317  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
4318  Node->getOperand(0), ZextTarget);
4319  ReplaceNode(Node, Brind.getNode());
4320  SelectCode(ZextTarget.getNode());
4321  SelectCode(Brind.getNode());
4322  return;
4323  }
4324  break;
4325  }
4326  case X86ISD::GlobalBaseReg:
4327  ReplaceNode(Node, getGlobalBaseReg());
4328  return;
4329 
4330  case ISD::BITCAST:
4331  // Just drop all 128/256/512-bit bitcasts.
4332  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
4333  NVT == MVT::f128) {
4334  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
4335  CurDAG->RemoveDeadNode(Node);
4336  return;
4337  }
4338  break;
4339 
4340  case ISD::VSELECT: {
4341  // Replace VSELECT with non-mask conditions with with BLENDV.
4343  break;
4344 
4345  assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
4346  SDValue Blendv = CurDAG->getNode(
4347  X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
4348  Node->getOperand(1), Node->getOperand(2));
4349  ReplaceNode(Node, Blendv.getNode());
4350  SelectCode(Blendv.getNode());
4351  // We already called ReplaceUses.
4352  return;
4353  }
4354 
4355  case ISD::SRL:
4356  if (matchBitExtract(Node))
4357  return;
4359  case ISD::SRA:
4360  case ISD::SHL:
4361  if (tryShiftAmountMod(Node))
4362  return;
4363  break;
4364 
4365  case ISD::AND:
4366  if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
4367  // Try to form a masked VPTESTM. Operands can be in either order.
4368  SDValue N0 = Node->getOperand(0);
4369  SDValue N1 = Node->getOperand(1);
4370  if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
4371  tryVPTESTM(Node, N0, N1))
4372  return;
4373  if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
4374  tryVPTESTM(Node, N1, N0))
4375  return;
4376  }
4377 
4378  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
4379  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4380  CurDAG->RemoveDeadNode(Node);
4381  return;
4382  }
4383  if (matchBitExtract(Node))
4384  return;
4385  if (AndImmShrink && shrinkAndImmediate(Node))
4386  return;
4387 
4389  case ISD::OR:
4390  case ISD::XOR:
4391  if (tryShrinkShlLogicImm(Node))
4392  return;
4393 
4395  case ISD::ADD:
4396  case ISD::SUB: {
4397  if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() &&
4398  combineIncDecVector(Node))
4399  return;
4400 
4401  // Try to avoid folding immediates with multiple uses for optsize.
4402  // This code tries to select to register form directly to avoid going
4403  // through the isel table which might fold the immediate. We can't change
4404  // the patterns on the add/sub/and/or/xor with immediate paterns in the
4405  // tablegen files to check immediate use count without making the patterns
4406  // unavailable to the fast-isel table.
4407  if (!OptForSize)
4408  break;
4409 
4410  // Only handle i8/i16/i32/i64.
4411  if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
4412  break;
4413 
4414  SDValue N0 = Node->getOperand(0);
4415  SDValue N1 = Node->getOperand(1);
4416 
4418  if (!Cst)
4419  break;
4420 
4421  int64_t Val = Cst->getSExtValue();
4422 
4423  // Make sure its an immediate that is considered foldable.
4424  // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
4425  if (!isInt<8>(Val) && !isInt<32>(Val))
4426  break;
4427 
4428  // If this can match to INC/DEC, let it go.
4429  if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
4430  break;
4431 
4432  // Check if we should avoid folding this immediate.
4433  if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
4434  break;
4435 
4436  // We should not fold the immediate. So we need a register form instead.
4437  unsigned ROpc, MOpc;
4438  switch (NVT.SimpleTy) {
4439  default: llvm_unreachable("Unexpected VT!");
4440  case MVT::i8:
4441  switch (Opcode) {
4442  default: llvm_unreachable("Unexpected opcode!");
4443  case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
4444  case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
4445  case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
4446  case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
4447  case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
4448  }
4449  break;
4450  case MVT::i16:
4451  switch (Opcode) {
4452  default: llvm_unreachable("Unexpected opcode!");
4453  case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
4454  case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
4455  case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
4456  case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
4457  case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
4458  }
4459  break;
4460  case MVT::i32:
4461  switch (Opcode) {
4462  default: llvm_unreachable("Unexpected opcode!");
4463  case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
4464  case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
4465  case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
4466  case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
4467  case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
4468  }
4469  break;
4470  case MVT::i64:
4471  switch (Opcode) {
4472  default: llvm_unreachable("Unexpected opcode!");
4473  case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
4474  case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
4475  case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
4476  case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
4477  case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
4478  }
4479  break;
4480  }
4481 
4482  // Ok this is a AND/OR/XOR/ADD/SUB with constant.
4483 
4484  // If this is a not a subtract, we can still try to fold a load.
4485  if (Opcode != ISD::SUB) {
4486  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4487  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4488  SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4489  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4490  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4491  // Update the chain.
4492  ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
4493  // Record the mem-refs
4494  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
4495  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4496  CurDAG->RemoveDeadNode(Node);
4497  return;
4498  }
4499  }
4500 
4501  CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
4502  return;
4503  }
4504 
4505  case X86ISD::SMUL:
4506  // i16/i32/i64 are handled with isel patterns.
4507  if (NVT != MVT::i8)
4508  break;
4510  case X86ISD::UMUL: {
4511  SDValue N0 = Node->getOperand(0);
4512  SDValue N1 = Node->getOperand(1);
4513 
4514  unsigned LoReg, ROpc, MOpc;
4515  switch (NVT.SimpleTy) {
4516  default: llvm_unreachable("Unsupported VT!");
4517  case MVT::i8:
4518  LoReg = X86::AL;
4519  ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
4520  MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
4521  break;
4522  case MVT::i16:
4523  LoReg = X86::AX;
4524  ROpc = X86::MUL16r;
4525  MOpc = X86::MUL16m;
4526  break;
4527  case MVT::i32:
4528  LoReg = X86::EAX;
4529  ROpc = X86::MUL32r;
4530  MOpc = X86::MUL32m;
4531  break;
4532  case MVT::i64:
4533  LoReg = X86::RAX;
4534  ROpc = X86::MUL64r;
4535  MOpc = X86::MUL64m;
4536  break;
4537  }
4538 
4539  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4540  bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4541  // Multiply is commmutative.
4542  if (!FoldedLoad) {
4543  FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4544  if (FoldedLoad)
4545  std::swap(N0, N1);
4546  }
4547 
4548  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
4549  N0, SDValue()).getValue(1);
4550 
4551  MachineSDNode *CNode;
4552  if (FoldedLoad) {
4553  // i16/i32/i64 use an instruction that produces a low and high result even
4554  // though only the low result is used.
4555  SDVTList VTs;
4556  if (NVT == MVT::i8)
4557  VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4558  else
4559  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
4560 
4561  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4562  InFlag };
4563  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4564 
4565  // Update the chain.
4566  ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
4567  // Record the mem-refs
4568  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4569  } else {
4570  // i16/i32/i64 use an instruction that produces a low and high result even
4571  // though only the low result is used.
4572  SDVTList VTs;
4573  if (NVT == MVT::i8)
4574  VTs = CurDAG->getVTList(NVT, MVT::i32);
4575  else
4576  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
4577 
4578  CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
4579  }
4580 
4581  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4582  ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
4583  CurDAG->RemoveDeadNode(Node);
4584  return;
4585  }
4586 
4587  case ISD::SMUL_LOHI:
4588  case ISD::UMUL_LOHI: {
4589  SDValue N0 = Node->getOperand(0);
4590  SDValue N1 = Node->getOperand(1);
4591 
4592  unsigned Opc, MOpc;
4593  bool isSigned = Opcode == ISD::SMUL_LOHI;
4594  if (!isSigned) {
4595  switch (NVT.SimpleTy) {
4596  default: llvm_unreachable("Unsupported VT!");
4597  case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
4598  case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
4599  }
4600  } else {
4601  switch (NVT.SimpleTy) {
4602  default: llvm_unreachable("Unsupported VT!");
4603  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
4604  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
4605  }
4606  }
4607 
4608  unsigned SrcReg, LoReg, HiReg;
4609  switch (Opc) {
4610  default: llvm_unreachable("Unknown MUL opcode!");
4611  case X86::IMUL32r:
4612  case X86::MUL32r:
4613  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
4614  break;
4615  case X86::IMUL64r:
4616  case X86::MUL64r:
4617  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
4618  break;
4619  }
4620 
4621  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4622  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4623  // Multiply is commmutative.
4624  if (!foldedLoad) {
4625  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4626  if (foldedLoad)
4627  std::swap(N0, N1);
4628  }
4629 
4630  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
4631  N0, SDValue()).getValue(1);
4632  if (foldedLoad) {
4633  SDValue Chain;
4634  MachineSDNode *CNode = nullptr;
4635  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4636  InFlag };
4637  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
4638  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4639  Chain = SDValue(CNode, 0);
4640  InFlag = SDValue(CNode, 1);
4641 
4642  // Update the chain.
4643  ReplaceUses(N1.getValue(1), Chain);
4644  // Record the mem-refs
4645  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4646  } else {
4647  SDValue Ops[] = { N1, InFlag };
4648  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
4649  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4650  InFlag = SDValue(CNode, 0);
4651  }
4652 
4653  // Copy the low half of the result, if it is needed.
4654  if (!SDValue(Node, 0).use_empty()) {
4655  assert(LoReg && "Register for low half is not defined!");
4656  SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
4657  NVT, InFlag);
4658  InFlag = ResLo.getValue(2);
4659  ReplaceUses(SDValue(Node, 0), ResLo);
4660  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
4661  dbgs() << '\n');
4662  }
4663  // Copy the high half of the result, if it is needed.
4664  if (!SDValue(Node, 1).use_empty()) {
4665  assert(HiReg && "Register for high half is not defined!");
4666  SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
4667  NVT, InFlag);
4668  InFlag = ResHi.getValue(2);
4669  ReplaceUses(SDValue(Node, 1), ResHi);
4670  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
4671  dbgs() << '\n');
4672  }
4673 
4674  CurDAG->RemoveDeadNode(Node);
4675  return;
4676  }
4677 
4678  case ISD::SDIVREM:
4679  case ISD::UDIVREM: {
4680  SDValue N0 = Node->getOperand(0);
4681  SDValue N1 = Node->getOperand(1);
4682 
4683  unsigned Opc, MOpc;
4684  bool isSigned = Opcode == ISD::SDIVREM;
4685  if (!isSigned) {
4686  switch (NVT.SimpleTy) {
4687  default: llvm_unreachable("Unsupported VT!");
4688  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
4689  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
4690  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
4691  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
4692  }
4693  } else {
4694  switch (NVT.SimpleTy) {
4695  default: llvm_unreachable("Unsupported VT!");
4696  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
4697  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
4698  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
4699  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
4700  }
4701  }
4702 
4703  unsigned LoReg, HiReg, ClrReg;
4704  unsigned SExtOpcode;
4705  switch (NVT.SimpleTy) {
4706  default: llvm_unreachable("Unsupported VT!");
4707  case MVT::i8:
4708  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
4709  SExtOpcode = 0; // Not used.
4710  break;
4711  case MVT::i16:
4712  LoReg = X86::AX; HiReg = X86::DX;
4713  ClrReg = X86::DX;
4714  SExtOpcode = X86::CWD;
4715  break;
4716  case MVT::i32:
4717  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
4718  SExtOpcode = X86::CDQ;
4719  break;
4720  case MVT::i64:
4721  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
4722  SExtOpcode = X86::CQO;
4723  break;
4724  }
4725 
4726  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4727  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4728  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
4729 
4730  SDValue InFlag;
4731  if (NVT == MVT::i8) {
4732  // Special case for div8, just use a move with zero extension to AX to
4733  // clear the upper 8 bits (AH).
4734  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
4735  MachineSDNode *Move;
4736  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4737  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4738  unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
4739  : X86::MOVZX16rm8;
4740  Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
4741  Chain = SDValue(Move, 1);
4742  ReplaceUses(N0.getValue(1), Chain);
4743  // Record the mem-refs
4744  CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
4745  } else {
4746  unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
4747  : X86::MOVZX16rr8;
4748  Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
4749  Chain = CurDAG->getEntryNode();
4750  }
4751  Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
4752  SDValue());
4753  InFlag = Chain.getValue(1);
4754  } else {
4755  InFlag =
4756  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
4757  LoReg, N0, SDValue()).getValue(1);
4758  if (isSigned && !signBitIsZero) {
4759  // Sign extend the low part into the high part.
4760  InFlag =
4761  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
4762  } else {
4763  // Zero out the high part, effectively zero extending the input.
4764  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
4765  switch (NVT.SimpleTy) {
4766  case MVT::i16:
4767  ClrNode =
4768  SDValue(CurDAG->getMachineNode(
4769  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
4770  CurDAG->getTargetConstant(X86::sub_16bit, dl,
4771  MVT::i32)),
4772  0);
4773  break;
4774  case MVT::i32:
4775  break;
4776  case MVT::i64:
4777  ClrNode =
4778  SDValue(CurDAG->getMachineNode(
4779  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
4780  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
4781  CurDAG->getTargetConstant(X86::sub_32bit, dl,
4782  MVT::i32)),
4783  0);
4784  break;
4785  default:
4786  llvm_unreachable("Unexpected division source");
4787  }
4788 
4789  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
4790  ClrNode, InFlag).getValue(1);
4791  }
4792  }
4793 
4794  if (foldedLoad) {
4795  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4796  InFlag };
4797  MachineSDNode *CNode =
4798  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
4799  InFlag = SDValue(CNode, 1);
4800  // Update the chain.
4801  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
4802  // Record the mem-refs
4803  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4804  } else {
4805  InFlag =
4806  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
4807  }
4808 
4809  // Prevent use of AH in a REX instruction by explicitly copying it to
4810  // an ABCD_L register.
4811  //
4812  // The current assumption of the register allocator is that isel
4813  // won't generate explicit references to the GR8_ABCD_H registers. If
4814  // the allocator and/or the backend get enhanced to be more robust in
4815  // that regard, this can be, and should be, removed.
4816  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
4817  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
4818  unsigned AHExtOpcode =
4819  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
4820 
4821  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
4822  MVT::Glue, AHCopy, InFlag);
4823  SDValue Result(RNode, 0);
4824  InFlag = SDValue(RNode, 1);
4825 
4826  Result =
4827  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
4828 
4829  ReplaceUses(SDValue(Node, 1), Result);
4830  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4831  dbgs() << '\n');
4832  }
4833  // Copy the division (low) result, if it is needed.
4834  if (!SDValue(Node, 0).use_empty()) {
4835  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4836  LoReg, NVT, InFlag);
4837  InFlag = Result.getValue(2);
4838  ReplaceUses(SDValue(Node, 0), Result);
4839  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4840  dbgs() << '\n');
4841  }
4842  // Copy the remainder (high) result, if it is needed.
4843  if (!SDValue(Node, 1).use_empty()) {
4844  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4845  HiReg, NVT, InFlag);
4846  InFlag = Result.getValue(2);
4847  ReplaceUses(SDValue(Node, 1), Result);
4848  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4849  dbgs() << '\n');
4850  }
4851  CurDAG->RemoveDeadNode(Node);
4852  return;
4853  }
4854 
4855  case X86ISD::CMP: {
4856  SDValue N0 = Node->getOperand(0);
4857  SDValue N1 = Node->getOperand(1);
4858 
4859  // Optimizations for TEST compares.
4860  if (!isNullConstant(N1))
4861  break;
4862 
4863  // Save the original VT of the compare.
4864  MVT CmpVT = N0.getSimpleValueType();
4865 
4866  // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
4867  // by a test instruction. The test should be removed later by
4868  // analyzeCompare if we are using only the zero flag.
4869  // TODO: Should we check the users and use the BEXTR flags directly?
4870  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4871  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
4872  unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
4873  : X86::TEST32rr;
4874  SDValue BEXTR = SDValue(NewNode, 0);
4875  NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
4876  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4877  CurDAG->RemoveDeadNode(Node);
4878  return;
4879  }
4880  }
4881 
4882  // We can peek through truncates, but we need to be careful below.
4883  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
4884  N0 = N0.getOperand(0);
4885 
4886  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
4887  // use a smaller encoding.
4888  // Look past the truncate if CMP is the only use of it.
4889  if (N0.getOpcode() == ISD::AND &&
4890  N0.getNode()->hasOneUse() &&
4891  N0.getValueType() != MVT::i8) {
4893  if (!C) break;
4894  uint64_t Mask = C->getZExtValue();
4895 
4896  // Check if we can replace AND+IMM64 with a shift. This is possible for
4897  // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
4898  // flag.
4899  if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
4900  onlyUsesZeroFlag(SDValue(Node, 0))) {
4901  if (isMask_64(~Mask)) {
4902  unsigned TrailingZeros = countTrailingZeros(Mask);
4903  SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
4904  SDValue Shift =
4905  SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
4906  N0.getOperand(0), Imm), 0);
4907  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4908  MVT::i32, Shift, Shift);
4909  ReplaceNode(Node, Test);
4910  return;
4911  }
4912  if (isMask_64(Mask)) {
4913  unsigned LeadingZeros = countLeadingZeros(Mask);
4914  SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
4915  SDValue Shift =
4916  SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
4917  N0.getOperand(0), Imm), 0);
4918  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4919  MVT::i32, Shift, Shift);
4920  ReplaceNode(Node, Test);
4921  return;
4922  }
4923  }
4924 
4925  MVT VT;
4926  int SubRegOp;
4927  unsigned ROpc, MOpc;
4928 
4929  // For each of these checks we need to be careful if the sign flag is
4930  // being used. It is only safe to use the sign flag in two conditions,
4931  // either the sign bit in the shrunken mask is zero or the final test
4932  // size is equal to the original compare size.
4933 
4934  if (isUInt<8>(Mask) &&
4935  (!(Mask & 0x80) || CmpVT == MVT::i8 ||
4936  hasNoSignFlagUses(SDValue(Node, 0)))) {
4937  // For example, convert "testl %eax, $8" to "testb %al, $8"
4938  VT = MVT::i8;
4939  SubRegOp = X86::sub_8bit;
4940  ROpc = X86::TEST8ri;
4941  MOpc = X86::TEST8mi;
4942  } else if (OptForMinSize && isUInt<16>(Mask) &&
4943  (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
4944  hasNoSignFlagUses(SDValue(Node, 0)))) {
4945  // For example, "testl %eax, $32776" to "testw %ax, $32776".
4946  // NOTE: We only want to form TESTW instructions if optimizing for
4947  // min size. Otherwise we only save one byte and possibly get a length
4948  // changing prefix penalty in the decoders.
4949  VT = MVT::i16;
4950  SubRegOp = X86::sub_16bit;
4951  ROpc = X86::TEST16ri;
4952  MOpc = X86::TEST16mi;
4953  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
4954  ((!(Mask & 0x80000000) &&
4955  // Without minsize 16-bit Cmps can get here so we need to
4956  // be sure we calculate the correct sign flag if needed.
4957  (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
4958  CmpVT == MVT::i32 ||
4959  hasNoSignFlagUses(SDValue(Node, 0)))) {
4960  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
4961  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
4962  // Otherwize, we find ourselves in a position where we have to do
4963  // promotion. If previous passes did not promote the and, we assume
4964  // they had a good reason not to and do not promote here.
4965  VT = MVT::i32;
4966  SubRegOp = X86::sub_32bit;
4967  ROpc = X86::TEST32ri;
4968  MOpc = X86::TEST32mi;
4969  } else {
4970  // No eligible transformation was found.
4971  break;
4972  }
4973 
4974  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
4975  SDValue Reg = N0.getOperand(0);
4976 
4977  // Emit a testl or testw.
4978  MachineSDNode *NewNode;
4979  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4980  if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4981  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4982  Reg.getOperand(0) };
4983  NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
4984  // Update the chain.
4985  ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
4986  // Record the mem-refs
4987  CurDAG->setNodeMemRefs(NewNode,
4988  {cast<LoadSDNode>(Reg)->getMemOperand()});
4989  } else {
4990  // Extract the subregister if necessary.
4991  if (N0.getValueType() != VT)
4992  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
4993 
4994  NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
4995  }
4996  // Replace CMP with TEST.
4997  ReplaceNode(Node, NewNode);
4998  return;
4999  }
5000  break;
5001  }
5002  case X86ISD::PCMPISTR: {
5003  if (!Subtarget->hasSSE42())
5004  break;
5005 
5006  bool NeedIndex = !SDValue(Node, 0).use_empty();
5007  bool NeedMask = !SDValue(Node, 1).use_empty();
5008  // We can't fold a load if we are going to make two instructions.
5009  bool MayFoldLoad = !NeedIndex || !NeedMask;
5010 
5011  MachineSDNode *CNode;
5012  if (NeedMask) {
5013  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
5014  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
5015  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
5016  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
5017  }
5018  if (NeedIndex || !NeedMask) {
5019  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
5020  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
5021  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
5022  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5023  }
5024 
5025  // Connect the flag usage to the last instruction created.
5026  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
5027  CurDAG->RemoveDeadNode(Node);
5028  return;
5029  }
5030  case X86ISD::PCMPESTR: {
5031  if (!Subtarget->hasSSE42())
5032  break;
5033 
5034  // Copy the two implicit register inputs.
5035  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
5036  Node->getOperand(1),
5037  SDValue()).getValue(1);
5038  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
5039  Node->getOperand(3), InFlag).getValue(1);
5040 
5041  bool NeedIndex = !SDValue(Node, 0).use_empty();
5042  bool NeedMask = !SDValue(Node, 1).use_empty();
5043  // We can't fold a load if we are going to make two instructions.
5044  bool MayFoldLoad = !NeedIndex || !NeedMask;
5045 
5046  MachineSDNode *CNode;
5047  if (NeedMask) {
5048  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
5049  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
5050  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
5051  InFlag);
5052  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
5053  }
5054  if (NeedIndex || !NeedMask) {
5055  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
5056  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
5057  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
5058  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5059  }
5060  // Connect the flag usage to the last instruction created.
5061  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
5062  CurDAG->RemoveDeadNode(Node);
5063  return;
5064  }
5065 
5066  case ISD::SETCC: {
5067  if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
5068  return;
5069 
5070  break;
5071  }
5072 
5073  case ISD::STORE:
5074  if (foldLoadStoreIntoMemOperand(Node))
5075  return;
5076  break;
5077  case ISD::FCEIL:
5078  case ISD::FFLOOR:
5079  case ISD::FTRUNC:
5080  case ISD::FNEARBYINT:
5081  case ISD::FRINT: {
5082  // Replace fp rounding with their X86 specific equivalent so we don't
5083  // need 2 sets of patterns.
5084  // FIXME: This can only happen when the nodes started as STRICT_* and have
5085  // been mutated into their non-STRICT equivalents. Eventually this
5086  // mutation will be removed and we should switch the STRICT_ nodes to a
5087  // strict version of RNDSCALE in PreProcessISelDAG.
5088  unsigned Imm;
5089  switch (Node->getOpcode()) {
5090  default: llvm_unreachable("Unexpected opcode!");
5091  case ISD::FCEIL: Imm = 0xA; break;
5092  case ISD::FFLOOR: Imm = 0x9; break;
5093  case ISD::FTRUNC: Imm = 0xB; break;
5094  case ISD::FNEARBYINT: Imm = 0xC; break;
5095  case ISD::FRINT: Imm = 0x4; break;
5096  }
5097  SDLoc dl(Node);
5098  SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0),
5099  Node->getOperand(0),
5100  CurDAG->getTargetConstant(Imm, dl, MVT::i8));
5101  ReplaceNode(Node, Res.getNode());
5102  SelectCode(Res.getNode());
5103  return;
5104  }
5105  }
5106 
5107  SelectCode(Node);
5108 }
5109 
5110 bool X86DAGToDAGISel::
5111 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5112  std::vector<SDValue> &OutOps) {
5113  SDValue Op0, Op1, Op2, Op3, Op4;
5114  switch (ConstraintID) {
5115  default:
5116  llvm_unreachable("Unexpected asm memory constraint");
5118  // FIXME: It seems strange that 'i' is needed here since it's supposed to
5119  // be an immediate and not a memory constraint.
5121  case InlineAsm::Constraint_o: // offsetable ??
5122  case InlineAsm::Constraint_v: // not offsetable ??
5123  case InlineAsm::Constraint_m: // memory
5125  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
5126  return true;
5127  break;
5128  }
5129 
5130  OutOps.push_back(Op0);
5131  OutOps.push_back(Op1);
5132  OutOps.push_back(Op2);
5133  OutOps.push_back(Op3);
5134  OutOps.push_back(Op4);
5135  return false;
5136 }
5137 
5138 /// This pass converts a legalized DAG into a X86-specific DAG,
5139 /// ready for instruction scheduling.
5141  CodeGenOpt::Level OptLevel) {
5142  return new X86DAGToDAGISel(TM, OptLevel);
5143 }
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:569
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool isConstantSplat(SDValue Op, APInt &SplatVal)
If Op is a constant whose elements are all the same constant or undefined, return true and return the...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool IsMasked(Instruction *I)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
static MVT getVectorVT(MVT VT, unsigned NumElements)
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:550
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:912
X86 conditional moves.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned Reg
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool hasFastBEXTR() const
Definition: X86Subtarget.h:666
unsigned getVectorNumElements() const
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:865
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
Bitwise logical ANDNOT of floating point values.
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:384
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1517
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
X86 compare and logical compare instructions.
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4428
The address of a basic block.
Definition: Constants.h:839
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
Bitwise Logical AND NOT of Packed FP values.
Shift and rotation operations.
Definition: ISDOpcodes.h:441
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
CallLoweringInfo & setChain(SDValue InChain)