LLVM  10.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DAG pattern matching instruction selector for X86,
10 // converting from a legalized dag to a X86 dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86.h"
15 #include "X86MachineFunctionInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "X86TargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/KnownBits.h"
36 #include <stdint.h>
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "x86-isel"
40 
41 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42 
43 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
44  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
45  cl::Hidden);
46 
47 //===----------------------------------------------------------------------===//
48 // Pattern Matcher Implementation
49 //===----------------------------------------------------------------------===//
50 
51 namespace {
52  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
53  /// numbers for the leaves of the matched tree.
54  struct X86ISelAddressMode {
55  enum {
56  RegBase,
57  FrameIndexBase
58  } BaseType;
59 
60  // This is really a union, discriminated by BaseType!
61  SDValue Base_Reg;
62  int Base_FrameIndex;
63 
64  unsigned Scale;
65  SDValue IndexReg;
66  int32_t Disp;
67  SDValue Segment;
68  const GlobalValue *GV;
69  const Constant *CP;
70  const BlockAddress *BlockAddr;
71  const char *ES;
72  MCSymbol *MCSym;
73  int JT;
74  unsigned Align; // CP alignment.
75  unsigned char SymbolFlags; // X86II::MO_*
76  bool NegateIndex = false;
77 
78  X86ISelAddressMode()
79  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
80  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
81  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
82 
83  bool hasSymbolicDisplacement() const {
84  return GV != nullptr || CP != nullptr || ES != nullptr ||
85  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
86  }
87 
88  bool hasBaseOrIndexReg() const {
89  return BaseType == FrameIndexBase ||
90  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
91  }
92 
93  /// Return true if this addressing mode is already RIP-relative.
94  bool isRIPRelative() const {
95  if (BaseType != RegBase) return false;
96  if (RegisterSDNode *RegNode =
97  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
98  return RegNode->getReg() == X86::RIP;
99  return false;
100  }
101 
102  void setBaseReg(SDValue Reg) {
103  BaseType = RegBase;
104  Base_Reg = Reg;
105  }
106 
107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108  void dump(SelectionDAG *DAG = nullptr) {
109  dbgs() << "X86ISelAddressMode " << this << '\n';
110  dbgs() << "Base_Reg ";
111  if (Base_Reg.getNode())
112  Base_Reg.getNode()->dump(DAG);
113  else
114  dbgs() << "nul\n";
115  if (BaseType == FrameIndexBase)
116  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
117  dbgs() << " Scale " << Scale << '\n'
118  << "IndexReg ";
119  if (NegateIndex)
120  dbgs() << "negate ";
121  if (IndexReg.getNode())
122  IndexReg.getNode()->dump(DAG);
123  else
124  dbgs() << "nul\n";
125  dbgs() << " Disp " << Disp << '\n'
126  << "GV ";
127  if (GV)
128  GV->dump();
129  else
130  dbgs() << "nul";
131  dbgs() << " CP ";
132  if (CP)
133  CP->dump();
134  else
135  dbgs() << "nul";
136  dbgs() << '\n'
137  << "ES ";
138  if (ES)
139  dbgs() << ES;
140  else
141  dbgs() << "nul";
142  dbgs() << " MCSym ";
143  if (MCSym)
144  dbgs() << MCSym;
145  else
146  dbgs() << "nul";
147  dbgs() << " JT" << JT << " Align" << Align << '\n';
148  }
149 #endif
150  };
151 }
152 
153 namespace {
154  //===--------------------------------------------------------------------===//
155  /// ISel - X86-specific code to select X86 machine instructions for
156  /// SelectionDAG operations.
157  ///
158  class X86DAGToDAGISel final : public SelectionDAGISel {
159  /// Keep a pointer to the X86Subtarget around so that we can
160  /// make the right decision when generating code for different targets.
161  const X86Subtarget *Subtarget;
162 
163  /// If true, selector should try to optimize for code size instead of
164  /// performance.
165  bool OptForSize;
166 
167  /// If true, selector should try to optimize for minimum code size.
168  bool OptForMinSize;
169 
170  /// Disable direct TLS access through segment registers.
171  bool IndirectTlsSegRefs;
172 
173  public:
174  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
175  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
176  OptForMinSize(false), IndirectTlsSegRefs(false) {}
177 
178  StringRef getPassName() const override {
179  return "X86 DAG->DAG Instruction Selection";
180  }
181 
182  bool runOnMachineFunction(MachineFunction &MF) override {
183  // Reset the subtarget each time through.
184  Subtarget = &MF.getSubtarget<X86Subtarget>();
185  IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
186  "indirect-tls-seg-refs");
187 
188  // OptFor[Min]Size are used in pattern predicates that isel is matching.
189  OptForSize = MF.getFunction().hasOptSize();
190  OptForMinSize = MF.getFunction().hasMinSize();
191  assert((!OptForMinSize || OptForSize) &&
192  "OptForMinSize implies OptForSize");
193 
195  return true;
196  }
197 
198  void EmitFunctionEntryCode() override;
199 
200  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
201 
202  void PreprocessISelDAG() override;
203  void PostprocessISelDAG() override;
204 
205 // Include the pieces autogenerated from the target description.
206 #include "X86GenDAGISel.inc"
207 
208  private:
209  void Select(SDNode *N) override;
210 
211  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
212  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
213  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
214  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
215  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
216  bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
217  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
218  unsigned Depth);
219  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
220  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
221  SDValue &Scale, SDValue &Index, SDValue &Disp,
222  SDValue &Segment);
223  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
224  SDValue &Scale, SDValue &Index, SDValue &Disp,
225  SDValue &Segment);
226  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
227  bool selectLEAAddr(SDValue N, SDValue &Base,
228  SDValue &Scale, SDValue &Index, SDValue &Disp,
229  SDValue &Segment);
230  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
231  SDValue &Scale, SDValue &Index, SDValue &Disp,
232  SDValue &Segment);
233  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
234  SDValue &Scale, SDValue &Index, SDValue &Disp,
235  SDValue &Segment);
236  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
237  SDValue &Base, SDValue &Scale,
238  SDValue &Index, SDValue &Disp,
239  SDValue &Segment,
240  SDValue &NodeWithChain);
241  bool selectRelocImm(SDValue N, SDValue &Op);
242 
243  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
244  SDValue &Base, SDValue &Scale,
245  SDValue &Index, SDValue &Disp,
246  SDValue &Segment);
247 
248  // Convenience method where P is also root.
249  bool tryFoldLoad(SDNode *P, SDValue N,
250  SDValue &Base, SDValue &Scale,
251  SDValue &Index, SDValue &Disp,
252  SDValue &Segment) {
253  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
254  }
255 
256  /// Implement addressing mode selection for inline asm expressions.
257  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
258  unsigned ConstraintID,
259  std::vector<SDValue> &OutOps) override;
260 
261  void emitSpecialCodeForMain();
262 
263  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
264  MVT VT, SDValue &Base, SDValue &Scale,
265  SDValue &Index, SDValue &Disp,
266  SDValue &Segment) {
267  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
268  Base = CurDAG->getTargetFrameIndex(
269  AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
270  else if (AM.Base_Reg.getNode())
271  Base = AM.Base_Reg;
272  else
273  Base = CurDAG->getRegister(0, VT);
274 
275  Scale = getI8Imm(AM.Scale, DL);
276 
277  // Negate the index if needed.
278  if (AM.NegateIndex) {
279  unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
280  SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
281  AM.IndexReg), 0);
282  AM.IndexReg = Neg;
283  }
284 
285  if (AM.IndexReg.getNode())
286  Index = AM.IndexReg;
287  else
288  Index = CurDAG->getRegister(0, VT);
289 
290  // These are 32-bit even in 64-bit mode since RIP-relative offset
291  // is 32-bit.
292  if (AM.GV)
293  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
294  MVT::i32, AM.Disp,
295  AM.SymbolFlags);
296  else if (AM.CP)
297  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
298  AM.Align, AM.Disp, AM.SymbolFlags);
299  else if (AM.ES) {
300  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
301  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
302  } else if (AM.MCSym) {
303  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
304  assert(AM.SymbolFlags == 0 && "oo");
305  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
306  } else if (AM.JT != -1) {
307  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
308  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
309  } else if (AM.BlockAddr)
310  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
311  AM.SymbolFlags);
312  else
313  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
314 
315  if (AM.Segment.getNode())
316  Segment = AM.Segment;
317  else
318  Segment = CurDAG->getRegister(0, MVT::i16);
319  }
320 
321  // Utility function to determine whether we should avoid selecting
322  // immediate forms of instructions for better code size or not.
323  // At a high level, we'd like to avoid such instructions when
324  // we have similar constants used within the same basic block
325  // that can be kept in a register.
326  //
327  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
328  uint32_t UseCount = 0;
329 
330  // Do not want to hoist if we're not optimizing for size.
331  // TODO: We'd like to remove this restriction.
332  // See the comment in X86InstrInfo.td for more info.
333  if (!OptForSize)
334  return false;
335 
336  // Walk all the users of the immediate.
337  for (SDNode::use_iterator UI = N->use_begin(),
338  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
339 
340  SDNode *User = *UI;
341 
342  // This user is already selected. Count it as a legitimate use and
343  // move on.
344  if (User->isMachineOpcode()) {
345  UseCount++;
346  continue;
347  }
348 
349  // We want to count stores of immediates as real uses.
350  if (User->getOpcode() == ISD::STORE &&
351  User->getOperand(1).getNode() == N) {
352  UseCount++;
353  continue;
354  }
355 
356  // We don't currently match users that have > 2 operands (except
357  // for stores, which are handled above)
358  // Those instruction won't match in ISEL, for now, and would
359  // be counted incorrectly.
360  // This may change in the future as we add additional instruction
361  // types.
362  if (User->getNumOperands() != 2)
363  continue;
364 
365  // Immediates that are used for offsets as part of stack
366  // manipulation should be left alone. These are typically
367  // used to indicate SP offsets for argument passing and
368  // will get pulled into stores/pushes (implicitly).
369  if (User->getOpcode() == X86ISD::ADD ||
370  User->getOpcode() == ISD::ADD ||
371  User->getOpcode() == X86ISD::SUB ||
372  User->getOpcode() == ISD::SUB) {
373 
374  // Find the other operand of the add/sub.
375  SDValue OtherOp = User->getOperand(0);
376  if (OtherOp.getNode() == N)
377  OtherOp = User->getOperand(1);
378 
379  // Don't count if the other operand is SP.
380  RegisterSDNode *RegNode;
381  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
382  (RegNode = dyn_cast_or_null<RegisterSDNode>(
383  OtherOp->getOperand(1).getNode())))
384  if ((RegNode->getReg() == X86::ESP) ||
385  (RegNode->getReg() == X86::RSP))
386  continue;
387  }
388 
389  // ... otherwise, count this and move on.
390  UseCount++;
391  }
392 
393  // If we have more than 1 use, then recommend for hoisting.
394  return (UseCount > 1);
395  }
396 
397  /// Return a target constant with the specified value of type i8.
398  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
399  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
400  }
401 
402  /// Return a target constant with the specified value, of type i32.
403  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
404  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
405  }
406 
407  /// Return a target constant with the specified value, of type i64.
408  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
409  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
410  }
411 
412  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
413  const SDLoc &DL) {
414  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
415  uint64_t Index = N->getConstantOperandVal(1);
416  MVT VecVT = N->getOperand(0).getSimpleValueType();
417  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
418  }
419 
420  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
421  const SDLoc &DL) {
422  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
423  uint64_t Index = N->getConstantOperandVal(2);
424  MVT VecVT = N->getSimpleValueType(0);
425  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
426  }
427 
428  // Helper to detect unneeded and instructions on shift amounts. Called
429  // from PatFrags in tablegen.
430  bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
431  assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
432  const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
433 
434  if (Val.countTrailingOnes() >= Width)
435  return true;
436 
437  APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
438  return Mask.countTrailingOnes() >= Width;
439  }
440 
441  /// Return an SDNode that returns the value of the global base register.
442  /// Output instructions required to initialize the global base register,
443  /// if necessary.
444  SDNode *getGlobalBaseReg();
445 
446  /// Return a reference to the TargetMachine, casted to the target-specific
447  /// type.
448  const X86TargetMachine &getTargetMachine() const {
449  return static_cast<const X86TargetMachine &>(TM);
450  }
451 
452  /// Return a reference to the TargetInstrInfo, casted to the target-specific
453  /// type.
454  const X86InstrInfo *getInstrInfo() const {
455  return Subtarget->getInstrInfo();
456  }
457 
458  /// Address-mode matching performs shift-of-and to and-of-shift
459  /// reassociation in order to expose more scaled addressing
460  /// opportunities.
461  bool ComplexPatternFuncMutatesDAG() const override {
462  return true;
463  }
464 
465  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
466 
467  /// Returns whether this is a relocatable immediate in the range
468  /// [-2^Width .. 2^Width-1].
469  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
470  if (auto *CN = dyn_cast<ConstantSDNode>(N))
471  return isInt<Width>(CN->getSExtValue());
472  return isSExtAbsoluteSymbolRef(Width, N);
473  }
474 
475  // Indicates we should prefer to use a non-temporal load for this load.
476  bool useNonTemporalLoad(LoadSDNode *N) const {
477  if (!N->isNonTemporal())
478  return false;
479 
480  unsigned StoreSize = N->getMemoryVT().getStoreSize();
481 
482  if (N->getAlignment() < StoreSize)
483  return false;
484 
485  switch (StoreSize) {
486  default: llvm_unreachable("Unsupported store size");
487  case 4:
488  case 8:
489  return false;
490  case 16:
491  return Subtarget->hasSSE41();
492  case 32:
493  return Subtarget->hasAVX2();
494  case 64:
495  return Subtarget->hasAVX512();
496  }
497  }
498 
499  bool foldLoadStoreIntoMemOperand(SDNode *Node);
500  MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
501  bool matchBitExtract(SDNode *Node);
502  bool shrinkAndImmediate(SDNode *N);
503  bool isMaskZeroExtended(SDNode *N) const;
504  bool tryShiftAmountMod(SDNode *N);
505  bool tryShrinkShlLogicImm(SDNode *N);
506  bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
507 
508  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
509  const SDLoc &dl, MVT VT, SDNode *Node);
510  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
511  const SDLoc &dl, MVT VT, SDNode *Node,
512  SDValue &InFlag);
513 
514  bool tryOptimizeRem8Extend(SDNode *N);
515 
516  bool onlyUsesZeroFlag(SDValue Flags) const;
517  bool hasNoSignFlagUses(SDValue Flags) const;
518  bool hasNoCarryFlagUses(SDValue Flags) const;
519  };
520 }
521 
522 
523 // Returns true if this masked compare can be implemented legally with this
524 // type.
525 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
526  unsigned Opcode = N->getOpcode();
527  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
528  Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
529  // We can get 256-bit 8 element types here without VLX being enabled. When
530  // this happens we will use 512-bit operations and the mask will not be
531  // zero extended.
532  EVT OpVT = N->getOperand(0).getValueType();
533  if (OpVT.is256BitVector() || OpVT.is128BitVector())
534  return Subtarget->hasVLX();
535 
536  return true;
537  }
538  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
539  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
540  Opcode == X86ISD::FSETCCM_SAE)
541  return true;
542 
543  return false;
544 }
545 
546 // Returns true if we can assume the writer of the mask has zero extended it
547 // for us.
548 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
549  // If this is an AND, check if we have a compare on either side. As long as
550  // one side guarantees the mask is zero extended, the AND will preserve those
551  // zeros.
552  if (N->getOpcode() == ISD::AND)
553  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
554  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
555 
556  return isLegalMaskCompare(N, Subtarget);
557 }
558 
559 bool
560 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
561  if (OptLevel == CodeGenOpt::None) return false;
562 
563  if (!N.hasOneUse())
564  return false;
565 
566  if (N.getOpcode() != ISD::LOAD)
567  return true;
568 
569  // Don't fold non-temporal loads if we have an instruction for them.
570  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
571  return false;
572 
573  // If N is a load, do additional profitability checks.
574  if (U == Root) {
575  switch (U->getOpcode()) {
576  default: break;
577  case X86ISD::ADD:
578  case X86ISD::ADC:
579  case X86ISD::SUB:
580  case X86ISD::SBB:
581  case X86ISD::AND:
582  case X86ISD::XOR:
583  case X86ISD::OR:
584  case ISD::ADD:
585  case ISD::ADDCARRY:
586  case ISD::AND:
587  case ISD::OR:
588  case ISD::XOR: {
589  SDValue Op1 = U->getOperand(1);
590 
591  // If the other operand is a 8-bit immediate we should fold the immediate
592  // instead. This reduces code size.
593  // e.g.
594  // movl 4(%esp), %eax
595  // addl $4, %eax
596  // vs.
597  // movl $4, %eax
598  // addl 4(%esp), %eax
599  // The former is 2 bytes shorter. In case where the increment is 1, then
600  // the saving can be 4 bytes (by using incl %eax).
601  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
602  if (Imm->getAPIntValue().isSignedIntN(8))
603  return false;
604 
605  // If this is a 64-bit AND with an immediate that fits in 32-bits,
606  // prefer using the smaller and over folding the load. This is needed to
607  // make sure immediates created by shrinkAndImmediate are always folded.
608  // Ideally we would narrow the load during DAG combine and get the
609  // best of both worlds.
610  if (U->getOpcode() == ISD::AND &&
611  Imm->getAPIntValue().getBitWidth() == 64 &&
612  Imm->getAPIntValue().isIntN(32))
613  return false;
614 
615  // If this really a zext_inreg that can be represented with a movzx
616  // instruction, prefer that.
617  // TODO: We could shrink the load and fold if it is non-volatile.
618  if (U->getOpcode() == ISD::AND &&
619  (Imm->getAPIntValue() == UINT8_MAX ||
620  Imm->getAPIntValue() == UINT16_MAX ||
621  Imm->getAPIntValue() == UINT32_MAX))
622  return false;
623 
624  // ADD/SUB with can negate the immediate and use the opposite operation
625  // to fit 128 into a sign extended 8 bit immediate.
626  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
627  (-Imm->getAPIntValue()).isSignedIntN(8))
628  return false;
629  }
630 
631  // If the other operand is a TLS address, we should fold it instead.
632  // This produces
633  // movl %gs:0, %eax
634  // leal i@NTPOFF(%eax), %eax
635  // instead of
636  // movl $i@NTPOFF, %eax
637  // addl %gs:0, %eax
638  // if the block also has an access to a second TLS address this will save
639  // a load.
640  // FIXME: This is probably also true for non-TLS addresses.
641  if (Op1.getOpcode() == X86ISD::Wrapper) {
642  SDValue Val = Op1.getOperand(0);
644  return false;
645  }
646 
647  // Don't fold load if this matches the BTS/BTR/BTC patterns.
648  // BTS: (or X, (shl 1, n))
649  // BTR: (and X, (rotl -2, n))
650  // BTC: (xor X, (shl 1, n))
651  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
652  if (U->getOperand(0).getOpcode() == ISD::SHL &&
654  return false;
655 
656  if (U->getOperand(1).getOpcode() == ISD::SHL &&
658  return false;
659  }
660  if (U->getOpcode() == ISD::AND) {
661  SDValue U0 = U->getOperand(0);
662  SDValue U1 = U->getOperand(1);
663  if (U0.getOpcode() == ISD::ROTL) {
664  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
665  if (C && C->getSExtValue() == -2)
666  return false;
667  }
668 
669  if (U1.getOpcode() == ISD::ROTL) {
670  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
671  if (C && C->getSExtValue() == -2)
672  return false;
673  }
674  }
675 
676  break;
677  }
678  case ISD::SHL:
679  case ISD::SRA:
680  case ISD::SRL:
681  // Don't fold a load into a shift by immediate. The BMI2 instructions
682  // support folding a load, but not an immediate. The legacy instructions
683  // support folding an immediate, but can't fold a load. Folding an
684  // immediate is preferable to folding a load.
685  if (isa<ConstantSDNode>(U->getOperand(1)))
686  return false;
687 
688  break;
689  }
690  }
691 
692  // Prevent folding a load if this can implemented with an insert_subreg or
693  // a move that implicitly zeroes.
694  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
695  isNullConstant(Root->getOperand(2)) &&
696  (Root->getOperand(0).isUndef() ||
698  return false;
699 
700  return true;
701 }
702 
703 /// Replace the original chain operand of the call with
704 /// load's chain operand and move load below the call's chain operand.
706  SDValue Call, SDValue OrigChain) {
708  SDValue Chain = OrigChain.getOperand(0);
709  if (Chain.getNode() == Load.getNode())
710  Ops.push_back(Load.getOperand(0));
711  else {
712  assert(Chain.getOpcode() == ISD::TokenFactor &&
713  "Unexpected chain operand");
714  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
715  if (Chain.getOperand(i).getNode() == Load.getNode())
716  Ops.push_back(Load.getOperand(0));
717  else
718  Ops.push_back(Chain.getOperand(i));
719  SDValue NewChain =
720  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
721  Ops.clear();
722  Ops.push_back(NewChain);
723  }
724  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
725  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
726  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
727  Load.getOperand(1), Load.getOperand(2));
728 
729  Ops.clear();
730  Ops.push_back(SDValue(Load.getNode(), 1));
731  Ops.append(Call->op_begin() + 1, Call->op_end());
732  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
733 }
734 
735 /// Return true if call address is a load and it can be
736 /// moved below CALLSEQ_START and the chains leading up to the call.
737 /// Return the CALLSEQ_START by reference as a second output.
738 /// In the case of a tail call, there isn't a callseq node between the call
739 /// chain and the load.
740 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
741  // The transformation is somewhat dangerous if the call's chain was glued to
742  // the call. After MoveBelowOrigChain the load is moved between the call and
743  // the chain, this can create a cycle if the load is not folded. So it is
744  // *really* important that we are sure the load will be folded.
745  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
746  return false;
747  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
748  if (!LD ||
749  LD->isVolatile() ||
752  return false;
753 
754  // Now let's find the callseq_start.
755  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
756  if (!Chain.hasOneUse())
757  return false;
758  Chain = Chain.getOperand(0);
759  }
760 
761  if (!Chain.getNumOperands())
762  return false;
763  // Since we are not checking for AA here, conservatively abort if the chain
764  // writes to memory. It's not safe to move the callee (a load) across a store.
765  if (isa<MemSDNode>(Chain.getNode()) &&
766  cast<MemSDNode>(Chain.getNode())->writeMem())
767  return false;
768  if (Chain.getOperand(0).getNode() == Callee.getNode())
769  return true;
770  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
771  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
772  Callee.getValue(1).hasOneUse())
773  return true;
774  return false;
775 }
776 
777 void X86DAGToDAGISel::PreprocessISelDAG() {
778  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
779  E = CurDAG->allnodes_end(); I != E; ) {
780  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
781 
782  // If this is a target specific AND node with no flag usages, turn it back
783  // into ISD::AND to enable test instruction matching.
784  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
785  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
786  N->getOperand(0), N->getOperand(1));
787  --I;
788  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
789  ++I;
790  CurDAG->DeleteNode(N);
791  continue;
792  }
793 
794  switch (N->getOpcode()) {
795  case ISD::FP_TO_SINT:
796  case ISD::FP_TO_UINT: {
797  // Replace vector fp_to_s/uint with their X86 specific equivalent so we
798  // don't need 2 sets of patterns.
799  if (!N->getSimpleValueType(0).isVector())
800  break;
801 
802  unsigned NewOpc;
803  switch (N->getOpcode()) {
804  default: llvm_unreachable("Unexpected opcode!");
805  case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
806  case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
807  }
808  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
809  N->getOperand(0));
810  --I;
811  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
812  ++I;
813  CurDAG->DeleteNode(N);
814  continue;
815  }
816  case ISD::SHL:
817  case ISD::SRA:
818  case ISD::SRL: {
819  // Replace vector shifts with their X86 specific equivalent so we don't
820  // need 2 sets of patterns.
821  if (!N->getValueType(0).isVector())
822  break;
823 
824  unsigned NewOpc;
825  switch (N->getOpcode()) {
826  default: llvm_unreachable("Unexpected opcode!");
827  case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
828  case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
829  case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
830  }
831  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
832  N->getOperand(0), N->getOperand(1));
833  --I;
834  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
835  ++I;
836  CurDAG->DeleteNode(N);
837  continue;
838  }
839  case ISD::ANY_EXTEND:
841  // Replace vector any extend with the zero extend equivalents so we don't
842  // need 2 sets of patterns. Ignore vXi1 extensions.
843  if (!N->getValueType(0).isVector() ||
845  break;
846 
847  unsigned NewOpc = N->getOpcode() == ISD::ANY_EXTEND
850 
851  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
852  N->getOperand(0));
853  --I;
854  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
855  ++I;
856  CurDAG->DeleteNode(N);
857  continue;
858  }
859  case ISD::FCEIL:
860  case ISD::FFLOOR:
861  case ISD::FTRUNC:
862  case ISD::FNEARBYINT:
863  case ISD::FRINT: {
864  // Replace fp rounding with their X86 specific equivalent so we don't
865  // need 2 sets of patterns.
866  unsigned Imm;
867  switch (N->getOpcode()) {
868  default: llvm_unreachable("Unexpected opcode!");
869  case ISD::FCEIL: Imm = 0xA; break;
870  case ISD::FFLOOR: Imm = 0x9; break;
871  case ISD::FTRUNC: Imm = 0xB; break;
872  case ISD::FNEARBYINT: Imm = 0xC; break;
873  case ISD::FRINT: Imm = 0x4; break;
874  }
875  SDLoc dl(N);
876  SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
877  N->getValueType(0),
878  N->getOperand(0),
879  CurDAG->getConstant(Imm, dl, MVT::i8));
880  --I;
881  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
882  ++I;
883  CurDAG->DeleteNode(N);
884  continue;
885  }
886  case X86ISD::FANDN:
887  case X86ISD::FAND:
888  case X86ISD::FOR:
889  case X86ISD::FXOR: {
890  // Widen scalar fp logic ops to vector to reduce isel patterns.
891  // FIXME: Can we do this during lowering/combine.
892  MVT VT = N->getSimpleValueType(0);
893  if (VT.isVector() || VT == MVT::f128)
894  break;
895 
896  MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
897  SDLoc dl(N);
898  SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
899  N->getOperand(0));
900  SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
901  N->getOperand(1));
902 
903  SDValue Res;
904  if (Subtarget->hasSSE2()) {
905  EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
906  Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
907  Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
908  unsigned Opc;
909  switch (N->getOpcode()) {
910  default: llvm_unreachable("Unexpected opcode!");
911  case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
912  case X86ISD::FAND: Opc = ISD::AND; break;
913  case X86ISD::FOR: Opc = ISD::OR; break;
914  case X86ISD::FXOR: Opc = ISD::XOR; break;
915  }
916  Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
917  Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
918  } else {
919  Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
920  }
921  Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
922  CurDAG->getIntPtrConstant(0, dl));
923  --I;
924  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
925  ++I;
926  CurDAG->DeleteNode(N);
927  continue;
928  }
929  }
930 
931  if (OptLevel != CodeGenOpt::None &&
932  // Only do this when the target can fold the load into the call or
933  // jmp.
934  !Subtarget->useRetpolineIndirectCalls() &&
935  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
936  (N->getOpcode() == X86ISD::TC_RETURN &&
937  (Subtarget->is64Bit() ||
938  !getTargetMachine().isPositionIndependent())))) {
939  /// Also try moving call address load from outside callseq_start to just
940  /// before the call to allow it to be folded.
941  ///
942  /// [Load chain]
943  /// ^
944  /// |
945  /// [Load]
946  /// ^ ^
947  /// | |
948  /// / \--
949  /// / |
950  ///[CALLSEQ_START] |
951  /// ^ |
952  /// | |
953  /// [LOAD/C2Reg] |
954  /// | |
955  /// \ /
956  /// \ /
957  /// [CALL]
958  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
959  SDValue Chain = N->getOperand(0);
960  SDValue Load = N->getOperand(1);
961  if (!isCalleeLoad(Load, Chain, HasCallSeq))
962  continue;
963  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
964  ++NumLoadMoved;
965  continue;
966  }
967 
968  // Lower fpround and fpextend nodes that target the FP stack to be store and
969  // load to the stack. This is a gross hack. We would like to simply mark
970  // these as being illegal, but when we do that, legalize produces these when
971  // it expands calls, then expands these in the same legalize pass. We would
972  // like dag combine to be able to hack on these between the call expansion
973  // and the node legalization. As such this pass basically does "really
974  // late" legalization of these inline with the X86 isel pass.
975  // FIXME: This should only happen when not compiled with -O0.
976  switch (N->getOpcode()) {
977  default: continue;
978  case ISD::FP_ROUND:
979  case ISD::FP_EXTEND:
980  {
981  MVT SrcVT = N->getOperand(0).getSimpleValueType();
982  MVT DstVT = N->getSimpleValueType(0);
983 
984  // If any of the sources are vectors, no fp stack involved.
985  if (SrcVT.isVector() || DstVT.isVector())
986  continue;
987 
988  // If the source and destination are SSE registers, then this is a legal
989  // conversion that should not be lowered.
990  const X86TargetLowering *X86Lowering =
991  static_cast<const X86TargetLowering *>(TLI);
992  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
993  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
994  if (SrcIsSSE && DstIsSSE)
995  continue;
996 
997  if (!SrcIsSSE && !DstIsSSE) {
998  // If this is an FPStack extension, it is a noop.
999  if (N->getOpcode() == ISD::FP_EXTEND)
1000  continue;
1001  // If this is a value-preserving FPStack truncation, it is a noop.
1002  if (N->getConstantOperandVal(1))
1003  continue;
1004  }
1005 
1006  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1007  // FPStack has extload and truncstore. SSE can fold direct loads into other
1008  // operations. Based on this, decide what we want to do.
1009  MVT MemVT;
1010  if (N->getOpcode() == ISD::FP_ROUND)
1011  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1012  else
1013  MemVT = SrcIsSSE ? SrcVT : DstVT;
1014 
1015  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1016  SDLoc dl(N);
1017 
1018  // FIXME: optimize the case where the src/dest is a load or store?
1019 
1020  SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
1021  MemTmp, MachinePointerInfo(), MemVT);
1022  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1023  MachinePointerInfo(), MemVT);
1024 
1025  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1026  // extload we created. This will cause general havok on the dag because
1027  // anything below the conversion could be folded into other existing nodes.
1028  // To avoid invalidating 'I', back it up to the convert node.
1029  --I;
1030  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1031  break;
1032  }
1033 
1034  //The sequence of events for lowering STRICT_FP versions of these nodes requires
1035  //dealing with the chain differently, as there is already a preexisting chain.
1036  case ISD::STRICT_FP_ROUND:
1037  case ISD::STRICT_FP_EXTEND:
1038  {
1039  MVT SrcVT = N->getOperand(1).getSimpleValueType();
1040  MVT DstVT = N->getSimpleValueType(0);
1041 
1042  // If any of the sources are vectors, no fp stack involved.
1043  if (SrcVT.isVector() || DstVT.isVector())
1044  continue;
1045 
1046  // If the source and destination are SSE registers, then this is a legal
1047  // conversion that should not be lowered.
1048  const X86TargetLowering *X86Lowering =
1049  static_cast<const X86TargetLowering *>(TLI);
1050  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1051  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1052  if (SrcIsSSE && DstIsSSE)
1053  continue;
1054 
1055  if (!SrcIsSSE && !DstIsSSE) {
1056  // If this is an FPStack extension, it is a noop.
1057  if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1058  continue;
1059  // If this is a value-preserving FPStack truncation, it is a noop.
1060  if (N->getConstantOperandVal(2))
1061  continue;
1062  }
1063 
1064  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1065  // FPStack has extload and truncstore. SSE can fold direct loads into other
1066  // operations. Based on this, decide what we want to do.
1067  MVT MemVT;
1068  if (N->getOpcode() == ISD::STRICT_FP_ROUND)
1069  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1070  else
1071  MemVT = SrcIsSSE ? SrcVT : DstVT;
1072 
1073  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1074  SDLoc dl(N);
1075 
1076  // FIXME: optimize the case where the src/dest is a load or store?
1077 
1078  //Since the operation is StrictFP, use the preexisting chain.
1079  SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
1080  MemTmp, MachinePointerInfo(), MemVT);
1081  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1082  MachinePointerInfo(), MemVT);
1083 
1084  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1085  // extload we created. This will cause general havok on the dag because
1086  // anything below the conversion could be folded into other existing nodes.
1087  // To avoid invalidating 'I', back it up to the convert node.
1088  --I;
1089  CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1090  break;
1091  }
1092  }
1093 
1094 
1095  // Now that we did that, the node is dead. Increment the iterator to the
1096  // next node to process, then delete N.
1097  ++I;
1098  CurDAG->DeleteNode(N);
1099  }
1100 
1101  // The load+call transform above can leave some dead nodes in the graph. Make
1102  // sure we remove them. Its possible some of the other transforms do to so
1103  // just remove dead nodes unconditionally.
1104  CurDAG->RemoveDeadNodes();
1105 }
1106 
1107 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1108 bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
1109  unsigned Opc = N->getMachineOpcode();
1110  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1111  Opc != X86::MOVSX64rr8)
1112  return false;
1113 
1114  SDValue N0 = N->getOperand(0);
1115 
1116  // We need to be extracting the lower bit of an extend.
1117  if (!N0.isMachineOpcode() ||
1118  N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
1119  N0.getConstantOperandVal(1) != X86::sub_8bit)
1120  return false;
1121 
1122  // We're looking for either a movsx or movzx to match the original opcode.
1123  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1124  : X86::MOVSX32rr8_NOREX;
1125  SDValue N00 = N0.getOperand(0);
1126  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
1127  return false;
1128 
1129  if (Opc == X86::MOVSX64rr8) {
1130  // If we had a sign extend from 8 to 64 bits. We still need to go from 32
1131  // to 64.
1132  MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1133  MVT::i64, N00);
1134  ReplaceUses(N, Extend);
1135  } else {
1136  // Ok we can drop this extend and just use the original extend.
1137  ReplaceUses(N, N00.getNode());
1138  }
1139 
1140  return true;
1141 }
1142 
1143 void X86DAGToDAGISel::PostprocessISelDAG() {
1144  // Skip peepholes at -O0.
1145  if (TM.getOptLevel() == CodeGenOpt::None)
1146  return;
1147 
1148  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1149 
1150  bool MadeChange = false;
1151  while (Position != CurDAG->allnodes_begin()) {
1152  SDNode *N = &*--Position;
1153  // Skip dead nodes and any non-machine opcodes.
1154  if (N->use_empty() || !N->isMachineOpcode())
1155  continue;
1156 
1157  if (tryOptimizeRem8Extend(N)) {
1158  MadeChange = true;
1159  continue;
1160  }
1161 
1162  // Look for a TESTrr+ANDrr pattern where both operands of the test are
1163  // the same. Rewrite to remove the AND.
1164  unsigned Opc = N->getMachineOpcode();
1165  if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
1166  Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
1167  N->getOperand(0) == N->getOperand(1) &&
1168  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1169  N->getOperand(0).isMachineOpcode()) {
1170  SDValue And = N->getOperand(0);
1171  unsigned N0Opc = And.getMachineOpcode();
1172  if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
1173  N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
1174  MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
1175  MVT::i32,
1176  And.getOperand(0),
1177  And.getOperand(1));
1178  ReplaceUses(N, Test);
1179  MadeChange = true;
1180  continue;
1181  }
1182  if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1183  N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
1184  unsigned NewOpc;
1185  switch (N0Opc) {
1186  case X86::AND8rm: NewOpc = X86::TEST8mr; break;
1187  case X86::AND16rm: NewOpc = X86::TEST16mr; break;
1188  case X86::AND32rm: NewOpc = X86::TEST32mr; break;
1189  case X86::AND64rm: NewOpc = X86::TEST64mr; break;
1190  }
1191 
1192  // Need to swap the memory and register operand.
1193  SDValue Ops[] = { And.getOperand(1),
1194  And.getOperand(2),
1195  And.getOperand(3),
1196  And.getOperand(4),
1197  And.getOperand(5),
1198  And.getOperand(0),
1199  And.getOperand(6) /* Chain */ };
1200  MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1201  MVT::i32, MVT::Other, Ops);
1202  ReplaceUses(N, Test);
1203  MadeChange = true;
1204  continue;
1205  }
1206  }
1207 
1208  // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1209  // used. We're doing this late so we can prefer to fold the AND into masked
1210  // comparisons. Doing that can be better for the live range of the mask
1211  // register.
1212  if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1213  Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1214  N->getOperand(0) == N->getOperand(1) &&
1215  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1216  N->getOperand(0).isMachineOpcode() &&
1217  onlyUsesZeroFlag(SDValue(N, 0))) {
1218  SDValue And = N->getOperand(0);
1219  unsigned N0Opc = And.getMachineOpcode();
1220  // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1221  // KAND instructions and KTEST use the same ISA feature.
1222  if (N0Opc == X86::KANDBrr ||
1223  (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1224  N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1225  unsigned NewOpc;
1226  switch (Opc) {
1227  default: llvm_unreachable("Unexpected opcode!");
1228  case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
1229  case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
1230  case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
1231  case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
1232  }
1233  MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1234  MVT::i32,
1235  And.getOperand(0),
1236  And.getOperand(1));
1237  ReplaceUses(N, KTest);
1238  MadeChange = true;
1239  continue;
1240  }
1241  }
1242 
1243  // Attempt to remove vectors moves that were inserted to zero upper bits.
1244  if (Opc != TargetOpcode::SUBREG_TO_REG)
1245  continue;
1246 
1247  unsigned SubRegIdx = N->getConstantOperandVal(2);
1248  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1249  continue;
1250 
1251  SDValue Move = N->getOperand(1);
1252  if (!Move.isMachineOpcode())
1253  continue;
1254 
1255  // Make sure its one of the move opcodes we recognize.
1256  switch (Move.getMachineOpcode()) {
1257  default:
1258  continue;
1259  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1260  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1261  case X86::VMOVDQArr: case X86::VMOVDQUrr:
1262  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1263  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1264  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1265  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1266  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1267  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1268  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1269  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1270  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1271  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1272  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1273  break;
1274  }
1275 
1276  SDValue In = Move.getOperand(0);
1277  if (!In.isMachineOpcode() ||
1278  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1279  continue;
1280 
1281  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1282  // the SHA instructions which use a legacy encoding.
1283  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1284  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1285  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1286  (TSFlags & X86II::EncodingMask) != X86II::XOP)
1287  continue;
1288 
1289  // Producing instruction is another vector instruction. We can drop the
1290  // move.
1291  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1292  MadeChange = true;
1293  }
1294 
1295  if (MadeChange)
1296  CurDAG->RemoveDeadNodes();
1297 }
1298 
1299 
1300 /// Emit any code that needs to be executed only in the main function.
1301 void X86DAGToDAGISel::emitSpecialCodeForMain() {
1302  if (Subtarget->isTargetCygMing()) {
1304  auto &DL = CurDAG->getDataLayout();
1305 
1306  TargetLowering::CallLoweringInfo CLI(*CurDAG);
1307  CLI.setChain(CurDAG->getRoot())
1308  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1309  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1310  std::move(Args));
1311  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1312  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1313  CurDAG->setRoot(Result.second);
1314  }
1315 }
1316 
1317 void X86DAGToDAGISel::EmitFunctionEntryCode() {
1318  // If this is main, emit special code for main.
1319  const Function &F = MF->getFunction();
1320  if (F.hasExternalLinkage() && F.getName() == "main")
1321  emitSpecialCodeForMain();
1322 }
1323 
1324 static bool isDispSafeForFrameIndex(int64_t Val) {
1325  // On 64-bit platforms, we can run into an issue where a frame index
1326  // includes a displacement that, when added to the explicit displacement,
1327  // will overflow the displacement field. Assuming that the frame index
1328  // displacement fits into a 31-bit integer (which is only slightly more
1329  // aggressive than the current fundamental assumption that it fits into
1330  // a 32-bit integer), a 31-bit disp should always be safe.
1331  return isInt<31>(Val);
1332 }
1333 
1334 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1335  X86ISelAddressMode &AM) {
1336  // If there's no offset to fold, we don't need to do any work.
1337  if (Offset == 0)
1338  return false;
1339 
1340  // Cannot combine ExternalSymbol displacements with integer offsets.
1341  if (AM.ES || AM.MCSym)
1342  return true;
1343 
1344  int64_t Val = AM.Disp + Offset;
1345  CodeModel::Model M = TM.getCodeModel();
1346  if (Subtarget->is64Bit()) {
1348  AM.hasSymbolicDisplacement()))
1349  return true;
1350  // In addition to the checks required for a register base, check that
1351  // we do not try to use an unsafe Disp with a frame index.
1352  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1354  return true;
1355  }
1356  AM.Disp = Val;
1357  return false;
1358 
1359 }
1360 
1361 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1362  SDValue Address = N->getOperand(1);
1363 
1364  // load gs:0 -> GS segment register.
1365  // load fs:0 -> FS segment register.
1366  //
1367  // This optimization is valid because the GNU TLS model defines that
1368  // gs:0 (or fs:0 on X86-64) contains its own address.
1369  // For more information see http://people.redhat.com/drepper/tls.pdf
1370  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1371  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1372  !IndirectTlsSegRefs &&
1373  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1374  Subtarget->isTargetFuchsia()))
1375  switch (N->getPointerInfo().getAddrSpace()) {
1376  case 256:
1377  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1378  return false;
1379  case 257:
1380  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1381  return false;
1382  // Address space 258 is not handled here, because it is not used to
1383  // address TLS areas.
1384  }
1385 
1386  return true;
1387 }
1388 
1389 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1390 /// mode. These wrap things that will resolve down into a symbol reference.
1391 /// If no match is possible, this returns true, otherwise it returns false.
1392 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1393  // If the addressing mode already has a symbol as the displacement, we can
1394  // never match another symbol.
1395  if (AM.hasSymbolicDisplacement())
1396  return true;
1397 
1398  bool IsRIPRelTLS = false;
1399  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1400  if (IsRIPRel) {
1401  SDValue Val = N.getOperand(0);
1403  IsRIPRelTLS = true;
1404  }
1405 
1406  // We can't use an addressing mode in the 64-bit large code model.
1407  // Global TLS addressing is an exception. In the medium code model,
1408  // we use can use a mode when RIP wrappers are present.
1409  // That signifies access to globals that are known to be "near",
1410  // such as the GOT itself.
1411  CodeModel::Model M = TM.getCodeModel();
1412  if (Subtarget->is64Bit() &&
1413  ((M == CodeModel::Large && !IsRIPRelTLS) ||
1414  (M == CodeModel::Medium && !IsRIPRel)))
1415  return true;
1416 
1417  // Base and index reg must be 0 in order to use %rip as base.
1418  if (IsRIPRel && AM.hasBaseOrIndexReg())
1419  return true;
1420 
1421  // Make a local copy in case we can't do this fold.
1422  X86ISelAddressMode Backup = AM;
1423 
1424  int64_t Offset = 0;
1425  SDValue N0 = N.getOperand(0);
1426  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1427  AM.GV = G->getGlobal();
1428  AM.SymbolFlags = G->getTargetFlags();
1429  Offset = G->getOffset();
1430  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1431  AM.CP = CP->getConstVal();
1432  AM.Align = CP->getAlignment();
1433  AM.SymbolFlags = CP->getTargetFlags();
1434  Offset = CP->getOffset();
1435  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1436  AM.ES = S->getSymbol();
1437  AM.SymbolFlags = S->getTargetFlags();
1438  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1439  AM.MCSym = S->getMCSymbol();
1440  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1441  AM.JT = J->getIndex();
1442  AM.SymbolFlags = J->getTargetFlags();
1443  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1444  AM.BlockAddr = BA->getBlockAddress();
1445  AM.SymbolFlags = BA->getTargetFlags();
1446  Offset = BA->getOffset();
1447  } else
1448  llvm_unreachable("Unhandled symbol reference node.");
1449 
1450  if (foldOffsetIntoAddress(Offset, AM)) {
1451  AM = Backup;
1452  return true;
1453  }
1454 
1455  if (IsRIPRel)
1456  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1457 
1458  // Commit the changes now that we know this fold is safe.
1459  return false;
1460 }
1461 
1462 /// Add the specified node to the specified addressing mode, returning true if
1463 /// it cannot be done. This just pattern matches for the addressing mode.
1464 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1465  if (matchAddressRecursively(N, AM, 0))
1466  return true;
1467 
1468  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1469  // a smaller encoding and avoids a scaled-index.
1470  if (AM.Scale == 2 &&
1471  AM.BaseType == X86ISelAddressMode::RegBase &&
1472  AM.Base_Reg.getNode() == nullptr) {
1473  AM.Base_Reg = AM.IndexReg;
1474  AM.Scale = 1;
1475  }
1476 
1477  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1478  // because it has a smaller encoding.
1479  // TODO: Which other code models can use this?
1480  switch (TM.getCodeModel()) {
1481  default: break;
1482  case CodeModel::Small:
1483  case CodeModel::Kernel:
1484  if (Subtarget->is64Bit() &&
1485  AM.Scale == 1 &&
1486  AM.BaseType == X86ISelAddressMode::RegBase &&
1487  AM.Base_Reg.getNode() == nullptr &&
1488  AM.IndexReg.getNode() == nullptr &&
1489  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1490  AM.hasSymbolicDisplacement())
1491  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1492  break;
1493  }
1494 
1495  return false;
1496 }
1497 
1498 bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1499  unsigned Depth) {
1500  // Add an artificial use to this node so that we can keep track of
1501  // it if it gets CSE'd with a different node.
1502  HandleSDNode Handle(N);
1503 
1504  X86ISelAddressMode Backup = AM;
1505  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1506  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1507  return false;
1508  AM = Backup;
1509 
1510  // Try again after commuting the operands.
1511  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1512  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1513  return false;
1514  AM = Backup;
1515 
1516  // If we couldn't fold both operands into the address at the same time,
1517  // see if we can just put each operand into a register and fold at least
1518  // the add.
1519  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1520  !AM.Base_Reg.getNode() &&
1521  !AM.IndexReg.getNode()) {
1522  N = Handle.getValue();
1523  AM.Base_Reg = N.getOperand(0);
1524  AM.IndexReg = N.getOperand(1);
1525  AM.Scale = 1;
1526  return false;
1527  }
1528  N = Handle.getValue();
1529  return true;
1530 }
1531 
1532 // Insert a node into the DAG at least before the Pos node's position. This
1533 // will reposition the node as needed, and will assign it a node ID that is <=
1534 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1535 // IDs! The selection DAG must no longer depend on their uniqueness when this
1536 // is used.
1537 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1538  if (N->getNodeId() == -1 ||
1541  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1542  // Mark Node as invalid for pruning as after this it may be a successor to a
1543  // selected node but otherwise be in the same position of Pos.
1544  // Conservatively mark it with the same -abs(Id) to assure node id
1545  // invariant is preserved.
1546  N->setNodeId(Pos->getNodeId());
1548  }
1549 }
1550 
1551 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1552 // safe. This allows us to convert the shift and and into an h-register
1553 // extract and a scaled index. Returns false if the simplification is
1554 // performed.
1556  uint64_t Mask,
1557  SDValue Shift, SDValue X,
1558  X86ISelAddressMode &AM) {
1559  if (Shift.getOpcode() != ISD::SRL ||
1560  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1561  !Shift.hasOneUse())
1562  return true;
1563 
1564  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1565  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1566  Mask != (0xffu << ScaleLog))
1567  return true;
1568 
1569  MVT VT = N.getSimpleValueType();
1570  SDLoc DL(N);
1571  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1572  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1573  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1574  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1575  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1576  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1577 
1578  // Insert the new nodes into the topological ordering. We must do this in
1579  // a valid topological ordering as nothing is going to go back and re-sort
1580  // these nodes. We continually insert before 'N' in sequence as this is
1581  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1582  // hierarchy left to express.
1583  insertDAGNode(DAG, N, Eight);
1584  insertDAGNode(DAG, N, Srl);
1585  insertDAGNode(DAG, N, NewMask);
1586  insertDAGNode(DAG, N, And);
1587  insertDAGNode(DAG, N, ShlCount);
1588  insertDAGNode(DAG, N, Shl);
1589  DAG.ReplaceAllUsesWith(N, Shl);
1590  DAG.RemoveDeadNode(N.getNode());
1591  AM.IndexReg = And;
1592  AM.Scale = (1 << ScaleLog);
1593  return false;
1594 }
1595 
1596 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1597 // allows us to fold the shift into this addressing mode. Returns false if the
1598 // transform succeeded.
1600  X86ISelAddressMode &AM) {
1601  SDValue Shift = N.getOperand(0);
1602 
1603  // Use a signed mask so that shifting right will insert sign bits. These
1604  // bits will be removed when we shift the result left so it doesn't matter
1605  // what we use. This might allow a smaller immediate encoding.
1606  int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
1607 
1608  // If we have an any_extend feeding the AND, look through it to see if there
1609  // is a shift behind it. But only if the AND doesn't use the extended bits.
1610  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
1611  bool FoundAnyExtend = false;
1612  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
1613  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
1614  isUInt<32>(Mask)) {
1615  FoundAnyExtend = true;
1616  Shift = Shift.getOperand(0);
1617  }
1618 
1619  if (Shift.getOpcode() != ISD::SHL ||
1620  !isa<ConstantSDNode>(Shift.getOperand(1)))
1621  return true;
1622 
1623  SDValue X = Shift.getOperand(0);
1624 
1625  // Not likely to be profitable if either the AND or SHIFT node has more
1626  // than one use (unless all uses are for address computation). Besides,
1627  // isel mechanism requires their node ids to be reused.
1628  if (!N.hasOneUse() || !Shift.hasOneUse())
1629  return true;
1630 
1631  // Verify that the shift amount is something we can fold.
1632  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1633  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1634  return true;
1635 
1636  MVT VT = N.getSimpleValueType();
1637  SDLoc DL(N);
1638  if (FoundAnyExtend) {
1639  SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
1640  insertDAGNode(DAG, N, NewX);
1641  X = NewX;
1642  }
1643 
1644  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1645  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1646  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1647 
1648  // Insert the new nodes into the topological ordering. We must do this in
1649  // a valid topological ordering as nothing is going to go back and re-sort
1650  // these nodes. We continually insert before 'N' in sequence as this is
1651  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1652  // hierarchy left to express.
1653  insertDAGNode(DAG, N, NewMask);
1654  insertDAGNode(DAG, N, NewAnd);
1655  insertDAGNode(DAG, N, NewShift);
1656  DAG.ReplaceAllUsesWith(N, NewShift);
1657  DAG.RemoveDeadNode(N.getNode());
1658 
1659  AM.Scale = 1 << ShiftAmt;
1660  AM.IndexReg = NewAnd;
1661  return false;
1662 }
1663 
1664 // Implement some heroics to detect shifts of masked values where the mask can
1665 // be replaced by extending the shift and undoing that in the addressing mode
1666 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1667 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1668 // the addressing mode. This results in code such as:
1669 //
1670 // int f(short *y, int *lookup_table) {
1671 // ...
1672 // return *y + lookup_table[*y >> 11];
1673 // }
1674 //
1675 // Turning into:
1676 // movzwl (%rdi), %eax
1677 // movl %eax, %ecx
1678 // shrl $11, %ecx
1679 // addl (%rsi,%rcx,4), %eax
1680 //
1681 // Instead of:
1682 // movzwl (%rdi), %eax
1683 // movl %eax, %ecx
1684 // shrl $9, %ecx
1685 // andl $124, %rcx
1686 // addl (%rsi,%rcx), %eax
1687 //
1688 // Note that this function assumes the mask is provided as a mask *after* the
1689 // value is shifted. The input chain may or may not match that, but computing
1690 // such a mask is trivial.
1692  uint64_t Mask,
1693  SDValue Shift, SDValue X,
1694  X86ISelAddressMode &AM) {
1695  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1696  !isa<ConstantSDNode>(Shift.getOperand(1)))
1697  return true;
1698 
1699  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1700  unsigned MaskLZ = countLeadingZeros(Mask);
1701  unsigned MaskTZ = countTrailingZeros(Mask);
1702 
1703  // The amount of shift we're trying to fit into the addressing mode is taken
1704  // from the trailing zeros of the mask.
1705  unsigned AMShiftAmt = MaskTZ;
1706 
1707  // There is nothing we can do here unless the mask is removing some bits.
1708  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1709  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1710 
1711  // We also need to ensure that mask is a continuous run of bits.
1712  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1713 
1714  // Scale the leading zero count down based on the actual size of the value.
1715  // Also scale it down based on the size of the shift.
1716  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1717  if (MaskLZ < ScaleDown)
1718  return true;
1719  MaskLZ -= ScaleDown;
1720 
1721  // The final check is to ensure that any masked out high bits of X are
1722  // already known to be zero. Otherwise, the mask has a semantic impact
1723  // other than masking out a couple of low bits. Unfortunately, because of
1724  // the mask, zero extensions will be removed from operands in some cases.
1725  // This code works extra hard to look through extensions because we can
1726  // replace them with zero extensions cheaply if necessary.
1727  bool ReplacingAnyExtend = false;
1728  if (X.getOpcode() == ISD::ANY_EXTEND) {
1729  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1731  // Assume that we'll replace the any-extend with a zero-extend, and
1732  // narrow the search to the extended value.
1733  X = X.getOperand(0);
1734  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1735  ReplacingAnyExtend = true;
1736  }
1737  APInt MaskedHighBits =
1739  KnownBits Known = DAG.computeKnownBits(X);
1740  if (MaskedHighBits != Known.Zero) return true;
1741 
1742  // We've identified a pattern that can be transformed into a single shift
1743  // and an addressing mode. Make it so.
1744  MVT VT = N.getSimpleValueType();
1745  if (ReplacingAnyExtend) {
1746  assert(X.getValueType() != VT);
1747  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1748  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1749  insertDAGNode(DAG, N, NewX);
1750  X = NewX;
1751  }
1752  SDLoc DL(N);
1753  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1754  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1755  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1756  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1757 
1758  // Insert the new nodes into the topological ordering. We must do this in
1759  // a valid topological ordering as nothing is going to go back and re-sort
1760  // these nodes. We continually insert before 'N' in sequence as this is
1761  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1762  // hierarchy left to express.
1763  insertDAGNode(DAG, N, NewSRLAmt);
1764  insertDAGNode(DAG, N, NewSRL);
1765  insertDAGNode(DAG, N, NewSHLAmt);
1766  insertDAGNode(DAG, N, NewSHL);
1767  DAG.ReplaceAllUsesWith(N, NewSHL);
1768  DAG.RemoveDeadNode(N.getNode());
1769 
1770  AM.Scale = 1 << AMShiftAmt;
1771  AM.IndexReg = NewSRL;
1772  return false;
1773 }
1774 
1775 // Transform "(X >> SHIFT) & (MASK << C1)" to
1776 // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1777 // matched to a BEXTR later. Returns false if the simplification is performed.
1779  uint64_t Mask,
1780  SDValue Shift, SDValue X,
1781  X86ISelAddressMode &AM,
1782  const X86Subtarget &Subtarget) {
1783  if (Shift.getOpcode() != ISD::SRL ||
1784  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1785  !Shift.hasOneUse() || !N.hasOneUse())
1786  return true;
1787 
1788  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1789  if (!Subtarget.hasTBM() &&
1790  !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1791  return true;
1792 
1793  // We need to ensure that mask is a continuous run of bits.
1794  if (!isShiftedMask_64(Mask)) return true;
1795 
1796  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1797 
1798  // The amount of shift we're trying to fit into the addressing mode is taken
1799  // from the trailing zeros of the mask.
1800  unsigned AMShiftAmt = countTrailingZeros(Mask);
1801 
1802  // There is nothing we can do here unless the mask is removing some bits.
1803  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1804  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1805 
1806  MVT VT = N.getSimpleValueType();
1807  SDLoc DL(N);
1808  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1809  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1810  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1811  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1812  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1813  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1814 
1815  // Insert the new nodes into the topological ordering. We must do this in
1816  // a valid topological ordering as nothing is going to go back and re-sort
1817  // these nodes. We continually insert before 'N' in sequence as this is
1818  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1819  // hierarchy left to express.
1820  insertDAGNode(DAG, N, NewSRLAmt);
1821  insertDAGNode(DAG, N, NewSRL);
1822  insertDAGNode(DAG, N, NewMask);
1823  insertDAGNode(DAG, N, NewAnd);
1824  insertDAGNode(DAG, N, NewSHLAmt);
1825  insertDAGNode(DAG, N, NewSHL);
1826  DAG.ReplaceAllUsesWith(N, NewSHL);
1827  DAG.RemoveDeadNode(N.getNode());
1828 
1829  AM.Scale = 1 << AMShiftAmt;
1830  AM.IndexReg = NewAnd;
1831  return false;
1832 }
1833 
1834 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1835  unsigned Depth) {
1836  SDLoc dl(N);
1837  LLVM_DEBUG({
1838  dbgs() << "MatchAddress: ";
1839  AM.dump(CurDAG);
1840  });
1841  // Limit recursion.
1842  if (Depth > 5)
1843  return matchAddressBase(N, AM);
1844 
1845  // If this is already a %rip relative address, we can only merge immediates
1846  // into it. Instead of handling this in every case, we handle it here.
1847  // RIP relative addressing: %rip + 32-bit displacement!
1848  if (AM.isRIPRelative()) {
1849  // FIXME: JumpTable and ExternalSymbol address currently don't like
1850  // displacements. It isn't very important, but this should be fixed for
1851  // consistency.
1852  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1853  return true;
1854 
1855  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1856  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1857  return false;
1858  return true;
1859  }
1860 
1861  switch (N.getOpcode()) {
1862  default: break;
1863  case ISD::LOCAL_RECOVER: {
1864  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1865  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1866  // Use the symbol and don't prefix it.
1867  AM.MCSym = ESNode->getMCSymbol();
1868  return false;
1869  }
1870  break;
1871  }
1872  case ISD::Constant: {
1873  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1874  if (!foldOffsetIntoAddress(Val, AM))
1875  return false;
1876  break;
1877  }
1878 
1879  case X86ISD::Wrapper:
1880  case X86ISD::WrapperRIP:
1881  if (!matchWrapper(N, AM))
1882  return false;
1883  break;
1884 
1885  case ISD::LOAD:
1886  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1887  return false;
1888  break;
1889 
1890  case ISD::FrameIndex:
1891  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1892  AM.Base_Reg.getNode() == nullptr &&
1893  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1894  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1895  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1896  return false;
1897  }
1898  break;
1899 
1900  case ISD::SHL:
1901  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1902  break;
1903 
1904  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1905  unsigned Val = CN->getZExtValue();
1906  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1907  // that the base operand remains free for further matching. If
1908  // the base doesn't end up getting used, a post-processing step
1909  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1910  if (Val == 1 || Val == 2 || Val == 3) {
1911  AM.Scale = 1 << Val;
1912  SDValue ShVal = N.getOperand(0);
1913 
1914  // Okay, we know that we have a scale by now. However, if the scaled
1915  // value is an add of something and a constant, we can fold the
1916  // constant into the disp field here.
1917  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1918  AM.IndexReg = ShVal.getOperand(0);
1919  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1920  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1921  if (!foldOffsetIntoAddress(Disp, AM))
1922  return false;
1923  }
1924 
1925  AM.IndexReg = ShVal;
1926  return false;
1927  }
1928  }
1929  break;
1930 
1931  case ISD::SRL: {
1932  // Scale must not be used already.
1933  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1934 
1935  // We only handle up to 64-bit values here as those are what matter for
1936  // addressing mode optimizations.
1937  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
1938  "Unexpected value size!");
1939 
1940  SDValue And = N.getOperand(0);
1941  if (And.getOpcode() != ISD::AND) break;
1942  SDValue X = And.getOperand(0);
1943 
1944  // The mask used for the transform is expected to be post-shift, but we
1945  // found the shift first so just apply the shift to the mask before passing
1946  // it down.
1947  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1948  !isa<ConstantSDNode>(And.getOperand(1)))
1949  break;
1950  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1951 
1952  // Try to fold the mask and shift into the scale, and return false if we
1953  // succeed.
1954  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1955  return false;
1956  break;
1957  }
1958 
1959  case ISD::SMUL_LOHI:
1960  case ISD::UMUL_LOHI:
1961  // A mul_lohi where we need the low part can be folded as a plain multiply.
1962  if (N.getResNo() != 0) break;
1964  case ISD::MUL:
1965  case X86ISD::MUL_IMM:
1966  // X*[3,5,9] -> X+X*[2,4,8]
1967  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1968  AM.Base_Reg.getNode() == nullptr &&
1969  AM.IndexReg.getNode() == nullptr) {
1970  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1971  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1972  CN->getZExtValue() == 9) {
1973  AM.Scale = unsigned(CN->getZExtValue())-1;
1974 
1975  SDValue MulVal = N.getOperand(0);
1976  SDValue Reg;
1977 
1978  // Okay, we know that we have a scale by now. However, if the scaled
1979  // value is an add of something and a constant, we can fold the
1980  // constant into the disp field here.
1981  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1982  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1983  Reg = MulVal.getOperand(0);
1984  ConstantSDNode *AddVal =
1985  cast<ConstantSDNode>(MulVal.getOperand(1));
1986  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1987  if (foldOffsetIntoAddress(Disp, AM))
1988  Reg = N.getOperand(0);
1989  } else {
1990  Reg = N.getOperand(0);
1991  }
1992 
1993  AM.IndexReg = AM.Base_Reg = Reg;
1994  return false;
1995  }
1996  }
1997  break;
1998 
1999  case ISD::SUB: {
2000  // Given A-B, if A can be completely folded into the address and
2001  // the index field with the index field unused, use -B as the index.
2002  // This is a win if a has multiple parts that can be folded into
2003  // the address. Also, this saves a mov if the base register has
2004  // other uses, since it avoids a two-address sub instruction, however
2005  // it costs an additional mov if the index register has other uses.
2006 
2007  // Add an artificial use to this node so that we can keep track of
2008  // it if it gets CSE'd with a different node.
2009  HandleSDNode Handle(N);
2010 
2011  // Test if the LHS of the sub can be folded.
2012  X86ISelAddressMode Backup = AM;
2013  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
2014  N = Handle.getValue();
2015  AM = Backup;
2016  break;
2017  }
2018  N = Handle.getValue();
2019  // Test if the index field is free for use.
2020  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2021  AM = Backup;
2022  break;
2023  }
2024 
2025  int Cost = 0;
2026  SDValue RHS = N.getOperand(1);
2027  // If the RHS involves a register with multiple uses, this
2028  // transformation incurs an extra mov, due to the neg instruction
2029  // clobbering its operand.
2030  if (!RHS.getNode()->hasOneUse() ||
2031  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2032  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2033  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2034  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2035  RHS.getOperand(0).getValueType() == MVT::i32))
2036  ++Cost;
2037  // If the base is a register with multiple uses, this
2038  // transformation may save a mov.
2039  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2040  !AM.Base_Reg.getNode()->hasOneUse()) ||
2041  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2042  --Cost;
2043  // If the folded LHS was interesting, this transformation saves
2044  // address arithmetic.
2045  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2046  ((AM.Disp != 0) && (Backup.Disp == 0)) +
2047  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2048  --Cost;
2049  // If it doesn't look like it may be an overall win, don't do it.
2050  if (Cost >= 0) {
2051  AM = Backup;
2052  break;
2053  }
2054 
2055  // Ok, the transformation is legal and appears profitable. Go for it.
2056  // Negation will be emitted later to avoid creating dangling nodes if this
2057  // was an unprofitable LEA.
2058  AM.IndexReg = RHS;
2059  AM.NegateIndex = true;
2060  AM.Scale = 1;
2061  return false;
2062  }
2063 
2064  case ISD::ADD:
2065  if (!matchAdd(N, AM, Depth))
2066  return false;
2067  break;
2068 
2069  case ISD::OR:
2070  // We want to look through a transform in InstCombine and DAGCombiner that
2071  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
2072  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
2073  // An 'lea' can then be used to match the shift (multiply) and add:
2074  // and $1, %esi
2075  // lea (%rsi, %rdi, 8), %rax
2076  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
2077  !matchAdd(N, AM, Depth))
2078  return false;
2079  break;
2080 
2081  case ISD::AND: {
2082  // Perform some heroic transforms on an and of a constant-count shift
2083  // with a constant to enable use of the scaled offset field.
2084 
2085  // Scale must not be used already.
2086  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2087 
2088  // We only handle up to 64-bit values here as those are what matter for
2089  // addressing mode optimizations.
2090  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2091  "Unexpected value size!");
2092 
2093  if (!isa<ConstantSDNode>(N.getOperand(1)))
2094  break;
2095 
2096  if (N.getOperand(0).getOpcode() == ISD::SRL) {
2097  SDValue Shift = N.getOperand(0);
2098  SDValue X = Shift.getOperand(0);
2099 
2100  uint64_t Mask = N.getConstantOperandVal(1);
2101 
2102  // Try to fold the mask and shift into an extract and scale.
2103  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
2104  return false;
2105 
2106  // Try to fold the mask and shift directly into the scale.
2107  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
2108  return false;
2109 
2110  // Try to fold the mask and shift into BEXTR and scale.
2111  if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
2112  return false;
2113  }
2114 
2115  // Try to swap the mask and shift to place shifts which can be done as
2116  // a scale on the outside of the mask.
2117  if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
2118  return false;
2119 
2120  break;
2121  }
2122  case ISD::ZERO_EXTEND: {
2123  // Try to widen a zexted shift left to the same size as its use, so we can
2124  // match the shift as a scale factor.
2125  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2126  break;
2127  if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
2128  break;
2129 
2130  // Give up if the shift is not a valid scale factor [1,2,3].
2131  SDValue Shl = N.getOperand(0);
2132  auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
2133  if (!ShAmtC || ShAmtC->getZExtValue() > 3)
2134  break;
2135 
2136  // The narrow shift must only shift out zero bits (it must be 'nuw').
2137  // That makes it safe to widen to the destination type.
2139  ShAmtC->getZExtValue());
2140  if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
2141  break;
2142 
2143  // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
2144  MVT VT = N.getSimpleValueType();
2145  SDLoc DL(N);
2146  SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
2147  SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
2148 
2149  // Convert the shift to scale factor.
2150  AM.Scale = 1 << ShAmtC->getZExtValue();
2151  AM.IndexReg = Zext;
2152 
2153  insertDAGNode(*CurDAG, N, Zext);
2154  insertDAGNode(*CurDAG, N, NewShl);
2155  CurDAG->ReplaceAllUsesWith(N, NewShl);
2156  CurDAG->RemoveDeadNode(N.getNode());
2157  return false;
2158  }
2159  }
2160 
2161  return matchAddressBase(N, AM);
2162 }
2163 
2164 /// Helper for MatchAddress. Add the specified node to the
2165 /// specified addressing mode without any further recursion.
2166 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
2167  // Is the base register already occupied?
2168  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2169  // If so, check to see if the scale index register is set.
2170  if (!AM.IndexReg.getNode()) {
2171  AM.IndexReg = N;
2172  AM.Scale = 1;
2173  return false;
2174  }
2175 
2176  // Otherwise, we cannot select it.
2177  return true;
2178  }
2179 
2180  // Default, generate it as a register.
2181  AM.BaseType = X86ISelAddressMode::RegBase;
2182  AM.Base_Reg = N;
2183  return false;
2184 }
2185 
2186 /// Helper for selectVectorAddr. Handles things that can be folded into a
2187 /// gather scatter address. The index register and scale should have already
2188 /// been handled.
2189 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2190  // TODO: Support other operations.
2191  switch (N.getOpcode()) {
2192  case ISD::Constant: {
2193  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2194  if (!foldOffsetIntoAddress(Val, AM))
2195  return false;
2196  break;
2197  }
2198  case X86ISD::Wrapper:
2199  if (!matchWrapper(N, AM))
2200  return false;
2201  break;
2202  }
2203 
2204  return matchAddressBase(N, AM);
2205 }
2206 
2207 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
2208  SDValue &Scale, SDValue &Index,
2209  SDValue &Disp, SDValue &Segment) {
2210  X86ISelAddressMode AM;
2211  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
2212  AM.IndexReg = Mgs->getIndex();
2213  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
2214 
2215  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2216  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2217  if (AddrSpace == 256)
2218  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2219  if (AddrSpace == 257)
2220  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2221  if (AddrSpace == 258)
2222  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2223 
2224  SDLoc DL(N);
2225  MVT VT = N.getSimpleValueType();
2226 
2227  // Try to match into the base and displacement fields.
2228  if (matchVectorAddress(N, AM))
2229  return false;
2230 
2231  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2232  return true;
2233 }
2234 
2235 /// Returns true if it is able to pattern match an addressing mode.
2236 /// It returns the operands which make up the maximal addressing mode it can
2237 /// match by reference.
2238 ///
2239 /// Parent is the parent node of the addr operand that is being matched. It
2240 /// is always a load, store, atomic node, or null. It is only null when
2241 /// checking memory operands for inline asm nodes.
2242 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2243  SDValue &Scale, SDValue &Index,
2244  SDValue &Disp, SDValue &Segment) {
2245  X86ISelAddressMode AM;
2246 
2247  if (Parent &&
2248  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2249  // that are not a MemSDNode, and thus don't have proper addrspace info.
2250  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2251  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2252  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2253  Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
2254  Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2255  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2256  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2257  unsigned AddrSpace =
2258  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2259  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2260  if (AddrSpace == 256)
2261  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2262  if (AddrSpace == 257)
2263  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2264  if (AddrSpace == 258)
2265  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2266  }
2267 
2268  // Save the DL and VT before calling matchAddress, it can invalidate N.
2269  SDLoc DL(N);
2270  MVT VT = N.getSimpleValueType();
2271 
2272  if (matchAddress(N, AM))
2273  return false;
2274 
2275  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2276  return true;
2277 }
2278 
2279 // We can only fold a load if all nodes between it and the root node have a
2280 // single use. If there are additional uses, we could end up duplicating the
2281 // load.
2282 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
2283  while (User != Root) {
2284  if (!User->hasOneUse())
2285  return false;
2286  User = *User->use_begin();
2287  }
2288 
2289  return true;
2290 }
2291 
2292 /// Match a scalar SSE load. In particular, we want to match a load whose top
2293 /// elements are either undef or zeros. The load flavor is derived from the
2294 /// type of N, which is either v4f32 or v2f64.
2295 ///
2296 /// We also return:
2297 /// PatternChainNode: this is the matched node that has a chain input and
2298 /// output.
2299 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
2300  SDValue N, SDValue &Base,
2301  SDValue &Scale, SDValue &Index,
2302  SDValue &Disp, SDValue &Segment,
2303  SDValue &PatternNodeWithChain) {
2304  if (!hasSingleUsesFromRoot(Root, Parent))
2305  return false;
2306 
2307  // We can allow a full vector load here since narrowing a load is ok unless
2308  // it's volatile.
2309  if (ISD::isNON_EXTLoad(N.getNode())) {
2310  LoadSDNode *LD = cast<LoadSDNode>(N);
2311  if (!LD->isVolatile() &&
2312  IsProfitableToFold(N, LD, Root) &&
2313  IsLegalToFold(N, Parent, Root, OptLevel)) {
2314  PatternNodeWithChain = N;
2315  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2316  Segment);
2317  }
2318  }
2319 
2320  // We can also match the special zero extended load opcode.
2321  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
2322  PatternNodeWithChain = N;
2323  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2324  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2325  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
2326  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
2327  Segment);
2328  }
2329  }
2330 
2331  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
2332  // once. Otherwise the load might get duplicated and the chain output of the
2333  // duplicate load will not be observed by all dependencies.
2334  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2335  PatternNodeWithChain = N.getOperand(0);
2336  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2337  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2338  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2339  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2340  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2341  Segment);
2342  }
2343  }
2344 
2345  return false;
2346 }
2347 
2348 
2349 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2350  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2351  uint64_t ImmVal = CN->getZExtValue();
2352  if (!isUInt<32>(ImmVal))
2353  return false;
2354 
2355  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2356  return true;
2357  }
2358 
2359  // In static codegen with small code model, we can get the address of a label
2360  // into a register with 'movl'
2361  if (N->getOpcode() != X86ISD::Wrapper)
2362  return false;
2363 
2364  N = N.getOperand(0);
2365 
2366  // At least GNU as does not accept 'movl' for TPOFF relocations.
2367  // FIXME: We could use 'movl' when we know we are targeting MC.
2369  return false;
2370 
2371  Imm = N;
2372  if (N->getOpcode() != ISD::TargetGlobalAddress)
2373  return TM.getCodeModel() == CodeModel::Small;
2374 
2376  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2377  if (!CR)
2378  return TM.getCodeModel() == CodeModel::Small;
2379 
2380  return CR->getUnsignedMax().ult(1ull << 32);
2381 }
2382 
2383 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2384  SDValue &Scale, SDValue &Index,
2385  SDValue &Disp, SDValue &Segment) {
2386  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2387  SDLoc DL(N);
2388 
2389  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2390  return false;
2391 
2393  if (RN && RN->getReg() == 0)
2394  Base = CurDAG->getRegister(0, MVT::i64);
2395  else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
2396  // Base could already be %rip, particularly in the x32 ABI.
2397  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2398  MVT::i64), 0);
2399  Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2400  Base);
2401  }
2402 
2403  RN = dyn_cast<RegisterSDNode>(Index);
2404  if (RN && RN->getReg() == 0)
2405  Index = CurDAG->getRegister(0, MVT::i64);
2406  else {
2407  assert(Index.getValueType() == MVT::i32 &&
2408  "Expect to be extending 32-bit registers for use in LEA");
2409  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2410  MVT::i64), 0);
2411  Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2412  Index);
2413  }
2414 
2415  return true;
2416 }
2417 
2418 /// Calls SelectAddr and determines if the maximal addressing
2419 /// mode it matches can be cost effectively emitted as an LEA instruction.
2420 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2421  SDValue &Base, SDValue &Scale,
2422  SDValue &Index, SDValue &Disp,
2423  SDValue &Segment) {
2424  X86ISelAddressMode AM;
2425 
2426  // Save the DL and VT before calling matchAddress, it can invalidate N.
2427  SDLoc DL(N);
2428  MVT VT = N.getSimpleValueType();
2429 
2430  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2431  // segments.
2432  SDValue Copy = AM.Segment;
2433  SDValue T = CurDAG->getRegister(0, MVT::i32);
2434  AM.Segment = T;
2435  if (matchAddress(N, AM))
2436  return false;
2437  assert (T == AM.Segment);
2438  AM.Segment = Copy;
2439 
2440  unsigned Complexity = 0;
2441  if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2442  Complexity = 1;
2443  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2444  Complexity = 4;
2445 
2446  if (AM.IndexReg.getNode())
2447  Complexity++;
2448 
2449  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2450  // a simple shift.
2451  if (AM.Scale > 1)
2452  Complexity++;
2453 
2454  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2455  // to a LEA. This is determined with some experimentation but is by no means
2456  // optimal (especially for code size consideration). LEA is nice because of
2457  // its three-address nature. Tweak the cost function again when we can run
2458  // convertToThreeAddress() at register allocation time.
2459  if (AM.hasSymbolicDisplacement()) {
2460  // For X86-64, always use LEA to materialize RIP-relative addresses.
2461  if (Subtarget->is64Bit())
2462  Complexity = 4;
2463  else
2464  Complexity += 2;
2465  }
2466 
2467  // Heuristic: try harder to form an LEA from ADD if the operands set flags.
2468  // Unlike ADD, LEA does not affect flags, so we will be less likely to require
2469  // duplicating flag-producing instructions later in the pipeline.
2470  if (N.getOpcode() == ISD::ADD) {
2471  auto isMathWithFlags = [](SDValue V) {
2472  switch (V.getOpcode()) {
2473  case X86ISD::ADD:
2474  case X86ISD::SUB:
2475  case X86ISD::ADC:
2476  case X86ISD::SBB:
2477  /* TODO: These opcodes can be added safely, but we may want to justify
2478  their inclusion for different reasons (better for reg-alloc).
2479  case X86ISD::SMUL:
2480  case X86ISD::UMUL:
2481  case X86ISD::OR:
2482  case X86ISD::XOR:
2483  case X86ISD::AND:
2484  */
2485  // Value 1 is the flag output of the node - verify it's not dead.
2486  return !SDValue(V.getNode(), 1).use_empty();
2487  default:
2488  return false;
2489  }
2490  };
2491  // TODO: This could be an 'or' rather than 'and' to make the transform more
2492  // likely to happen. We might want to factor in whether there's a
2493  // load folding opportunity for the math op that disappears with LEA.
2494  if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
2495  Complexity++;
2496  }
2497 
2498  if (AM.Disp)
2499  Complexity++;
2500 
2501  // If it isn't worth using an LEA, reject it.
2502  if (Complexity <= 2)
2503  return false;
2504 
2505  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2506  return true;
2507 }
2508 
2509 /// This is only run on TargetGlobalTLSAddress nodes.
2510 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2511  SDValue &Scale, SDValue &Index,
2512  SDValue &Disp, SDValue &Segment) {
2514  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2515 
2516  X86ISelAddressMode AM;
2517  AM.GV = GA->getGlobal();
2518  AM.Disp += GA->getOffset();
2519  AM.SymbolFlags = GA->getTargetFlags();
2520 
2521  MVT VT = N.getSimpleValueType();
2522  if (VT == MVT::i32) {
2523  AM.Scale = 1;
2524  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2525  }
2526 
2527  getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
2528  return true;
2529 }
2530 
2531 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2532  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2533  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2534  N.getValueType());
2535  return true;
2536  }
2537 
2538  // Keep track of the original value type and whether this value was
2539  // truncated. If we see a truncation from pointer type to VT that truncates
2540  // bits that are known to be zero, we can use a narrow reference.
2541  EVT VT = N.getValueType();
2542  bool WasTruncated = false;
2543  if (N.getOpcode() == ISD::TRUNCATE) {
2544  WasTruncated = true;
2545  N = N.getOperand(0);
2546  }
2547 
2548  if (N.getOpcode() != X86ISD::Wrapper)
2549  return false;
2550 
2551  // We can only use non-GlobalValues as immediates if they were not truncated,
2552  // as we do not have any range information. If we have a GlobalValue and the
2553  // address was not truncated, we can select it as an operand directly.
2554  unsigned Opc = N.getOperand(0)->getOpcode();
2555  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2556  Op = N.getOperand(0);
2557  // We can only select the operand directly if we didn't have to look past a
2558  // truncate.
2559  return !WasTruncated;
2560  }
2561 
2562  // Check that the global's range fits into VT.
2563  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2564  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2565  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2566  return false;
2567 
2568  // Okay, we can use a narrow reference.
2569  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2570  GA->getOffset(), GA->getTargetFlags());
2571  return true;
2572 }
2573 
2574 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2575  SDValue &Base, SDValue &Scale,
2576  SDValue &Index, SDValue &Disp,
2577  SDValue &Segment) {
2578  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2579  !IsProfitableToFold(N, P, Root) ||
2580  !IsLegalToFold(N, P, Root, OptLevel))
2581  return false;
2582 
2583  return selectAddr(N.getNode(),
2584  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2585 }
2586 
2587 /// Return an SDNode that returns the value of the global base register.
2588 /// Output instructions required to initialize the global base register,
2589 /// if necessary.
2590 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2591  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2592  auto &DL = MF->getDataLayout();
2593  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2594 }
2595 
2596 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2597  if (N->getOpcode() == ISD::TRUNCATE)
2598  N = N->getOperand(0).getNode();
2599  if (N->getOpcode() != X86ISD::Wrapper)
2600  return false;
2601 
2602  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2603  if (!GA)
2604  return false;
2605 
2606  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2607  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2608  CR->getSignedMax().slt(1ull << Width);
2609 }
2610 
2612  assert(N->isMachineOpcode() && "Unexpected node");
2614  unsigned Opc = N->getMachineOpcode();
2615  if (Opc == X86::JCC_1)
2616  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
2617  else if (Opc == X86::SETCCr)
2618  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
2619  else if (Opc == X86::SETCCm)
2620  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
2621  else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
2622  Opc == X86::CMOV64rr)
2623  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
2624  else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
2625  Opc == X86::CMOV64rm)
2626  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
2627 
2628  return CC;
2629 }
2630 
2631 /// Test whether the given X86ISD::CMP node has any users that use a flag
2632 /// other than ZF.
2633 bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2634  // Examine each user of the node.
2635  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2636  UI != UE; ++UI) {
2637  // Only check things that use the flags.
2638  if (UI.getUse().getResNo() != Flags.getResNo())
2639  continue;
2640  // Only examine CopyToReg uses that copy to EFLAGS.
2641  if (UI->getOpcode() != ISD::CopyToReg ||
2642  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2643  return false;
2644  // Examine each user of the CopyToReg use.
2645  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2646  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2647  // Only examine the Flag result.
2648  if (FlagUI.getUse().getResNo() != 1) continue;
2649  // Anything unusual: assume conservatively.
2650  if (!FlagUI->isMachineOpcode()) return false;
2651  // Examine the condition code of the user.
2652  X86::CondCode CC = getCondFromNode(*FlagUI);
2653 
2654  switch (CC) {
2655  // Comparisons which only use the zero flag.
2656  case X86::COND_E: case X86::COND_NE:
2657  continue;
2658  // Anything else: assume conservatively.
2659  default:
2660  return false;
2661  }
2662  }
2663  }
2664  return true;
2665 }
2666 
2667 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2668 /// flag to be accurate.
2669 bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2670  // Examine each user of the node.
2671  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2672  UI != UE; ++UI) {
2673  // Only check things that use the flags.
2674  if (UI.getUse().getResNo() != Flags.getResNo())
2675  continue;
2676  // Only examine CopyToReg uses that copy to EFLAGS.
2677  if (UI->getOpcode() != ISD::CopyToReg ||
2678  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2679  return false;
2680  // Examine each user of the CopyToReg use.
2681  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2682  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2683  // Only examine the Flag result.
2684  if (FlagUI.getUse().getResNo() != 1) continue;
2685  // Anything unusual: assume conservatively.
2686  if (!FlagUI->isMachineOpcode()) return false;
2687  // Examine the condition code of the user.
2688  X86::CondCode CC = getCondFromNode(*FlagUI);
2689 
2690  switch (CC) {
2691  // Comparisons which don't examine the SF flag.
2692  case X86::COND_A: case X86::COND_AE:
2693  case X86::COND_B: case X86::COND_BE:
2694  case X86::COND_E: case X86::COND_NE:
2695  case X86::COND_O: case X86::COND_NO:
2696  case X86::COND_P: case X86::COND_NP:
2697  continue;
2698  // Anything else: assume conservatively.
2699  default:
2700  return false;
2701  }
2702  }
2703  }
2704  return true;
2705 }
2706 
2708  switch (CC) {
2709  // Comparisons which don't examine the CF flag.
2710  case X86::COND_O: case X86::COND_NO:
2711  case X86::COND_E: case X86::COND_NE:
2712  case X86::COND_S: case X86::COND_NS:
2713  case X86::COND_P: case X86::COND_NP:
2714  case X86::COND_L: case X86::COND_GE:
2715  case X86::COND_G: case X86::COND_LE:
2716  return false;
2717  // Anything else: assume conservatively.
2718  default:
2719  return true;
2720  }
2721 }
2722 
2723 /// Test whether the given node which sets flags has any uses which require the
2724 /// CF flag to be accurate.
2725  bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2726  // Examine each user of the node.
2727  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2728  UI != UE; ++UI) {
2729  // Only check things that use the flags.
2730  if (UI.getUse().getResNo() != Flags.getResNo())
2731  continue;
2732 
2733  unsigned UIOpc = UI->getOpcode();
2734 
2735  if (UIOpc == ISD::CopyToReg) {
2736  // Only examine CopyToReg uses that copy to EFLAGS.
2737  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2738  return false;
2739  // Examine each user of the CopyToReg use.
2740  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2741  FlagUI != FlagUE; ++FlagUI) {
2742  // Only examine the Flag result.
2743  if (FlagUI.getUse().getResNo() != 1)
2744  continue;
2745  // Anything unusual: assume conservatively.
2746  if (!FlagUI->isMachineOpcode())
2747  return false;
2748  // Examine the condition code of the user.
2749  X86::CondCode CC = getCondFromNode(*FlagUI);
2750 
2751  if (mayUseCarryFlag(CC))
2752  return false;
2753  }
2754 
2755  // This CopyToReg is ok. Move on to the next user.
2756  continue;
2757  }
2758 
2759  // This might be an unselected node. So look for the pre-isel opcodes that
2760  // use flags.
2761  unsigned CCOpNo;
2762  switch (UIOpc) {
2763  default:
2764  // Something unusual. Be conservative.
2765  return false;
2766  case X86ISD::SETCC: CCOpNo = 0; break;
2767  case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2768  case X86ISD::CMOV: CCOpNo = 2; break;
2769  case X86ISD::BRCOND: CCOpNo = 2; break;
2770  }
2771 
2772  X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2773  if (mayUseCarryFlag(CC))
2774  return false;
2775  }
2776  return true;
2777 }
2778 
2779 /// Check whether or not the chain ending in StoreNode is suitable for doing
2780 /// the {load; op; store} to modify transformation.
2782  SDValue StoredVal, SelectionDAG *CurDAG,
2783  unsigned LoadOpNo,
2784  LoadSDNode *&LoadNode,
2785  SDValue &InputChain) {
2786  // Is the stored value result 0 of the operation?
2787  if (StoredVal.getResNo() != 0) return false;
2788 
2789  // Are there other uses of the operation other than the store?
2790  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2791 
2792  // Is the store non-extending and non-indexed?
2793  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2794  return false;
2795 
2796  SDValue Load = StoredVal->getOperand(LoadOpNo);
2797  // Is the stored value a non-extending and non-indexed load?
2798  if (!ISD::isNormalLoad(Load.getNode())) return false;
2799 
2800  // Return LoadNode by reference.
2801  LoadNode = cast<LoadSDNode>(Load);
2802 
2803  // Is store the only read of the loaded value?
2804  if (!Load.hasOneUse())
2805  return false;
2806 
2807  // Is the address of the store the same as the load?
2808  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2809  LoadNode->getOffset() != StoreNode->getOffset())
2810  return false;
2811 
2812  bool FoundLoad = false;
2813  SmallVector<SDValue, 4> ChainOps;
2814  SmallVector<const SDNode *, 4> LoopWorklist;
2816  const unsigned int Max = 1024;
2817 
2818  // Visualization of Load-Op-Store fusion:
2819  // -------------------------
2820  // Legend:
2821  // *-lines = Chain operand dependencies.
2822  // |-lines = Normal operand dependencies.
2823  // Dependencies flow down and right. n-suffix references multiple nodes.
2824  //
2825  // C Xn C
2826  // * * *
2827  // * * *
2828  // Xn A-LD Yn TF Yn
2829  // * * \ | * |
2830  // * * \ | * |
2831  // * * \ | => A--LD_OP_ST
2832  // * * \| \
2833  // TF OP \
2834  // * | \ Zn
2835  // * | \
2836  // A-ST Zn
2837  //
2838 
2839  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2840  // #2: Yn -> LD
2841  // #3: ST -> Zn
2842 
2843  // Ensure the transform is safe by checking for the dual
2844  // dependencies to make sure we do not induce a loop.
2845 
2846  // As LD is a predecessor to both OP and ST we can do this by checking:
2847  // a). if LD is a predecessor to a member of Xn or Yn.
2848  // b). if a Zn is a predecessor to ST.
2849 
2850  // However, (b) can only occur through being a chain predecessor to
2851  // ST, which is the same as Zn being a member or predecessor of Xn,
2852  // which is a subset of LD being a predecessor of Xn. So it's
2853  // subsumed by check (a).
2854 
2855  SDValue Chain = StoreNode->getChain();
2856 
2857  // Gather X elements in ChainOps.
2858  if (Chain == Load.getValue(1)) {
2859  FoundLoad = true;
2860  ChainOps.push_back(Load.getOperand(0));
2861  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2862  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2863  SDValue Op = Chain.getOperand(i);
2864  if (Op == Load.getValue(1)) {
2865  FoundLoad = true;
2866  // Drop Load, but keep its chain. No cycle check necessary.
2867  ChainOps.push_back(Load.getOperand(0));
2868  continue;
2869  }
2870  LoopWorklist.push_back(Op.getNode());
2871  ChainOps.push_back(Op);
2872  }
2873  }
2874 
2875  if (!FoundLoad)
2876  return false;
2877 
2878  // Worklist is currently Xn. Add Yn to worklist.
2879  for (SDValue Op : StoredVal->ops())
2880  if (Op.getNode() != LoadNode)
2881  LoopWorklist.push_back(Op.getNode());
2882 
2883  // Check (a) if Load is a predecessor to Xn + Yn
2884  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2885  true))
2886  return false;
2887 
2888  InputChain =
2889  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2890  return true;
2891 }
2892 
2893 // Change a chain of {load; op; store} of the same value into a simple op
2894 // through memory of that value, if the uses of the modified value and its
2895 // address are suitable.
2896 //
2897 // The tablegen pattern memory operand pattern is currently not able to match
2898 // the case where the EFLAGS on the original operation are used.
2899 //
2900 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2901 // be transferred from a node in the pattern to the result node, probably with
2902 // a new keyword. For example, we have this
2903 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2904 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2905 // (implicit EFLAGS)]>;
2906 // but maybe need something like this
2907 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2908 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2909 // (transferrable EFLAGS)]>;
2910 //
2911 // Until then, we manually fold these and instruction select the operation
2912 // here.
2913 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2914  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2915  SDValue StoredVal = StoreNode->getOperand(1);
2916  unsigned Opc = StoredVal->getOpcode();
2917 
2918  // Before we try to select anything, make sure this is memory operand size
2919  // and opcode we can handle. Note that this must match the code below that
2920  // actually lowers the opcodes.
2921  EVT MemVT = StoreNode->getMemoryVT();
2922  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2923  MemVT != MVT::i8)
2924  return false;
2925 
2926  bool IsCommutable = false;
2927  bool IsNegate = false;
2928  switch (Opc) {
2929  default:
2930  return false;
2931  case X86ISD::SUB:
2932  IsNegate = isNullConstant(StoredVal.getOperand(0));
2933  break;
2934  case X86ISD::SBB:
2935  break;
2936  case X86ISD::ADD:
2937  case X86ISD::ADC:
2938  case X86ISD::AND:
2939  case X86ISD::OR:
2940  case X86ISD::XOR:
2941  IsCommutable = true;
2942  break;
2943  }
2944 
2945  unsigned LoadOpNo = IsNegate ? 1 : 0;
2946  LoadSDNode *LoadNode = nullptr;
2947  SDValue InputChain;
2948  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2949  LoadNode, InputChain)) {
2950  if (!IsCommutable)
2951  return false;
2952 
2953  // This operation is commutable, try the other operand.
2954  LoadOpNo = 1;
2955  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2956  LoadNode, InputChain))
2957  return false;
2958  }
2959 
2960  SDValue Base, Scale, Index, Disp, Segment;
2961  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2962  Segment))
2963  return false;
2964 
2965  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2966  unsigned Opc8) {
2967  switch (MemVT.getSimpleVT().SimpleTy) {
2968  case MVT::i64:
2969  return Opc64;
2970  case MVT::i32:
2971  return Opc32;
2972  case MVT::i16:
2973  return Opc16;
2974  case MVT::i8:
2975  return Opc8;
2976  default:
2977  llvm_unreachable("Invalid size!");
2978  }
2979  };
2980 
2981  MachineSDNode *Result;
2982  switch (Opc) {
2983  case X86ISD::SUB:
2984  // Handle negate.
2985  if (IsNegate) {
2986  unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
2987  X86::NEG8m);
2988  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2989  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2990  MVT::Other, Ops);
2991  break;
2992  }
2994  case X86ISD::ADD:
2995  // Try to match inc/dec.
2996  if (!Subtarget->slowIncDec() || OptForSize) {
2997  bool IsOne = isOneConstant(StoredVal.getOperand(1));
2998  bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
2999  // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
3000  if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
3001  unsigned NewOpc =
3002  ((Opc == X86ISD::ADD) == IsOne)
3003  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3004  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
3005  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3006  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3007  MVT::Other, Ops);
3008  break;
3009  }
3010  }
3012  case X86ISD::ADC:
3013  case X86ISD::SBB:
3014  case X86ISD::AND:
3015  case X86ISD::OR:
3016  case X86ISD::XOR: {
3017  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
3018  switch (Opc) {
3019  case X86ISD::ADD:
3020  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3021  X86::ADD8mr);
3022  case X86ISD::ADC:
3023  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3024  X86::ADC8mr);
3025  case X86ISD::SUB:
3026  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3027  X86::SUB8mr);
3028  case X86ISD::SBB:
3029  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3030  X86::SBB8mr);
3031  case X86ISD::AND:
3032  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3033  X86::AND8mr);
3034  case X86ISD::OR:
3035  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3036  case X86ISD::XOR:
3037  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3038  X86::XOR8mr);
3039  default:
3040  llvm_unreachable("Invalid opcode!");
3041  }
3042  };
3043  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
3044  switch (Opc) {
3045  case X86ISD::ADD:
3046  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
3047  case X86ISD::ADC:
3048  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
3049  case X86ISD::SUB:
3050  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
3051  case X86ISD::SBB:
3052  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
3053  case X86ISD::AND:
3054  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
3055  case X86ISD::OR:
3056  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
3057  case X86ISD::XOR:
3058  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
3059  default:
3060  llvm_unreachable("Invalid opcode!");
3061  }
3062  };
3063  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
3064  switch (Opc) {
3065  case X86ISD::ADD:
3066  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3067  X86::ADD8mi);
3068  case X86ISD::ADC:
3069  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3070  X86::ADC8mi);
3071  case X86ISD::SUB:
3072  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3073  X86::SUB8mi);
3074  case X86ISD::SBB:
3075  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3076  X86::SBB8mi);
3077  case X86ISD::AND:
3078  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3079  X86::AND8mi);
3080  case X86ISD::OR:
3081  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3082  X86::OR8mi);
3083  case X86ISD::XOR:
3084  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3085  X86::XOR8mi);
3086  default:
3087  llvm_unreachable("Invalid opcode!");
3088  }
3089  };
3090 
3091  unsigned NewOpc = SelectRegOpcode(Opc);
3092  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3093 
3094  // See if the operand is a constant that we can fold into an immediate
3095  // operand.
3096  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3097  int64_t OperandV = OperandC->getSExtValue();
3098 
3099  // Check if we can shrink the operand enough to fit in an immediate (or
3100  // fit into a smaller immediate) by negating it and switching the
3101  // operation.
3102  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
3103  ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3104  (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3105  isInt<32>(-OperandV))) &&
3106  hasNoCarryFlagUses(StoredVal.getValue(1))) {
3107  OperandV = -OperandV;
3108  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
3109  }
3110 
3111  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
3112  // the larger immediate operand.
3113  if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
3114  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3115  NewOpc = SelectImm8Opcode(Opc);
3116  } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3117  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3118  NewOpc = SelectImmOpcode(Opc);
3119  }
3120  }
3121 
3122  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
3123  SDValue CopyTo =
3124  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3125  StoredVal.getOperand(2), SDValue());
3126 
3127  const SDValue Ops[] = {Base, Scale, Index, Disp,
3128  Segment, Operand, CopyTo, CopyTo.getValue(1)};
3129  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3130  Ops);
3131  } else {
3132  const SDValue Ops[] = {Base, Scale, Index, Disp,
3133  Segment, Operand, InputChain};
3134  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3135  Ops);
3136  }
3137  break;
3138  }
3139  default:
3140  llvm_unreachable("Invalid opcode!");
3141  }
3142 
3143  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3144  LoadNode->getMemOperand()};
3145  CurDAG->setNodeMemRefs(Result, MemOps);
3146 
3147  // Update Load Chain uses as well.
3148  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
3149  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
3150  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
3151  CurDAG->RemoveDeadNode(Node);
3152  return true;
3153 }
3154 
3155 // See if this is an X & Mask that we can match to BEXTR/BZHI.
3156 // Where Mask is one of the following patterns:
3157 // a) x & (1 << nbits) - 1
3158 // b) x & ~(-1 << nbits)
3159 // c) x & (-1 >> (32 - y))
3160 // d) x << (32 - y) >> (32 - y)
3161 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
3162  assert(
3163  (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
3164  "Should be either an and-mask, or right-shift after clearing high bits.");
3165 
3166  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
3167  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3168  return false;
3169 
3170  MVT NVT = Node->getSimpleValueType(0);
3171 
3172  // Only supported for 32 and 64 bits.
3173  if (NVT != MVT::i32 && NVT != MVT::i64)
3174  return false;
3175 
3176  SDValue NBits;
3177 
3178  // If we have BMI2's BZHI, we are ok with muti-use patterns.
3179  // Else, if we only have BMI1's BEXTR, we require one-use.
3180  const bool CanHaveExtraUses = Subtarget->hasBMI2();
3181  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
3182  return CanHaveExtraUses ||
3183  Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3184  };
3185  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
3186  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
3187 
3188  auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
3189  if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3190  assert(V.getSimpleValueType() == MVT::i32 &&
3191  V.getOperand(0).getSimpleValueType() == MVT::i64 &&
3192  "Expected i64 -> i32 truncation");
3193  V = V.getOperand(0);
3194  }
3195  return V;
3196  };
3197 
3198  // a) x & ((1 << nbits) + (-1))
3199  auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
3200  &NBits](SDValue Mask) -> bool {
3201  // Match `add`. Must only have one use!
3202  if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3203  return false;
3204  // We should be adding all-ones constant (i.e. subtracting one.)
3205  if (!isAllOnesConstant(Mask->getOperand(1)))
3206  return false;
3207  // Match `1 << nbits`. Might be truncated. Must only have one use!
3208  SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3209  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3210  return false;
3211  if (!isOneConstant(M0->getOperand(0)))
3212  return false;
3213  NBits = M0->getOperand(1);
3214  return true;
3215  };
3216 
3217  auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
3218  V = peekThroughOneUseTruncation(V);
3219  return CurDAG->MaskedValueIsAllOnes(
3220  V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
3221  NVT.getSizeInBits()));
3222  };
3223 
3224  // b) x & ~(-1 << nbits)
3225  auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3226  &NBits](SDValue Mask) -> bool {
3227  // Match `~()`. Must only have one use!
3228  if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
3229  return false;
3230  // The -1 only has to be all-ones for the final Node's NVT.
3231  if (!isAllOnes(Mask->getOperand(1)))
3232  return false;
3233  // Match `-1 << nbits`. Might be truncated. Must only have one use!
3234  SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3235  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3236  return false;
3237  // The -1 only has to be all-ones for the final Node's NVT.
3238  if (!isAllOnes(M0->getOperand(0)))
3239  return false;
3240  NBits = M0->getOperand(1);
3241  return true;
3242  };
3243 
3244  // Match potentially-truncated (bitwidth - y)
3245  auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
3246  unsigned Bitwidth) {
3247  // Skip over a truncate of the shift amount.
3248  if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
3249  ShiftAmt = ShiftAmt.getOperand(0);
3250  // The trunc should have been the only user of the real shift amount.
3251  if (!checkOneUse(ShiftAmt))
3252  return false;
3253  }
3254  // Match the shift amount as: (bitwidth - y). It should go away, too.
3255  if (ShiftAmt.getOpcode() != ISD::SUB)
3256  return false;
3257  auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
3258  if (!V0 || V0->getZExtValue() != Bitwidth)
3259  return false;
3260  NBits = ShiftAmt.getOperand(1);
3261  return true;
3262  };
3263 
3264  // c) x & (-1 >> (32 - y))
3265  auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
3266  matchShiftAmt](SDValue Mask) -> bool {
3267  // The mask itself may be truncated.
3268  Mask = peekThroughOneUseTruncation(Mask);
3269  unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
3270  // Match `l>>`. Must only have one use!
3271  if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3272  return false;
3273  // We should be shifting truly all-ones constant.
3274  if (!isAllOnesConstant(Mask.getOperand(0)))
3275  return false;
3276  SDValue M1 = Mask.getOperand(1);
3277  // The shift amount should not be used externally.
3278  if (!checkOneUse(M1))
3279  return false;
3280  return matchShiftAmt(M1, Bitwidth);
3281  };
3282 
3283  SDValue X;
3284 
3285  // d) x << (32 - y) >> (32 - y)
3286  auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
3287  &X](SDNode *Node) -> bool {
3288  if (Node->getOpcode() != ISD::SRL)
3289  return false;
3290  SDValue N0 = Node->getOperand(0);
3291  if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
3292  return false;
3293  unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
3294  SDValue N1 = Node->getOperand(1);
3295  SDValue N01 = N0->getOperand(1);
3296  // Both of the shifts must be by the exact same value.
3297  // There should not be any uses of the shift amount outside of the pattern.
3298  if (N1 != N01 || !checkTwoUse(N1))
3299  return false;
3300  if (!matchShiftAmt(N1, Bitwidth))
3301  return false;
3302  X = N0->getOperand(0);
3303  return true;
3304  };
3305 
3306  auto matchLowBitMask = [matchPatternA, matchPatternB,
3307  matchPatternC](SDValue Mask) -> bool {
3308  return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3309  };
3310 
3311  if (Node->getOpcode() == ISD::AND) {
3312  X = Node->getOperand(0);
3313  SDValue Mask = Node->getOperand(1);
3314 
3315  if (matchLowBitMask(Mask)) {
3316  // Great.
3317  } else {
3318  std::swap(X, Mask);
3319  if (!matchLowBitMask(Mask))
3320  return false;
3321  }
3322  } else if (!matchPatternD(Node))
3323  return false;
3324 
3325  SDLoc DL(Node);
3326 
3327  // Truncate the shift amount.
3328  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3329  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3330 
3331  // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3332  // All the other bits are undefined, we do not care about them.
3333  SDValue ImplDef = SDValue(
3334  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3335  insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3336  NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
3337  NBits);
3338  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3339 
3340  if (Subtarget->hasBMI2()) {
3341  // Great, just emit the the BZHI..
3342  if (NVT != MVT::i32) {
3343  // But have to place the bit count into the wide-enough register first.
3344  NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
3345  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3346  }
3347 
3348  SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
3349  ReplaceNode(Node, Extract.getNode());
3350  SelectCode(Extract.getNode());
3351  return true;
3352  }
3353 
3354  // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
3355  // *logically* shifted (potentially with one-use trunc inbetween),
3356  // and the truncation was the only use of the shift,
3357  // and if so look past one-use truncation.
3358  {
3359  SDValue RealX = peekThroughOneUseTruncation(X);
3360  // FIXME: only if the shift is one-use?
3361  if (RealX != X && RealX.getOpcode() == ISD::SRL)
3362  X = RealX;
3363  }
3364 
3365  MVT XVT = X.getSimpleValueType();
3366 
3367  // Else, emitting BEXTR requires one more step.
3368  // The 'control' of BEXTR has the pattern of:
3369  // [15...8 bit][ 7...0 bit] location
3370  // [ bit count][ shift] name
3371  // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3372 
3373  // Shift NBits left by 8 bits, thus producing 'control'.
3374  // This makes the low 8 bits to be zero.
3375  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3376  SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
3377  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3378 
3379  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3380  // FIXME: only if the shift is one-use?
3381  if (X.getOpcode() == ISD::SRL) {
3382  SDValue ShiftAmt = X.getOperand(1);
3383  X = X.getOperand(0);
3384 
3385  assert(ShiftAmt.getValueType() == MVT::i8 &&
3386  "Expected shift amount to be i8");
3387 
3388  // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3389  // We could zext to i16 in some form, but we intentionally don't do that.
3390  SDValue OrigShiftAmt = ShiftAmt;
3391  ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
3392  insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3393 
3394  // And now 'or' these low 8 bits of shift amount into the 'control'.
3395  Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
3396  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3397  }
3398 
3399  // But have to place the 'control' into the wide-enough register first.
3400  if (XVT != MVT::i32) {
3401  Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
3402  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3403  }
3404 
3405  // And finally, form the BEXTR itself.
3406  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3407 
3408  // The 'X' was originally truncated. Do that now.
3409  if (XVT != NVT) {
3410  insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
3411  Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3412  }
3413 
3414  ReplaceNode(Node, Extract.getNode());
3415  SelectCode(Extract.getNode());
3416 
3417  return true;
3418 }
3419 
3420 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3421 MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3422  MVT NVT = Node->getSimpleValueType(0);
3423  SDLoc dl(Node);
3424 
3425  SDValue N0 = Node->getOperand(0);
3426  SDValue N1 = Node->getOperand(1);
3427 
3428  // If we have TBM we can use an immediate for the control. If we have BMI
3429  // we should only do this if the BEXTR instruction is implemented well.
3430  // Otherwise moving the control into a register makes this more costly.
3431  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3432  // hoisting the move immediate would make it worthwhile with a less optimal
3433  // BEXTR?
3434  if (!Subtarget->hasTBM() &&
3435  !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
3436  return nullptr;
3437 
3438  // Must have a shift right.
3439  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3440  return nullptr;
3441 
3442  // Shift can't have additional users.
3443  if (!N0->hasOneUse())
3444  return nullptr;
3445 
3446  // Only supported for 32 and 64 bits.
3447  if (NVT != MVT::i32 && NVT != MVT::i64)
3448  return nullptr;
3449 
3450  // Shift amount and RHS of and must be constant.
3451  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3452  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3453  if (!MaskCst || !ShiftCst)
3454  return nullptr;
3455 
3456  // And RHS must be a mask.
3457  uint64_t Mask = MaskCst->getZExtValue();
3458  if (!isMask_64(Mask))
3459  return nullptr;
3460 
3461  uint64_t Shift = ShiftCst->getZExtValue();
3462  uint64_t MaskSize = countPopulation(Mask);
3463 
3464  // Don't interfere with something that can be handled by extracting AH.
3465  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3466  if (Shift == 8 && MaskSize == 8)
3467  return nullptr;
3468 
3469  // Make sure we are only using bits that were in the original value, not
3470  // shifted in.
3471  if (Shift + MaskSize > NVT.getSizeInBits())
3472  return nullptr;
3473 
3474  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3475  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3476  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3477 
3478  // BMI requires the immediate to placed in a register.
3479  if (!Subtarget->hasTBM()) {
3480  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3481  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3482  unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3483  New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
3484  }
3485 
3486  MachineSDNode *NewNode;
3487  SDValue Input = N0->getOperand(0);
3488  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3489  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3490  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
3491  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3492  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3493  // Update the chain.
3494  ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
3495  // Record the mem-refs
3496  CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3497  } else {
3498  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New);
3499  }
3500 
3501  return NewNode;
3502 }
3503 
3504 // Emit a PCMISTR(I/M) instruction.
3505 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3506  bool MayFoldLoad, const SDLoc &dl,
3507  MVT VT, SDNode *Node) {
3508  SDValue N0 = Node->getOperand(0);
3509  SDValue N1 = Node->getOperand(1);
3510  SDValue Imm = Node->getOperand(2);
3511  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3512  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3513 
3514  // Try to fold a load. No need to check alignment.
3515  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3516  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3517  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3518  N1.getOperand(0) };
3519  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3520  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3521  // Update the chain.
3522  ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3523  // Record the mem-refs
3524  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3525  return CNode;
3526  }
3527 
3528  SDValue Ops[] = { N0, N1, Imm };
3529  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3530  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3531  return CNode;
3532 }
3533 
3534 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3535 // to emit a second instruction after this one. This is needed since we have two
3536 // copyToReg nodes glued before this and we need to continue that glue through.
3537 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3538  bool MayFoldLoad, const SDLoc &dl,
3539  MVT VT, SDNode *Node,
3540  SDValue &InFlag) {
3541  SDValue N0 = Node->getOperand(0);
3542  SDValue N2 = Node->getOperand(2);
3543  SDValue Imm = Node->getOperand(4);
3544  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3545  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3546 
3547  // Try to fold a load. No need to check alignment.
3548  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3549  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3550  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3551  N2.getOperand(0), InFlag };
3552  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3553  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3554  InFlag = SDValue(CNode, 3);
3555  // Update the chain.
3556  ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3557  // Record the mem-refs
3558  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3559  return CNode;
3560  }
3561 
3562  SDValue Ops[] = { N0, N2, Imm, InFlag };
3563  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3564  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3565  InFlag = SDValue(CNode, 2);
3566  return CNode;
3567 }
3568 
3569 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3570  EVT VT = N->getValueType(0);
3571 
3572  // Only handle scalar shifts.
3573  if (VT.isVector())
3574  return false;
3575 
3576  // Narrower shifts only mask to 5 bits in hardware.
3577  unsigned Size = VT == MVT::i64 ? 64 : 32;
3578 
3579  SDValue OrigShiftAmt = N->getOperand(1);
3580  SDValue ShiftAmt = OrigShiftAmt;
3581  SDLoc DL(N);
3582 
3583  // Skip over a truncate of the shift amount.
3584  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3585  ShiftAmt = ShiftAmt->getOperand(0);
3586 
3587  // This function is called after X86DAGToDAGISel::matchBitExtract(),
3588  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3589 
3590  SDValue NewShiftAmt;
3591  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3592  SDValue Add0 = ShiftAmt->getOperand(0);
3593  SDValue Add1 = ShiftAmt->getOperand(1);
3594  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3595  // to avoid the ADD/SUB.
3596  if (isa<ConstantSDNode>(Add1) &&
3597  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3598  NewShiftAmt = Add0;
3599  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3600  // generate a NEG instead of a SUB of a constant.
3601  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3602  isa<ConstantSDNode>(Add0) &&
3603  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3604  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3605  // Insert a negate op.
3606  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3607  // that uses it that's not a shift.
3608  EVT SubVT = ShiftAmt.getValueType();
3609  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3610  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3611  NewShiftAmt = Neg;
3612 
3613  // Insert these operands into a valid topological order so they can
3614  // get selected independently.
3615  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3616  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3617  } else
3618  return false;
3619  } else
3620  return false;
3621 
3622  if (NewShiftAmt.getValueType() != MVT::i8) {
3623  // Need to truncate the shift amount.
3624  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3625  // Add to a correct topological ordering.
3626  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3627  }
3628 
3629  // Insert a new mask to keep the shift amount legal. This should be removed
3630  // by isel patterns.
3631  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3632  CurDAG->getConstant(Size - 1, DL, MVT::i8));
3633  // Place in a correct topological ordering.
3634  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3635 
3636  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3637  NewShiftAmt);
3638  if (UpdatedNode != N) {
3639  // If we found an existing node, we should replace ourselves with that node
3640  // and wait for it to be selected after its other users.
3641  ReplaceNode(N, UpdatedNode);
3642  return true;
3643  }
3644 
3645  // If the original shift amount is now dead, delete it so that we don't run
3646  // it through isel.
3647  if (OrigShiftAmt.getNode()->use_empty())
3648  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3649 
3650  // Now that we've optimized the shift amount, defer to normal isel to get
3651  // load folding and legacy vs BMI2 selection without repeating it here.
3652  SelectCode(N);
3653  return true;
3654 }
3655 
3656 bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
3657  MVT NVT = N->getSimpleValueType(0);
3658  unsigned Opcode = N->getOpcode();
3659  SDLoc dl(N);
3660 
3661  // For operations of the form (x << C1) op C2, check if we can use a smaller
3662  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3663  SDValue Shift = N->getOperand(0);
3664  SDValue N1 = N->getOperand(1);
3665 
3667  if (!Cst)
3668  return false;
3669 
3670  int64_t Val = Cst->getSExtValue();
3671 
3672  // If we have an any_extend feeding the AND, look through it to see if there
3673  // is a shift behind it. But only if the AND doesn't use the extended bits.
3674  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
3675  bool FoundAnyExtend = false;
3676  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
3677  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
3678  isUInt<32>(Val)) {
3679  FoundAnyExtend = true;
3680  Shift = Shift.getOperand(0);
3681  }
3682 
3683  if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
3684  return false;
3685 
3686  // i8 is unshrinkable, i16 should be promoted to i32.
3687  if (NVT != MVT::i32 && NVT != MVT::i64)
3688  return false;
3689 
3690  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
3691  if (!ShlCst)
3692  return false;
3693 
3694  uint64_t ShAmt = ShlCst->getZExtValue();
3695 
3696  // Make sure that we don't change the operation by removing bits.
3697  // This only matters for OR and XOR, AND is unaffected.
3698  uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
3699  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
3700  return false;
3701 
3702  // Check the minimum bitwidth for the new constant.
3703  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
3704  auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
3705  if (Opcode == ISD::AND) {
3706  // AND32ri is the same as AND64ri32 with zext imm.
3707  // Try this before sign extended immediates below.
3708  ShiftedVal = (uint64_t)Val >> ShAmt;
3709  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3710  return true;
3711  // Also swap order when the AND can become MOVZX.
3712  if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
3713  return true;
3714  }
3715  ShiftedVal = Val >> ShAmt;
3716  if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
3717  (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
3718  return true;
3719  if (Opcode != ISD::AND) {
3720  // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
3721  ShiftedVal = (uint64_t)Val >> ShAmt;
3722  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3723  return true;
3724  }
3725  return false;
3726  };
3727 
3728  int64_t ShiftedVal;
3729  if (!CanShrinkImmediate(ShiftedVal))
3730  return false;
3731 
3732  // Ok, we can reorder to get a smaller immediate.
3733 
3734  // But, its possible the original immediate allowed an AND to become MOVZX.
3735  // Doing this late due to avoid the MakedValueIsZero call as late as
3736  // possible.
3737  if (Opcode == ISD::AND) {
3738  // Find the smallest zext this could possibly be.
3739  unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
3740  ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
3741 
3742  // Figure out which bits need to be zero to achieve that mask.
3743  APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
3744  ZExtWidth);
3745  NeededMask &= ~Cst->getAPIntValue();
3746 
3747  if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
3748  return false;
3749  }
3750 
3751  SDValue X = Shift.getOperand(0);
3752  if (FoundAnyExtend) {
3753  SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
3754  insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
3755  X = NewX;
3756  }
3757 
3758  SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
3759  insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
3760  SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
3761  insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
3762  SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
3763  Shift.getOperand(1));
3764  ReplaceNode(N, NewSHL.getNode());
3765  SelectCode(NewSHL.getNode());
3766  return true;
3767 }
3768 
3769 /// If the high bits of an 'and' operand are known zero, try setting the
3770 /// high bits of an 'and' constant operand to produce a smaller encoding by
3771 /// creating a small, sign-extended negative immediate rather than a large
3772 /// positive one. This reverses a transform in SimplifyDemandedBits that
3773 /// shrinks mask constants by clearing bits. There is also a possibility that
3774 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3775 /// case, just replace the 'and'. Return 'true' if the node is replaced.
3776 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3777  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3778  // have immediate operands.
3779  MVT VT = And->getSimpleValueType(0);
3780  if (VT != MVT::i32 && VT != MVT::i64)
3781  return false;
3782 
3783  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3784  if (!And1C)
3785  return false;
3786 
3787  // Bail out if the mask constant is already negative. It's can't shrink more.
3788  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3789  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3790  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3791  // are negative too.
3792  APInt MaskVal = And1C->getAPIntValue();
3793  unsigned MaskLZ = MaskVal.countLeadingZeros();
3794  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3795  return false;
3796 
3797  // Don't extend into the upper 32 bits of a 64 bit mask.
3798  if (VT == MVT::i64 && MaskLZ >= 32) {
3799  MaskLZ -= 32;
3800  MaskVal = MaskVal.trunc(32);
3801  }
3802 
3803  SDValue And0 = And->getOperand(0);
3804  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3805  APInt NegMaskVal = MaskVal | HighZeros;
3806 
3807  // If a negative constant would not allow a smaller encoding, there's no need
3808  // to continue. Only change the constant when we know it's a win.
3809  unsigned MinWidth = NegMaskVal.getMinSignedBits();
3810  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3811  return false;
3812 
3813  // Extend masks if we truncated above.
3814  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3815  NegMaskVal = NegMaskVal.zext(64);
3816  HighZeros = HighZeros.zext(64);
3817  }
3818 
3819  // The variable operand must be all zeros in the top bits to allow using the
3820  // new, negative constant as the mask.
3821  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3822  return false;
3823 
3824  // Check if the mask is -1. In that case, this is an unnecessary instruction
3825  // that escaped earlier analysis.
3826  if (NegMaskVal.isAllOnesValue()) {
3827  ReplaceNode(And, And0.getNode());
3828  return true;
3829  }
3830 
3831  // A negative mask allows a smaller encoding. Create a new 'and' node.
3832  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3833  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3834  ReplaceNode(And, NewAnd.getNode());
3835  SelectCode(NewAnd.getNode());
3836  return true;
3837 }
3838 
3839 static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
3840  bool FoldedBCast, bool Masked) {
3841  if (Masked) {
3842  if (FoldedLoad) {
3843  switch (TestVT.SimpleTy) {
3844  default: llvm_unreachable("Unexpected VT!");
3845  case MVT::v16i8:
3846  return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
3847  case MVT::v8i16:
3848  return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
3849  case MVT::v4i32:
3850  return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
3851  case MVT::v2i64:
3852  return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
3853  case MVT::v32i8:
3854  return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
3855  case MVT::v16i16:
3856  return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
3857  case MVT::v8i32:
3858  return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
3859  case MVT::v4i64:
3860  return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
3861  case MVT::v64i8:
3862  return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
3863  case MVT::v32i16:
3864  return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
3865  case MVT::v16i32:
3866  return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
3867  case MVT::v8i64:
3868  return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
3869  }
3870  }
3871 
3872  if (FoldedBCast) {
3873  switch (TestVT.SimpleTy) {
3874  default: llvm_unreachable("Unexpected VT!");
3875  case MVT::v4i32:
3876  return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
3877  case MVT::v2i64:
3878  return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
3879  case MVT::v8i32:
3880  return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
3881  case MVT::v4i64:
3882  return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
3883  case MVT::v16i32:
3884  return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
3885  case MVT::v8i64:
3886  return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
3887  }
3888  }
3889 
3890  switch (TestVT.SimpleTy) {
3891  default: llvm_unreachable("Unexpected VT!");
3892  case MVT::v16i8:
3893  return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
3894  case MVT::v8i16:
3895  return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
3896  case MVT::v4i32:
3897  return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
3898  case MVT::v2i64:
3899  return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
3900  case MVT::v32i8:
3901  return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
3902  case MVT::v16i16:
3903  return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
3904  case MVT::v8i32:
3905  return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
3906  case MVT::v4i64:
3907  return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
3908  case MVT::v64i8:
3909  return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
3910  case MVT::v32i16:
3911  return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
3912  case MVT::v16i32:
3913  return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
3914  case MVT::v8i64:
3915  return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
3916  }
3917  }
3918 
3919  if (FoldedLoad) {
3920  switch (TestVT.SimpleTy) {
3921  default: llvm_unreachable("Unexpected VT!");
3922  case MVT::v16i8:
3923  return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
3924  case MVT::v8i16:
3925  return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
3926  case MVT::v4i32:
3927  return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
3928  case MVT::v2i64:
3929  return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
3930  case MVT::v32i8:
3931  return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
3932  case MVT::v16i16:
3933  return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
3934  case MVT::v8i32:
3935  return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
3936  case MVT::v4i64:
3937  return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
3938  case MVT::v64i8:
3939  return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
3940  case MVT::v32i16:
3941  return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
3942  case MVT::v16i32:
3943  return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
3944  case MVT::v8i64:
3945  return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
3946  }
3947  }
3948 
3949  if (FoldedBCast) {
3950  switch (TestVT.SimpleTy) {
3951  default: llvm_unreachable("Unexpected VT!");
3952  case MVT::v4i32:
3953  return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
3954  case MVT::v2i64:
3955  return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
3956  case MVT::v8i32:
3957  return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
3958  case MVT::v4i64:
3959  return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
3960  case MVT::v16i32:
3961  return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
3962  case MVT::v8i64:
3963  return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
3964  }
3965  }
3966 
3967  switch (TestVT.SimpleTy) {
3968  default: llvm_unreachable("Unexpected VT!");
3969  case MVT::v16i8:
3970  return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
3971  case MVT::v8i16:
3972  return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
3973  case MVT::v4i32:
3974  return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
3975  case MVT::v2i64:
3976  return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
3977  case MVT::v32i8:
3978  return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
3979  case MVT::v16i16:
3980  return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
3981  case MVT::v8i32:
3982  return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
3983  case MVT::v4i64:
3984  return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
3985  case MVT::v64i8:
3986  return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
3987  case MVT::v32i16:
3988  return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
3989  case MVT::v16i32:
3990  return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
3991  case MVT::v8i64:
3992  return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
3993  }
3994 }
3995 
3996 // Try to create VPTESTM instruction. If InMask is not null, it will be used
3997 // to form a masked operation.
3998 bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
3999  SDValue InMask) {
4000  assert(Subtarget->hasAVX512() && "Expected AVX512!");
4002  "Unexpected VT!");
4003 
4004  // Look for equal and not equal compares.
4005  ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
4006  if (CC != ISD::SETEQ && CC != ISD::SETNE)
4007  return false;
4008 
4009  // See if we're comparing against zero. This should have been canonicalized
4010  // to RHS during lowering.
4012  return false;
4013 
4014  SDValue N0 = Setcc.getOperand(0);
4015 
4016  MVT CmpVT = N0.getSimpleValueType();
4017  MVT CmpSVT = CmpVT.getVectorElementType();
4018 
4019  // Start with both operands the same. We'll try to refine this.
4020  SDValue Src0 = N0;
4021  SDValue Src1 = N0;
4022 
4023  {
4024  // Look through single use bitcasts.
4025  SDValue N0Temp = N0;
4026  if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
4027  N0Temp = N0.getOperand(0);
4028 
4029  // Look for single use AND.
4030  if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
4031  Src0 = N0Temp.getOperand(0);
4032  Src1 = N0Temp.getOperand(1);
4033  }
4034  }
4035 
4036  // Without VLX we need to widen the load.
4037  bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4038 
4039  // We can only fold loads if the sources are unique.
4040  bool CanFoldLoads = Src0 != Src1;
4041 
4042  // Try to fold loads unless we need to widen.
4043  bool FoldedLoad = false;
4044  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
4045  if (!Widen && CanFoldLoads) {
4046  Load = Src1;
4047  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
4048  Tmp4);
4049  if (!FoldedLoad) {
4050  // And is computative.
4051  Load = Src0;
4052  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
4053  Tmp3, Tmp4);
4054  if (FoldedLoad)
4055  std::swap(Src0, Src1);
4056  }
4057  }
4058 
4059  auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
4060  // Look through single use bitcasts.
4061  if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse())
4062  Src = Src.getOperand(0);
4063 
4064  if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) {
4065  Parent = Src.getNode();
4066  Src = Src.getOperand(0);
4067  if (Src.getSimpleValueType() == CmpSVT)
4068  return Src;
4069  }
4070 
4071  return SDValue();
4072  };
4073 
4074  // If we didn't fold a load, try to match broadcast. No widening limitation
4075  // for this. But only 32 and 64 bit types are supported.
4076  bool FoldedBCast = false;
4077  if (!FoldedLoad && CanFoldLoads &&
4078  (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
4079  SDNode *ParentNode = nullptr;
4080  if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
4081  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
4082  Tmp1, Tmp2, Tmp3, Tmp4);
4083  }
4084 
4085  // Try the other operand.
4086  if (!FoldedBCast) {
4087  if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
4088  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
4089  Tmp1, Tmp2, Tmp3, Tmp4);
4090  if (FoldedBCast)
4091  std::swap(Src0, Src1);
4092  }
4093  }
4094  }
4095 
4096  auto getMaskRC = [](MVT MaskVT) {
4097  switch (MaskVT.SimpleTy) {
4098  default: llvm_unreachable("Unexpected VT!");
4099  case MVT::v2i1: return X86::VK2RegClassID;
4100  case MVT::v4i1: return X86::VK4RegClassID;
4101  case MVT::v8i1: return X86::VK8RegClassID;
4102  case MVT::v16i1: return X86::VK16RegClassID;
4103  case MVT::v32i1: return X86::VK32RegClassID;
4104  case MVT::v64i1: return X86::VK64RegClassID;
4105  }
4106  };
4107 
4108  bool IsMasked = InMask.getNode() != nullptr;
4109 
4110  SDLoc dl(Root);
4111 
4112  MVT ResVT = Setcc.getSimpleValueType();
4113  MVT MaskVT = ResVT;
4114  if (Widen) {
4115  // Widen the inputs using insert_subreg or copy_to_regclass.
4116  unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
4117  unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
4118  unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
4119  CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
4120  MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4121  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4122  CmpVT), 0);
4123  Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
4124 
4125  assert(!FoldedLoad && "Shouldn't have folded the load");
4126  if (!FoldedBCast)
4127  Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
4128 
4129  if (IsMasked) {
4130  // Widen the mask.
4131  unsigned RegClass = getMaskRC(MaskVT);
4132  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4133  InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4134  dl, MaskVT, InMask, RC), 0);
4135  }
4136  }
4137 
4138  bool IsTestN = CC == ISD::SETEQ;
4139  unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4140  IsMasked);
4141 
4142  MachineSDNode *CNode;
4143  if (FoldedLoad || FoldedBCast) {
4144  SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4145 
4146  if (IsMasked) {
4147  SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4148  Load.getOperand(0) };
4149  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4150  } else {
4151  SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4152  Load.getOperand(0) };
4153  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4154  }
4155 
4156  // Update the chain.
4157  ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
4158  // Record the mem-refs
4159  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
4160  } else {
4161  if (IsMasked)
4162  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
4163  else
4164  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
4165  }
4166 
4167  // If we widened, we need to shrink the mask VT.
4168  if (Widen) {
4169  unsigned RegClass = getMaskRC(ResVT);
4170  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4171  CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4172  dl, ResVT, SDValue(CNode, 0), RC);
4173  }
4174 
4175  ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
4176  CurDAG->RemoveDeadNode(Root);
4177  return true;
4178 }
4179 
4180 void X86DAGToDAGISel::Select(SDNode *Node) {
4181  MVT NVT = Node->getSimpleValueType(0);
4182  unsigned Opcode = Node->getOpcode();
4183  SDLoc dl(Node);
4184 
4185  if (Node->isMachineOpcode()) {
4186  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
4187  Node->setNodeId(-1);
4188  return; // Already selected.
4189  }
4190 
4191  switch (Opcode) {
4192  default: break;
4193  case ISD::INTRINSIC_VOID: {
4194  unsigned IntNo = Node->getConstantOperandVal(1);
4195  switch (IntNo) {
4196  default: break;
4197  case Intrinsic::x86_sse3_monitor:
4198  case Intrinsic::x86_monitorx:
4199  case Intrinsic::x86_clzero: {
4200  bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
4201 
4202  unsigned Opc = 0;
4203  switch (IntNo) {
4204  case Intrinsic::x86_sse3_monitor:
4205  if (!Subtarget->hasSSE3())
4206  break;
4207  Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
4208  break;
4209  case Intrinsic::x86_monitorx:
4210  if (!Subtarget->hasMWAITX())
4211  break;
4212  Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
4213  break;
4214  case Intrinsic::x86_clzero:
4215  if (!Subtarget->hasCLZERO())
4216  break;
4217  Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
4218  break;
4219  }
4220 
4221  if (Opc) {
4222  unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
4223  SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
4224  Node->getOperand(2), SDValue());
4225  SDValue InFlag = Chain.getValue(1);
4226 
4227  if (IntNo == Intrinsic::x86_sse3_monitor ||
4228  IntNo == Intrinsic::x86_monitorx) {
4229  // Copy the other two operands to ECX and EDX.
4230  Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
4231  InFlag);
4232  InFlag = Chain.getValue(1);
4233  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
4234  InFlag);
4235  InFlag = Chain.getValue(1);
4236  }
4237 
4238  MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4239  { Chain, InFlag});
4240  ReplaceNode(Node, CNode);
4241  return;
4242  }
4243  }
4244  }
4245 
4246  break;
4247  }
4248  case ISD::BRIND: {
4249  if (Subtarget->isTargetNaCl())
4250  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
4251  // leave the instruction alone.
4252  break;
4253  if (Subtarget->isTarget64BitILP32()) {
4254  // Converts a 32-bit register to a 64-bit, zero-extended version of
4255  // it. This is needed because x86-64 can do many things, but jmp %r32
4256  // ain't one of them.
4257  const SDValue &Target = Node->getOperand(1);
4259  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
4260  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
4261  Node->getOperand(0), ZextTarget);
4262  ReplaceNode(Node, Brind.getNode());
4263  SelectCode(ZextTarget.getNode());
4264  SelectCode(Brind.getNode());
4265  return;
4266  }
4267  break;
4268  }
4269  case X86ISD::GlobalBaseReg:
4270  ReplaceNode(Node, getGlobalBaseReg());
4271  return;
4272 
4273  case ISD::BITCAST:
4274  // Just drop all 128/256/512-bit bitcasts.
4275  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
4276  NVT == MVT::f128) {
4277  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
4278  CurDAG->RemoveDeadNode(Node);
4279  return;
4280  }
4281  break;
4282 
4283  case ISD::VSELECT: {
4284  // Replace VSELECT with non-mask conditions with with BLENDV.
4286  break;
4287 
4288  assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
4289  SDValue Blendv = CurDAG->getNode(
4290  X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
4291  Node->getOperand(1), Node->getOperand(2));
4292  ReplaceNode(Node, Blendv.getNode());
4293  SelectCode(Blendv.getNode());
4294  // We already called ReplaceUses.
4295  return;
4296  }
4297 
4298  case ISD::SRL:
4299  if (matchBitExtract(Node))
4300  return;
4302  case ISD::SRA:
4303  case ISD::SHL:
4304  if (tryShiftAmountMod(Node))
4305  return;
4306  break;
4307 
4308  case ISD::AND:
4309  if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
4310  // Try to form a masked VPTESTM. Operands can be in either order.
4311  SDValue N0 = Node->getOperand(0);
4312  SDValue N1 = Node->getOperand(1);
4313  if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
4314  tryVPTESTM(Node, N0, N1))
4315  return;
4316  if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
4317  tryVPTESTM(Node, N1, N0))
4318  return;
4319  }
4320 
4321  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
4322  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4323  CurDAG->RemoveDeadNode(Node);
4324  return;
4325  }
4326  if (matchBitExtract(Node))
4327  return;
4328  if (AndImmShrink && shrinkAndImmediate(Node))
4329  return;
4330 
4332  case ISD::OR:
4333  case ISD::XOR:
4334  if (tryShrinkShlLogicImm(Node))
4335  return;
4336 
4338  case ISD::ADD:
4339  case ISD::SUB: {
4340  // Try to avoid folding immediates with multiple uses for optsize.
4341  // This code tries to select to register form directly to avoid going
4342  // through the isel table which might fold the immediate. We can't change
4343  // the patterns on the add/sub/and/or/xor with immediate paterns in the
4344  // tablegen files to check immediate use count without making the patterns
4345  // unavailable to the fast-isel table.
4346  if (!OptForSize)
4347  break;
4348 
4349  // Only handle i8/i16/i32/i64.
4350  if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
4351  break;
4352 
4353  SDValue N0 = Node->getOperand(0);
4354  SDValue N1 = Node->getOperand(1);
4355 
4357  if (!Cst)
4358  break;
4359 
4360  int64_t Val = Cst->getSExtValue();
4361 
4362  // Make sure its an immediate that is considered foldable.
4363  // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
4364  if (!isInt<8>(Val) && !isInt<32>(Val))
4365  break;
4366 
4367  // Check if we should avoid folding this immediate.
4368  if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
4369  break;
4370 
4371  // We should not fold the immediate. So we need a register form instead.
4372  unsigned ROpc, MOpc;
4373  switch (NVT.SimpleTy) {
4374  default: llvm_unreachable("Unexpected VT!");
4375  case MVT::i8:
4376  switch (Opcode) {
4377  default: llvm_unreachable("Unexpected opcode!");
4378  case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
4379  case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
4380  case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
4381  case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
4382  case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
4383  }
4384  break;
4385  case MVT::i16:
4386  switch (Opcode) {
4387  default: llvm_unreachable("Unexpected opcode!");
4388  case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
4389  case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
4390  case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
4391  case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
4392  case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
4393  }
4394  break;
4395  case MVT::i32:
4396  switch (Opcode) {
4397  default: llvm_unreachable("Unexpected opcode!");
4398  case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
4399  case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
4400  case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
4401  case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
4402  case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
4403  }
4404  break;
4405  case MVT::i64:
4406  switch (Opcode) {
4407  default: llvm_unreachable("Unexpected opcode!");
4408  case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
4409  case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
4410  case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
4411  case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
4412  case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
4413  }
4414  break;
4415  }
4416 
4417  // Ok this is a AND/OR/XOR/ADD/SUB with constant.
4418 
4419  // If this is a not a subtract, we can still try to fold a load.
4420  if (Opcode != ISD::SUB) {
4421  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4422  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4423  SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4424  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4425  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4426  // Update the chain.
4427  ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
4428  // Record the mem-refs
4429  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
4430  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4431  CurDAG->RemoveDeadNode(Node);
4432  return;
4433  }
4434  }
4435 
4436  CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
4437  return;
4438  }
4439 
4440  case X86ISD::SMUL:
4441  // i16/i32/i64 are handled with isel patterns.
4442  if (NVT != MVT::i8)
4443  break;
4445  case X86ISD::UMUL: {
4446  SDValue N0 = Node->getOperand(0);
4447  SDValue N1 = Node->getOperand(1);
4448 
4449  unsigned LoReg, ROpc, MOpc;
4450  switch (NVT.SimpleTy) {
4451  default: llvm_unreachable("Unsupported VT!");
4452  case MVT::i8:
4453  LoReg = X86::AL;
4454  ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
4455  MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
4456  break;
4457  case MVT::i16:
4458  LoReg = X86::AX;
4459  ROpc = X86::MUL16r;
4460  MOpc = X86::MUL16m;
4461  break;
4462  case MVT::i32:
4463  LoReg = X86::EAX;
4464  ROpc = X86::MUL32r;
4465  MOpc = X86::MUL32m;
4466  break;
4467  case MVT::i64:
4468  LoReg = X86::RAX;
4469  ROpc = X86::MUL64r;
4470  MOpc = X86::MUL64m;
4471  break;
4472  }
4473 
4474  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4475  bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4476  // Multiply is commmutative.
4477  if (!FoldedLoad) {
4478  FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4479  if (FoldedLoad)
4480  std::swap(N0, N1);
4481  }
4482 
4483  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
4484  N0, SDValue()).getValue(1);
4485 
4486  MachineSDNode *CNode;
4487  if (FoldedLoad) {
4488  // i16/i32/i64 use an instruction that produces a low and high result even
4489  // though only the low result is used.
4490  SDVTList VTs;
4491  if (NVT == MVT::i8)
4492  VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4493  else
4494  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
4495 
4496  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4497  InFlag };
4498  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4499 
4500  // Update the chain.
4501  ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
4502  // Record the mem-refs
4503  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4504  } else {
4505  // i16/i32/i64 use an instruction that produces a low and high result even
4506  // though only the low result is used.
4507  SDVTList VTs;
4508  if (NVT == MVT::i8)
4509  VTs = CurDAG->getVTList(NVT, MVT::i32);
4510  else
4511  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
4512 
4513  CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
4514  }
4515 
4516  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4517  ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
4518  CurDAG->RemoveDeadNode(Node);
4519  return;
4520  }
4521 
4522  case ISD::SMUL_LOHI:
4523  case ISD::UMUL_LOHI: {
4524  SDValue N0 = Node->getOperand(0);
4525  SDValue N1 = Node->getOperand(1);
4526 
4527  unsigned Opc, MOpc;
4528  bool isSigned = Opcode == ISD::SMUL_LOHI;
4529  if (!isSigned) {
4530  switch (NVT.SimpleTy) {
4531  default: llvm_unreachable("Unsupported VT!");
4532  case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
4533  case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
4534  }
4535  } else {
4536  switch (NVT.SimpleTy) {
4537  default: llvm_unreachable("Unsupported VT!");
4538  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
4539  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
4540  }
4541  }
4542 
4543  unsigned SrcReg, LoReg, HiReg;
4544  switch (Opc) {
4545  default: llvm_unreachable("Unknown MUL opcode!");
4546  case X86::IMUL32r:
4547  case X86::MUL32r:
4548  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
4549  break;
4550  case X86::IMUL64r:
4551  case X86::MUL64r:
4552  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
4553  break;
4554  }
4555 
4556  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4557  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4558  // Multiply is commmutative.
4559  if (!foldedLoad) {
4560  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4561  if (foldedLoad)
4562  std::swap(N0, N1);
4563  }
4564 
4565  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
4566  N0, SDValue()).getValue(1);
4567  if (foldedLoad) {
4568  SDValue Chain;
4569  MachineSDNode *CNode = nullptr;
4570  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4571  InFlag };
4572  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
4573  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4574  Chain = SDValue(CNode, 0);
4575  InFlag = SDValue(CNode, 1);
4576 
4577  // Update the chain.
4578  ReplaceUses(N1.getValue(1), Chain);
4579  // Record the mem-refs
4580  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4581  } else {
4582  SDValue Ops[] = { N1, InFlag };
4583  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
4584  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4585  InFlag = SDValue(CNode, 0);
4586  }
4587 
4588  // Copy the low half of the result, if it is needed.
4589  if (!SDValue(Node, 0).use_empty()) {
4590  assert(LoReg && "Register for low half is not defined!");
4591  SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
4592  NVT, InFlag);
4593  InFlag = ResLo.getValue(2);
4594  ReplaceUses(SDValue(Node, 0), ResLo);
4595  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
4596  dbgs() << '\n');
4597  }
4598  // Copy the high half of the result, if it is needed.
4599  if (!SDValue(Node, 1).use_empty()) {
4600  assert(HiReg && "Register for high half is not defined!");
4601  SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
4602  NVT, InFlag);
4603  InFlag = ResHi.getValue(2);
4604  ReplaceUses(SDValue(Node, 1), ResHi);
4605  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
4606  dbgs() << '\n');
4607  }
4608 
4609  CurDAG->RemoveDeadNode(Node);
4610  return;
4611  }
4612 
4613  case ISD::SDIVREM:
4614  case ISD::UDIVREM: {
4615  SDValue N0 = Node->getOperand(0);
4616  SDValue N1 = Node->getOperand(1);
4617 
4618  unsigned Opc, MOpc;
4619  bool isSigned = Opcode == ISD::SDIVREM;
4620  if (!isSigned) {
4621  switch (NVT.SimpleTy) {
4622  default: llvm_unreachable("Unsupported VT!");
4623  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
4624  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
4625  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
4626  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
4627  }
4628  } else {
4629  switch (NVT.SimpleTy) {
4630  default: llvm_unreachable("Unsupported VT!");
4631  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
4632  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
4633  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
4634  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
4635  }
4636  }
4637 
4638  unsigned LoReg, HiReg, ClrReg;
4639  unsigned SExtOpcode;
4640  switch (NVT.SimpleTy) {
4641  default: llvm_unreachable("Unsupported VT!");
4642  case MVT::i8:
4643  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
4644  SExtOpcode = X86::CBW;
4645  break;
4646  case MVT::i16:
4647  LoReg = X86::AX; HiReg = X86::DX;
4648  ClrReg = X86::DX;
4649  SExtOpcode = X86::CWD;
4650  break;
4651  case MVT::i32:
4652  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
4653  SExtOpcode = X86::CDQ;
4654  break;
4655  case MVT::i64:
4656  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
4657  SExtOpcode = X86::CQO;
4658  break;
4659  }
4660 
4661  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4662  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4663  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
4664 
4665  SDValue InFlag;
4666  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
4667  // Special case for div8, just use a move with zero extension to AX to
4668  // clear the upper 8 bits (AH).
4669  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
4670  MachineSDNode *Move;
4671  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4672  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4673  Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
4674  MVT::Other, Ops);
4675  Chain = SDValue(Move, 1);
4676  ReplaceUses(N0.getValue(1), Chain);
4677  // Record the mem-refs
4678  CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
4679  } else {
4680  Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
4681  Chain = CurDAG->getEntryNode();
4682  }
4683  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
4684  SDValue());
4685  InFlag = Chain.getValue(1);
4686  } else {
4687  InFlag =
4688  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
4689  LoReg, N0, SDValue()).getValue(1);
4690  if (isSigned && !signBitIsZero) {
4691  // Sign extend the low part into the high part.
4692  InFlag =
4693  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
4694  } else {
4695  // Zero out the high part, effectively zero extending the input.
4696  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
4697  switch (NVT.SimpleTy) {
4698  case MVT::i16:
4699  ClrNode =
4700  SDValue(CurDAG->getMachineNode(
4701  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
4702  CurDAG->getTargetConstant(X86::sub_16bit, dl,
4703  MVT::i32)),
4704  0);
4705  break;
4706  case MVT::i32:
4707  break;
4708  case MVT::i64:
4709  ClrNode =
4710  SDValue(CurDAG->getMachineNode(
4711  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
4712  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
4713  CurDAG->getTargetConstant(X86::sub_32bit, dl,
4714  MVT::i32)),
4715  0);
4716  break;
4717  default:
4718  llvm_unreachable("Unexpected division source");
4719  }
4720 
4721  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
4722  ClrNode, InFlag).getValue(1);
4723  }
4724  }
4725 
4726  if (foldedLoad) {
4727  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4728  InFlag };
4729  MachineSDNode *CNode =
4730  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
4731  InFlag = SDValue(CNode, 1);
4732  // Update the chain.
4733  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
4734  // Record the mem-refs
4735  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4736  } else {
4737  InFlag =
4738  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
4739  }
4740 
4741  // Prevent use of AH in a REX instruction by explicitly copying it to
4742  // an ABCD_L register.
4743  //
4744  // The current assumption of the register allocator is that isel
4745  // won't generate explicit references to the GR8_ABCD_H registers. If
4746  // the allocator and/or the backend get enhanced to be more robust in
4747  // that regard, this can be, and should be, removed.
4748  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
4749  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
4750  unsigned AHExtOpcode =
4751  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
4752 
4753  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
4754  MVT::Glue, AHCopy, InFlag);
4755  SDValue Result(RNode, 0);
4756  InFlag = SDValue(RNode, 1);
4757 
4758  Result =
4759  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
4760 
4761  ReplaceUses(SDValue(Node, 1), Result);
4762  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4763  dbgs() << '\n');
4764  }
4765  // Copy the division (low) result, if it is needed.
4766  if (!SDValue(Node, 0).use_empty()) {
4767  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4768  LoReg, NVT, InFlag);
4769  InFlag = Result.getValue(2);
4770  ReplaceUses(SDValue(Node, 0), Result);
4771  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4772  dbgs() << '\n');
4773  }
4774  // Copy the remainder (high) result, if it is needed.
4775  if (!SDValue(Node, 1).use_empty()) {
4776  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4777  HiReg, NVT, InFlag);
4778  InFlag = Result.getValue(2);
4779  ReplaceUses(SDValue(Node, 1), Result);
4780  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4781  dbgs() << '\n');
4782  }
4783  CurDAG->RemoveDeadNode(Node);
4784  return;
4785  }
4786 
4787  case X86ISD::CMP: {
4788  SDValue N0 = Node->getOperand(0);
4789  SDValue N1 = Node->getOperand(1);
4790 
4791  // Optimizations for TEST compares.
4792  if (!isNullConstant(N1))
4793  break;
4794 
4795  // Save the original VT of the compare.
4796  MVT CmpVT = N0.getSimpleValueType();
4797 
4798  // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
4799  // by a test instruction. The test should be removed later by
4800  // analyzeCompare if we are using only the zero flag.
4801  // TODO: Should we check the users and use the BEXTR flags directly?
4802  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4803  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
4804  unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
4805  : X86::TEST32rr;
4806  SDValue BEXTR = SDValue(NewNode, 0);
4807  NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
4808  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4809  CurDAG->RemoveDeadNode(Node);
4810  return;
4811  }
4812  }
4813 
4814  // We can peek through truncates, but we need to be careful below.
4815  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
4816  N0 = N0.getOperand(0);
4817 
4818  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
4819  // use a smaller encoding.
4820  // Look past the truncate if CMP is the only use of it.
4821  if (N0.getOpcode() == ISD::AND &&
4822  N0.getNode()->hasOneUse() &&
4823  N0.getValueType() != MVT::i8) {
4825  if (!C) break;
4826  uint64_t Mask = C->getZExtValue();
4827 
4828  // Check if we can replace AND+IMM64 with a shift. This is possible for
4829  // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
4830  // flag.
4831  if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
4832  onlyUsesZeroFlag(SDValue(Node, 0))) {
4833  if (isMask_64(~Mask)) {
4834  unsigned TrailingZeros = countTrailingZeros(Mask);
4835  SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
4836  SDValue Shift =
4837  SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
4838  N0.getOperand(0), Imm), 0);
4839  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4840  MVT::i32, Shift, Shift);
4841  ReplaceNode(Node, Test);
4842  return;
4843  }
4844  if (isMask_64(Mask)) {
4845  unsigned LeadingZeros = countLeadingZeros(Mask);
4846  SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
4847  SDValue Shift =
4848  SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
4849  N0.getOperand(0), Imm), 0);
4850  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4851  MVT::i32, Shift, Shift);
4852  ReplaceNode(Node, Test);
4853  return;
4854  }
4855  }
4856 
4857  MVT VT;
4858  int SubRegOp;
4859  unsigned ROpc, MOpc;
4860 
4861  // For each of these checks we need to be careful if the sign flag is
4862  // being used. It is only safe to use the sign flag in two conditions,
4863  // either the sign bit in the shrunken mask is zero or the final test
4864  // size is equal to the original compare size.
4865 
4866  if (isUInt<8>(Mask) &&
4867  (!(Mask & 0x80) || CmpVT == MVT::i8 ||
4868  hasNoSignFlagUses(SDValue(Node, 0)))) {
4869  // For example, convert "testl %eax, $8" to "testb %al, $8"
4870  VT = MVT::i8;
4871  SubRegOp = X86::sub_8bit;
4872  ROpc = X86::TEST8ri;
4873  MOpc = X86::TEST8mi;
4874  } else if (OptForMinSize && isUInt<16>(Mask) &&
4875  (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
4876  hasNoSignFlagUses(SDValue(Node, 0)))) {
4877  // For example, "testl %eax, $32776" to "testw %ax, $32776".
4878  // NOTE: We only want to form TESTW instructions if optimizing for
4879  // min size. Otherwise we only save one byte and possibly get a length
4880  // changing prefix penalty in the decoders.
4881  VT = MVT::i16;
4882  SubRegOp = X86::sub_16bit;
4883  ROpc = X86::TEST16ri;
4884  MOpc = X86::TEST16mi;
4885  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
4886  ((!(Mask & 0x80000000) &&
4887  // Without minsize 16-bit Cmps can get here so we need to
4888  // be sure we calculate the correct sign flag if needed.
4889  (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
4890  CmpVT == MVT::i32 ||
4891  hasNoSignFlagUses(SDValue(Node, 0)))) {
4892  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
4893  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
4894  // Otherwize, we find ourselves in a position where we have to do
4895  // promotion. If previous passes did not promote the and, we assume
4896  // they had a good reason not to and do not promote here.
4897  VT = MVT::i32;
4898  SubRegOp = X86::sub_32bit;
4899  ROpc = X86::TEST32ri;
4900  MOpc = X86::TEST32mi;
4901  } else {
4902  // No eligible transformation was found.
4903  break;
4904  }
4905 
4906  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
4907  SDValue Reg = N0.getOperand(0);
4908 
4909  // Emit a testl or testw.
4910  MachineSDNode *NewNode;
4911  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4912  if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4913  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4914  Reg.getOperand(0) };
4915  NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
4916  // Update the chain.
4917  ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
4918  // Record the mem-refs
4919  CurDAG->setNodeMemRefs(NewNode,
4920  {cast<LoadSDNode>(Reg)->getMemOperand()});
4921  } else {
4922  // Extract the subregister if necessary.
4923  if (N0.getValueType() != VT)
4924  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
4925 
4926  NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
4927  }
4928  // Replace CMP with TEST.
4929  ReplaceNode(Node, NewNode);
4930  return;
4931  }
4932  break;
4933  }
4934  case X86ISD::PCMPISTR: {
4935  if (!Subtarget->hasSSE42())
4936  break;
4937 
4938  bool NeedIndex = !SDValue(Node, 0).use_empty();
4939  bool NeedMask = !SDValue(Node, 1).use_empty();
4940  // We can't fold a load if we are going to make two instructions.
4941  bool MayFoldLoad = !NeedIndex || !NeedMask;
4942 
4943  MachineSDNode *CNode;
4944  if (NeedMask) {
4945  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
4946  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
4947  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
4948  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4949  }
4950  if (NeedIndex || !NeedMask) {
4951  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
4952  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
4953  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
4954  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4955  }
4956 
4957  // Connect the flag usage to the last instruction created.
4958  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4959  CurDAG->RemoveDeadNode(Node);
4960  return;
4961  }
4962  case X86ISD::PCMPESTR: {
4963  if (!Subtarget->hasSSE42())
4964  break;
4965 
4966  // Copy the two implicit register inputs.
4967  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
4968  Node->getOperand(1),
4969  SDValue()).getValue(1);
4970  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
4971  Node->getOperand(3), InFlag).getValue(1);
4972 
4973  bool NeedIndex = !SDValue(Node, 0).use_empty();
4974  bool NeedMask = !SDValue(Node, 1).use_empty();
4975  // We can't fold a load if we are going to make two instructions.
4976  bool MayFoldLoad = !NeedIndex || !NeedMask;
4977 
4978  MachineSDNode *CNode;
4979  if (NeedMask) {
4980  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
4981  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
4982  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
4983  InFlag);
4984  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4985  }
4986  if (NeedIndex || !NeedMask) {
4987  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
4988  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
4989  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
4990  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4991  }
4992  // Connect the flag usage to the last instruction created.
4993  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4994  CurDAG->RemoveDeadNode(Node);
4995  return;
4996  }
4997 
4998  case ISD::SETCC: {
4999  if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
5000  return;
5001 
5002  break;
5003  }
5004 
5005  case ISD::STORE:
5006  if (foldLoadStoreIntoMemOperand(Node))
5007  return;
5008  break;
5009  case ISD::FCEIL:
5010  case ISD::FFLOOR:
5011  case ISD::FTRUNC:
5012  case ISD::FNEARBYINT:
5013  case ISD::FRINT: {
5014  // Replace fp rounding with their X86 specific equivalent so we don't
5015  // need 2 sets of patterns.
5016  // FIXME: This can only happen when the nodes started as STRICT_* and have
5017  // been mutated into their non-STRICT equivalents. Eventually this
5018  // mutation will be removed and we should switch the STRICT_ nodes to a
5019  // strict version of RNDSCALE in PreProcessISelDAG.
5020  unsigned Imm;
5021  switch (Node->getOpcode()) {
5022  default: llvm_unreachable("Unexpected opcode!");
5023  case ISD::FCEIL: Imm = 0xA; break;
5024  case ISD::FFLOOR: Imm = 0x9; break;
5025  case ISD::FTRUNC: Imm = 0xB; break;
5026  case ISD::FNEARBYINT: Imm = 0xC; break;
5027  case ISD::FRINT: Imm = 0x4; break;
5028  }
5029  SDLoc dl(Node);
5030  SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
5031  Node->getValueType(0),
5032  Node->getOperand(0),
5033  CurDAG->getConstant(Imm, dl, MVT::i8));
5034  ReplaceNode(Node, Res.getNode());
5035  SelectCode(Res.getNode());
5036  return;
5037  }
5038  }
5039 
5040  SelectCode(Node);
5041 }
5042 
5043 bool X86DAGToDAGISel::
5044 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5045  std::vector<SDValue> &OutOps) {
5046  SDValue Op0, Op1, Op2, Op3, Op4;
5047  switch (ConstraintID) {
5048  default:
5049  llvm_unreachable("Unexpected asm memory constraint");
5051  // FIXME: It seems strange that 'i' is needed here since it's supposed to
5052  // be an immediate and not a memory constraint.
5054  case InlineAsm::Constraint_o: // offsetable ??
5055  case InlineAsm::Constraint_v: // not offsetable ??
5056  case InlineAsm::Constraint_m: // memory
5058  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
5059  return true;
5060  break;
5061  }
5062 
5063  OutOps.push_back(Op0);
5064  OutOps.push_back(Op1);
5065  OutOps.push_back(Op2);
5066  OutOps.push_back(Op3);
5067  OutOps.push_back(Op4);
5068  return false;
5069 }
5070 
5071 /// This pass converts a legalized DAG into a X86-specific DAG,
5072 /// ready for instruction scheduling.
5074  CodeGenOpt::Level OptLevel) {
5075  return new X86DAGToDAGISel(TM, OptLevel);
5076 }
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:562
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
static MVT getVectorVT(MVT VT, unsigned NumElements)
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:543
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:860
X86 conditional moves.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned Reg
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool hasFastBEXTR() const
Definition: X86Subtarget.h:660
unsigned getVectorNumElements() const
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:813
STATISTIC(NumFunctions, "Total number of functions")
XOP - Opcode prefix used by XOP instructions.
Definition: X86BaseInfo.h:590
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
Bitwise logical ANDNOT of floating point values.
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:377
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
X86 compare and logical compare instructions.
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4424
The address of a basic block.
Definition: Constants.h:839
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
Bitwise Logical AND NOT of Packed FP values.
Shift and rotation operations.
Definition: ISDOpcodes.h:434
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
static bool signBitIsZero(const MachineOperand &Op, const MachineRegisterInfo &MRI)
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
SymbolFlags
Symbol flags.
Definition: Symbol.h:24
unsigned SubReg
op_iterator op_end() const
unsigned getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:744
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:681
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:431
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1532
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:453