LLVM  13.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
30 #include "llvm/Support/KnownBits.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
39  : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42  return nullptr;
43 }
44 
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
52  SDValue &Chain) const {
53  const Function &F = DAG.getMachineFunction().getFunction();
54 
55  // First, check if tail calls have been disabled in this function.
56  if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
57  return false;
58 
59  // Conservatively require the attributes of the call to match those of
60  // the return. Ignore NoAlias and NonNull because they don't affect the
61  // call sequence.
62  AttributeList CallerAttrs = F.getAttributes();
63  if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
64  .removeAttribute(Attribute::NoAlias)
65  .removeAttribute(Attribute::NonNull)
66  .hasAttributes())
67  return false;
68 
69  // It's not safe to eliminate the sign / zero extension of the return value.
70  if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
71  CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
72  return false;
73 
74  // Check if the only use is a function return node.
75  return isUsedByReturnOnly(Node, Chain);
76 }
77 
79  const uint32_t *CallerPreservedMask,
80  const SmallVectorImpl<CCValAssign> &ArgLocs,
81  const SmallVectorImpl<SDValue> &OutVals) const {
82  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
83  const CCValAssign &ArgLoc = ArgLocs[I];
84  if (!ArgLoc.isRegLoc())
85  continue;
86  MCRegister Reg = ArgLoc.getLocReg();
87  // Only look at callee saved registers.
88  if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
89  continue;
90  // Check that we pass the value used for the caller.
91  // (We look for a CopyFromReg reading a virtual register that is used
92  // for the function live-in value of register Reg)
93  SDValue Value = OutVals[I];
94  if (Value->getOpcode() != ISD::CopyFromReg)
95  return false;
96  Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
97  if (MRI.getLiveInPhysReg(ArgReg) != Reg)
98  return false;
99  }
100  return true;
101 }
102 
103 /// Set CallLoweringInfo attribute flags based on a call instruction
104 /// and called function attributes.
106  unsigned ArgIdx) {
107  IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
108  IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
109  IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
110  IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
111  IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
112  IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
113  IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
114  IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
115  IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
116  IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
117  IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
118  Alignment = Call->getParamStackAlign(ArgIdx);
119  ByValType = nullptr;
120  if (IsByVal) {
121  ByValType = Call->getParamByValType(ArgIdx);
122  if (!Alignment)
123  Alignment = Call->getParamAlign(ArgIdx);
124  }
125  PreallocatedType = nullptr;
126  if (IsPreallocated)
127  PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
128 }
129 
130 /// Generate a libcall taking the given operands as arguments and returning a
131 /// result of type RetVT.
132 std::pair<SDValue, SDValue>
134  ArrayRef<SDValue> Ops,
135  MakeLibCallOptions CallOptions,
136  const SDLoc &dl,
137  SDValue InChain) const {
138  if (!InChain)
139  InChain = DAG.getEntryNode();
140 
142  Args.reserve(Ops.size());
143 
145  for (unsigned i = 0; i < Ops.size(); ++i) {
146  SDValue NewOp = Ops[i];
147  Entry.Node = NewOp;
148  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
149  Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
150  CallOptions.IsSExt);
151  Entry.IsZExt = !Entry.IsSExt;
152 
153  if (CallOptions.IsSoften &&
155  Entry.IsSExt = Entry.IsZExt = false;
156  }
157  Args.push_back(Entry);
158  }
159 
160  if (LC == RTLIB::UNKNOWN_LIBCALL)
161  report_fatal_error("Unsupported library call operation!");
163  getPointerTy(DAG.getDataLayout()));
164 
165  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
167  bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
168  bool zeroExtend = !signExtend;
169 
170  if (CallOptions.IsSoften &&
172  signExtend = zeroExtend = false;
173  }
174 
175  CLI.setDebugLoc(dl)
176  .setChain(InChain)
178  .setNoReturn(CallOptions.DoesNotReturn)
179  .setDiscardResult(!CallOptions.IsReturnValueUsed)
181  .setSExtResult(signExtend)
182  .setZExtResult(zeroExtend);
183  return LowerCallTo(CLI);
184 }
185 
187  std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
188  unsigned SrcAS, const AttributeList &FuncAttributes) const {
189  if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
190  return false;
191 
192  EVT VT = getOptimalMemOpType(Op, FuncAttributes);
193 
194  if (VT == MVT::Other) {
195  // Use the largest integer type whose alignment constraints are satisfied.
196  // We only need to check DstAlign here as SrcAlign is always greater or
197  // equal to DstAlign (or zero).
198  VT = MVT::i64;
199  if (Op.isFixedDstAlign())
200  while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
201  !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
202  VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
203  assert(VT.isInteger());
204 
205  // Find the largest legal integer type.
206  MVT LVT = MVT::i64;
207  while (!isTypeLegal(LVT))
208  LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
209  assert(LVT.isInteger());
210 
211  // If the type we've chosen is larger than the largest legal integer type
212  // then use that instead.
213  if (VT.bitsGT(LVT))
214  VT = LVT;
215  }
216 
217  unsigned NumMemOps = 0;
218  uint64_t Size = Op.size();
219  while (Size) {
220  unsigned VTSize = VT.getSizeInBits() / 8;
221  while (VTSize > Size) {
222  // For now, only use non-vector load / store's for the left-over pieces.
223  EVT NewVT = VT;
224  unsigned NewVTSize;
225 
226  bool Found = false;
227  if (VT.isVector() || VT.isFloatingPoint()) {
228  NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
229  if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
230  isSafeMemOpType(NewVT.getSimpleVT()))
231  Found = true;
232  else if (NewVT == MVT::i64 &&
235  // i64 is usually not legal on 32-bit targets, but f64 may be.
236  NewVT = MVT::f64;
237  Found = true;
238  }
239  }
240 
241  if (!Found) {
242  do {
243  NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
244  if (NewVT == MVT::i8)
245  break;
246  } while (!isSafeMemOpType(NewVT.getSimpleVT()));
247  }
248  NewVTSize = NewVT.getSizeInBits() / 8;
249 
250  // If the new VT cannot cover all of the remaining bits, then consider
251  // issuing a (or a pair of) unaligned and overlapping load / store.
252  bool Fast;
253  if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
255  VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
256  MachineMemOperand::MONone, &Fast) &&
257  Fast)
258  VTSize = Size;
259  else {
260  VT = NewVT;
261  VTSize = NewVTSize;
262  }
263  }
264 
265  if (++NumMemOps > Limit)
266  return false;
267 
268  MemOps.push_back(VT);
269  Size -= VTSize;
270  }
271 
272  return true;
273 }
274 
275 /// Soften the operands of a comparison. This code is shared among BR_CC,
276 /// SELECT_CC, and SETCC handlers.
278  SDValue &NewLHS, SDValue &NewRHS,
279  ISD::CondCode &CCCode,
280  const SDLoc &dl, const SDValue OldLHS,
281  const SDValue OldRHS) const {
282  SDValue Chain;
283  return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
284  OldRHS, Chain);
285 }
286 
288  SDValue &NewLHS, SDValue &NewRHS,
289  ISD::CondCode &CCCode,
290  const SDLoc &dl, const SDValue OldLHS,
291  const SDValue OldRHS,
292  SDValue &Chain,
293  bool IsSignaling) const {
294  // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
295  // not supporting it. We can update this code when libgcc provides such
296  // functions.
297 
298  assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
299  && "Unsupported setcc type!");
300 
301  // Expand into one or more soft-fp libcall(s).
302  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
303  bool ShouldInvertCC = false;
304  switch (CCCode) {
305  case ISD::SETEQ:
306  case ISD::SETOEQ:
307  LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
308  (VT == MVT::f64) ? RTLIB::OEQ_F64 :
309  (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
310  break;
311  case ISD::SETNE:
312  case ISD::SETUNE:
313  LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
314  (VT == MVT::f64) ? RTLIB::UNE_F64 :
315  (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
316  break;
317  case ISD::SETGE:
318  case ISD::SETOGE:
319  LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
320  (VT == MVT::f64) ? RTLIB::OGE_F64 :
321  (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
322  break;
323  case ISD::SETLT:
324  case ISD::SETOLT:
325  LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
326  (VT == MVT::f64) ? RTLIB::OLT_F64 :
327  (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
328  break;
329  case ISD::SETLE:
330  case ISD::SETOLE:
331  LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
332  (VT == MVT::f64) ? RTLIB::OLE_F64 :
333  (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
334  break;
335  case ISD::SETGT:
336  case ISD::SETOGT:
337  LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
338  (VT == MVT::f64) ? RTLIB::OGT_F64 :
339  (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
340  break;
341  case ISD::SETO:
342  ShouldInvertCC = true;
344  case ISD::SETUO:
345  LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
346  (VT == MVT::f64) ? RTLIB::UO_F64 :
347  (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
348  break;
349  case ISD::SETONE:
350  // SETONE = O && UNE
351  ShouldInvertCC = true;
353  case ISD::SETUEQ:
354  LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
355  (VT == MVT::f64) ? RTLIB::UO_F64 :
356  (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
357  LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
358  (VT == MVT::f64) ? RTLIB::OEQ_F64 :
359  (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
360  break;
361  default:
362  // Invert CC for unordered comparisons
363  ShouldInvertCC = true;
364  switch (CCCode) {
365  case ISD::SETULT:
366  LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
367  (VT == MVT::f64) ? RTLIB::OGE_F64 :
368  (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
369  break;
370  case ISD::SETULE:
371  LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
372  (VT == MVT::f64) ? RTLIB::OGT_F64 :
373  (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
374  break;
375  case ISD::SETUGT:
376  LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
377  (VT == MVT::f64) ? RTLIB::OLE_F64 :
378  (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
379  break;
380  case ISD::SETUGE:
381  LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
382  (VT == MVT::f64) ? RTLIB::OLT_F64 :
383  (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
384  break;
385  default: llvm_unreachable("Do not know how to soften this setcc!");
386  }
387  }
388 
389  // Use the target specific return value for comparions lib calls.
390  EVT RetVT = getCmpLibcallReturnType();
391  SDValue Ops[2] = {NewLHS, NewRHS};
393  EVT OpsVT[2] = { OldLHS.getValueType(),
394  OldRHS.getValueType() };
395  CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
396  auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
397  NewLHS = Call.first;
398  NewRHS = DAG.getConstant(0, dl, RetVT);
399 
400  CCCode = getCmpLibcallCC(LC1);
401  if (ShouldInvertCC) {
402  assert(RetVT.isInteger());
403  CCCode = getSetCCInverse(CCCode, RetVT);
404  }
405 
406  if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
407  // Update Chain.
408  Chain = Call.second;
409  } else {
410  EVT SetCCVT =
411  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
412  SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
413  auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
414  CCCode = getCmpLibcallCC(LC2);
415  if (ShouldInvertCC)
416  CCCode = getSetCCInverse(CCCode, RetVT);
417  NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
418  if (Chain)
419  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
420  Call2.second);
421  NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
422  Tmp.getValueType(), Tmp, NewLHS);
423  NewRHS = SDValue();
424  }
425 }
426 
427 /// Return the entry encoding for a jump table in the current function. The
428 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
430  // In non-pic modes, just use the address of a block.
431  if (!isPositionIndependent())
433 
434  // In PIC mode, if the target supports a GPRel32 directive, use it.
435  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
437 
438  // Otherwise, use a label difference.
440 }
441 
443  SelectionDAG &DAG) const {
444  // If our PIC model is GP relative, use the global offset table as the base.
445  unsigned JTEncoding = getJumpTableEncoding();
446 
447  if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
450 
451  return Table;
452 }
453 
454 /// This returns the relocation base for the given PIC jumptable, the same as
455 /// getPICJumpTableRelocBase, but as an MCExpr.
456 const MCExpr *
458  unsigned JTI,MCContext &Ctx) const{
459  // The normal PIC reloc base is the label at the start of the jump table.
460  return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
461 }
462 
463 bool
465  const TargetMachine &TM = getTargetMachine();
466  const GlobalValue *GV = GA->getGlobal();
467 
468  // If the address is not even local to this DSO we will have to load it from
469  // a got and then add the offset.
470  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
471  return false;
472 
473  // If the code is position independent we will have to add a base register.
474  if (isPositionIndependent())
475  return false;
476 
477  // Otherwise we can do it.
478  return true;
479 }
480 
481 //===----------------------------------------------------------------------===//
482 // Optimization Methods
483 //===----------------------------------------------------------------------===//
484 
485 /// If the specified instruction has a constant integer operand and there are
486 /// bits set in that constant that are not demanded, then clear those bits and
487 /// return true.
489  const APInt &DemandedBits,
490  const APInt &DemandedElts,
491  TargetLoweringOpt &TLO) const {
492  SDLoc DL(Op);
493  unsigned Opcode = Op.getOpcode();
494 
495  // Do target-specific constant optimization.
496  if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
497  return TLO.New.getNode();
498 
499  // FIXME: ISD::SELECT, ISD::SELECT_CC
500  switch (Opcode) {
501  default:
502  break;
503  case ISD::XOR:
504  case ISD::AND:
505  case ISD::OR: {
506  auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
507  if (!Op1C)
508  return false;
509 
510  // If this is a 'not' op, don't touch it because that's a canonical form.
511  const APInt &C = Op1C->getAPIntValue();
512  if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
513  return false;
514 
515  if (!C.isSubsetOf(DemandedBits)) {
516  EVT VT = Op.getValueType();
517  SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
518  SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
519  return TLO.CombineTo(Op, NewOp);
520  }
521 
522  break;
523  }
524  }
525 
526  return false;
527 }
528 
530  const APInt &DemandedBits,
531  TargetLoweringOpt &TLO) const {
532  EVT VT = Op.getValueType();
533  APInt DemandedElts = VT.isVector()
535  : APInt(1, 1);
536  return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
537 }
538 
539 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
540 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
541 /// generalized for targets with other types of implicit widening casts.
543  const APInt &Demanded,
544  TargetLoweringOpt &TLO) const {
545  assert(Op.getNumOperands() == 2 &&
546  "ShrinkDemandedOp only supports binary operators!");
547  assert(Op.getNode()->getNumValues() == 1 &&
548  "ShrinkDemandedOp only supports nodes with one result!");
549 
550  SelectionDAG &DAG = TLO.DAG;
551  SDLoc dl(Op);
552 
553  // Early return, as this function cannot handle vector types.
554  if (Op.getValueType().isVector())
555  return false;
556 
557  // Don't do this if the node has another user, which may require the
558  // full value.
559  if (!Op.getNode()->hasOneUse())
560  return false;
561 
562  // Search for the smallest integer type with free casts to and from
563  // Op's type. For expedience, just check power-of-2 integer types.
564  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
565  unsigned DemandedSize = Demanded.getActiveBits();
566  unsigned SmallVTBits = DemandedSize;
567  if (!isPowerOf2_32(SmallVTBits))
568  SmallVTBits = NextPowerOf2(SmallVTBits);
569  for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
570  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
571  if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
572  TLI.isZExtFree(SmallVT, Op.getValueType())) {
573  // We found a type with free casts.
574  SDValue X = DAG.getNode(
575  Op.getOpcode(), dl, SmallVT,
576  DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
577  DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
578  assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
579  SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
580  return TLO.CombineTo(Op, Z);
581  }
582  }
583  return false;
584 }
585 
587  DAGCombinerInfo &DCI) const {
588  SelectionDAG &DAG = DCI.DAG;
589  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
590  !DCI.isBeforeLegalizeOps());
591  KnownBits Known;
592 
593  bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
594  if (Simplified) {
595  DCI.AddToWorklist(Op.getNode());
596  DCI.CommitTargetLoweringOpt(TLO);
597  }
598  return Simplified;
599 }
600 
602  KnownBits &Known,
603  TargetLoweringOpt &TLO,
604  unsigned Depth,
605  bool AssumeSingleUse) const {
606  EVT VT = Op.getValueType();
607 
608  // TODO: We can probably do more work on calculating the known bits and
609  // simplifying the operations for scalable vectors, but for now we just
610  // bail out.
611  if (VT.isScalableVector()) {
612  // Pretend we don't know anything for now.
613  Known = KnownBits(DemandedBits.getBitWidth());
614  return false;
615  }
616 
617  APInt DemandedElts = VT.isVector()
619  : APInt(1, 1);
620  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
621  AssumeSingleUse);
622 }
623 
624 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
625 // TODO: Under what circumstances can we create nodes? Constant folding?
627  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
628  SelectionDAG &DAG, unsigned Depth) const {
629  // Limit search depth.
631  return SDValue();
632 
633  // Ignore UNDEFs.
634  if (Op.isUndef())
635  return SDValue();
636 
637  // Not demanding any bits/elts from Op.
638  if (DemandedBits == 0 || DemandedElts == 0)
639  return DAG.getUNDEF(Op.getValueType());
640 
641  unsigned NumElts = DemandedElts.getBitWidth();
642  unsigned BitWidth = DemandedBits.getBitWidth();
643  KnownBits LHSKnown, RHSKnown;
644  switch (Op.getOpcode()) {
645  case ISD::BITCAST: {
646  SDValue Src = peekThroughBitcasts(Op.getOperand(0));
647  EVT SrcVT = Src.getValueType();
648  EVT DstVT = Op.getValueType();
649  if (SrcVT == DstVT)
650  return Src;
651 
652  unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
653  unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
654  if (NumSrcEltBits == NumDstEltBits)
655  if (SDValue V = SimplifyMultipleUseDemandedBits(
656  Src, DemandedBits, DemandedElts, DAG, Depth + 1))
657  return DAG.getBitcast(DstVT, V);
658 
659  // TODO - bigendian once we have test coverage.
660  if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
661  DAG.getDataLayout().isLittleEndian()) {
662  unsigned Scale = NumDstEltBits / NumSrcEltBits;
663  unsigned NumSrcElts = SrcVT.getVectorNumElements();
664  APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
665  APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
666  for (unsigned i = 0; i != Scale; ++i) {
667  unsigned Offset = i * NumSrcEltBits;
668  APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
669  if (!Sub.isNullValue()) {
670  DemandedSrcBits |= Sub;
671  for (unsigned j = 0; j != NumElts; ++j)
672  if (DemandedElts[j])
673  DemandedSrcElts.setBit((j * Scale) + i);
674  }
675  }
676 
677  if (SDValue V = SimplifyMultipleUseDemandedBits(
678  Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
679  return DAG.getBitcast(DstVT, V);
680  }
681 
682  // TODO - bigendian once we have test coverage.
683  if ((NumSrcEltBits % NumDstEltBits) == 0 &&
684  DAG.getDataLayout().isLittleEndian()) {
685  unsigned Scale = NumSrcEltBits / NumDstEltBits;
686  unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
687  APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
688  APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
689  for (unsigned i = 0; i != NumElts; ++i)
690  if (DemandedElts[i]) {
691  unsigned Offset = (i % Scale) * NumDstEltBits;
692  DemandedSrcBits.insertBits(DemandedBits, Offset);
693  DemandedSrcElts.setBit(i / Scale);
694  }
695 
696  if (SDValue V = SimplifyMultipleUseDemandedBits(
697  Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
698  return DAG.getBitcast(DstVT, V);
699  }
700 
701  break;
702  }
703  case ISD::AND: {
704  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
705  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
706 
707  // If all of the demanded bits are known 1 on one side, return the other.
708  // These bits cannot contribute to the result of the 'and' in this
709  // context.
710  if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
711  return Op.getOperand(0);
712  if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
713  return Op.getOperand(1);
714  break;
715  }
716  case ISD::OR: {
717  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
718  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
719 
720  // If all of the demanded bits are known zero on one side, return the
721  // other. These bits cannot contribute to the result of the 'or' in this
722  // context.
723  if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
724  return Op.getOperand(0);
725  if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
726  return Op.getOperand(1);
727  break;
728  }
729  case ISD::XOR: {
730  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
731  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
732 
733  // If all of the demanded bits are known zero on one side, return the
734  // other.
735  if (DemandedBits.isSubsetOf(RHSKnown.Zero))
736  return Op.getOperand(0);
737  if (DemandedBits.isSubsetOf(LHSKnown.Zero))
738  return Op.getOperand(1);
739  break;
740  }
741  case ISD::SHL: {
742  // If we are only demanding sign bits then we can use the shift source
743  // directly.
744  if (const APInt *MaxSA =
745  DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
746  SDValue Op0 = Op.getOperand(0);
747  unsigned ShAmt = MaxSA->getZExtValue();
748  unsigned NumSignBits =
749  DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
750  unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
751  if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
752  return Op0;
753  }
754  break;
755  }
756  case ISD::SETCC: {
757  SDValue Op0 = Op.getOperand(0);
758  SDValue Op1 = Op.getOperand(1);
759  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
760  // If (1) we only need the sign-bit, (2) the setcc operands are the same
761  // width as the setcc result, and (3) the result of a setcc conforms to 0 or
762  // -1, we may be able to bypass the setcc.
763  if (DemandedBits.isSignMask() &&
766  BooleanContent::ZeroOrNegativeOneBooleanContent) {
767  // If we're testing X < 0, then this compare isn't needed - just use X!
768  // FIXME: We're limiting to integer types here, but this should also work
769  // if we don't care about FP signed-zero. The use of SETLT with FP means
770  // that we don't care about NaNs.
771  if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
773  return Op0;
774  }
775  break;
776  }
777  case ISD::SIGN_EXTEND_INREG: {
778  // If none of the extended bits are demanded, eliminate the sextinreg.
779  SDValue Op0 = Op.getOperand(0);
780  EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
781  unsigned ExBits = ExVT.getScalarSizeInBits();
782  if (DemandedBits.getActiveBits() <= ExBits)
783  return Op0;
784  // If the input is already sign extended, just drop the extension.
785  unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
786  if (NumSignBits >= (BitWidth - ExBits + 1))
787  return Op0;
788  break;
789  }
793  // If we only want the lowest element and none of extended bits, then we can
794  // return the bitcasted source vector.
795  SDValue Src = Op.getOperand(0);
796  EVT SrcVT = Src.getValueType();
797  EVT DstVT = Op.getValueType();
798  if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
799  DAG.getDataLayout().isLittleEndian() &&
800  DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
801  return DAG.getBitcast(DstVT, Src);
802  }
803  break;
804  }
805  case ISD::INSERT_VECTOR_ELT: {
806  // If we don't demand the inserted element, return the base vector.
807  SDValue Vec = Op.getOperand(0);
808  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
809  EVT VecVT = Vec.getValueType();
810  if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
811  !DemandedElts[CIdx->getZExtValue()])
812  return Vec;
813  break;
814  }
815  case ISD::INSERT_SUBVECTOR: {
816  // If we don't demand the inserted subvector, return the base vector.
817  SDValue Vec = Op.getOperand(0);
818  SDValue Sub = Op.getOperand(1);
819  uint64_t Idx = Op.getConstantOperandVal(2);
820  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
821  if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
822  return Vec;
823  break;
824  }
825  case ISD::VECTOR_SHUFFLE: {
826  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
827 
828  // If all the demanded elts are from one operand and are inline,
829  // then we can use the operand directly.
830  bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
831  for (unsigned i = 0; i != NumElts; ++i) {
832  int M = ShuffleMask[i];
833  if (M < 0 || !DemandedElts[i])
834  continue;
835  AllUndef = false;
836  IdentityLHS &= (M == (int)i);
837  IdentityRHS &= ((M - NumElts) == i);
838  }
839 
840  if (AllUndef)
841  return DAG.getUNDEF(Op.getValueType());
842  if (IdentityLHS)
843  return Op.getOperand(0);
844  if (IdentityRHS)
845  return Op.getOperand(1);
846  break;
847  }
848  default:
849  if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
850  if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
851  Op, DemandedBits, DemandedElts, DAG, Depth))
852  return V;
853  break;
854  }
855  return SDValue();
856 }
857 
859  SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
860  unsigned Depth) const {
861  EVT VT = Op.getValueType();
862  APInt DemandedElts = VT.isVector()
864  : APInt(1, 1);
865  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
866  Depth);
867 }
868 
870  SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
871  unsigned Depth) const {
872  APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
873  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
874  Depth);
875 }
876 
877 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
878 /// result of Op are ever used downstream. If we can use this information to
879 /// simplify Op, create a new simplified DAG node and return true, returning the
880 /// original and new nodes in Old and New. Otherwise, analyze the expression and
881 /// return a mask of Known bits for the expression (used to simplify the
882 /// caller). The Known bits may only be accurate for those bits in the
883 /// OriginalDemandedBits and OriginalDemandedElts.
885  SDValue Op, const APInt &OriginalDemandedBits,
886  const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
887  unsigned Depth, bool AssumeSingleUse) const {
888  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
889  assert(Op.getScalarValueSizeInBits() == BitWidth &&
890  "Mask size mismatches value type size!");
891 
892  // Don't know anything.
893  Known = KnownBits(BitWidth);
894 
895  // TODO: We can probably do more work on calculating the known bits and
896  // simplifying the operations for scalable vectors, but for now we just
897  // bail out.
898  if (Op.getValueType().isScalableVector())
899  return false;
900 
901  unsigned NumElts = OriginalDemandedElts.getBitWidth();
902  assert((!Op.getValueType().isVector() ||
903  NumElts == Op.getValueType().getVectorNumElements()) &&
904  "Unexpected vector size");
905 
906  APInt DemandedBits = OriginalDemandedBits;
907  APInt DemandedElts = OriginalDemandedElts;
908  SDLoc dl(Op);
909  auto &DL = TLO.DAG.getDataLayout();
910 
911  // Undef operand.
912  if (Op.isUndef())
913  return false;
914 
915  if (Op.getOpcode() == ISD::Constant) {
916  // We know all of the bits for a constant!
917  Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
918  return false;
919  }
920 
921  if (Op.getOpcode() == ISD::ConstantFP) {
922  // We know all of the bits for a floating point constant!
923  Known = KnownBits::makeConstant(
924  cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
925  return false;
926  }
927 
928  // Other users may use these bits.
929  EVT VT = Op.getValueType();
930  if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
931  if (Depth != 0) {
932  // If not at the root, Just compute the Known bits to
933  // simplify things downstream.
934  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
935  return false;
936  }
937  // If this is the root being simplified, allow it to have multiple uses,
938  // just set the DemandedBits/Elts to all bits.
940  DemandedElts = APInt::getAllOnesValue(NumElts);
941  } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
942  // Not demanding any bits/elts from Op.
943  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
944  } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
945  // Limit search depth.
946  return false;
947  }
948 
949  KnownBits Known2;
950  switch (Op.getOpcode()) {
951  case ISD::TargetConstant:
952  llvm_unreachable("Can't simplify this node");
953  case ISD::SCALAR_TO_VECTOR: {
954  if (!DemandedElts[0])
955  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
956 
957  KnownBits SrcKnown;
958  SDValue Src = Op.getOperand(0);
959  unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
960  APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
961  if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
962  return true;
963 
964  // Upper elements are undef, so only get the knownbits if we just demand
965  // the bottom element.
966  if (DemandedElts == 1)
967  Known = SrcKnown.anyextOrTrunc(BitWidth);
968  break;
969  }
970  case ISD::BUILD_VECTOR:
971  // Collect the known bits that are shared by every demanded element.
972  // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
973  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
974  return false; // Don't fall through, will infinitely loop.
975  case ISD::LOAD: {
976  LoadSDNode *LD = cast<LoadSDNode>(Op);
977  if (getTargetConstantFromLoad(LD)) {
978  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
979  return false; // Don't fall through, will infinitely loop.
980  } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
981  // If this is a ZEXTLoad and we are looking at the loaded value.
982  EVT MemVT = LD->getMemoryVT();
983  unsigned MemBits = MemVT.getScalarSizeInBits();
984  Known.Zero.setBitsFrom(MemBits);
985  return false; // Don't fall through, will infinitely loop.
986  }
987  break;
988  }
989  case ISD::INSERT_VECTOR_ELT: {
990  SDValue Vec = Op.getOperand(0);
991  SDValue Scl = Op.getOperand(1);
992  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
993  EVT VecVT = Vec.getValueType();
994 
995  // If index isn't constant, assume we need all vector elements AND the
996  // inserted element.
997  APInt DemandedVecElts(DemandedElts);
998  if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
999  unsigned Idx = CIdx->getZExtValue();
1000  DemandedVecElts.clearBit(Idx);
1001 
1002  // Inserted element is not required.
1003  if (!DemandedElts[Idx])
1004  return TLO.CombineTo(Op, Vec);
1005  }
1006 
1007  KnownBits KnownScl;
1008  unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1009  APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1010  if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1011  return true;
1012 
1013  Known = KnownScl.anyextOrTrunc(BitWidth);
1014 
1015  KnownBits KnownVec;
1016  if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1017  Depth + 1))
1018  return true;
1019 
1020  if (!!DemandedVecElts)
1021  Known = KnownBits::commonBits(Known, KnownVec);
1022 
1023  return false;
1024  }
1025  case ISD::INSERT_SUBVECTOR: {
1026  // Demand any elements from the subvector and the remainder from the src its
1027  // inserted into.
1028  SDValue Src = Op.getOperand(0);
1029  SDValue Sub = Op.getOperand(1);
1030  uint64_t Idx = Op.getConstantOperandVal(2);
1031  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1032  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1033  APInt DemandedSrcElts = DemandedElts;
1034  DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
1035 
1036  KnownBits KnownSub, KnownSrc;
1037  if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1038  Depth + 1))
1039  return true;
1040  if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1041  Depth + 1))
1042  return true;
1043 
1044  Known.Zero.setAllBits();
1045  Known.One.setAllBits();
1046  if (!!DemandedSubElts)
1047  Known = KnownBits::commonBits(Known, KnownSub);
1048  if (!!DemandedSrcElts)
1049  Known = KnownBits::commonBits(Known, KnownSrc);
1050 
1051  // Attempt to avoid multi-use src if we don't need anything from it.
1052  if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
1053  !DemandedSrcElts.isAllOnesValue()) {
1054  SDValue NewSub = SimplifyMultipleUseDemandedBits(
1055  Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1056  SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1057  Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1058  if (NewSub || NewSrc) {
1059  NewSub = NewSub ? NewSub : Sub;
1060  NewSrc = NewSrc ? NewSrc : Src;
1061  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1062  Op.getOperand(2));
1063  return TLO.CombineTo(Op, NewOp);
1064  }
1065  }
1066  break;
1067  }
1068  case ISD::EXTRACT_SUBVECTOR: {
1069  // Offset the demanded elts by the subvector index.
1070  SDValue Src = Op.getOperand(0);
1071  if (Src.getValueType().isScalableVector())
1072  break;
1073  uint64_t Idx = Op.getConstantOperandVal(1);
1074  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1075  APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
1076 
1077  if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1078  Depth + 1))
1079  return true;
1080 
1081  // Attempt to avoid multi-use src if we don't need anything from it.
1082  if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
1083  SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1084  Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1085  if (DemandedSrc) {
1086  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1087  Op.getOperand(1));
1088  return TLO.CombineTo(Op, NewOp);
1089  }
1090  }
1091  break;
1092  }
1093  case ISD::CONCAT_VECTORS: {
1094  Known.Zero.setAllBits();
1095  Known.One.setAllBits();
1096  EVT SubVT = Op.getOperand(0).getValueType();
1097  unsigned NumSubVecs = Op.getNumOperands();
1098  unsigned NumSubElts = SubVT.getVectorNumElements();
1099  for (unsigned i = 0; i != NumSubVecs; ++i) {
1100  APInt DemandedSubElts =
1101  DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1102  if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1103  Known2, TLO, Depth + 1))
1104  return true;
1105  // Known bits are shared by every demanded subvector element.
1106  if (!!DemandedSubElts)
1107  Known = KnownBits::commonBits(Known, Known2);
1108  }
1109  break;
1110  }
1111  case ISD::VECTOR_SHUFFLE: {
1112  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1113 
1114  // Collect demanded elements from shuffle operands..
1115  APInt DemandedLHS(NumElts, 0);
1116  APInt DemandedRHS(NumElts, 0);
1117  for (unsigned i = 0; i != NumElts; ++i) {
1118  if (!DemandedElts[i])
1119  continue;
1120  int M = ShuffleMask[i];
1121  if (M < 0) {
1122  // For UNDEF elements, we don't know anything about the common state of
1123  // the shuffle result.
1124  DemandedLHS.clearAllBits();
1125  DemandedRHS.clearAllBits();
1126  break;
1127  }
1128  assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1129  if (M < (int)NumElts)
1130  DemandedLHS.setBit(M);
1131  else
1132  DemandedRHS.setBit(M - NumElts);
1133  }
1134 
1135  if (!!DemandedLHS || !!DemandedRHS) {
1136  SDValue Op0 = Op.getOperand(0);
1137  SDValue Op1 = Op.getOperand(1);
1138 
1139  Known.Zero.setAllBits();
1140  Known.One.setAllBits();
1141  if (!!DemandedLHS) {
1142  if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1143  Depth + 1))
1144  return true;
1145  Known = KnownBits::commonBits(Known, Known2);
1146  }
1147  if (!!DemandedRHS) {
1148  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1149  Depth + 1))
1150  return true;
1151  Known = KnownBits::commonBits(Known, Known2);
1152  }
1153 
1154  // Attempt to avoid multi-use ops if we don't need anything from them.
1155  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1156  Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1157  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1158  Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1159  if (DemandedOp0 || DemandedOp1) {
1160  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1161  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1162  SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1163  return TLO.CombineTo(Op, NewOp);
1164  }
1165  }
1166  break;
1167  }
1168  case ISD::AND: {
1169  SDValue Op0 = Op.getOperand(0);
1170  SDValue Op1 = Op.getOperand(1);
1171 
1172  // If the RHS is a constant, check to see if the LHS would be zero without
1173  // using the bits from the RHS. Below, we use knowledge about the RHS to
1174  // simplify the LHS, here we're using information from the LHS to simplify
1175  // the RHS.
1176  if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1177  // Do not increment Depth here; that can cause an infinite loop.
1178  KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1179  // If the LHS already has zeros where RHSC does, this 'and' is dead.
1180  if ((LHSKnown.Zero & DemandedBits) ==
1181  (~RHSC->getAPIntValue() & DemandedBits))
1182  return TLO.CombineTo(Op, Op0);
1183 
1184  // If any of the set bits in the RHS are known zero on the LHS, shrink
1185  // the constant.
1186  if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1187  DemandedElts, TLO))
1188  return true;
1189 
1190  // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1191  // constant, but if this 'and' is only clearing bits that were just set by
1192  // the xor, then this 'and' can be eliminated by shrinking the mask of
1193  // the xor. For example, for a 32-bit X:
1194  // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1195  if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1196  LHSKnown.One == ~RHSC->getAPIntValue()) {
1197  SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1198  return TLO.CombineTo(Op, Xor);
1199  }
1200  }
1201 
1202  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1203  Depth + 1))
1204  return true;
1205  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1206  if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1207  Known2, TLO, Depth + 1))
1208  return true;
1209  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1210 
1211  // Attempt to avoid multi-use ops if we don't need anything from them.
1212  if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1213  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1214  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1215  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1216  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1217  if (DemandedOp0 || DemandedOp1) {
1218  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1219  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1220  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1221  return TLO.CombineTo(Op, NewOp);
1222  }
1223  }
1224 
1225  // If all of the demanded bits are known one on one side, return the other.
1226  // These bits cannot contribute to the result of the 'and'.
1227  if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1228  return TLO.CombineTo(Op, Op0);
1229  if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1230  return TLO.CombineTo(Op, Op1);
1231  // If all of the demanded bits in the inputs are known zeros, return zero.
1232  if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1233  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1234  // If the RHS is a constant, see if we can simplify it.
1235  if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1236  TLO))
1237  return true;
1238  // If the operation can be done in a smaller type, do so.
1239  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1240  return true;
1241 
1242  Known &= Known2;
1243  break;
1244  }
1245  case ISD::OR: {
1246  SDValue Op0 = Op.getOperand(0);
1247  SDValue Op1 = Op.getOperand(1);
1248 
1249  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1250  Depth + 1))
1251  return true;
1252  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1253  if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1254  Known2, TLO, Depth + 1))
1255  return true;
1256  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1257 
1258  // Attempt to avoid multi-use ops if we don't need anything from them.
1259  if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1260  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1261  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1262  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1263  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1264  if (DemandedOp0 || DemandedOp1) {
1265  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1266  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1267  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1268  return TLO.CombineTo(Op, NewOp);
1269  }
1270  }
1271 
1272  // If all of the demanded bits are known zero on one side, return the other.
1273  // These bits cannot contribute to the result of the 'or'.
1274  if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1275  return TLO.CombineTo(Op, Op0);
1276  if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1277  return TLO.CombineTo(Op, Op1);
1278  // If the RHS is a constant, see if we can simplify it.
1279  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1280  return true;
1281  // If the operation can be done in a smaller type, do so.
1282  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1283  return true;
1284 
1285  Known |= Known2;
1286  break;
1287  }
1288  case ISD::XOR: {
1289  SDValue Op0 = Op.getOperand(0);
1290  SDValue Op1 = Op.getOperand(1);
1291 
1292  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1293  Depth + 1))
1294  return true;
1295  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1296  if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1297  Depth + 1))
1298  return true;
1299  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1300 
1301  // Attempt to avoid multi-use ops if we don't need anything from them.
1302  if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1303  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1304  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1305  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1306  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1307  if (DemandedOp0 || DemandedOp1) {
1308  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1309  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1310  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1311  return TLO.CombineTo(Op, NewOp);
1312  }
1313  }
1314 
1315  // If all of the demanded bits are known zero on one side, return the other.
1316  // These bits cannot contribute to the result of the 'xor'.
1317  if (DemandedBits.isSubsetOf(Known.Zero))
1318  return TLO.CombineTo(Op, Op0);
1319  if (DemandedBits.isSubsetOf(Known2.Zero))
1320  return TLO.CombineTo(Op, Op1);
1321  // If the operation can be done in a smaller type, do so.
1322  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1323  return true;
1324 
1325  // If all of the unknown bits are known to be zero on one side or the other
1326  // turn this into an *inclusive* or.
1327  // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1328  if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1329  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1330 
1331  ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
1332  if (C) {
1333  // If one side is a constant, and all of the set bits in the constant are
1334  // also known set on the other side, turn this into an AND, as we know
1335  // the bits will be cleared.
1336  // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1337  // NB: it is okay if more bits are known than are requested
1338  if (C->getAPIntValue() == Known2.One) {
1339  SDValue ANDC =
1340  TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1341  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1342  }
1343 
1344  // If the RHS is a constant, see if we can change it. Don't alter a -1
1345  // constant because that's a 'not' op, and that is better for combining
1346  // and codegen.
1347  if (!C->isAllOnesValue() &&
1348  DemandedBits.isSubsetOf(C->getAPIntValue())) {
1349  // We're flipping all demanded bits. Flip the undemanded bits too.
1350  SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1351  return TLO.CombineTo(Op, New);
1352  }
1353  }
1354 
1355  // If we can't turn this into a 'not', try to shrink the constant.
1356  if (!C || !C->isAllOnesValue())
1357  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1358  return true;
1359 
1360  Known ^= Known2;
1361  break;
1362  }
1363  case ISD::SELECT:
1364  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1365  Depth + 1))
1366  return true;
1367  if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1368  Depth + 1))
1369  return true;
1370  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1371  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1372 
1373  // If the operands are constants, see if we can simplify them.
1374  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1375  return true;
1376 
1377  // Only known if known in both the LHS and RHS.
1378  Known = KnownBits::commonBits(Known, Known2);
1379  break;
1380  case ISD::SELECT_CC:
1381  if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1382  Depth + 1))
1383  return true;
1384  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1385  Depth + 1))
1386  return true;
1387  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1388  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1389 
1390  // If the operands are constants, see if we can simplify them.
1391  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1392  return true;
1393 
1394  // Only known if known in both the LHS and RHS.
1395  Known = KnownBits::commonBits(Known, Known2);
1396  break;
1397  case ISD::SETCC: {
1398  SDValue Op0 = Op.getOperand(0);
1399  SDValue Op1 = Op.getOperand(1);
1400  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1401  // If (1) we only need the sign-bit, (2) the setcc operands are the same
1402  // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1403  // -1, we may be able to bypass the setcc.
1404  if (DemandedBits.isSignMask() &&
1407  BooleanContent::ZeroOrNegativeOneBooleanContent) {
1408  // If we're testing X < 0, then this compare isn't needed - just use X!
1409  // FIXME: We're limiting to integer types here, but this should also work
1410  // if we don't care about FP signed-zero. The use of SETLT with FP means
1411  // that we don't care about NaNs.
1412  if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1414  return TLO.CombineTo(Op, Op0);
1415 
1416  // TODO: Should we check for other forms of sign-bit comparisons?
1417  // Examples: X <= -1, X >= 0
1418  }
1419  if (getBooleanContents(Op0.getValueType()) ==
1421  BitWidth > 1)
1422  Known.Zero.setBitsFrom(1);
1423  break;
1424  }
1425  case ISD::SHL: {
1426  SDValue Op0 = Op.getOperand(0);
1427  SDValue Op1 = Op.getOperand(1);
1428  EVT ShiftVT = Op1.getValueType();
1429 
1430  if (const APInt *SA =
1431  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1432  unsigned ShAmt = SA->getZExtValue();
1433  if (ShAmt == 0)
1434  return TLO.CombineTo(Op, Op0);
1435 
1436  // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1437  // single shift. We can do this if the bottom bits (which are shifted
1438  // out) are never demanded.
1439  // TODO - support non-uniform vector amounts.
1440  if (Op0.getOpcode() == ISD::SRL) {
1441  if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1442  if (const APInt *SA2 =
1443  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1444  unsigned C1 = SA2->getZExtValue();
1445  unsigned Opc = ISD::SHL;
1446  int Diff = ShAmt - C1;
1447  if (Diff < 0) {
1448  Diff = -Diff;
1449  Opc = ISD::SRL;
1450  }
1451  SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1452  return TLO.CombineTo(
1453  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1454  }
1455  }
1456  }
1457 
1458  // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1459  // are not demanded. This will likely allow the anyext to be folded away.
1460  // TODO - support non-uniform vector amounts.
1461  if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1462  SDValue InnerOp = Op0.getOperand(0);
1463  EVT InnerVT = InnerOp.getValueType();
1464  unsigned InnerBits = InnerVT.getScalarSizeInBits();
1465  if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1466  isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1467  EVT ShTy = getShiftAmountTy(InnerVT, DL);
1468  if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1469  ShTy = InnerVT;
1470  SDValue NarrowShl =
1471  TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1472  TLO.DAG.getConstant(ShAmt, dl, ShTy));
1473  return TLO.CombineTo(
1474  Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1475  }
1476 
1477  // Repeat the SHL optimization above in cases where an extension
1478  // intervenes: (shl (anyext (shr x, c1)), c2) to
1479  // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1480  // aren't demanded (as above) and that the shifted upper c1 bits of
1481  // x aren't demanded.
1482  // TODO - support non-uniform vector amounts.
1483  if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1484  InnerOp.hasOneUse()) {
1485  if (const APInt *SA2 =
1486  TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1487  unsigned InnerShAmt = SA2->getZExtValue();
1488  if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1489  DemandedBits.getActiveBits() <=
1490  (InnerBits - InnerShAmt + ShAmt) &&
1491  DemandedBits.countTrailingZeros() >= ShAmt) {
1492  SDValue NewSA =
1493  TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1494  SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1495  InnerOp.getOperand(0));
1496  return TLO.CombineTo(
1497  Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1498  }
1499  }
1500  }
1501  }
1502 
1503  APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1504  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1505  Depth + 1))
1506  return true;
1507  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1508  Known.Zero <<= ShAmt;
1509  Known.One <<= ShAmt;
1510  // low bits known zero.
1511  Known.Zero.setLowBits(ShAmt);
1512 
1513  // Try shrinking the operation as long as the shift amount will still be
1514  // in range.
1515  if ((ShAmt < DemandedBits.getActiveBits()) &&
1516  ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1517  return true;
1518  }
1519 
1520  // If we are only demanding sign bits then we can use the shift source
1521  // directly.
1522  if (const APInt *MaxSA =
1523  TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1524  unsigned ShAmt = MaxSA->getZExtValue();
1525  unsigned NumSignBits =
1526  TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1527  unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1528  if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1529  return TLO.CombineTo(Op, Op0);
1530  }
1531  break;
1532  }
1533  case ISD::SRL: {
1534  SDValue Op0 = Op.getOperand(0);
1535  SDValue Op1 = Op.getOperand(1);
1536  EVT ShiftVT = Op1.getValueType();
1537 
1538  if (const APInt *SA =
1539  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1540  unsigned ShAmt = SA->getZExtValue();
1541  if (ShAmt == 0)
1542  return TLO.CombineTo(Op, Op0);
1543 
1544  // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1545  // single shift. We can do this if the top bits (which are shifted out)
1546  // are never demanded.
1547  // TODO - support non-uniform vector amounts.
1548  if (Op0.getOpcode() == ISD::SHL) {
1549  if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1550  if (const APInt *SA2 =
1551  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1552  unsigned C1 = SA2->getZExtValue();
1553  unsigned Opc = ISD::SRL;
1554  int Diff = ShAmt - C1;
1555  if (Diff < 0) {
1556  Diff = -Diff;
1557  Opc = ISD::SHL;
1558  }
1559  SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1560  return TLO.CombineTo(
1561  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1562  }
1563  }
1564  }
1565 
1566  APInt InDemandedMask = (DemandedBits << ShAmt);
1567 
1568  // If the shift is exact, then it does demand the low bits (and knows that
1569  // they are zero).
1570  if (Op->getFlags().hasExact())
1571  InDemandedMask.setLowBits(ShAmt);
1572 
1573  // Compute the new bits that are at the top now.
1574  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1575  Depth + 1))
1576  return true;
1577  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1578  Known.Zero.lshrInPlace(ShAmt);
1579  Known.One.lshrInPlace(ShAmt);
1580  // High bits known zero.
1581  Known.Zero.setHighBits(ShAmt);
1582  }
1583  break;
1584  }
1585  case ISD::SRA: {
1586  SDValue Op0 = Op.getOperand(0);
1587  SDValue Op1 = Op.getOperand(1);
1588  EVT ShiftVT = Op1.getValueType();
1589 
1590  // If we only want bits that already match the signbit then we don't need
1591  // to shift.
1592  unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1593  if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1594  NumHiDemandedBits)
1595  return TLO.CombineTo(Op, Op0);
1596 
1597  // If this is an arithmetic shift right and only the low-bit is set, we can
1598  // always convert this into a logical shr, even if the shift amount is
1599  // variable. The low bit of the shift cannot be an input sign bit unless
1600  // the shift amount is >= the size of the datatype, which is undefined.
1601  if (DemandedBits.isOneValue())
1602  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1603 
1604  if (const APInt *SA =
1605  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1606  unsigned ShAmt = SA->getZExtValue();
1607  if (ShAmt == 0)
1608  return TLO.CombineTo(Op, Op0);
1609 
1610  APInt InDemandedMask = (DemandedBits << ShAmt);
1611 
1612  // If the shift is exact, then it does demand the low bits (and knows that
1613  // they are zero).
1614  if (Op->getFlags().hasExact())
1615  InDemandedMask.setLowBits(ShAmt);
1616 
1617  // If any of the demanded bits are produced by the sign extension, we also
1618  // demand the input sign bit.
1619  if (DemandedBits.countLeadingZeros() < ShAmt)
1620  InDemandedMask.setSignBit();
1621 
1622  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1623  Depth + 1))
1624  return true;
1625  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1626  Known.Zero.lshrInPlace(ShAmt);
1627  Known.One.lshrInPlace(ShAmt);
1628 
1629  // If the input sign bit is known to be zero, or if none of the top bits
1630  // are demanded, turn this into an unsigned shift right.
1631  if (Known.Zero[BitWidth - ShAmt - 1] ||
1632  DemandedBits.countLeadingZeros() >= ShAmt) {
1633  SDNodeFlags Flags;
1634  Flags.setExact(Op->getFlags().hasExact());
1635  return TLO.CombineTo(
1636  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1637  }
1638 
1639  int Log2 = DemandedBits.exactLogBase2();
1640  if (Log2 >= 0) {
1641  // The bit must come from the sign.
1642  SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1643  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1644  }
1645 
1646  if (Known.One[BitWidth - ShAmt - 1])
1647  // New bits are known one.
1648  Known.One.setHighBits(ShAmt);
1649 
1650  // Attempt to avoid multi-use ops if we don't need anything from them.
1651  if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1652  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1653  Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1654  if (DemandedOp0) {
1655  SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1656  return TLO.CombineTo(Op, NewOp);
1657  }
1658  }
1659  }
1660  break;
1661  }
1662  case ISD::FSHL:
1663  case ISD::FSHR: {
1664  SDValue Op0 = Op.getOperand(0);
1665  SDValue Op1 = Op.getOperand(1);
1666  SDValue Op2 = Op.getOperand(2);
1667  bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1668 
1669  if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1670  unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1671 
1672  // For fshl, 0-shift returns the 1st arg.
1673  // For fshr, 0-shift returns the 2nd arg.
1674  if (Amt == 0) {
1675  if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1676  Known, TLO, Depth + 1))
1677  return true;
1678  break;
1679  }
1680 
1681  // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1682  // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1683  APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1684  APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1685  if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1686  Depth + 1))
1687  return true;
1688  if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1689  Depth + 1))
1690  return true;
1691 
1692  Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1693  Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1694  Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1695  Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1696  Known.One |= Known2.One;
1697  Known.Zero |= Known2.Zero;
1698  }
1699 
1700  // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1701  if (isPowerOf2_32(BitWidth)) {
1702  APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
1703  if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
1704  Known2, TLO, Depth + 1))
1705  return true;
1706  }
1707  break;
1708  }
1709  case ISD::ROTL:
1710  case ISD::ROTR: {
1711  SDValue Op0 = Op.getOperand(0);
1712  SDValue Op1 = Op.getOperand(1);
1713 
1714  // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
1715  if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
1716  return TLO.CombineTo(Op, Op0);
1717 
1718  // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1719  if (isPowerOf2_32(BitWidth)) {
1720  APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
1721  if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
1722  Depth + 1))
1723  return true;
1724  }
1725  break;
1726  }
1727  case ISD::UMIN: {
1728  // Check if one arg is always less than (or equal) to the other arg.
1729  SDValue Op0 = Op.getOperand(0);
1730  SDValue Op1 = Op.getOperand(1);
1731  KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1732  KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1733  Known = KnownBits::umin(Known0, Known1);
1734  if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
1735  return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
1736  if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
1737  return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
1738  break;
1739  }
1740  case ISD::UMAX: {
1741  // Check if one arg is always greater than (or equal) to the other arg.
1742  SDValue Op0 = Op.getOperand(0);
1743  SDValue Op1 = Op.getOperand(1);
1744  KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1745  KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1746  Known = KnownBits::umax(Known0, Known1);
1747  if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
1748  return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
1749  if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
1750  return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
1751  break;
1752  }
1753  case ISD::BITREVERSE: {
1754  SDValue Src = Op.getOperand(0);
1755  APInt DemandedSrcBits = DemandedBits.reverseBits();
1756  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1757  Depth + 1))
1758  return true;
1759  Known.One = Known2.One.reverseBits();
1760  Known.Zero = Known2.Zero.reverseBits();
1761  break;
1762  }
1763  case ISD::BSWAP: {
1764  SDValue Src = Op.getOperand(0);
1765  APInt DemandedSrcBits = DemandedBits.byteSwap();
1766  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1767  Depth + 1))
1768  return true;
1769  Known.One = Known2.One.byteSwap();
1770  Known.Zero = Known2.Zero.byteSwap();
1771  break;
1772  }
1773  case ISD::CTPOP: {
1774  // If only 1 bit is demanded, replace with PARITY as long as we're before
1775  // op legalization.
1776  // FIXME: Limit to scalars for now.
1777  if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
1778  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
1779  Op.getOperand(0)));
1780 
1781  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1782  break;
1783  }
1784  case ISD::SIGN_EXTEND_INREG: {
1785  SDValue Op0 = Op.getOperand(0);
1786  EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1787  unsigned ExVTBits = ExVT.getScalarSizeInBits();
1788 
1789  // If we only care about the highest bit, don't bother shifting right.
1790  if (DemandedBits.isSignMask()) {
1791  unsigned NumSignBits =
1792  TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1793  bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1794  // However if the input is already sign extended we expect the sign
1795  // extension to be dropped altogether later and do not simplify.
1796  if (!AlreadySignExtended) {
1797  // Compute the correct shift amount type, which must be getShiftAmountTy
1798  // for scalar types after legalization.
1799  EVT ShiftAmtTy = VT;
1800  if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1801  ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1802 
1803  SDValue ShiftAmt =
1804  TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1805  return TLO.CombineTo(Op,
1806  TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1807  }
1808  }
1809 
1810  // If none of the extended bits are demanded, eliminate the sextinreg.
1811  if (DemandedBits.getActiveBits() <= ExVTBits)
1812  return TLO.CombineTo(Op, Op0);
1813 
1814  APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1815 
1816  // Since the sign extended bits are demanded, we know that the sign
1817  // bit is demanded.
1818  InputDemandedBits.setBit(ExVTBits - 1);
1819 
1820  if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1821  return true;
1822  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1823 
1824  // If the sign bit of the input is known set or clear, then we know the
1825  // top bits of the result.
1826 
1827  // If the input sign bit is known zero, convert this into a zero extension.
1828  if (Known.Zero[ExVTBits - 1])
1829  return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
1830 
1831  APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1832  if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1833  Known.One.setBitsFrom(ExVTBits);
1834  Known.Zero &= Mask;
1835  } else { // Input sign bit unknown
1836  Known.Zero &= Mask;
1837  Known.One &= Mask;
1838  }
1839  break;
1840  }
1841  case ISD::BUILD_PAIR: {
1842  EVT HalfVT = Op.getOperand(0).getValueType();
1843  unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1844 
1845  APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1846  APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1847 
1848  KnownBits KnownLo, KnownHi;
1849 
1850  if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1851  return true;
1852 
1853  if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1854  return true;
1855 
1856  Known.Zero = KnownLo.Zero.zext(BitWidth) |
1857  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1858 
1859  Known.One = KnownLo.One.zext(BitWidth) |
1860  KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1861  break;
1862  }
1863  case ISD::ZERO_EXTEND:
1865  SDValue Src = Op.getOperand(0);
1866  EVT SrcVT = Src.getValueType();
1867  unsigned InBits = SrcVT.getScalarSizeInBits();
1868  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1869  bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1870 
1871  // If none of the top bits are demanded, convert this into an any_extend.
1872  if (DemandedBits.getActiveBits() <= InBits) {
1873  // If we only need the non-extended bits of the bottom element
1874  // then we can just bitcast to the result.
1875  if (IsVecInReg && DemandedElts == 1 &&
1876  VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1878  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1879 
1880  unsigned Opc =
1882  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1883  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1884  }
1885 
1886  APInt InDemandedBits = DemandedBits.trunc(InBits);
1887  APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1888  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1889  Depth + 1))
1890  return true;
1891  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1892  assert(Known.getBitWidth() == InBits && "Src width has changed?");
1893  Known = Known.zext(BitWidth);
1894 
1895  // Attempt to avoid multi-use ops if we don't need anything from them.
1896  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1897  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1898  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1899  break;
1900  }
1901  case ISD::SIGN_EXTEND:
1903  SDValue Src = Op.getOperand(0);
1904  EVT SrcVT = Src.getValueType();
1905  unsigned InBits = SrcVT.getScalarSizeInBits();
1906  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1907  bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1908 
1909  // If none of the top bits are demanded, convert this into an any_extend.
1910  if (DemandedBits.getActiveBits() <= InBits) {
1911  // If we only need the non-extended bits of the bottom element
1912  // then we can just bitcast to the result.
1913  if (IsVecInReg && DemandedElts == 1 &&
1914  VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1916  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1917 
1918  unsigned Opc =
1920  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1921  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1922  }
1923 
1924  APInt InDemandedBits = DemandedBits.trunc(InBits);
1925  APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1926 
1927  // Since some of the sign extended bits are demanded, we know that the sign
1928  // bit is demanded.
1929  InDemandedBits.setBit(InBits - 1);
1930 
1931  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1932  Depth + 1))
1933  return true;
1934  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1935  assert(Known.getBitWidth() == InBits && "Src width has changed?");
1936 
1937  // If the sign bit is known one, the top bits match.
1938  Known = Known.sext(BitWidth);
1939 
1940  // If the sign bit is known zero, convert this to a zero extend.
1941  if (Known.isNonNegative()) {
1942  unsigned Opc =
1944  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1945  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1946  }
1947 
1948  // Attempt to avoid multi-use ops if we don't need anything from them.
1949  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1950  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1951  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1952  break;
1953  }
1954  case ISD::ANY_EXTEND:
1956  SDValue Src = Op.getOperand(0);
1957  EVT SrcVT = Src.getValueType();
1958  unsigned InBits = SrcVT.getScalarSizeInBits();
1959  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1960  bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1961 
1962  // If we only need the bottom element then we can just bitcast.
1963  // TODO: Handle ANY_EXTEND?
1964  if (IsVecInReg && DemandedElts == 1 &&
1965  VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1967  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1968 
1969  APInt InDemandedBits = DemandedBits.trunc(InBits);
1970  APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1971  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1972  Depth + 1))
1973  return true;
1974  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1975  assert(Known.getBitWidth() == InBits && "Src width has changed?");
1976  Known = Known.anyext(BitWidth);
1977 
1978  // Attempt to avoid multi-use ops if we don't need anything from them.
1979  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1980  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1981  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1982  break;
1983  }
1984  case ISD::TRUNCATE: {
1985  SDValue Src = Op.getOperand(0);
1986 
1987  // Simplify the input, using demanded bit information, and compute the known
1988  // zero/one bits live out.
1989  unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1990  APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1991  if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
1992  Depth + 1))
1993  return true;
1994  Known = Known.trunc(BitWidth);
1995 
1996  // Attempt to avoid multi-use ops if we don't need anything from them.
1997  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1998  Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
1999  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2000 
2001  // If the input is only used by this truncate, see if we can shrink it based
2002  // on the known demanded bits.
2003  if (Src.getNode()->hasOneUse()) {
2004  switch (Src.getOpcode()) {
2005  default:
2006  break;
2007  case ISD::SRL:
2008  // Shrink SRL by a constant if none of the high bits shifted in are
2009  // demanded.
2010  if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2011  // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2012  // undesirable.
2013  break;
2014 
2015  const APInt *ShAmtC =
2016  TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2017  if (!ShAmtC || ShAmtC->uge(BitWidth))
2018  break;
2019  uint64_t ShVal = ShAmtC->getZExtValue();
2020 
2021  APInt HighBits =
2022  APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2023  HighBits.lshrInPlace(ShVal);
2024  HighBits = HighBits.trunc(BitWidth);
2025 
2026  if (!(HighBits & DemandedBits)) {
2027  // None of the shifted in bits are needed. Add a truncate of the
2028  // shift input, then shift it.
2029  SDValue NewShAmt = TLO.DAG.getConstant(
2030  ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2031  SDValue NewTrunc =
2032  TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2033  return TLO.CombineTo(
2034  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2035  }
2036  break;
2037  }
2038  }
2039 
2040  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2041  break;
2042  }
2043  case ISD::AssertZext: {
2044  // AssertZext demands all of the high bits, plus any of the low bits
2045  // demanded by its users.
2046  EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2048  if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2049  TLO, Depth + 1))
2050  return true;
2051  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2052 
2053  Known.Zero |= ~InMask;
2054  break;
2055  }
2056  case ISD::EXTRACT_VECTOR_ELT: {
2057  SDValue Src = Op.getOperand(0);
2058  SDValue Idx = Op.getOperand(1);
2059  ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2060  unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2061 
2062  if (SrcEltCnt.isScalable())
2063  return false;
2064 
2065  // Demand the bits from every vector element without a constant index.
2066  unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2067  APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
2068  if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2069  if (CIdx->getAPIntValue().ult(NumSrcElts))
2070  DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2071 
2072  // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2073  // anything about the extended bits.
2074  APInt DemandedSrcBits = DemandedBits;
2075  if (BitWidth > EltBitWidth)
2076  DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2077 
2078  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2079  Depth + 1))
2080  return true;
2081 
2082  // Attempt to avoid multi-use ops if we don't need anything from them.
2083  if (!DemandedSrcBits.isAllOnesValue() ||
2084  !DemandedSrcElts.isAllOnesValue()) {
2085  if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2086  Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2087  SDValue NewOp =
2088  TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2089  return TLO.CombineTo(Op, NewOp);
2090  }
2091  }
2092 
2093  Known = Known2;
2094  if (BitWidth > EltBitWidth)
2095  Known = Known.anyext(BitWidth);
2096  break;
2097  }
2098  case ISD::BITCAST: {
2099  SDValue Src = Op.getOperand(0);
2100  EVT SrcVT = Src.getValueType();
2101  unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2102 
2103  // If this is an FP->Int bitcast and if the sign bit is the only
2104  // thing demanded, turn this into a FGETSIGN.
2105  if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2106  DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2107  SrcVT.isFloatingPoint()) {
2108  bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2110  if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2111  SrcVT != MVT::f128) {
2112  // Cannot eliminate/lower SHL for f128 yet.
2113  EVT Ty = OpVTLegal ? VT : MVT::i32;
2114  // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2115  // place. We expect the SHL to be eliminated by other optimizations.
2116  SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2117  unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2118  if (!OpVTLegal && OpVTSizeInBits > 32)
2119  Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2120  unsigned ShVal = Op.getValueSizeInBits() - 1;
2121  SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2122  return TLO.CombineTo(Op,
2123  TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2124  }
2125  }
2126 
2127  // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2128  // Demand the elt/bit if any of the original elts/bits are demanded.
2129  // TODO - bigendian once we have test coverage.
2130  if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
2131  TLO.DAG.getDataLayout().isLittleEndian()) {
2132  unsigned Scale = BitWidth / NumSrcEltBits;
2133  unsigned NumSrcElts = SrcVT.getVectorNumElements();
2134  APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2135  APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2136  for (unsigned i = 0; i != Scale; ++i) {
2137  unsigned Offset = i * NumSrcEltBits;
2138  APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
2139  if (!Sub.isNullValue()) {
2140  DemandedSrcBits |= Sub;
2141  for (unsigned j = 0; j != NumElts; ++j)
2142  if (DemandedElts[j])
2143  DemandedSrcElts.setBit((j * Scale) + i);
2144  }
2145  }
2146 
2147  APInt KnownSrcUndef, KnownSrcZero;
2148  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2149  KnownSrcZero, TLO, Depth + 1))
2150  return true;
2151 
2152  KnownBits KnownSrcBits;
2153  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2154  KnownSrcBits, TLO, Depth + 1))
2155  return true;
2156  } else if ((NumSrcEltBits % BitWidth) == 0 &&
2157  TLO.DAG.getDataLayout().isLittleEndian()) {
2158  unsigned Scale = NumSrcEltBits / BitWidth;
2159  unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2160  APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2161  APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2162  for (unsigned i = 0; i != NumElts; ++i)
2163  if (DemandedElts[i]) {
2164  unsigned Offset = (i % Scale) * BitWidth;
2165  DemandedSrcBits.insertBits(DemandedBits, Offset);
2166  DemandedSrcElts.setBit(i / Scale);
2167  }
2168 
2169  if (SrcVT.isVector()) {
2170  APInt KnownSrcUndef, KnownSrcZero;
2171  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2172  KnownSrcZero, TLO, Depth + 1))
2173  return true;
2174  }
2175 
2176  KnownBits KnownSrcBits;
2177  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2178  KnownSrcBits, TLO, Depth + 1))
2179  return true;
2180  }
2181 
2182  // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2183  // recursive call where Known may be useful to the caller.
2184  if (Depth > 0) {
2185  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2186  return false;
2187  }
2188  break;
2189  }
2190  case ISD::ADD:
2191  case ISD::MUL:
2192  case ISD::SUB: {
2193  // Add, Sub, and Mul don't demand any bits in positions beyond that
2194  // of the highest bit demanded of them.
2195  SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2196  SDNodeFlags Flags = Op.getNode()->getFlags();
2197  unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2198  APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2199  if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2200  Depth + 1) ||
2201  SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2202  Depth + 1) ||
2203  // See if the operation should be performed at a smaller bit width.
2204  ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2205  if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2206  // Disable the nsw and nuw flags. We can no longer guarantee that we
2207  // won't wrap after simplification.
2208  Flags.setNoSignedWrap(false);
2209  Flags.setNoUnsignedWrap(false);
2210  SDValue NewOp =
2211  TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2212  return TLO.CombineTo(Op, NewOp);
2213  }
2214  return true;
2215  }
2216 
2217  // Attempt to avoid multi-use ops if we don't need anything from them.
2218  if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
2219  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2220  Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2221  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2222  Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2223  if (DemandedOp0 || DemandedOp1) {
2224  Flags.setNoSignedWrap(false);
2225  Flags.setNoUnsignedWrap(false);
2226  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2227  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2228  SDValue NewOp =
2229  TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2230  return TLO.CombineTo(Op, NewOp);
2231  }
2232  }
2233 
2234  // If we have a constant operand, we may be able to turn it into -1 if we
2235  // do not demand the high bits. This can make the constant smaller to
2236  // encode, allow more general folding, or match specialized instruction
2237  // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2238  // is probably not useful (and could be detrimental).
2240  APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2241  if (C && !C->isAllOnesValue() && !C->isOne() &&
2242  (C->getAPIntValue() | HighMask).isAllOnesValue()) {
2243  SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2244  // Disable the nsw and nuw flags. We can no longer guarantee that we
2245  // won't wrap after simplification.
2246  Flags.setNoSignedWrap(false);
2247  Flags.setNoUnsignedWrap(false);
2248  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2249  return TLO.CombineTo(Op, NewOp);
2250  }
2251 
2253  }
2254  default:
2255  if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2256  if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2257  Known, TLO, Depth))
2258  return true;
2259  break;
2260  }
2261 
2262  // Just use computeKnownBits to compute output bits.
2263  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2264  break;
2265  }
2266 
2267  // If we know the value of all of the demanded bits, return this as a
2268  // constant.
2269  if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2270  // Avoid folding to a constant if any OpaqueConstant is involved.
2271  const SDNode *N = Op.getNode();
2272  for (SDNode *Op :
2274  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2275  if (C->isOpaque())
2276  return false;
2277  }
2278  if (VT.isInteger())
2279  return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2280  if (VT.isFloatingPoint())
2281  return TLO.CombineTo(
2282  Op,
2283  TLO.DAG.getConstantFP(
2284  APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2285  }
2286 
2287  return false;
2288 }
2289 
2291  const APInt &DemandedElts,
2292  APInt &KnownUndef,
2293  APInt &KnownZero,
2294  DAGCombinerInfo &DCI) const {
2295  SelectionDAG &DAG = DCI.DAG;
2296  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2297  !DCI.isBeforeLegalizeOps());
2298 
2299  bool Simplified =
2300  SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2301  if (Simplified) {
2302  DCI.AddToWorklist(Op.getNode());
2303  DCI.CommitTargetLoweringOpt(TLO);
2304  }
2305 
2306  return Simplified;
2307 }
2308 
2309 /// Given a vector binary operation and known undefined elements for each input
2310 /// operand, compute whether each element of the output is undefined.
2312  const APInt &UndefOp0,
2313  const APInt &UndefOp1) {
2314  EVT VT = BO.getValueType();
2315  assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2316  "Vector binop only");
2317 
2318  EVT EltVT = VT.getVectorElementType();
2319  unsigned NumElts = VT.getVectorNumElements();
2320  assert(UndefOp0.getBitWidth() == NumElts &&
2321  UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2322 
2323  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2324  const APInt &UndefVals) {
2325  if (UndefVals[Index])
2326  return DAG.getUNDEF(EltVT);
2327 
2328  if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2329  // Try hard to make sure that the getNode() call is not creating temporary
2330  // nodes. Ignore opaque integers because they do not constant fold.
2331  SDValue Elt = BV->getOperand(Index);
2332  auto *C = dyn_cast<ConstantSDNode>(Elt);
2333  if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2334  return Elt;
2335  }
2336 
2337  return SDValue();
2338  };
2339 
2340  APInt KnownUndef = APInt::getNullValue(NumElts);
2341  for (unsigned i = 0; i != NumElts; ++i) {
2342  // If both inputs for this element are either constant or undef and match
2343  // the element type, compute the constant/undef result for this element of
2344  // the vector.
2345  // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2346  // not handle FP constants. The code within getNode() should be refactored
2347  // to avoid the danger of creating a bogus temporary node here.
2348  SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2349  SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2350  if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2351  if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2352  KnownUndef.setBit(i);
2353  }
2354  return KnownUndef;
2355 }
2356 
2358  SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2359  APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2360  bool AssumeSingleUse) const {
2361  EVT VT = Op.getValueType();
2362  unsigned Opcode = Op.getOpcode();
2363  APInt DemandedElts = OriginalDemandedElts;
2364  unsigned NumElts = DemandedElts.getBitWidth();
2365  assert(VT.isVector() && "Expected vector op");
2366 
2367  KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2368 
2369  // TODO: For now we assume we know nothing about scalable vectors.
2370  if (VT.isScalableVector())
2371  return false;
2372 
2373  assert(VT.getVectorNumElements() == NumElts &&
2374  "Mask size mismatches value type element count!");
2375 
2376  // Undef operand.
2377  if (Op.isUndef()) {
2378  KnownUndef.setAllBits();
2379  return false;
2380  }
2381 
2382  // If Op has other users, assume that all elements are needed.
2383  if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2384  DemandedElts.setAllBits();
2385 
2386  // Not demanding any elements from Op.
2387  if (DemandedElts == 0) {
2388  KnownUndef.setAllBits();
2389  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2390  }
2391 
2392  // Limit search depth.
2394  return false;
2395 
2396  SDLoc DL(Op);
2397  unsigned EltSizeInBits = VT.getScalarSizeInBits();
2398 
2399  // Helper for demanding the specified elements and all the bits of both binary
2400  // operands.
2401  auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2402  SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2403  TLO.DAG, Depth + 1);
2404  SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2405  TLO.DAG, Depth + 1);
2406  if (NewOp0 || NewOp1) {
2407  SDValue NewOp = TLO.DAG.getNode(
2408  Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2409  return TLO.CombineTo(Op, NewOp);
2410  }
2411  return false;
2412  };
2413 
2414  switch (Opcode) {
2415  case ISD::SCALAR_TO_VECTOR: {
2416  if (!DemandedElts[0]) {
2417  KnownUndef.setAllBits();
2418  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2419  }
2420  KnownUndef.setHighBits(NumElts - 1);
2421  break;
2422  }
2423  case ISD::BITCAST: {
2424  SDValue Src = Op.getOperand(0);
2425  EVT SrcVT = Src.getValueType();
2426 
2427  // We only handle vectors here.
2428  // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2429  if (!SrcVT.isVector())
2430  break;
2431 
2432  // Fast handling of 'identity' bitcasts.
2433  unsigned NumSrcElts = SrcVT.getVectorNumElements();
2434  if (NumSrcElts == NumElts)
2435  return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2436  KnownZero, TLO, Depth + 1);
2437 
2438  APInt SrcZero, SrcUndef;
2439  APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2440 
2441  // Bitcast from 'large element' src vector to 'small element' vector, we
2442  // must demand a source element if any DemandedElt maps to it.
2443  if ((NumElts % NumSrcElts) == 0) {
2444  unsigned Scale = NumElts / NumSrcElts;
2445  for (unsigned i = 0; i != NumElts; ++i)
2446  if (DemandedElts[i])
2447  SrcDemandedElts.setBit(i / Scale);
2448 
2449  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2450  TLO, Depth + 1))
2451  return true;
2452 
2453  // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2454  // of the large element.
2455  // TODO - bigendian once we have test coverage.
2456  if (TLO.DAG.getDataLayout().isLittleEndian()) {
2457  unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2458  APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2459  for (unsigned i = 0; i != NumElts; ++i)
2460  if (DemandedElts[i]) {
2461  unsigned Ofs = (i % Scale) * EltSizeInBits;
2462  SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2463  }
2464 
2465  KnownBits Known;
2466  if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2467  TLO, Depth + 1))
2468  return true;
2469  }
2470 
2471  // If the src element is zero/undef then all the output elements will be -
2472  // only demanded elements are guaranteed to be correct.
2473  for (unsigned i = 0; i != NumSrcElts; ++i) {
2474  if (SrcDemandedElts[i]) {
2475  if (SrcZero[i])
2476  KnownZero.setBits(i * Scale, (i + 1) * Scale);
2477  if (SrcUndef[i])
2478  KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2479  }
2480  }
2481  }
2482 
2483  // Bitcast from 'small element' src vector to 'large element' vector, we
2484  // demand all smaller source elements covered by the larger demanded element
2485  // of this vector.
2486  if ((NumSrcElts % NumElts) == 0) {
2487  unsigned Scale = NumSrcElts / NumElts;
2488  for (unsigned i = 0; i != NumElts; ++i)
2489  if (DemandedElts[i])
2490  SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2491 
2492  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2493  TLO, Depth + 1))
2494  return true;
2495 
2496  // If all the src elements covering an output element are zero/undef, then
2497  // the output element will be as well, assuming it was demanded.
2498  for (unsigned i = 0; i != NumElts; ++i) {
2499  if (DemandedElts[i]) {
2500  if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2501  KnownZero.setBit(i);
2502  if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2503  KnownUndef.setBit(i);
2504  }
2505  }
2506  }
2507  break;
2508  }
2509  case ISD::BUILD_VECTOR: {
2510  // Check all elements and simplify any unused elements with UNDEF.
2511  if (!DemandedElts.isAllOnesValue()) {
2512  // Don't simplify BROADCASTS.
2513  if (llvm::any_of(Op->op_values(),
2514  [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2515  SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2516  bool Updated = false;
2517  for (unsigned i = 0; i != NumElts; ++i) {
2518  if (!DemandedElts[i] && !Ops[i].isUndef()) {
2519  Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2520  KnownUndef.setBit(i);
2521  Updated = true;
2522  }
2523  }
2524  if (Updated)
2525  return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2526  }
2527  }
2528  for (unsigned i = 0; i != NumElts; ++i) {
2529  SDValue SrcOp = Op.getOperand(i);
2530  if (SrcOp.isUndef()) {
2531  KnownUndef.setBit(i);
2532  } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2534  KnownZero.setBit(i);
2535  }
2536  }
2537  break;
2538  }
2539  case ISD::CONCAT_VECTORS: {
2540  EVT SubVT = Op.getOperand(0).getValueType();
2541  unsigned NumSubVecs = Op.getNumOperands();
2542  unsigned NumSubElts = SubVT.getVectorNumElements();
2543  for (unsigned i = 0; i != NumSubVecs; ++i) {
2544  SDValue SubOp = Op.getOperand(i);
2545  APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2546  APInt SubUndef, SubZero;
2547  if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2548  Depth + 1))
2549  return true;
2550  KnownUndef.insertBits(SubUndef, i * NumSubElts);
2551  KnownZero.insertBits(SubZero, i * NumSubElts);
2552  }
2553  break;
2554  }
2555  case ISD::INSERT_SUBVECTOR: {
2556  // Demand any elements from the subvector and the remainder from the src its
2557  // inserted into.
2558  SDValue Src = Op.getOperand(0);
2559  SDValue Sub = Op.getOperand(1);
2560  uint64_t Idx = Op.getConstantOperandVal(2);
2561  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
2562  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
2563  APInt DemandedSrcElts = DemandedElts;
2564  DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
2565 
2566  APInt SubUndef, SubZero;
2567  if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
2568  Depth + 1))
2569  return true;
2570 
2571  // If none of the src operand elements are demanded, replace it with undef.
2572  if (!DemandedSrcElts && !Src.isUndef())
2573  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2574  TLO.DAG.getUNDEF(VT), Sub,
2575  Op.getOperand(2)));
2576 
2577  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
2578  TLO, Depth + 1))
2579  return true;
2580  KnownUndef.insertBits(SubUndef, Idx);
2581  KnownZero.insertBits(SubZero, Idx);
2582 
2583  // Attempt to avoid multi-use ops if we don't need anything from them.
2584  if (!DemandedSrcElts.isAllOnesValue() ||
2585  !DemandedSubElts.isAllOnesValue()) {
2586  SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2587  Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2588  SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
2589  Sub, DemandedSubElts, TLO.DAG, Depth + 1);
2590  if (NewSrc || NewSub) {
2591  NewSrc = NewSrc ? NewSrc : Src;
2592  NewSub = NewSub ? NewSub : Sub;
2593  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2594  NewSub, Op.getOperand(2));
2595  return TLO.CombineTo(Op, NewOp);
2596  }
2597  }
2598  break;
2599  }
2600  case ISD::EXTRACT_SUBVECTOR: {
2601  // Offset the demanded elts by the subvector index.
2602  SDValue Src = Op.getOperand(0);
2603  if (Src.getValueType().isScalableVector())
2604  break;
2605  uint64_t Idx = Op.getConstantOperandVal(1);
2606  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2607  APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2608 
2609  APInt SrcUndef, SrcZero;
2610  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2611  Depth + 1))
2612  return true;
2613  KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2614  KnownZero = SrcZero.extractBits(NumElts, Idx);
2615 
2616  // Attempt to avoid multi-use ops if we don't need anything from them.
2617  if (!DemandedElts.isAllOnesValue()) {
2618  SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2619  Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2620  if (NewSrc) {
2621  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2622  Op.getOperand(1));
2623  return TLO.CombineTo(Op, NewOp);
2624  }
2625  }
2626  break;
2627  }
2628  case ISD::INSERT_VECTOR_ELT: {
2629  SDValue Vec = Op.getOperand(0);
2630  SDValue Scl = Op.getOperand(1);
2631  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2632 
2633  // For a legal, constant insertion index, if we don't need this insertion
2634  // then strip it, else remove it from the demanded elts.
2635  if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2636  unsigned Idx = CIdx->getZExtValue();
2637  if (!DemandedElts[Idx])
2638  return TLO.CombineTo(Op, Vec);
2639 
2640  APInt DemandedVecElts(DemandedElts);
2641  DemandedVecElts.clearBit(Idx);
2642  if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2643  KnownZero, TLO, Depth + 1))
2644  return true;
2645 
2646  KnownUndef.setBitVal(Idx, Scl.isUndef());
2647 
2648  KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
2649  break;
2650  }
2651 
2652  APInt VecUndef, VecZero;
2653  if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2654  Depth + 1))
2655  return true;
2656  // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2657  break;
2658  }
2659  case ISD::VSELECT: {
2660  // Try to transform the select condition based on the current demanded
2661  // elements.
2662  // TODO: If a condition element is undef, we can choose from one arm of the
2663  // select (and if one arm is undef, then we can propagate that to the
2664  // result).
2665  // TODO - add support for constant vselect masks (see IR version of this).
2666  APInt UnusedUndef, UnusedZero;
2667  if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2668  UnusedZero, TLO, Depth + 1))
2669  return true;
2670 
2671  // See if we can simplify either vselect operand.
2672  APInt DemandedLHS(DemandedElts);
2673  APInt DemandedRHS(DemandedElts);
2674  APInt UndefLHS, ZeroLHS;
2675  APInt UndefRHS, ZeroRHS;
2676  if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2677  ZeroLHS, TLO, Depth + 1))
2678  return true;
2679  if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2680  ZeroRHS, TLO, Depth + 1))
2681  return true;
2682 
2683  KnownUndef = UndefLHS & UndefRHS;
2684  KnownZero = ZeroLHS & ZeroRHS;
2685  break;
2686  }
2687  case ISD::VECTOR_SHUFFLE: {
2688  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2689 
2690  // Collect demanded elements from shuffle operands..
2691  APInt DemandedLHS(NumElts, 0);
2692  APInt DemandedRHS(NumElts, 0);
2693  for (unsigned i = 0; i != NumElts; ++i) {
2694  int M = ShuffleMask[i];
2695  if (M < 0 || !DemandedElts[i])
2696  continue;
2697  assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2698  if (M < (int)NumElts)
2699  DemandedLHS.setBit(M);
2700  else
2701  DemandedRHS.setBit(M - NumElts);
2702  }
2703 
2704  // See if we can simplify either shuffle operand.
2705  APInt UndefLHS, ZeroLHS;
2706  APInt UndefRHS, ZeroRHS;
2707  if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2708  ZeroLHS, TLO, Depth + 1))
2709  return true;
2710  if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2711  ZeroRHS, TLO, Depth + 1))
2712  return true;
2713 
2714  // Simplify mask using undef elements from LHS/RHS.
2715  bool Updated = false;
2716  bool IdentityLHS = true, IdentityRHS = true;
2717  SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2718  for (unsigned i = 0; i != NumElts; ++i) {
2719  int &M = NewMask[i];
2720  if (M < 0)
2721  continue;
2722  if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2723  (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2724  Updated = true;
2725  M = -1;
2726  }
2727  IdentityLHS &= (M < 0) || (M == (int)i);
2728  IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2729  }
2730 
2731  // Update legal shuffle masks based on demanded elements if it won't reduce
2732  // to Identity which can cause premature removal of the shuffle mask.
2733  if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2734  SDValue LegalShuffle =
2735  buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2736  NewMask, TLO.DAG);
2737  if (LegalShuffle)
2738  return TLO.CombineTo(Op, LegalShuffle);
2739  }
2740 
2741  // Propagate undef/zero elements from LHS/RHS.
2742  for (unsigned i = 0; i != NumElts; ++i) {
2743  int M = ShuffleMask[i];
2744  if (M < 0) {
2745  KnownUndef.setBit(i);
2746  } else if (M < (int)NumElts) {
2747  if (UndefLHS[M])
2748  KnownUndef.setBit(i);
2749  if (ZeroLHS[M])
2750  KnownZero.setBit(i);
2751  } else {
2752  if (UndefRHS[M - NumElts])
2753  KnownUndef.setBit(i);
2754  if (ZeroRHS[M - NumElts])
2755  KnownZero.setBit(i);
2756  }
2757  }
2758  break;
2759  }
2763  APInt SrcUndef, SrcZero;
2764  SDValue Src = Op.getOperand(0);
2765  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2766  APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2767  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2768  Depth + 1))
2769  return true;
2770  KnownZero = SrcZero.zextOrTrunc(NumElts);
2771  KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2772 
2773  if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2774  Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2775  DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2776  // aext - if we just need the bottom element then we can bitcast.
2777  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2778  }
2779 
2780  if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2781  // zext(undef) upper bits are guaranteed to be zero.
2782  if (DemandedElts.isSubsetOf(KnownUndef))
2783  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2784  KnownUndef.clearAllBits();
2785  }
2786  break;
2787  }
2788 
2789  // TODO: There are more binop opcodes that could be handled here - MIN,
2790  // MAX, saturated math, etc.
2791  case ISD::OR:
2792  case ISD::XOR:
2793  case ISD::ADD:
2794  case ISD::SUB:
2795  case ISD::FADD:
2796  case ISD::FSUB:
2797  case ISD::FMUL:
2798  case ISD::FDIV:
2799  case ISD::FREM: {
2800  SDValue Op0 = Op.getOperand(0);
2801  SDValue Op1 = Op.getOperand(1);
2802 
2803  APInt UndefRHS, ZeroRHS;
2804  if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2805  Depth + 1))
2806  return true;
2807  APInt UndefLHS, ZeroLHS;
2808  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2809  Depth + 1))
2810  return true;
2811 
2812  KnownZero = ZeroLHS & ZeroRHS;
2813  KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2814 
2815  // Attempt to avoid multi-use ops if we don't need anything from them.
2816  // TODO - use KnownUndef to relax the demandedelts?
2817  if (!DemandedElts.isAllOnesValue())
2818  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2819  return true;
2820  break;
2821  }
2822  case ISD::SHL:
2823  case ISD::SRL:
2824  case ISD::SRA:
2825  case ISD::ROTL:
2826  case ISD::ROTR: {
2827  SDValue Op0 = Op.getOperand(0);
2828  SDValue Op1 = Op.getOperand(1);
2829 
2830  APInt UndefRHS, ZeroRHS;
2831  if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2832  Depth + 1))
2833  return true;
2834  APInt UndefLHS, ZeroLHS;
2835  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2836  Depth + 1))
2837  return true;
2838 
2839  KnownZero = ZeroLHS;
2840  KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2841 
2842  // Attempt to avoid multi-use ops if we don't need anything from them.
2843  // TODO - use KnownUndef to relax the demandedelts?
2844  if (!DemandedElts.isAllOnesValue())
2845  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2846  return true;
2847  break;
2848  }
2849  case ISD::MUL:
2850  case ISD::AND: {
2851  SDValue Op0 = Op.getOperand(0);
2852  SDValue Op1 = Op.getOperand(1);
2853 
2854  APInt SrcUndef, SrcZero;
2855  if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
2856  Depth + 1))
2857  return true;
2858  if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
2859  TLO, Depth + 1))
2860  return true;
2861 
2862  // If either side has a zero element, then the result element is zero, even
2863  // if the other is an UNDEF.
2864  // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2865  // and then handle 'and' nodes with the rest of the binop opcodes.
2866  KnownZero |= SrcZero;
2867  KnownUndef &= SrcUndef;
2868  KnownUndef &= ~KnownZero;
2869 
2870  // Attempt to avoid multi-use ops if we don't need anything from them.
2871  // TODO - use KnownUndef to relax the demandedelts?
2872  if (!DemandedElts.isAllOnesValue())
2873  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2874  return true;
2875  break;
2876  }
2877  case ISD::TRUNCATE:
2878  case ISD::SIGN_EXTEND:
2879  case ISD::ZERO_EXTEND:
2880  if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2881  KnownZero, TLO, Depth + 1))
2882  return true;
2883 
2884  if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2885  // zext(undef) upper bits are guaranteed to be zero.
2886  if (DemandedElts.isSubsetOf(KnownUndef))
2887  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2888  KnownUndef.clearAllBits();
2889  }
2890  break;
2891  default: {
2892  if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2893  if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2894  KnownZero, TLO, Depth))
2895  return true;
2896  } else {
2897  KnownBits Known;
2898  APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2899  if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2900  TLO, Depth, AssumeSingleUse))
2901  return true;
2902  }
2903  break;
2904  }
2905  }
2906  assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2907 
2908  // Constant fold all undef cases.
2909  // TODO: Handle zero cases as well.
2910  if (DemandedElts.isSubsetOf(KnownUndef))
2911  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2912 
2913  return false;
2914 }
2915 
2916 /// Determine which of the bits specified in Mask are known to be either zero or
2917 /// one and return them in the Known.
2919  KnownBits &Known,
2920  const APInt &DemandedElts,
2921  const SelectionDAG &DAG,
2922  unsigned Depth) const {
2923  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2924  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2925  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2926  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2927  "Should use MaskedValueIsZero if you don't know whether Op"
2928  " is a target node!");
2929  Known.resetAll();
2930 }
2931 
2933  GISelKnownBits &Analysis, Register R, KnownBits &Known,
2934  const APInt &DemandedElts, const MachineRegisterInfo &MRI,
2935  unsigned Depth) const {
2936  Known.resetAll();
2937 }
2938 
2940  const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
2941  // The low bits are known zero if the pointer is aligned.
2942  Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
2943 }
2944 
2946  GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
2947  unsigned Depth) const {
2948  return Align(1);
2949 }
2950 
2951 /// This method can be implemented by targets that want to expose additional
2952 /// information about sign bits to the DAG Combiner.
2954  const APInt &,
2955  const SelectionDAG &,
2956  unsigned Depth) const {
2957  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2958  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2959  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2960  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2961  "Should use ComputeNumSignBits if you don't know whether Op"
2962  " is a target node!");
2963  return 1;
2964 }
2965 
2967  GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
2968  const MachineRegisterInfo &MRI, unsigned Depth) const {
2969  return 1;
2970 }
2971 
2973  SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2974  TargetLoweringOpt &TLO, unsigned Depth) const {
2975  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2976  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2977  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2978  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2979  "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2980  " is a target node!");
2981  return false;
2982 }
2983 
2985  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2986  KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2987  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2988  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2989  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2990  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2991  "Should use SimplifyDemandedBits if you don't know whether Op"
2992  " is a target node!");
2993  computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2994  return false;
2995 }
2996 
2998  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2999  SelectionDAG &DAG, unsigned Depth) const {
3000  assert(
3001  (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3002  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3003  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3004  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3005  "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3006  " is a target node!");
3007  return SDValue();
3008 }
3009 
3010 SDValue
3013  SelectionDAG &DAG) const {
3014  bool LegalMask = isShuffleMaskLegal(Mask, VT);
3015  if (!LegalMask) {
3016  std::swap(N0, N1);
3018  LegalMask = isShuffleMaskLegal(Mask, VT);
3019  }
3020 
3021  if (!LegalMask)
3022  return SDValue();
3023 
3024  return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3025 }
3026 
3028  return nullptr;
3029 }
3030 
3032  const SelectionDAG &DAG,
3033  bool SNaN,
3034  unsigned Depth) const {
3035  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3036  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3037  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3038  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3039  "Should use isKnownNeverNaN if you don't know whether Op"
3040  " is a target node!");
3041  return false;
3042 }
3043 
3044 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3045 // work with truncating build vectors and vectors with elements of less than
3046 // 8 bits.
3048  if (!N)
3049  return false;
3050 
3051  APInt CVal;
3052  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
3053  CVal = CN->getAPIntValue();
3054  } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
3055  auto *CN = BV->getConstantSplatNode();
3056  if (!CN)
3057  return false;
3058 
3059  // If this is a truncating build vector, truncate the splat value.
3060  // Otherwise, we may fail to match the expected values below.
3061  unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
3062  CVal = CN->getAPIntValue();
3063  if (BVEltWidth < CVal.getBitWidth())
3064  CVal = CVal.trunc(BVEltWidth);
3065  } else {
3066  return false;
3067  }
3068 
3069  switch (getBooleanContents(N->getValueType(0))) {
3071  return CVal[0];
3073  return CVal.isOneValue();
3075  return CVal.isAllOnesValue();
3076  }
3077 
3078  llvm_unreachable("Invalid boolean contents");
3079 }
3080 
3082  if (!N)
3083  return false;
3084 
3085  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3086  if (!CN) {
3087  const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3088  if (!BV)
3089  return false;
3090 
3091  // Only interested in constant splats, we don't care about undef
3092  // elements in identifying boolean constants and getConstantSplatNode
3093  // returns NULL if all ops are undef;
3094  CN = BV->getConstantSplatNode();
3095  if (!CN)
3096  return false;
3097  }
3098 
3099  if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3100  return !CN->getAPIntValue()[0];
3101 
3102  return CN->isNullValue();
3103 }
3104 
3106  bool SExt) const {
3107  if (VT == MVT::i1)
3108  return N->isOne();
3109 
3111  switch (Cnt) {
3113  // An extended value of 1 is always true, unless its original type is i1,
3114  // in which case it will be sign extended to -1.
3115  return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3118  return N->isAllOnesValue() && SExt;
3119  }
3120  llvm_unreachable("Unexpected enumeration.");
3121 }
3122 
3123 /// This helper function of SimplifySetCC tries to optimize the comparison when
3124 /// either operand of the SetCC node is a bitwise-and instruction.
3125 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3126  ISD::CondCode Cond, const SDLoc &DL,
3127  DAGCombinerInfo &DCI) const {
3128  // Match these patterns in any of their permutations:
3129  // (X & Y) == Y
3130  // (X & Y) != Y
3131  if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3132  std::swap(N0, N1);
3133 
3134  EVT OpVT = N0.getValueType();
3135  if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3136  (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3137  return SDValue();
3138 
3139  SDValue X, Y;
3140  if (N0.getOperand(0) == N1) {
3141  X = N0.getOperand(1);
3142  Y = N0.getOperand(0);
3143  } else if (N0.getOperand(1) == N1) {
3144  X = N0.getOperand(0);
3145  Y = N0.getOperand(1);
3146  } else {
3147  return SDValue();
3148  }
3149 
3150  SelectionDAG &DAG = DCI.DAG;
3151  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3152  if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3153  // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3154  // Note that where Y is variable and is known to have at most one bit set
3155  // (for example, if it is Z & 1) we cannot do this; the expressions are not
3156  // equivalent when Y == 0.
3157  assert(OpVT.isInteger());
3158  Cond = ISD::getSetCCInverse(Cond, OpVT);
3159  if (DCI.isBeforeLegalizeOps() ||
3161  return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3162  } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3163  // If the target supports an 'and-not' or 'and-complement' logic operation,
3164  // try to use that to make a comparison operation more efficient.
3165  // But don't do this transform if the mask is a single bit because there are
3166  // more efficient ways to deal with that case (for example, 'bt' on x86 or
3167  // 'rlwinm' on PPC).
3168 
3169  // Bail out if the compare operand that we want to turn into a zero is
3170  // already a zero (otherwise, infinite loop).
3171  auto *YConst = dyn_cast<ConstantSDNode>(Y);
3172  if (YConst && YConst->isNullValue())
3173  return SDValue();
3174 
3175  // Transform this into: ~X & Y == 0.
3176  SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3177  SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3178  return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3179  }
3180 
3181  return SDValue();
3182 }
3183 
3184 /// There are multiple IR patterns that could be checking whether certain
3185 /// truncation of a signed number would be lossy or not. The pattern which is
3186 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3187 /// We are looking for the following pattern: (KeptBits is a constant)
3188 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3189 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3190 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3191 /// We will unfold it into the natural trunc+sext pattern:
3192 /// ((%x << C) a>> C) dstcond %x
3193 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3194 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3195  EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3196  const SDLoc &DL) const {
3197  // We must be comparing with a constant.
3198  ConstantSDNode *C1;
3199  if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3200  return SDValue();
3201 
3202  // N0 should be: add %x, (1 << (KeptBits-1))
3203  if (N0->getOpcode() != ISD::ADD)
3204  return SDValue();
3205 
3206  // And we must be 'add'ing a constant.
3207  ConstantSDNode *C01;
3208  if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3209  return SDValue();
3210 
3211  SDValue X = N0->getOperand(0);
3212  EVT XVT = X.getValueType();
3213 
3214  // Validate constants ...
3215 
3216  APInt I1 = C1->getAPIntValue();
3217 
3218  ISD::CondCode NewCond;
3219  if (Cond == ISD::CondCode::SETULT) {
3220  NewCond = ISD::CondCode::SETEQ;
3221  } else if (Cond == ISD::CondCode::SETULE) {
3222  NewCond = ISD::CondCode::SETEQ;
3223  // But need to 'canonicalize' the constant.
3224  I1 += 1;
3225  } else if (Cond == ISD::CondCode::SETUGT) {
3226  NewCond = ISD::CondCode::SETNE;
3227  // But need to 'canonicalize' the constant.
3228  I1 += 1;
3229  } else if (Cond == ISD::CondCode::SETUGE) {
3230  NewCond = ISD::CondCode::SETNE;
3231  } else
3232  return SDValue();
3233 
3234  APInt I01 = C01->getAPIntValue();
3235 
3236  auto checkConstants = [&I1, &I01]() -> bool {
3237  // Both of them must be power-of-two, and the constant from setcc is bigger.
3238  return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3239  };
3240 
3241  if (checkConstants()) {
3242  // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3243  } else {
3244  // What if we invert constants? (and the target predicate)
3245  I1.negate();
3246  I01.negate();
3247  assert(XVT.isInteger());
3248  NewCond = getSetCCInverse(NewCond, XVT);
3249  if (!checkConstants())
3250  return SDValue();
3251  // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3252  }
3253 
3254  // They are power-of-two, so which bit is set?
3255  const unsigned KeptBits = I1.logBase2();
3256  const unsigned KeptBitsMinusOne = I01.logBase2();
3257 
3258  // Magic!
3259  if (KeptBits != (KeptBitsMinusOne + 1))
3260  return SDValue();
3261  assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3262 
3263  // We don't want to do this in every single case.
3264  SelectionDAG &DAG = DCI.DAG;
3266  XVT, KeptBits))
3267  return SDValue();
3268 
3269  const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3270  assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3271 
3272  // Unfold into: ((%x << C) a>> C) cond %x
3273  // Where 'cond' will be either 'eq' or 'ne'.
3274  SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3275  SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3276  SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3277  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3278 
3279  return T2;
3280 }
3281 
3282 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3283 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3284  EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3285  DAGCombinerInfo &DCI, const SDLoc &DL) const {
3286  assert(isConstOrConstSplat(N1C) &&
3287  isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
3288  "Should be a comparison with 0.");
3289  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3290  "Valid only for [in]equality comparisons.");
3291 
3292  unsigned NewShiftOpcode;
3293  SDValue X, C, Y;
3294 
3295  SelectionDAG &DAG = DCI.DAG;
3296  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3297 
3298  // Look for '(C l>>/<< Y)'.
3299  auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3300  // The shift should be one-use.
3301  if (!V.hasOneUse())
3302  return false;
3303  unsigned OldShiftOpcode = V.getOpcode();
3304  switch (OldShiftOpcode) {
3305  case ISD::SHL:
3306  NewShiftOpcode = ISD::SRL;
3307  break;
3308  case ISD::SRL:
3309  NewShiftOpcode = ISD::SHL;
3310  break;
3311  default:
3312  return false; // must be a logical shift.
3313  }
3314  // We should be shifting a constant.
3315  // FIXME: best to use isConstantOrConstantVector().
3316  C = V.getOperand(0);
3317  ConstantSDNode *CC =
3318  isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3319  if (!CC)
3320  return false;
3321  Y = V.getOperand(1);
3322 
3323  ConstantSDNode *XC =
3324  isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3325  return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3326  X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3327  };
3328 
3329  // LHS of comparison should be an one-use 'and'.
3330  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3331  return SDValue();
3332 
3333  X = N0.getOperand(0);
3334  SDValue Mask = N0.getOperand(1);
3335 
3336  // 'and' is commutative!
3337  if (!Match(Mask)) {
3338  std::swap(X, Mask);
3339  if (!Match(Mask))
3340  return SDValue();
3341  }
3342 
3343  EVT VT = X.getValueType();
3344 
3345  // Produce:
3346  // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3347  SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3348  SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3349  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3350  return T2;
3351 }
3352 
3353 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3354 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3355 /// handle the commuted versions of these patterns.
3356 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3357  ISD::CondCode Cond, const SDLoc &DL,
3358  DAGCombinerInfo &DCI) const {
3359  unsigned BOpcode = N0.getOpcode();
3360  assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3361  "Unexpected binop");
3362  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3363 
3364  // (X + Y) == X --> Y == 0
3365  // (X - Y) == X --> Y == 0
3366  // (X ^ Y) == X --> Y == 0
3367  SelectionDAG &DAG = DCI.DAG;
3368  EVT OpVT = N0.getValueType();
3369  SDValue X = N0.getOperand(0);
3370  SDValue Y = N0.getOperand(1);
3371  if (X == N1)
3372  return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3373 
3374  if (Y != N1)
3375  return SDValue();
3376 
3377  // (X + Y) == Y --> X == 0
3378  // (X ^ Y) == Y --> X == 0
3379  if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3380  return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3381 
3382  // The shift would not be valid if the operands are boolean (i1).
3383  if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3384  return SDValue();
3385 
3386  // (X - Y) == Y --> X == Y << 1
3387  EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3388  !DCI.isBeforeLegalize());
3389  SDValue One = DAG.getConstant(1, DL, ShiftVT);
3390  SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3391  if (!DCI.isCalledByLegalizer())
3392  DCI.AddToWorklist(YShl1.getNode());
3393  return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3394 }
3395 
3397  SDValue N0, const APInt &C1,
3398  ISD::CondCode Cond, const SDLoc &dl,
3399  SelectionDAG &DAG) {
3400  // Look through truncs that don't change the value of a ctpop.
3401  // FIXME: Add vector support? Need to be careful with setcc result type below.
3402  SDValue CTPOP = N0;
3403  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
3405  CTPOP = N0.getOperand(0);
3406 
3407  if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
3408  return SDValue();
3409 
3410  EVT CTVT = CTPOP.getValueType();
3411  SDValue CTOp = CTPOP.getOperand(0);
3412 
3413  // If this is a vector CTPOP, keep the CTPOP if it is legal.
3414  // TODO: Should we check if CTPOP is legal(or custom) for scalars?
3415  if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
3416  return SDValue();
3417 
3418  // (ctpop x) u< 2 -> (x & x-1) == 0
3419  // (ctpop x) u> 1 -> (x & x-1) != 0
3420  if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
3421  unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
3422  if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
3423  return SDValue();
3424  if (C1 == 0 && (Cond == ISD::SETULT))
3425  return SDValue(); // This is handled elsewhere.
3426 
3427  unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
3428 
3429  SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3430  SDValue Result = CTOp;
3431  for (unsigned i = 0; i < Passes; i++) {
3432  SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
3433  Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
3434  }
3436  return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
3437  }
3438 
3439  // If ctpop is not supported, expand a power-of-2 comparison based on it.
3440  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
3441  // For scalars, keep CTPOP if it is legal or custom.
3442  if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
3443  return SDValue();
3444  // This is based on X86's custom lowering for CTPOP which produces more
3445  // instructions than the expansion here.
3446 
3447  // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3448  // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3449  SDValue Zero = DAG.getConstant(0, dl, CTVT);
3450  SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3451  assert(CTVT.isInteger());
3452  ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3453  SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3454  SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3455  SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3456  SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3457  unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3458  return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3459  }
3460 
3461  return SDValue();
3462 }
3463 
3464 /// Try to simplify a setcc built with the specified operands and cc. If it is
3465 /// unable to simplify it, return a null SDValue.
3467  ISD::CondCode Cond, bool foldBooleans,
3468  DAGCombinerInfo &DCI,
3469  const SDLoc &dl) const {
3470  SelectionDAG &DAG = DCI.DAG;
3471  const DataLayout &Layout = DAG.getDataLayout();
3472  EVT OpVT = N0.getValueType();
3473 
3474  // Constant fold or commute setcc.
3475  if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3476  return Fold;
3477 
3478  // Ensure that the constant occurs on the RHS and fold constant comparisons.
3479  // TODO: Handle non-splat vector constants. All undef causes trouble.
3480  // FIXME: We can't yet fold constant scalable vector splats, so avoid an
3481  // infinite loop here when we encounter one.
3483  if (isConstOrConstSplat(N0) &&
3484  (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
3485  (DCI.isBeforeLegalizeOps() ||
3486  isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3487  return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3488 
3489  // If we have a subtract with the same 2 non-constant operands as this setcc
3490  // -- but in reverse order -- then try to commute the operands of this setcc
3491  // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3492  // instruction on some targets.
3493  if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3494  (DCI.isBeforeLegalizeOps() ||
3495  isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3496  DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
3497  !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
3498  return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3499 
3500  if (auto *N1C = isConstOrConstSplat(N1)) {
3501  const APInt &C1 = N1C->getAPIntValue();
3502 
3503  // Optimize some CTPOP cases.
3504  if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
3505  return V;
3506 
3507  // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3508  // equality comparison, then we're just comparing whether X itself is
3509  // zero.
3510  if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3511  N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3513  if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
3514  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3515  ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
3516  if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3517  // (srl (ctlz x), 5) == 0 -> X != 0
3518  // (srl (ctlz x), 5) != 1 -> X != 0
3519  Cond = ISD::SETNE;
3520  } else {
3521  // (srl (ctlz x), 5) != 0 -> X == 0
3522  // (srl (ctlz x), 5) == 1 -> X == 0
3523  Cond = ISD::SETEQ;
3524  }
3525  SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3526  return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
3527  Cond);
3528  }
3529  }
3530  }
3531  }
3532 
3533  // FIXME: Support vectors.
3534  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3535  const APInt &C1 = N1C->getAPIntValue();
3536 
3537  // (zext x) == C --> x == (trunc C)
3538  // (sext x) == C --> x == (trunc C)
3539  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3540  DCI.isBeforeLegalize() && N0->hasOneUse()) {
3541  unsigned MinBits = N0.getValueSizeInBits();
3542  SDValue PreExt;
3543  bool Signed = false;
3544  if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3545  // ZExt
3546  MinBits = N0->getOperand(0).getValueSizeInBits();
3547  PreExt = N0->getOperand(0);
3548  } else if (N0->getOpcode() == ISD::AND) {
3549  // DAGCombine turns costly ZExts into ANDs
3550  if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3551  if ((C->getAPIntValue()+1).isPowerOf2()) {
3552  MinBits = C->getAPIntValue().countTrailingOnes();
3553  PreExt = N0->getOperand(0);
3554  }
3555  } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3556  // SExt
3557  MinBits = N0->getOperand(0).getValueSizeInBits();
3558  PreExt = N0->getOperand(0);
3559  Signed = true;
3560  } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3561  // ZEXTLOAD / SEXTLOAD
3562  if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3563  MinBits = LN0->getMemoryVT().getSizeInBits();
3564  PreExt = N0;
3565  } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3566  Signed = true;
3567  MinBits = LN0->getMemoryVT().getSizeInBits();
3568  PreExt = N0;
3569  }
3570  }
3571 
3572  // Figure out how many bits we need to preserve this constant.
3573  unsigned ReqdBits = Signed ?
3574  C1.getBitWidth() - C1.getNumSignBits() + 1 :
3575  C1.getActiveBits();
3576 
3577  // Make sure we're not losing bits from the constant.
3578  if (MinBits > 0 &&
3579  MinBits < C1.getBitWidth() &&
3580  MinBits >= ReqdBits) {
3581  EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3582  if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3583  // Will get folded away.
3584  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3585  if (MinBits == 1 && C1 == 1)
3586  // Invert the condition.
3587  return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3589  SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3590  return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3591  }
3592 
3593  // If truncating the setcc operands is not desirable, we can still
3594  // simplify the expression in some cases:
3595  // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3596  // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3597  // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3598  // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3599  // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3600  // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3601  SDValue TopSetCC = N0->getOperand(0);
3602  unsigned N0Opc = N0->getOpcode();
3603  bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3604  if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3605  TopSetCC.getOpcode() == ISD::SETCC &&
3606  (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3607  (isConstFalseVal(N1C) ||
3608  isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3609 
3610  bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3611  (!N1C->isNullValue() && Cond == ISD::SETNE);
3612 
3613  if (!Inverse)
3614  return TopSetCC;
3615 
3617  cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3618  TopSetCC.getOperand(0).getValueType());
3619  return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3620  TopSetCC.getOperand(1),
3621  InvCond);
3622  }
3623  }
3624  }
3625 
3626  // If the LHS is '(and load, const)', the RHS is 0, the test is for
3627  // equality or unsigned, and all 1 bits of the const are in the same
3628  // partial word, see if we can shorten the load.
3629  if (DCI.isBeforeLegalize() &&
3631  N0.getOpcode() == ISD::AND && C1 == 0 &&
3632  N0.getNode()->hasOneUse() &&
3633  isa<LoadSDNode>(N0.getOperand(0)) &&
3634  N0.getOperand(0).getNode()->hasOneUse() &&
3635  isa<ConstantSDNode>(N0.getOperand(1))) {
3636  LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3637  APInt bestMask;
3638  unsigned bestWidth = 0, bestOffset = 0;
3639  if (Lod->isSimple() && Lod->isUnindexed()) {
3640  unsigned origWidth = N0.getValueSizeInBits();
3641  unsigned maskWidth = origWidth;
3642  // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3643  // 8 bits, but have to be careful...
3644  if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3645  origWidth = Lod->getMemoryVT().getSizeInBits();
3646  const APInt &Mask = N0.getConstantOperandAPInt(1);
3647  for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3648  APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3649  for (unsigned offset=0; offset<origWidth/width; offset++) {
3650  if (Mask.isSubsetOf(newMask)) {
3651  if (Layout.isLittleEndian())
3652  bestOffset = (uint64_t)offset * (width/8);
3653  else
3654  bestOffset = (origWidth/width - offset - 1) * (width/8);
3655  bestMask = Mask.lshr(offset * (width/8) * 8);
3656  bestWidth = width;
3657  break;
3658  }
3659  newMask <<= width;
3660  }
3661  }
3662  }
3663  if (bestWidth) {
3664  EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3665  if (newVT.isRound() &&
3666  shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3667  SDValue Ptr = Lod->getBasePtr();
3668  if (bestOffset != 0)
3669  Ptr =
3670  DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
3671  SDValue NewLoad =
3672  DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
3673  Lod->getPointerInfo().getWithOffset(bestOffset),
3674  Lod->getOriginalAlign());
3675  return DAG.getSetCC(dl, VT,
3676  DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3677  DAG.getConstant(bestMask.trunc(bestWidth),
3678  dl, newVT)),
3679  DAG.getConstant(0LL, dl, newVT), Cond);
3680  }
3681  }
3682  }
3683 
3684  // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3685  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3686  unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3687 
3688  // If the comparison constant has bits in the upper part, the
3689  // zero-extended value could never match.
3690  if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3691  C1.getBitWidth() - InSize))) {
3692  switch (Cond) {
3693  case ISD::SETUGT:
3694  case ISD::SETUGE:
3695  case ISD::SETEQ:
3696  return DAG.getConstant(0, dl, VT);
3697  case ISD::SETULT:
3698  case ISD::SETULE:
3699  case ISD::SETNE:
3700  return DAG.getConstant(1, dl, VT);
3701  case ISD::SETGT:
3702  case ISD::SETGE:
3703  // True if the sign bit of C1 is set.
3704  return DAG.getConstant(C1.isNegative(), dl, VT);
3705  case ISD::SETLT:
3706  case ISD::SETLE:
3707  // True if the sign bit of C1 isn't set.
3708  return DAG.getConstant(C1.isNonNegative(), dl, VT);
3709  default:
3710  break;
3711  }
3712  }
3713 
3714  // Otherwise, we can perform the comparison with the low bits.
3715  switch (Cond) {
3716  case ISD::SETEQ:
3717  case ISD::SETNE:
3718  case ISD::SETUGT:
3719  case ISD::SETUGE:
3720  case ISD::SETULT:
3721  case ISD::SETULE: {
3722  EVT newVT = N0.getOperand(0).getValueType();
3723  if (DCI.isBeforeLegalizeOps() ||
3724  (isOperationLegal(ISD::SETCC, newVT) &&
3725  isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3726  EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
3727  SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3728 
3729  SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3730  NewConst, Cond);
3731  return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3732  }
3733  break;
3734  }
3735  default:
3736  break; // todo, be more careful with signed comparisons
3737  }
3738  } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3739  (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3740  !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
3741  OpVT)) {
3742  EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3743  unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3744  EVT ExtDstTy = N0.getValueType();
3745  unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3746 
3747  // If the constant doesn't fit into the number of bits for the source of
3748  // the sign extension, it is impossible for both sides to be equal.
3749  if (C1.getMinSignedBits() > ExtSrcTyBits)
3750  return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
3751 
3752  assert(ExtDstTy == N0.getOperand(0).getValueType() &&
3753  ExtDstTy != ExtSrcTy && "Unexpected types!");
3754  APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3755  SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
3756  DAG.getConstant(Imm, dl, ExtDstTy));
3757  if (!DCI.isCalledByLegalizer())
3758  DCI.AddToWorklist(ZextOp.getNode());
3759  // Otherwise, make this a use of a zext.
3760  return DAG.getSetCC(dl, VT, ZextOp,
3761  DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
3762  } else if ((N1C->isNullValue() || N1C->isOne()) &&
3763  (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3764  // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
3765  if (N0.getOpcode() == ISD::SETCC &&
3766  isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
3767  (N0.getValueType() == MVT::i1 ||
3770  bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3771  if (TrueWhenTrue)
3772  return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3773  // Invert the condition.
3774  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3775  CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
3776  if (DCI.isBeforeLegalizeOps() ||
3778  return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3779  }
3780 
3781  if ((N0.getOpcode() == ISD::XOR ||
3782  (N0.getOpcode() == ISD::AND &&
3783  N0.getOperand(0).getOpcode() == ISD::XOR &&
3784  N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3785  isOneConstant(N0.getOperand(1))) {
3786  // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
3787  // can only do this if the top bits are known zero.
3788  unsigned BitWidth = N0.getValueSizeInBits();
3789  if (DAG.MaskedValueIsZero(N0,
3791  BitWidth-1))) {
3792  // Okay, get the un-inverted input value.
3793  SDValue Val;
3794  if (N0.getOpcode() == ISD::XOR) {
3795  Val = N0.getOperand(0);
3796  } else {
3797  assert(N0.getOpcode() == ISD::AND &&
3798  N0.getOperand(0).getOpcode() == ISD::XOR);
3799  // ((X^1)&1)^1 -> X & 1
3800  Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3801  N0.getOperand(0).getOperand(0),
3802  N0.getOperand(1));
3803  }
3804 
3805  return DAG.getSetCC(dl, VT, Val, N1,
3807  }
3808  } else if (N1C->isOne()) {
3809  SDValue Op0 = N0;
3810  if (Op0.getOpcode() == ISD::TRUNCATE)
3811  Op0 = Op0.getOperand(0);
3812 
3813  if ((Op0.getOpcode() == ISD::XOR) &&
3814  Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3815  Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3816  SDValue XorLHS = Op0.getOperand(0);
3817  SDValue XorRHS = Op0.getOperand(1);
3818  // Ensure that the input setccs return an i1 type or 0/1 value.
3819  if (Op0.getValueType() == MVT::i1 ||
3820  (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
3824  // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3826  return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
3827  }
3828  }
3829  if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
3830  // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3831  if (Op0.getValueType().bitsGT(VT))
3832  Op0 = DAG.getNode(ISD::AND, dl, VT,
3833  DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3834  DAG.getConstant(1, dl, VT));
3835  else if (Op0.getValueType().bitsLT(VT))
3836  Op0 = DAG.getNode(ISD::AND, dl, VT,
3837  DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3838  DAG.getConstant(1, dl, VT));
3839 
3840  return DAG.getSetCC(dl, VT, Op0,
3841  DAG.getConstant(0, dl, Op0.getValueType()),
3843  }
3844  if (Op0.getOpcode() == ISD::AssertZext &&
3845  cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3846  return DAG.getSetCC(dl, VT, Op0,
3847  DAG.getConstant(0, dl, Op0.getValueType()),
3849  }
3850  }
3851 
3852  // Given:
3853  // icmp eq/ne (urem %x, %y), 0
3854  // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3855  // icmp eq/ne %x, 0
3856  if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3857  (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3858  KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3859  KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3860  if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3861  return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3862  }
3863 
3864  if (SDValue V =
3865  optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3866  return V;
3867  }
3868 
3869  // These simplifications apply to splat vectors as well.
3870  // TODO: Handle more splat vector cases.
3871  if (auto *N1C = isConstOrConstSplat(N1)) {
3872  const APInt &C1 = N1C->getAPIntValue();
3873 
3874  APInt MinVal, MaxVal;
3875  unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3876  if (ISD::isSignedIntSetCC(Cond)) {
3877  MinVal = APInt::getSignedMinValue(OperandBitSize);
3878  MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3879  } else {
3880  MinVal = APInt::getMinValue(OperandBitSize);
3881  MaxVal = APInt::getMaxValue(OperandBitSize);
3882  }
3883 
3884  // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3885  if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3886  // X >= MIN --> true
3887  if (C1 == MinVal)
3888  return DAG.getBoolConstant(true, dl, VT, OpVT);
3889 
3890  if (!VT.isVector()) { // TODO: Support this for vectors.
3891  // X >= C0 --> X > (C0 - 1)
3892  APInt C = C1 - 1;
3894  if ((DCI.isBeforeLegalizeOps() ||
3895  isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3896  (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3897  isLegalICmpImmediate(C.getSExtValue())))) {
3898  return DAG.getSetCC(dl, VT, N0,
3899  DAG.getConstant(C, dl, N1.getValueType()),
3900  NewCC);
3901  }
3902  }
3903  }
3904 
3905  if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3906  // X <= MAX --> true
3907  if (C1 == MaxVal)
3908  return DAG.getBoolConstant(true, dl, VT, OpVT);
3909 
3910  // X <= C0 --> X < (C0 + 1)
3911  if (!VT.isVector()) { // TODO: Support this for vectors.
3912  APInt C = C1 + 1;
3914  if ((DCI.isBeforeLegalizeOps() ||
3915  isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3916  (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3917  isLegalICmpImmediate(C.getSExtValue())))) {
3918  return DAG.getSetCC(dl, VT, N0,
3919  DAG.getConstant(C, dl, N1.getValueType()),
3920  NewCC);
3921  }
3922  }
3923  }
3924 
3925  if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3926  if (C1 == MinVal)
3927  return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3928 
3929  // TODO: Support this for vectors after legalize ops.
3930  if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3931  // Canonicalize setlt X, Max --> setne X, Max
3932  if (C1 == MaxVal)
3933  return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3934 
3935  // If we have setult X, 1, turn it into seteq X, 0
3936  if (C1 == MinVal+1)
3937  return DAG.getSetCC(dl, VT, N0,
3938  DAG.getConstant(MinVal, dl, N0.getValueType()),
3939  ISD::SETEQ);
3940  }
3941  }
3942 
3943  if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3944  if (C1 == MaxVal)
3945  return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3946 
3947  // TODO: Support this for vectors after legalize ops.
3948  if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3949  // Canonicalize setgt X, Min --> setne X, Min
3950  if (C1 == MinVal)
3951  return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3952 
3953  // If we have setugt X, Max-1, turn it into seteq X, Max
3954  if (C1 == MaxVal-1)
3955  return DAG.getSetCC(dl, VT, N0,
3956  DAG.getConstant(MaxVal, dl, N0.getValueType()),
3957  ISD::SETEQ);
3958  }
3959  }
3960 
3961  if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3962  // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3963  if (C1.isNullValue())
3964  if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3965  VT, N0, N1, Cond, DCI, dl))
3966  return CC;
3967 
3968  // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
3969  // For example, when high 32-bits of i64 X are known clear:
3970  // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
3971  // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
3972  bool CmpZero = N1C->getAPIntValue().isNullValue();
3973  bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
3974  if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
3975  // Match or(lo,shl(hi,bw/2)) pattern.
3976  auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
3977  unsigned EltBits = V.getScalarValueSizeInBits();
3978  if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
3979  return false;
3980  SDValue LHS = V.getOperand(0);
3981  SDValue RHS = V.getOperand(1);
3982  APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
3983  // Unshifted element must have zero upperbits.
3984  if (RHS.getOpcode() == ISD::SHL &&
3985  isa<ConstantSDNode>(RHS.getOperand(1)) &&
3986  RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
3987  DAG.MaskedValueIsZero(LHS, HiBits)) {
3988  Lo = LHS;
3989  Hi = RHS.getOperand(0);
3990  return true;
3991  }
3992  if (LHS.getOpcode() == ISD::SHL &&
3993  isa<ConstantSDNode>(LHS.getOperand(1)) &&
3994  LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
3995  DAG.MaskedValueIsZero(RHS, HiBits)) {
3996  Lo = RHS;
3997  Hi = LHS.getOperand(0);
3998  return true;
3999  }
4000  return false;
4001  };
4002 
4003  auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4004  unsigned EltBits = N0.getScalarValueSizeInBits();
4005  unsigned HalfBits = EltBits / 2;
4006  APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
4007  SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
4008  SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
4009  SDValue NewN0 =
4010  DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
4011  SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
4012  return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
4013  };
4014 
4015  SDValue Lo, Hi;
4016  if (IsConcat(N0, Lo, Hi))
4017  return MergeConcat(Lo, Hi);
4018 
4019  if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4020  SDValue Lo0, Lo1, Hi0, Hi1;
4021  if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
4022  IsConcat(N0.getOperand(1), Lo1, Hi1)) {
4023  return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
4024  DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
4025  }
4026  }
4027  }
4028  }
4029 
4030  // If we have "setcc X, C0", check to see if we can shrink the immediate
4031  // by changing cc.
4032  // TODO: Support this for vectors after legalize ops.
4033  if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4034  // SETUGT X, SINTMAX -> SETLT X, 0
4035  // SETUGE X, SINTMIN -> SETLT X, 0
4036  if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
4037  (Cond == ISD::SETUGE && C1.isMinSignedValue()))
4038  return DAG.getSetCC(dl, VT, N0,
4039  DAG.getConstant(0, dl, N1.getValueType()),
4040  ISD::SETLT);
4041 
4042  // SETULT X, SINTMIN -> SETGT X, -1
4043  // SETULE X, SINTMAX -> SETGT X, -1
4044  if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
4045  (Cond == ISD::SETULE && C1.isMaxSignedValue()))
4046  return DAG.getSetCC(dl, VT, N0,
4047  DAG.getAllOnesConstant(dl, N1.getValueType()),
4048  ISD::SETGT);
4049  }
4050  }
4051 
4052  // Back to non-vector simplifications.
4053  // TODO: Can we do these for vector splats?
4054  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4055  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4056  const APInt &C1 = N1C->getAPIntValue();
4057  EVT ShValTy = N0.getValueType();
4058 
4059  // Fold bit comparisons when we can. This will result in an
4060  // incorrect value when boolean false is negative one, unless
4061  // the bitsize is 1 in which case the false value is the same
4062  // in practice regardless of the representation.
4063  if ((VT.getSizeInBits() == 1 ||
4065  (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4066  (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
4067  N0.getOpcode() == ISD::AND) {
4068  if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4069  EVT ShiftTy =
4070  getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4071  if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
4072  // Perform the xform if the AND RHS is a single bit.
4073  unsigned ShCt = AndRHS->getAPIntValue().logBase2();
4074  if (AndRHS->getAPIntValue().isPowerOf2() &&
4075  !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4076  return DAG.getNode(ISD::TRUNCATE, dl, VT,
4077  DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4078  DAG.getConstant(ShCt, dl, ShiftTy)));
4079  }
4080  } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
4081  // (X & 8) == 8 --> (X & 8) >> 3
4082  // Perform the xform if C1 is a single bit.
4083  unsigned ShCt = C1.logBase2();
4084  if (C1.isPowerOf2() &&
4085  !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4086  return DAG.getNode(ISD::TRUNCATE, dl, VT,
4087  DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4088  DAG.getConstant(ShCt, dl, ShiftTy)));
4089  }
4090  }
4091  }
4092  }
4093 
4094  if (C1.getMinSignedBits() <= 64 &&
4095  !isLegalICmpImmediate(C1.getSExtValue())) {
4096  EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4097  // (X & -256) == 256 -> (X >> 8) == 1
4098  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4099  N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4100  if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4101  const APInt &AndRHSC = AndRHS->getAPIntValue();
4102  if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
4103  unsigned ShiftBits = AndRHSC.countTrailingZeros();
4104  if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4105  SDValue Shift =
4106  DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
4107  DAG.getConstant(ShiftBits, dl, ShiftTy));
4108  SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
4109  return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
4110  }
4111  }
4112  }
4113  } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
4114  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
4115  bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
4116  // X < 0x100000000 -> (X >> 32) < 1
4117  // X >= 0x100000000 -> (X >> 32) >= 1
4118  // X <= 0x0ffffffff -> (X >> 32) < 1
4119  // X > 0x0ffffffff -> (X >> 32) >= 1
4120  unsigned ShiftBits;
4121  APInt NewC = C1;
4122  ISD::CondCode NewCond = Cond;
4123  if (AdjOne) {
4124  ShiftBits = C1.countTrailingOnes();
4125  NewC = NewC + 1;
4126  NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4127  } else {
4128  ShiftBits = C1.countTrailingZeros();
4129  }
4130  NewC.lshrInPlace(ShiftBits);
4131  if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
4133  !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4134  SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4135  DAG.getConstant(ShiftBits, dl, ShiftTy));
4136  SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
4137  return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
4138  }
4139  }
4140  }
4141  }
4142 
4143  if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
4144  auto *CFP = cast<ConstantFPSDNode>(N1);
4145  assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
4146 
4147  // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
4148  // constant if knowing that the operand is non-nan is enough. We prefer to
4149  // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
4150  // materialize 0.0.
4151  if (Cond == ISD::SETO || Cond == ISD::SETUO)
4152  return DAG.getSetCC(dl, VT, N0, N0, Cond);
4153 
4154  // setcc (fneg x), C -> setcc swap(pred) x, -C
4155  if (N0.getOpcode() == ISD::FNEG) {
4157  if (DCI.isBeforeLegalizeOps() ||
4158  isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
4159  SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
4160  return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
4161  }
4162  }
4163 
4164  // If the condition is not legal, see if we can find an equivalent one
4165  // which is legal.
4166  if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
4167  // If the comparison was an awkward floating-point == or != and one of
4168  // the comparison operands is infinity or negative infinity, convert the
4169  // condition to a less-awkward <= or >=.
4170  if (CFP->getValueAPF().isInfinity()) {
4171  bool IsNegInf = CFP->getValueAPF().isNegative();
4173  switch (Cond) {
4174  case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
4175  case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
4176  case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
4177  case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
4178  default: break;
4179  }
4180  if (NewCond != ISD::SETCC_INVALID &&
4181  isCondCodeLegal(NewCond, N0.getSimpleValueType()))
4182  return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4183  }
4184  }
4185  }
4186 
4187  if (N0 == N1) {
4188  // The sext(setcc()) => setcc() optimization relies on the appropriate
4189  // constant being emitted.
4190  assert(!N0.getValueType().isInteger() &&
4191  "Integer types should be handled by FoldSetCC");
4192 
4193  bool EqTrue = ISD::isTrueWhenEqual(Cond);
4194  unsigned UOF = ISD::getUnorderedFlavor(Cond);
4195  if (UOF == 2) // FP operators that are undefined on NaNs.
4196  return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4197  if (UOF == unsigned(EqTrue))
4198  return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4199  // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
4200  // if it is not already.
4201  ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
4202  if (NewCond != Cond &&
4203  (DCI.isBeforeLegalizeOps() ||
4204  isCondCodeLegal(NewCond, N0.getSimpleValueType())))
4205  return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4206  }
4207 
4208  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4209  N0.getValueType().isInteger()) {
4210  if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
4211  N0.getOpcode() == ISD::XOR) {
4212  // Simplify (X+Y) == (X+Z) --> Y == Z
4213  if (N0.getOpcode() == N1.getOpcode()) {
4214  if (N0.getOperand(0) == N1.getOperand(0))
4215  return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
4216  if (N0.getOperand(1) == N1.getOperand(1))
4217  return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
4218  if (isCommutativeBinOp(N0.getOpcode())) {
4219  // If X op Y == Y op X, try other combinations.
4220  if (N0.getOperand(0) == N1.getOperand(1))
4221  return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
4222  Cond);
4223  if (N0.getOperand(1) == N1.getOperand(0))
4224  return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
4225  Cond);
4226  }
4227  }
4228 
4229  // If RHS is a legal immediate value for a compare instruction, we need
4230  // to be careful about increasing register pressure needlessly.
4231  bool LegalRHSImm = false;
4232 
4233  if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
4234  if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4235  // Turn (X+C1) == C2 --> X == C2-C1
4236  if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
4237  return DAG.getSetCC(dl, VT, N0.getOperand(0),
4238  DAG.getConstant(RHSC->getAPIntValue()-
4239  LHSR->getAPIntValue(),
4240  dl, N0.getValueType()), Cond);
4241  }
4242 
4243  // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
4244  if (N0.getOpcode() == ISD::XOR)
4245  // If we know that all of the inverted bits are zero, don't bother
4246  // performing the inversion.
4247  if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
4248  return
4249  DAG.getSetCC(dl, VT, N0.getOperand(0),
4250  DAG.getConstant(LHSR->getAPIntValue() ^
4251  RHSC->getAPIntValue(),
4252  dl, N0.getValueType()),
4253  Cond);
4254  }
4255 
4256  // Turn (C1-X) == C2 --> X == C1-C2
4257  if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
4258  if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
4259  return
4260  DAG.getSetCC(dl, VT, N0.getOperand(1),
4261  DAG.getConstant(SUBC->getAPIntValue() -
4262  RHSC->getAPIntValue(),
4263  dl, N0.getValueType()),
4264  Cond);
4265  }
4266  }
4267 
4268  // Could RHSC fold directly into a compare?
4269  if (RHSC->getValueType(0).getSizeInBits() <= 64)
4270  LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
4271  }
4272 
4273  // (X+Y) == X --> Y == 0 and similar folds.
4274  // Don't do this if X is an immediate that can fold into a cmp
4275  // instruction and X+Y has other uses. It could be an induction variable
4276  // chain, and the transform would increase register pressure.
4277  if (!LegalRHSImm || N0.hasOneUse())
4278  if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
4279  return V;
4280  }
4281 
4282  if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
4283  N1.getOpcode() == ISD::XOR)
4284  if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
4285  return V;
4286 
4287  if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
4288  return V;
4289  }
4290 
4291  // Fold remainder of division by a constant.
4292  if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
4293  N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4295 
4296  // When division is cheap or optimizing for minimum size,
4297  // fall through to DIVREM creation by skipping this fold.
4298  if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
4299  if (N0.getOpcode() == ISD::UREM) {
4300  if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
4301  return Folded;
4302  } else if (N0.getOpcode() == ISD::SREM) {
4303  if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
4304  return Folded;
4305  }
4306  }
4307  }
4308 
4309  // Fold away ALL boolean setcc's.
4310  if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
4311  SDValue Temp;
4312  switch (Cond) {
4313  default: llvm_unreachable("Unknown integer setcc!");
4314  case ISD::SETEQ: // X == Y -> ~(X^Y)
4315  Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4316  N0 = DAG.getNOT(dl, Temp, OpVT);
4317  if (!DCI.isCalledByLegalizer())
4318  DCI.AddToWorklist(Temp.getNode());
4319  break;
4320  case ISD::SETNE: // X != Y --> (X^Y)
4321  N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4322  break;
4323  case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
4324  case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
4325  Temp = DAG.getNOT(dl, N0, OpVT);
4326  N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
4327  if (!DCI.isCalledByLegalizer())
4328  DCI.AddToWorklist(Temp.getNode());
4329  break;
4330  case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
4331  case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
4332  Temp = DAG.getNOT(dl, N1, OpVT);
4333  N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
4334  if (!DCI.isCalledByLegalizer())
4335  DCI.AddToWorklist(Temp.getNode());
4336  break;
4337  case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
4338  case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
4339  Temp = DAG.getNOT(dl, N0, OpVT);
4340  N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
4341  if (!DCI.isCalledByLegalizer())
4342  DCI.AddToWorklist(Temp.getNode());
4343  break;
4344  case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
4345  case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
4346  Temp = DAG.getNOT(dl, N1, OpVT);
4347  N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
4348  break;
4349  }
4350  if (VT.getScalarType() != MVT::i1) {
4351  if (!DCI.isCalledByLegalizer())
4352  DCI.AddToWorklist(N0.getNode());
4353  // FIXME: If running after legalize, we probably can't do this.
4355  N0 = DAG.getNode(ExtendCode, dl, VT, N0);
4356  }
4357  return N0;
4358  }
4359 
4360  // Could not fold it.
4361  return SDValue();
4362 }
4363 
4364 /// Returns true (and the GlobalValue and the offset) if the node is a
4365 /// GlobalAddress + offset.
4367  int64_t &Offset) const {
4368 
4369  SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
4370 
4371  if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
4372  GA = GASD->getGlobal();
4373  Offset += GASD->getOffset();
4374  return true;
4375  }
4376 
4377  if (N->getOpcode() == ISD::ADD) {
4378  SDValue N1 = N->getOperand(0);
4379  SDValue N2 = N->getOperand(1);
4380  if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
4381  if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
4382  Offset += V->getSExtValue();
4383  return true;
4384  }
4385  } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
4386  if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
4387  Offset += V->getSExtValue();
4388  return true;
4389  }
4390  }
4391  }
4392 
4393  return false;
4394 }
4395 
4397  DAGCombinerInfo &DCI) const {
4398  // Default implementation: no optimization.
4399  return SDValue();
4400 }
4401 
4402 //===----------------------------------------------------------------------===//
4403 // Inline Assembler Implementation Methods
4404 //===----------------------------------------------------------------------===//
4405 
4408  unsigned S = Constraint.size();
4409 
4410  if (S == 1) {
4411  switch (Constraint[0]) {
4412  default: break;
4413  case 'r':
4414  return C_RegisterClass;
4415  case 'm': // memory
4416  case 'o': // offsetable
4417  case 'V': // not offsetable
4418  return C_Memory;
4419  case 'n': // Simple Integer
4420  case 'E': // Floating Point Constant
4421  case 'F': // Floating Point Constant
4422  return C_Immediate;
4423  case 'i': // Simple Integer or Relocatable Constant
4424  case 's': // Relocatable Constant
4425  case 'p': // Address.
4426  case 'X': // Allow ANY value.
4427  case 'I': // Target registers.
4428  case 'J':
4429  case 'K':
4430  case 'L':
4431  case 'M':
4432  case 'N':
4433  case 'O':
4434  case 'P':
4435  case '<':
4436  case '>':
4437  return C_Other;
4438  }
4439  }
4440 
4441  if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4442  if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4443  return C_Memory;
4444  return C_Register;
4445  }
4446  return C_Unknown;
4447 }
4448 
4449 /// Try to replace an X constraint, which matches anything, with another that
4450 /// has more specific requirements based on the type of the corresponding
4451 /// operand.
4452 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4453