LLVM  16.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
31 #include "llvm/Support/KnownBits.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
39  : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42  return nullptr;
43 }
44 
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
52  SDValue &Chain) const {
53  const Function &F = DAG.getMachineFunction().getFunction();
54 
55  // First, check if tail calls have been disabled in this function.
56  if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
57  return false;
58 
59  // Conservatively require the attributes of the call to match those of
60  // the return. Ignore following attributes because they don't affect the
61  // call sequence.
62  AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
63  for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
64  Attribute::DereferenceableOrNull, Attribute::NoAlias,
65  Attribute::NonNull, Attribute::NoUndef})
66  CallerAttrs.removeAttribute(Attr);
67 
68  if (CallerAttrs.hasAttributes())
69  return false;
70 
71  // It's not safe to eliminate the sign / zero extension of the return value.
72  if (CallerAttrs.contains(Attribute::ZExt) ||
73  CallerAttrs.contains(Attribute::SExt))
74  return false;
75 
76  // Check if the only use is a function return node.
77  return isUsedByReturnOnly(Node, Chain);
78 }
79 
81  const uint32_t *CallerPreservedMask,
82  const SmallVectorImpl<CCValAssign> &ArgLocs,
83  const SmallVectorImpl<SDValue> &OutVals) const {
84  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
85  const CCValAssign &ArgLoc = ArgLocs[I];
86  if (!ArgLoc.isRegLoc())
87  continue;
88  MCRegister Reg = ArgLoc.getLocReg();
89  // Only look at callee saved registers.
90  if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
91  continue;
92  // Check that we pass the value used for the caller.
93  // (We look for a CopyFromReg reading a virtual register that is used
94  // for the function live-in value of register Reg)
95  SDValue Value = OutVals[I];
96  if (Value->getOpcode() == ISD::AssertZext)
97  Value = Value.getOperand(0);
98  if (Value->getOpcode() != ISD::CopyFromReg)
99  return false;
100  Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
101  if (MRI.getLiveInPhysReg(ArgReg) != Reg)
102  return false;
103  }
104  return true;
105 }
106 
107 /// Set CallLoweringInfo attribute flags based on a call instruction
108 /// and called function attributes.
110  unsigned ArgIdx) {
111  IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
112  IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
113  IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
114  IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
115  IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
116  IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
117  IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
118  IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
119  IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
120  IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
121  IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
122  IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
123  Alignment = Call->getParamStackAlign(ArgIdx);
124  IndirectType = nullptr;
126  "multiple ABI attributes?");
127  if (IsByVal) {
128  IndirectType = Call->getParamByValType(ArgIdx);
129  if (!Alignment)
130  Alignment = Call->getParamAlign(ArgIdx);
131  }
132  if (IsPreallocated)
133  IndirectType = Call->getParamPreallocatedType(ArgIdx);
134  if (IsInAlloca)
135  IndirectType = Call->getParamInAllocaType(ArgIdx);
136  if (IsSRet)
137  IndirectType = Call->getParamStructRetType(ArgIdx);
138 }
139 
140 /// Generate a libcall taking the given operands as arguments and returning a
141 /// result of type RetVT.
142 std::pair<SDValue, SDValue>
144  ArrayRef<SDValue> Ops,
145  MakeLibCallOptions CallOptions,
146  const SDLoc &dl,
147  SDValue InChain) const {
148  if (!InChain)
149  InChain = DAG.getEntryNode();
150 
152  Args.reserve(Ops.size());
153 
155  for (unsigned i = 0; i < Ops.size(); ++i) {
156  SDValue NewOp = Ops[i];
157  Entry.Node = NewOp;
158  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
159  Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
160  CallOptions.IsSExt);
161  Entry.IsZExt = !Entry.IsSExt;
162 
163  if (CallOptions.IsSoften &&
165  Entry.IsSExt = Entry.IsZExt = false;
166  }
167  Args.push_back(Entry);
168  }
169 
170  if (LC == RTLIB::UNKNOWN_LIBCALL)
171  report_fatal_error("Unsupported library call operation!");
173  getPointerTy(DAG.getDataLayout()));
174 
175  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
177  bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
178  bool zeroExtend = !signExtend;
179 
180  if (CallOptions.IsSoften &&
182  signExtend = zeroExtend = false;
183  }
184 
185  CLI.setDebugLoc(dl)
186  .setChain(InChain)
188  .setNoReturn(CallOptions.DoesNotReturn)
189  .setDiscardResult(!CallOptions.IsReturnValueUsed)
191  .setSExtResult(signExtend)
192  .setZExtResult(zeroExtend);
193  return LowerCallTo(CLI);
194 }
195 
197  std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
198  unsigned SrcAS, const AttributeList &FuncAttributes) const {
199  if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
200  Op.getSrcAlign() < Op.getDstAlign())
201  return false;
202 
203  EVT VT = getOptimalMemOpType(Op, FuncAttributes);
204 
205  if (VT == MVT::Other) {
206  // Use the largest integer type whose alignment constraints are satisfied.
207  // We only need to check DstAlign here as SrcAlign is always greater or
208  // equal to DstAlign (or zero).
209  VT = MVT::i64;
210  if (Op.isFixedDstAlign())
211  while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
212  !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
213  VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
214  assert(VT.isInteger());
215 
216  // Find the largest legal integer type.
217  MVT LVT = MVT::i64;
218  while (!isTypeLegal(LVT))
219  LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
220  assert(LVT.isInteger());
221 
222  // If the type we've chosen is larger than the largest legal integer type
223  // then use that instead.
224  if (VT.bitsGT(LVT))
225  VT = LVT;
226  }
227 
228  unsigned NumMemOps = 0;
229  uint64_t Size = Op.size();
230  while (Size) {
231  unsigned VTSize = VT.getSizeInBits() / 8;
232  while (VTSize > Size) {
233  // For now, only use non-vector load / store's for the left-over pieces.
234  EVT NewVT = VT;
235  unsigned NewVTSize;
236 
237  bool Found = false;
238  if (VT.isVector() || VT.isFloatingPoint()) {
239  NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
240  if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
241  isSafeMemOpType(NewVT.getSimpleVT()))
242  Found = true;
243  else if (NewVT == MVT::i64 &&
246  // i64 is usually not legal on 32-bit targets, but f64 may be.
247  NewVT = MVT::f64;
248  Found = true;
249  }
250  }
251 
252  if (!Found) {
253  do {
254  NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
255  if (NewVT == MVT::i8)
256  break;
257  } while (!isSafeMemOpType(NewVT.getSimpleVT()));
258  }
259  NewVTSize = NewVT.getSizeInBits() / 8;
260 
261  // If the new VT cannot cover all of the remaining bits, then consider
262  // issuing a (or a pair of) unaligned and overlapping load / store.
263  bool Fast;
264  if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
266  VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
267  MachineMemOperand::MONone, &Fast) &&
268  Fast)
269  VTSize = Size;
270  else {
271  VT = NewVT;
272  VTSize = NewVTSize;
273  }
274  }
275 
276  if (++NumMemOps > Limit)
277  return false;
278 
279  MemOps.push_back(VT);
280  Size -= VTSize;
281  }
282 
283  return true;
284 }
285 
286 /// Soften the operands of a comparison. This code is shared among BR_CC,
287 /// SELECT_CC, and SETCC handlers.
289  SDValue &NewLHS, SDValue &NewRHS,
290  ISD::CondCode &CCCode,
291  const SDLoc &dl, const SDValue OldLHS,
292  const SDValue OldRHS) const {
293  SDValue Chain;
294  return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
295  OldRHS, Chain);
296 }
297 
299  SDValue &NewLHS, SDValue &NewRHS,
300  ISD::CondCode &CCCode,
301  const SDLoc &dl, const SDValue OldLHS,
302  const SDValue OldRHS,
303  SDValue &Chain,
304  bool IsSignaling) const {
305  // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
306  // not supporting it. We can update this code when libgcc provides such
307  // functions.
308 
309  assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
310  && "Unsupported setcc type!");
311 
312  // Expand into one or more soft-fp libcall(s).
313  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
314  bool ShouldInvertCC = false;
315  switch (CCCode) {
316  case ISD::SETEQ:
317  case ISD::SETOEQ:
318  LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
319  (VT == MVT::f64) ? RTLIB::OEQ_F64 :
320  (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
321  break;
322  case ISD::SETNE:
323  case ISD::SETUNE:
324  LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
325  (VT == MVT::f64) ? RTLIB::UNE_F64 :
326  (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
327  break;
328  case ISD::SETGE:
329  case ISD::SETOGE:
330  LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
331  (VT == MVT::f64) ? RTLIB::OGE_F64 :
332  (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
333  break;
334  case ISD::SETLT:
335  case ISD::SETOLT:
336  LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
337  (VT == MVT::f64) ? RTLIB::OLT_F64 :
338  (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
339  break;
340  case ISD::SETLE:
341  case ISD::SETOLE:
342  LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
343  (VT == MVT::f64) ? RTLIB::OLE_F64 :
344  (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
345  break;
346  case ISD::SETGT:
347  case ISD::SETOGT:
348  LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
349  (VT == MVT::f64) ? RTLIB::OGT_F64 :
350  (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
351  break;
352  case ISD::SETO:
353  ShouldInvertCC = true;
354  [[fallthrough]];
355  case ISD::SETUO:
356  LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
357  (VT == MVT::f64) ? RTLIB::UO_F64 :
358  (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
359  break;
360  case ISD::SETONE:
361  // SETONE = O && UNE
362  ShouldInvertCC = true;
363  [[fallthrough]];
364  case ISD::SETUEQ:
365  LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
366  (VT == MVT::f64) ? RTLIB::UO_F64 :
367  (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
368  LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
369  (VT == MVT::f64) ? RTLIB::OEQ_F64 :
370  (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
371  break;
372  default:
373  // Invert CC for unordered comparisons
374  ShouldInvertCC = true;
375  switch (CCCode) {
376  case ISD::SETULT:
377  LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
378  (VT == MVT::f64) ? RTLIB::OGE_F64 :
379  (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
380  break;
381  case ISD::SETULE:
382  LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
383  (VT == MVT::f64) ? RTLIB::OGT_F64 :
384  (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
385  break;
386  case ISD::SETUGT:
387  LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
388  (VT == MVT::f64) ? RTLIB::OLE_F64 :
389  (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
390  break;
391  case ISD::SETUGE:
392  LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
393  (VT == MVT::f64) ? RTLIB::OLT_F64 :
394  (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
395  break;
396  default: llvm_unreachable("Do not know how to soften this setcc!");
397  }
398  }
399 
400  // Use the target specific return value for comparison lib calls.
401  EVT RetVT = getCmpLibcallReturnType();
402  SDValue Ops[2] = {NewLHS, NewRHS};
404  EVT OpsVT[2] = { OldLHS.getValueType(),
405  OldRHS.getValueType() };
406  CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
407  auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
408  NewLHS = Call.first;
409  NewRHS = DAG.getConstant(0, dl, RetVT);
410 
411  CCCode = getCmpLibcallCC(LC1);
412  if (ShouldInvertCC) {
413  assert(RetVT.isInteger());
414  CCCode = getSetCCInverse(CCCode, RetVT);
415  }
416 
417  if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
418  // Update Chain.
419  Chain = Call.second;
420  } else {
421  EVT SetCCVT =
422  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
423  SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
424  auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
425  CCCode = getCmpLibcallCC(LC2);
426  if (ShouldInvertCC)
427  CCCode = getSetCCInverse(CCCode, RetVT);
428  NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
429  if (Chain)
430  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
431  Call2.second);
432  NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
433  Tmp.getValueType(), Tmp, NewLHS);
434  NewRHS = SDValue();
435  }
436 }
437 
438 /// Return the entry encoding for a jump table in the current function. The
439 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
441  // In non-pic modes, just use the address of a block.
442  if (!isPositionIndependent())
444 
445  // In PIC mode, if the target supports a GPRel32 directive, use it.
446  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
448 
449  // Otherwise, use a label difference.
451 }
452 
454  SelectionDAG &DAG) const {
455  // If our PIC model is GP relative, use the global offset table as the base.
456  unsigned JTEncoding = getJumpTableEncoding();
457 
458  if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
461 
462  return Table;
463 }
464 
465 /// This returns the relocation base for the given PIC jumptable, the same as
466 /// getPICJumpTableRelocBase, but as an MCExpr.
467 const MCExpr *
469  unsigned JTI,MCContext &Ctx) const{
470  // The normal PIC reloc base is the label at the start of the jump table.
471  return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
472 }
473 
474 bool
476  const TargetMachine &TM = getTargetMachine();
477  const GlobalValue *GV = GA->getGlobal();
478 
479  // If the address is not even local to this DSO we will have to load it from
480  // a got and then add the offset.
481  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
482  return false;
483 
484  // If the code is position independent we will have to add a base register.
485  if (isPositionIndependent())
486  return false;
487 
488  // Otherwise we can do it.
489  return true;
490 }
491 
492 //===----------------------------------------------------------------------===//
493 // Optimization Methods
494 //===----------------------------------------------------------------------===//
495 
496 /// If the specified instruction has a constant integer operand and there are
497 /// bits set in that constant that are not demanded, then clear those bits and
498 /// return true.
500  const APInt &DemandedBits,
501  const APInt &DemandedElts,
502  TargetLoweringOpt &TLO) const {
503  SDLoc DL(Op);
504  unsigned Opcode = Op.getOpcode();
505 
506  // Do target-specific constant optimization.
507  if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
508  return TLO.New.getNode();
509 
510  // FIXME: ISD::SELECT, ISD::SELECT_CC
511  switch (Opcode) {
512  default:
513  break;
514  case ISD::XOR:
515  case ISD::AND:
516  case ISD::OR: {
517  auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
518  if (!Op1C || Op1C->isOpaque())
519  return false;
520 
521  // If this is a 'not' op, don't touch it because that's a canonical form.
522  const APInt &C = Op1C->getAPIntValue();
523  if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
524  return false;
525 
526  if (!C.isSubsetOf(DemandedBits)) {
527  EVT VT = Op.getValueType();
528  SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
529  SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
530  return TLO.CombineTo(Op, NewOp);
531  }
532 
533  break;
534  }
535  }
536 
537  return false;
538 }
539 
541  const APInt &DemandedBits,
542  TargetLoweringOpt &TLO) const {
543  EVT VT = Op.getValueType();
544  APInt DemandedElts = VT.isVector()
546  : APInt(1, 1);
547  return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
548 }
549 
550 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
551 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
552 /// generalized for targets with other types of implicit widening casts.
554  const APInt &Demanded,
555  TargetLoweringOpt &TLO) const {
556  assert(Op.getNumOperands() == 2 &&
557  "ShrinkDemandedOp only supports binary operators!");
558  assert(Op.getNode()->getNumValues() == 1 &&
559  "ShrinkDemandedOp only supports nodes with one result!");
560 
561  SelectionDAG &DAG = TLO.DAG;
562  SDLoc dl(Op);
563 
564  // Early return, as this function cannot handle vector types.
565  if (Op.getValueType().isVector())
566  return false;
567 
568  // Don't do this if the node has another user, which may require the
569  // full value.
570  if (!Op.getNode()->hasOneUse())
571  return false;
572 
573  // Search for the smallest integer type with free casts to and from
574  // Op's type. For expedience, just check power-of-2 integer types.
575  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
576  unsigned DemandedSize = Demanded.getActiveBits();
577  unsigned SmallVTBits = DemandedSize;
578  if (!isPowerOf2_32(SmallVTBits))
579  SmallVTBits = NextPowerOf2(SmallVTBits);
580  for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
581  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
582  if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
583  TLI.isZExtFree(SmallVT, Op.getValueType())) {
584  // We found a type with free casts.
585  SDValue X = DAG.getNode(
586  Op.getOpcode(), dl, SmallVT,
587  DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
588  DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
589  assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
590  SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
591  return TLO.CombineTo(Op, Z);
592  }
593  }
594  return false;
595 }
596 
598  DAGCombinerInfo &DCI) const {
599  SelectionDAG &DAG = DCI.DAG;
600  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
601  !DCI.isBeforeLegalizeOps());
602  KnownBits Known;
603 
604  bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
605  if (Simplified) {
606  DCI.AddToWorklist(Op.getNode());
607  DCI.CommitTargetLoweringOpt(TLO);
608  }
609  return Simplified;
610 }
611 
613  const APInt &DemandedElts,
614  DAGCombinerInfo &DCI) const {
615  SelectionDAG &DAG = DCI.DAG;
616  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
617  !DCI.isBeforeLegalizeOps());
618  KnownBits Known;
619 
620  bool Simplified =
621  SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
622  if (Simplified) {
623  DCI.AddToWorklist(Op.getNode());
624  DCI.CommitTargetLoweringOpt(TLO);
625  }
626  return Simplified;
627 }
628 
630  KnownBits &Known,
631  TargetLoweringOpt &TLO,
632  unsigned Depth,
633  bool AssumeSingleUse) const {
634  EVT VT = Op.getValueType();
635 
636  // TODO: We can probably do more work on calculating the known bits and
637  // simplifying the operations for scalable vectors, but for now we just
638  // bail out.
639  if (VT.isScalableVector()) {
640  // Pretend we don't know anything for now.
641  Known = KnownBits(DemandedBits.getBitWidth());
642  return false;
643  }
644 
645  APInt DemandedElts = VT.isVector()
647  : APInt(1, 1);
648  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
649  AssumeSingleUse);
650 }
651 
652 // TODO: Under what circumstances can we create nodes? Constant folding?
654  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
655  SelectionDAG &DAG, unsigned Depth) const {
656  EVT VT = Op.getValueType();
657 
658  // Pretend we don't know anything about scalable vectors for now.
659  // TODO: We can probably do more work on simplifying the operations for
660  // scalable vectors, but for now we just bail out.
661  if (VT.isScalableVector())
662  return SDValue();
663 
664  // Limit search depth.
666  return SDValue();
667 
668  // Ignore UNDEFs.
669  if (Op.isUndef())
670  return SDValue();
671 
672  // Not demanding any bits/elts from Op.
673  if (DemandedBits == 0 || DemandedElts == 0)
674  return DAG.getUNDEF(VT);
675 
676  bool IsLE = DAG.getDataLayout().isLittleEndian();
677  unsigned NumElts = DemandedElts.getBitWidth();
678  unsigned BitWidth = DemandedBits.getBitWidth();
679  KnownBits LHSKnown, RHSKnown;
680  switch (Op.getOpcode()) {
681  case ISD::BITCAST: {
682  SDValue Src = peekThroughBitcasts(Op.getOperand(0));
683  EVT SrcVT = Src.getValueType();
684  EVT DstVT = Op.getValueType();
685  if (SrcVT == DstVT)
686  return Src;
687 
688  unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
689  unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
690  if (NumSrcEltBits == NumDstEltBits)
691  if (SDValue V = SimplifyMultipleUseDemandedBits(
692  Src, DemandedBits, DemandedElts, DAG, Depth + 1))
693  return DAG.getBitcast(DstVT, V);
694 
695  if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
696  unsigned Scale = NumDstEltBits / NumSrcEltBits;
697  unsigned NumSrcElts = SrcVT.getVectorNumElements();
698  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
699  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
700  for (unsigned i = 0; i != Scale; ++i) {
701  unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
702  unsigned BitOffset = EltOffset * NumSrcEltBits;
703  APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
704  if (!Sub.isZero()) {
705  DemandedSrcBits |= Sub;
706  for (unsigned j = 0; j != NumElts; ++j)
707  if (DemandedElts[j])
708  DemandedSrcElts.setBit((j * Scale) + i);
709  }
710  }
711 
712  if (SDValue V = SimplifyMultipleUseDemandedBits(
713  Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
714  return DAG.getBitcast(DstVT, V);
715  }
716 
717  // TODO - bigendian once we have test coverage.
718  if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
719  unsigned Scale = NumSrcEltBits / NumDstEltBits;
720  unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
721  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
722  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
723  for (unsigned i = 0; i != NumElts; ++i)
724  if (DemandedElts[i]) {
725  unsigned Offset = (i % Scale) * NumDstEltBits;
726  DemandedSrcBits.insertBits(DemandedBits, Offset);
727  DemandedSrcElts.setBit(i / Scale);
728  }
729 
730  if (SDValue V = SimplifyMultipleUseDemandedBits(
731  Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
732  return DAG.getBitcast(DstVT, V);
733  }
734 
735  break;
736  }
737  case ISD::AND: {
738  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
739  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
740 
741  // If all of the demanded bits are known 1 on one side, return the other.
742  // These bits cannot contribute to the result of the 'and' in this
743  // context.
744  if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
745  return Op.getOperand(0);
746  if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
747  return Op.getOperand(1);
748  break;
749  }
750  case ISD::OR: {
751  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
752  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
753 
754  // If all of the demanded bits are known zero on one side, return the
755  // other. These bits cannot contribute to the result of the 'or' in this
756  // context.
757  if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
758  return Op.getOperand(0);
759  if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
760  return Op.getOperand(1);
761  break;
762  }
763  case ISD::XOR: {
764  LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
765  RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
766 
767  // If all of the demanded bits are known zero on one side, return the
768  // other.
769  if (DemandedBits.isSubsetOf(RHSKnown.Zero))
770  return Op.getOperand(0);
771  if (DemandedBits.isSubsetOf(LHSKnown.Zero))
772  return Op.getOperand(1);
773  break;
774  }
775  case ISD::SHL: {
776  // If we are only demanding sign bits then we can use the shift source
777  // directly.
778  if (const APInt *MaxSA =
779  DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
780  SDValue Op0 = Op.getOperand(0);
781  unsigned ShAmt = MaxSA->getZExtValue();
782  unsigned NumSignBits =
783  DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
784  unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
785  if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
786  return Op0;
787  }
788  break;
789  }
790  case ISD::SETCC: {
791  SDValue Op0 = Op.getOperand(0);
792  SDValue Op1 = Op.getOperand(1);
793  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
794  // If (1) we only need the sign-bit, (2) the setcc operands are the same
795  // width as the setcc result, and (3) the result of a setcc conforms to 0 or
796  // -1, we may be able to bypass the setcc.
797  if (DemandedBits.isSignMask() &&
800  BooleanContent::ZeroOrNegativeOneBooleanContent) {
801  // If we're testing X < 0, then this compare isn't needed - just use X!
802  // FIXME: We're limiting to integer types here, but this should also work
803  // if we don't care about FP signed-zero. The use of SETLT with FP means
804  // that we don't care about NaNs.
805  if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
807  return Op0;
808  }
809  break;
810  }
811  case ISD::SIGN_EXTEND_INREG: {
812  // If none of the extended bits are demanded, eliminate the sextinreg.
813  SDValue Op0 = Op.getOperand(0);
814  EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
815  unsigned ExBits = ExVT.getScalarSizeInBits();
816  if (DemandedBits.getActiveBits() <= ExBits)
817  return Op0;
818  // If the input is already sign extended, just drop the extension.
819  unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
820  if (NumSignBits >= (BitWidth - ExBits + 1))
821  return Op0;
822  break;
823  }
827  // If we only want the lowest element and none of extended bits, then we can
828  // return the bitcasted source vector.
829  SDValue Src = Op.getOperand(0);
830  EVT SrcVT = Src.getValueType();
831  EVT DstVT = Op.getValueType();
832  if (IsLE && DemandedElts == 1 &&
833  DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
834  DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
835  return DAG.getBitcast(DstVT, Src);
836  }
837  break;
838  }
839  case ISD::INSERT_VECTOR_ELT: {
840  // If we don't demand the inserted element, return the base vector.
841  SDValue Vec = Op.getOperand(0);
842  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
843  EVT VecVT = Vec.getValueType();
844  if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
845  !DemandedElts[CIdx->getZExtValue()])
846  return Vec;
847  break;
848  }
849  case ISD::INSERT_SUBVECTOR: {
850  SDValue Vec = Op.getOperand(0);
851  SDValue Sub = Op.getOperand(1);
852  uint64_t Idx = Op.getConstantOperandVal(2);
853  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
854  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
855  // If we don't demand the inserted subvector, return the base vector.
856  if (DemandedSubElts == 0)
857  return Vec;
858  // If this simply widens the lowest subvector, see if we can do it earlier.
859  // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating
860  // general nodes like this.
861  if (Idx == 0 && Vec.isUndef()) {
862  if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
863  Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
864  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
865  Op.getOperand(0), NewSub, Op.getOperand(2));
866  }
867  break;
868  }
869  case ISD::VECTOR_SHUFFLE: {
870  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
871 
872  // If all the demanded elts are from one operand and are inline,
873  // then we can use the operand directly.
874  bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
875  for (unsigned i = 0; i != NumElts; ++i) {
876  int M = ShuffleMask[i];
877  if (M < 0 || !DemandedElts[i])
878  continue;
879  AllUndef = false;
880  IdentityLHS &= (M == (int)i);
881  IdentityRHS &= ((M - NumElts) == i);
882  }
883 
884  if (AllUndef)
885  return DAG.getUNDEF(Op.getValueType());
886  if (IdentityLHS)
887  return Op.getOperand(0);
888  if (IdentityRHS)
889  return Op.getOperand(1);
890  break;
891  }
892  default:
893  if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
894  if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
895  Op, DemandedBits, DemandedElts, DAG, Depth))
896  return V;
897  break;
898  }
899  return SDValue();
900 }
901 
903  SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
904  unsigned Depth) const {
905  EVT VT = Op.getValueType();
906 
907  // Pretend we don't know anything about scalable vectors for now.
908  // TODO: We can probably do more work on simplifying the operations for
909  // scalable vectors, but for now we just bail out.
910  if (VT.isScalableVector())
911  return SDValue();
912 
913  APInt DemandedElts = VT.isVector()
915  : APInt(1, 1);
916  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
917  Depth);
918 }
919 
921  SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
922  unsigned Depth) const {
923  APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
924  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
925  Depth);
926 }
927 
928 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
929 // or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
931  const TargetLowering &TLI,
932  const APInt &DemandedBits,
933  const APInt &DemandedElts,
934  unsigned Depth) {
935  assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
936  "SRL or SRA node is required here!");
937  // Is the right shift using an immediate value of 1?
938  ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
939  if (!N1C || !N1C->isOne())
940  return SDValue();
941 
942  // We are looking for an avgfloor
943  // add(ext, ext)
944  // or one of these as a avgceil
945  // add(add(ext, ext), 1)
946  // add(add(ext, 1), ext)
947  // add(ext, add(ext, 1))
948  SDValue Add = Op.getOperand(0);
949  if (Add.getOpcode() != ISD::ADD)
950  return SDValue();
951 
952  SDValue ExtOpA = Add.getOperand(0);
953  SDValue ExtOpB = Add.getOperand(1);
954  auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
955  ConstantSDNode *ConstOp;
956  if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
957  ConstOp->isOne()) {
958  ExtOpA = Op2;
959  ExtOpB = Op3;
960  return true;
961  }
962  if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
963  ConstOp->isOne()) {
964  ExtOpA = Op1;
965  ExtOpB = Op3;
966  return true;
967  }
968  if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
969  ConstOp->isOne()) {
970  ExtOpA = Op1;
971  ExtOpB = Op2;
972  return true;
973  }
974  return false;
975  };
976  bool IsCeil =
977  (ExtOpA.getOpcode() == ISD::ADD &&
978  MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
979  (ExtOpB.getOpcode() == ISD::ADD &&
980  MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
981 
982  // If the shift is signed (sra):
983  // - Needs >= 2 sign bit for both operands.
984  // - Needs >= 2 zero bits.
985  // If the shift is unsigned (srl):
986  // - Needs >= 1 zero bit for both operands.
987  // - Needs 1 demanded bit zero and >= 2 sign bits.
988  unsigned ShiftOpc = Op.getOpcode();
989  bool IsSigned = false;
990  unsigned KnownBits;
991  unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
992  unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
993  unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
994  unsigned NumZeroA =
995  DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
996  unsigned NumZeroB =
997  DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
998  unsigned NumZero = std::min(NumZeroA, NumZeroB);
999 
1000  switch (ShiftOpc) {
1001  default:
1002  llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1003  case ISD::SRA: {
1004  if (NumZero >= 2 && NumSigned < NumZero) {
1005  IsSigned = false;
1006  KnownBits = NumZero;
1007  break;
1008  }
1009  if (NumSigned >= 1) {
1010  IsSigned = true;
1011  KnownBits = NumSigned;
1012  break;
1013  }
1014  return SDValue();
1015  }
1016  case ISD::SRL: {
1017  if (NumZero >= 1 && NumSigned < NumZero) {
1018  IsSigned = false;
1019  KnownBits = NumZero;
1020  break;
1021  }
1022  if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1023  IsSigned = true;
1024  KnownBits = NumSigned;
1025  break;
1026  }
1027  return SDValue();
1028  }
1029  }
1030 
1031  unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1032  : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1033 
1034  // Find the smallest power-2 type that is legal for this vector size and
1035  // operation, given the original type size and the number of known sign/zero
1036  // bits.
1037  EVT VT = Op.getValueType();
1038  unsigned MinWidth =
1039  std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1040  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
1041  if (VT.isVector())
1042  NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1043  if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
1044  return SDValue();
1045 
1046  SDLoc DL(Op);
1047  SDValue ResultAVG =
1048  DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1049  DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1050  return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1051  ResultAVG);
1052 }
1053 
1054 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1055 /// result of Op are ever used downstream. If we can use this information to
1056 /// simplify Op, create a new simplified DAG node and return true, returning the
1057 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1058 /// return a mask of Known bits for the expression (used to simplify the
1059 /// caller). The Known bits may only be accurate for those bits in the
1060 /// OriginalDemandedBits and OriginalDemandedElts.
1062  SDValue Op, const APInt &OriginalDemandedBits,
1063  const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1064  unsigned Depth, bool AssumeSingleUse) const {
1065  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1066  assert(Op.getScalarValueSizeInBits() == BitWidth &&
1067  "Mask size mismatches value type size!");
1068 
1069  // Don't know anything.
1070  Known = KnownBits(BitWidth);
1071 
1072  // TODO: We can probably do more work on calculating the known bits and
1073  // simplifying the operations for scalable vectors, but for now we just
1074  // bail out.
1075  EVT VT = Op.getValueType();
1076  if (VT.isScalableVector())
1077  return false;
1078 
1079  bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1080  unsigned NumElts = OriginalDemandedElts.getBitWidth();
1081  assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) &&
1082  "Unexpected vector size");
1083 
1084  APInt DemandedBits = OriginalDemandedBits;
1085  APInt DemandedElts = OriginalDemandedElts;
1086  SDLoc dl(Op);
1087  auto &DL = TLO.DAG.getDataLayout();
1088 
1089  // Undef operand.
1090  if (Op.isUndef())
1091  return false;
1092 
1093  // We can't simplify target constants.
1094  if (Op.getOpcode() == ISD::TargetConstant)
1095  return false;
1096 
1097  if (Op.getOpcode() == ISD::Constant) {
1098  // We know all of the bits for a constant!
1099  Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1100  return false;
1101  }
1102 
1103  if (Op.getOpcode() == ISD::ConstantFP) {
1104  // We know all of the bits for a floating point constant!
1105  Known = KnownBits::makeConstant(
1106  cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1107  return false;
1108  }
1109 
1110  // Other users may use these bits.
1111  bool HasMultiUse = false;
1112  if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1114  // Limit search depth.
1115  return false;
1116  }
1117  // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1119  DemandedElts = APInt::getAllOnes(NumElts);
1120  HasMultiUse = true;
1121  } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1122  // Not demanding any bits/elts from Op.
1123  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1124  } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1125  // Limit search depth.
1126  return false;
1127  }
1128 
1129  KnownBits Known2;
1130  switch (Op.getOpcode()) {
1131  case ISD::SCALAR_TO_VECTOR: {
1132  if (!DemandedElts[0])
1133  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1134 
1135  KnownBits SrcKnown;
1136  SDValue Src = Op.getOperand(0);
1137  unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1138  APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1139  if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1140  return true;
1141 
1142  // Upper elements are undef, so only get the knownbits if we just demand
1143  // the bottom element.
1144  if (DemandedElts == 1)
1145  Known = SrcKnown.anyextOrTrunc(BitWidth);
1146  break;
1147  }
1148  case ISD::BUILD_VECTOR:
1149  // Collect the known bits that are shared by every demanded element.
1150  // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1151  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1152  return false; // Don't fall through, will infinitely loop.
1153  case ISD::LOAD: {
1154  auto *LD = cast<LoadSDNode>(Op);
1155  if (getTargetConstantFromLoad(LD)) {
1156  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1157  return false; // Don't fall through, will infinitely loop.
1158  }
1159  if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1160  // If this is a ZEXTLoad and we are looking at the loaded value.
1161  EVT MemVT = LD->getMemoryVT();
1162  unsigned MemBits = MemVT.getScalarSizeInBits();
1163  Known.Zero.setBitsFrom(MemBits);
1164  return false; // Don't fall through, will infinitely loop.
1165  }
1166  break;
1167  }
1168  case ISD::INSERT_VECTOR_ELT: {
1169  SDValue Vec = Op.getOperand(0);
1170  SDValue Scl = Op.getOperand(1);
1171  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1172  EVT VecVT = Vec.getValueType();
1173 
1174  // If index isn't constant, assume we need all vector elements AND the
1175  // inserted element.
1176  APInt DemandedVecElts(DemandedElts);
1177  if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1178  unsigned Idx = CIdx->getZExtValue();
1179  DemandedVecElts.clearBit(Idx);
1180 
1181  // Inserted element is not required.
1182  if (!DemandedElts[Idx])
1183  return TLO.CombineTo(Op, Vec);
1184  }
1185 
1186  KnownBits KnownScl;
1187  unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1188  APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1189  if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1190  return true;
1191 
1192  Known = KnownScl.anyextOrTrunc(BitWidth);
1193 
1194  KnownBits KnownVec;
1195  if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1196  Depth + 1))
1197  return true;
1198 
1199  if (!!DemandedVecElts)
1200  Known = KnownBits::commonBits(Known, KnownVec);
1201 
1202  return false;
1203  }
1204  case ISD::INSERT_SUBVECTOR: {
1205  // Demand any elements from the subvector and the remainder from the src its
1206  // inserted into.
1207  SDValue Src = Op.getOperand(0);
1208  SDValue Sub = Op.getOperand(1);
1209  uint64_t Idx = Op.getConstantOperandVal(2);
1210  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1211  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1212  APInt DemandedSrcElts = DemandedElts;
1213  DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1214 
1215  KnownBits KnownSub, KnownSrc;
1216  if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1217  Depth + 1))
1218  return true;
1219  if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1220  Depth + 1))
1221  return true;
1222 
1223  Known.Zero.setAllBits();
1224  Known.One.setAllBits();
1225  if (!!DemandedSubElts)
1226  Known = KnownBits::commonBits(Known, KnownSub);
1227  if (!!DemandedSrcElts)
1228  Known = KnownBits::commonBits(Known, KnownSrc);
1229 
1230  // Attempt to avoid multi-use src if we don't need anything from it.
1231  if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1232  !DemandedSrcElts.isAllOnes()) {
1233  SDValue NewSub = SimplifyMultipleUseDemandedBits(
1234  Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1235  SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1236  Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1237  if (NewSub || NewSrc) {
1238  NewSub = NewSub ? NewSub : Sub;
1239  NewSrc = NewSrc ? NewSrc : Src;
1240  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1241  Op.getOperand(2));
1242  return TLO.CombineTo(Op, NewOp);
1243  }
1244  }
1245  break;
1246  }
1247  case ISD::EXTRACT_SUBVECTOR: {
1248  // Offset the demanded elts by the subvector index.
1249  SDValue Src = Op.getOperand(0);
1250  if (Src.getValueType().isScalableVector())
1251  break;
1252  uint64_t Idx = Op.getConstantOperandVal(1);
1253  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1254  APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1255 
1256  if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1257  Depth + 1))
1258  return true;
1259 
1260  // Attempt to avoid multi-use src if we don't need anything from it.
1261  if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1262  SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1263  Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1264  if (DemandedSrc) {
1265  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1266  Op.getOperand(1));
1267  return TLO.CombineTo(Op, NewOp);
1268  }
1269  }
1270  break;
1271  }
1272  case ISD::CONCAT_VECTORS: {
1273  Known.Zero.setAllBits();
1274  Known.One.setAllBits();
1275  EVT SubVT = Op.getOperand(0).getValueType();
1276  unsigned NumSubVecs = Op.getNumOperands();
1277  unsigned NumSubElts = SubVT.getVectorNumElements();
1278  for (unsigned i = 0; i != NumSubVecs; ++i) {
1279  APInt DemandedSubElts =
1280  DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1281  if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1282  Known2, TLO, Depth + 1))
1283  return true;
1284  // Known bits are shared by every demanded subvector element.
1285  if (!!DemandedSubElts)
1286  Known = KnownBits::commonBits(Known, Known2);
1287  }
1288  break;
1289  }
1290  case ISD::VECTOR_SHUFFLE: {
1291  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1292 
1293  // Collect demanded elements from shuffle operands..
1294  APInt DemandedLHS(NumElts, 0);
1295  APInt DemandedRHS(NumElts, 0);
1296  for (unsigned i = 0; i != NumElts; ++i) {
1297  if (!DemandedElts[i])
1298  continue;
1299  int M = ShuffleMask[i];
1300  if (M < 0) {
1301  // For UNDEF elements, we don't know anything about the common state of
1302  // the shuffle result.
1303  DemandedLHS.clearAllBits();
1304  DemandedRHS.clearAllBits();
1305  break;
1306  }
1307  assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1308  if (M < (int)NumElts)
1309  DemandedLHS.setBit(M);
1310  else
1311  DemandedRHS.setBit(M - NumElts);
1312  }
1313 
1314  if (!!DemandedLHS || !!DemandedRHS) {
1315  SDValue Op0 = Op.getOperand(0);
1316  SDValue Op1 = Op.getOperand(1);
1317 
1318  Known.Zero.setAllBits();
1319  Known.One.setAllBits();
1320  if (!!DemandedLHS) {
1321  if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1322  Depth + 1))
1323  return true;
1324  Known = KnownBits::commonBits(Known, Known2);
1325  }
1326  if (!!DemandedRHS) {
1327  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1328  Depth + 1))
1329  return true;
1330  Known = KnownBits::commonBits(Known, Known2);
1331  }
1332 
1333  // Attempt to avoid multi-use ops if we don't need anything from them.
1334  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1335  Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1336  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1337  Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1338  if (DemandedOp0 || DemandedOp1) {
1339  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1340  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1341  SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1342  return TLO.CombineTo(Op, NewOp);
1343  }
1344  }
1345  break;
1346  }
1347  case ISD::AND: {
1348  SDValue Op0 = Op.getOperand(0);
1349  SDValue Op1 = Op.getOperand(1);
1350 
1351  // If the RHS is a constant, check to see if the LHS would be zero without
1352  // using the bits from the RHS. Below, we use knowledge about the RHS to
1353  // simplify the LHS, here we're using information from the LHS to simplify
1354  // the RHS.
1355  if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1356  // Do not increment Depth here; that can cause an infinite loop.
1357  KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1358  // If the LHS already has zeros where RHSC does, this 'and' is dead.
1359  if ((LHSKnown.Zero & DemandedBits) ==
1360  (~RHSC->getAPIntValue() & DemandedBits))
1361  return TLO.CombineTo(Op, Op0);
1362 
1363  // If any of the set bits in the RHS are known zero on the LHS, shrink
1364  // the constant.
1365  if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1366  DemandedElts, TLO))
1367  return true;
1368 
1369  // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1370  // constant, but if this 'and' is only clearing bits that were just set by
1371  // the xor, then this 'and' can be eliminated by shrinking the mask of
1372  // the xor. For example, for a 32-bit X:
1373  // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1374  if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1375  LHSKnown.One == ~RHSC->getAPIntValue()) {
1376  SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1377  return TLO.CombineTo(Op, Xor);
1378  }
1379  }
1380 
1381  // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1382  // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1383  if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR &&
1384  (Op0.getOperand(0).isUndef() ||
1386  Op0->hasOneUse()) {
1387  unsigned NumSubElts =
1389  unsigned SubIdx = Op0.getConstantOperandVal(2);
1390  APInt DemandedSub =
1391  APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1392  KnownBits KnownSubMask =
1393  TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1394  if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1395  SDValue NewAnd =
1396  TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1397  SDValue NewInsert =
1398  TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1399  Op0.getOperand(1), Op0.getOperand(2));
1400  return TLO.CombineTo(Op, NewInsert);
1401  }
1402  }
1403 
1404  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1405  Depth + 1))
1406  return true;
1407  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1408  if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1409  Known2, TLO, Depth + 1))
1410  return true;
1411  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1412 
1413  // If all of the demanded bits are known one on one side, return the other.
1414  // These bits cannot contribute to the result of the 'and'.
1415  if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1416  return TLO.CombineTo(Op, Op0);
1417  if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1418  return TLO.CombineTo(Op, Op1);
1419  // If all of the demanded bits in the inputs are known zeros, return zero.
1420  if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1421  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1422  // If the RHS is a constant, see if we can simplify it.
1423  if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1424  TLO))
1425  return true;
1426  // If the operation can be done in a smaller type, do so.
1427  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1428  return true;
1429 
1430  // Attempt to avoid multi-use ops if we don't need anything from them.
1431  if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1432  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1433  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1434  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1435  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1436  if (DemandedOp0 || DemandedOp1) {
1437  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1438  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1439  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1440  return TLO.CombineTo(Op, NewOp);
1441  }
1442  }
1443 
1444  Known &= Known2;
1445  break;
1446  }
1447  case ISD::OR: {
1448  SDValue Op0 = Op.getOperand(0);
1449  SDValue Op1 = Op.getOperand(1);
1450 
1451  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1452  Depth + 1))
1453  return true;
1454  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1455  if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1456  Known2, TLO, Depth + 1))
1457  return true;
1458  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1459 
1460  // If all of the demanded bits are known zero on one side, return the other.
1461  // These bits cannot contribute to the result of the 'or'.
1462  if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1463  return TLO.CombineTo(Op, Op0);
1464  if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1465  return TLO.CombineTo(Op, Op1);
1466  // If the RHS is a constant, see if we can simplify it.
1467  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1468  return true;
1469  // If the operation can be done in a smaller type, do so.
1470  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1471  return true;
1472 
1473  // Attempt to avoid multi-use ops if we don't need anything from them.
1474  if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1475  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1476  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1477  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1478  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1479  if (DemandedOp0 || DemandedOp1) {
1480  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1481  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1482  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1483  return TLO.CombineTo(Op, NewOp);
1484  }
1485  }
1486 
1487  // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1488  // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1489  if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1490  Op0->hasOneUse() && Op1->hasOneUse()) {
1491  // Attempt to match all commutations - m_c_Or would've been useful!
1492  for (int I = 0; I != 2; ++I) {
1493  SDValue X = Op.getOperand(I).getOperand(0);
1494  SDValue C1 = Op.getOperand(I).getOperand(1);
1495  SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1496  SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1497  if (Alt.getOpcode() == ISD::OR) {
1498  for (int J = 0; J != 2; ++J) {
1499  if (X == Alt.getOperand(J)) {
1500  SDValue Y = Alt.getOperand(1 - J);
1501  if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1502  {C1, C2})) {
1503  SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1504  SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1505  return TLO.CombineTo(
1506  Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1507  }
1508  }
1509  }
1510  }
1511  }
1512  }
1513 
1514  Known |= Known2;
1515  break;
1516  }
1517  case ISD::XOR: {
1518  SDValue Op0 = Op.getOperand(0);
1519  SDValue Op1 = Op.getOperand(1);
1520 
1521  if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1522  Depth + 1))
1523  return true;
1524  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1525  if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1526  Depth + 1))
1527  return true;
1528  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1529 
1530  // If all of the demanded bits are known zero on one side, return the other.
1531  // These bits cannot contribute to the result of the 'xor'.
1532  if (DemandedBits.isSubsetOf(Known.Zero))
1533  return TLO.CombineTo(Op, Op0);
1534  if (DemandedBits.isSubsetOf(Known2.Zero))
1535  return TLO.CombineTo(Op, Op1);
1536  // If the operation can be done in a smaller type, do so.
1537  if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1538  return true;
1539 
1540  // If all of the unknown bits are known to be zero on one side or the other
1541  // turn this into an *inclusive* or.
1542  // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1543  if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1544  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1545 
1546  ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1547  if (C) {
1548  // If one side is a constant, and all of the set bits in the constant are
1549  // also known set on the other side, turn this into an AND, as we know
1550  // the bits will be cleared.
1551  // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1552  // NB: it is okay if more bits are known than are requested
1553  if (C->getAPIntValue() == Known2.One) {
1554  SDValue ANDC =
1555  TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1556  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1557  }
1558 
1559  // If the RHS is a constant, see if we can change it. Don't alter a -1
1560  // constant because that's a 'not' op, and that is better for combining
1561  // and codegen.
1562  if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1563  // We're flipping all demanded bits. Flip the undemanded bits too.
1564  SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1565  return TLO.CombineTo(Op, New);
1566  }
1567 
1568  unsigned Op0Opcode = Op0.getOpcode();
1569  if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1570  if (ConstantSDNode *ShiftC =
1571  isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1572  // Don't crash on an oversized shift. We can not guarantee that a
1573  // bogus shift has been simplified to undef.
1574  if (ShiftC->getAPIntValue().ult(BitWidth)) {
1575  uint64_t ShiftAmt = ShiftC->getZExtValue();
1577  Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1578  : Ones.lshr(ShiftAmt);
1579  const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1580  if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1581  TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1582  // If the xor constant is a demanded mask, do a 'not' before the
1583  // shift:
1584  // xor (X << ShiftC), XorC --> (not X) << ShiftC
1585  // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1586  SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1587  return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1588  Op0.getOperand(1)));
1589  }
1590  }
1591  }
1592  }
1593  }
1594 
1595  // If we can't turn this into a 'not', try to shrink the constant.
1596  if (!C || !C->isAllOnes())
1597  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1598  return true;
1599 
1600  // Attempt to avoid multi-use ops if we don't need anything from them.
1601  if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1602  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1603  Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1604  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1605  Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1606  if (DemandedOp0 || DemandedOp1) {
1607  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1608  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1609  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1610  return TLO.CombineTo(Op, NewOp);
1611  }
1612  }
1613 
1614  Known ^= Known2;
1615  break;
1616  }
1617  case ISD::SELECT:
1618  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1619  Depth + 1))
1620  return true;
1621  if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1622  Depth + 1))
1623  return true;
1624  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1625  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1626 
1627  // If the operands are constants, see if we can simplify them.
1628  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1629  return true;
1630 
1631  // Only known if known in both the LHS and RHS.
1632  Known = KnownBits::commonBits(Known, Known2);
1633  break;
1634  case ISD::VSELECT:
1635  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1636  Known, TLO, Depth + 1))
1637  return true;
1638  if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1639  Known2, TLO, Depth + 1))
1640  return true;
1641  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1642  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1643 
1644  // Only known if known in both the LHS and RHS.
1645  Known = KnownBits::commonBits(Known, Known2);
1646  break;
1647  case ISD::SELECT_CC:
1648  if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1649  Depth + 1))
1650  return true;
1651  if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1652  Depth + 1))
1653  return true;
1654  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1655  assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1656 
1657  // If the operands are constants, see if we can simplify them.
1658  if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1659  return true;
1660 
1661  // Only known if known in both the LHS and RHS.
1662  Known = KnownBits::commonBits(Known, Known2);
1663  break;
1664  case ISD::SETCC: {
1665  SDValue Op0 = Op.getOperand(0);
1666  SDValue Op1 = Op.getOperand(1);
1667  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1668  // If (1) we only need the sign-bit, (2) the setcc operands are the same
1669  // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1670  // -1, we may be able to bypass the setcc.
1671  if (DemandedBits.isSignMask() &&
1674  BooleanContent::ZeroOrNegativeOneBooleanContent) {
1675  // If we're testing X < 0, then this compare isn't needed - just use X!
1676  // FIXME: We're limiting to integer types here, but this should also work
1677  // if we don't care about FP signed-zero. The use of SETLT with FP means
1678  // that we don't care about NaNs.
1679  if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1681  return TLO.CombineTo(Op, Op0);
1682 
1683  // TODO: Should we check for other forms of sign-bit comparisons?
1684  // Examples: X <= -1, X >= 0
1685  }
1686  if (getBooleanContents(Op0.getValueType()) ==
1688  BitWidth > 1)
1689  Known.Zero.setBitsFrom(1);
1690  break;
1691  }
1692  case ISD::SHL: {
1693  SDValue Op0 = Op.getOperand(0);
1694  SDValue Op1 = Op.getOperand(1);
1695  EVT ShiftVT = Op1.getValueType();
1696 
1697  if (const APInt *SA =
1698  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1699  unsigned ShAmt = SA->getZExtValue();
1700  if (ShAmt == 0)
1701  return TLO.CombineTo(Op, Op0);
1702 
1703  // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1704  // single shift. We can do this if the bottom bits (which are shifted
1705  // out) are never demanded.
1706  // TODO - support non-uniform vector amounts.
1707  if (Op0.getOpcode() == ISD::SRL) {
1708  if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1709  if (const APInt *SA2 =
1710  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1711  unsigned C1 = SA2->getZExtValue();
1712  unsigned Opc = ISD::SHL;
1713  int Diff = ShAmt - C1;
1714  if (Diff < 0) {
1715  Diff = -Diff;
1716  Opc = ISD::SRL;
1717  }
1718  SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1719  return TLO.CombineTo(
1720  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1721  }
1722  }
1723  }
1724 
1725  // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1726  // are not demanded. This will likely allow the anyext to be folded away.
1727  // TODO - support non-uniform vector amounts.
1728  if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1729  SDValue InnerOp = Op0.getOperand(0);
1730  EVT InnerVT = InnerOp.getValueType();
1731  unsigned InnerBits = InnerVT.getScalarSizeInBits();
1732  if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1733  isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1734  EVT ShTy = getShiftAmountTy(InnerVT, DL);
1735  if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1736  ShTy = InnerVT;
1737  SDValue NarrowShl =
1738  TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1739  TLO.DAG.getConstant(ShAmt, dl, ShTy));
1740  return TLO.CombineTo(
1741  Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1742  }
1743 
1744  // Repeat the SHL optimization above in cases where an extension
1745  // intervenes: (shl (anyext (shr x, c1)), c2) to
1746  // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1747  // aren't demanded (as above) and that the shifted upper c1 bits of
1748  // x aren't demanded.
1749  // TODO - support non-uniform vector amounts.
1750  if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1751  InnerOp.hasOneUse()) {
1752  if (const APInt *SA2 =
1753  TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1754  unsigned InnerShAmt = SA2->getZExtValue();
1755  if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1756  DemandedBits.getActiveBits() <=
1757  (InnerBits - InnerShAmt + ShAmt) &&
1758  DemandedBits.countTrailingZeros() >= ShAmt) {
1759  SDValue NewSA =
1760  TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1761  SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1762  InnerOp.getOperand(0));
1763  return TLO.CombineTo(
1764  Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1765  }
1766  }
1767  }
1768  }
1769 
1770  APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1771  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1772  Depth + 1))
1773  return true;
1774  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1775  Known.Zero <<= ShAmt;
1776  Known.One <<= ShAmt;
1777  // low bits known zero.
1778  Known.Zero.setLowBits(ShAmt);
1779 
1780  // Attempt to avoid multi-use ops if we don't need anything from them.
1781  if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1782  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1783  Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1784  if (DemandedOp0) {
1785  SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1786  return TLO.CombineTo(Op, NewOp);
1787  }
1788  }
1789 
1790  // Try shrinking the operation as long as the shift amount will still be
1791  // in range.
1792  if ((ShAmt < DemandedBits.getActiveBits()) &&
1793  ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1794  return true;
1795  } else {
1796  // This is a variable shift, so we can't shift the demand mask by a known
1797  // amount. But if we are not demanding high bits, then we are not
1798  // demanding those bits from the pre-shifted operand either.
1799  if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
1800  APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1801  if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1802  Depth + 1)) {
1803  SDNodeFlags Flags = Op.getNode()->getFlags();
1804  if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1805  // Disable the nsw and nuw flags. We can no longer guarantee that we
1806  // won't wrap after simplification.
1807  Flags.setNoSignedWrap(false);
1808  Flags.setNoUnsignedWrap(false);
1809  Op->setFlags(Flags);
1810  }
1811  return true;
1812  }
1813  Known.resetAll();
1814  }
1815  }
1816 
1817  // If we are only demanding sign bits then we can use the shift source
1818  // directly.
1819  if (const APInt *MaxSA =
1820  TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1821  unsigned ShAmt = MaxSA->getZExtValue();
1822  unsigned NumSignBits =
1823  TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1824  unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1825  if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1826  return TLO.CombineTo(Op, Op0);
1827  }
1828  break;
1829  }
1830  case ISD::SRL: {
1831  SDValue Op0 = Op.getOperand(0);
1832  SDValue Op1 = Op.getOperand(1);
1833  EVT ShiftVT = Op1.getValueType();
1834 
1835  // Try to match AVG patterns.
1836  if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1837  DemandedElts, Depth + 1))
1838  return TLO.CombineTo(Op, AVG);
1839 
1840  if (const APInt *SA =
1841  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1842  unsigned ShAmt = SA->getZExtValue();
1843  if (ShAmt == 0)
1844  return TLO.CombineTo(Op, Op0);
1845 
1846  // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1847  // single shift. We can do this if the top bits (which are shifted out)
1848  // are never demanded.
1849  // TODO - support non-uniform vector amounts.
1850  if (Op0.getOpcode() == ISD::SHL) {
1851  if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1852  if (const APInt *SA2 =
1853  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1854  unsigned C1 = SA2->getZExtValue();
1855  unsigned Opc = ISD::SRL;
1856  int Diff = ShAmt - C1;
1857  if (Diff < 0) {
1858  Diff = -Diff;
1859  Opc = ISD::SHL;
1860  }
1861  SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1862  return TLO.CombineTo(
1863  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1864  }
1865  }
1866  }
1867 
1868  APInt InDemandedMask = (DemandedBits << ShAmt);
1869 
1870  // If the shift is exact, then it does demand the low bits (and knows that
1871  // they are zero).
1872  if (Op->getFlags().hasExact())
1873  InDemandedMask.setLowBits(ShAmt);
1874 
1875  // Compute the new bits that are at the top now.
1876  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1877  Depth + 1))
1878  return true;
1879  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1880  Known.Zero.lshrInPlace(ShAmt);
1881  Known.One.lshrInPlace(ShAmt);
1882  // High bits known zero.
1883  Known.Zero.setHighBits(ShAmt);
1884 
1885  // Attempt to avoid multi-use ops if we don't need anything from them.
1886  if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1887  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1888  Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1889  if (DemandedOp0) {
1890  SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1891  return TLO.CombineTo(Op, NewOp);
1892  }
1893  }
1894  }
1895  break;
1896  }
1897  case ISD::SRA: {
1898  SDValue Op0 = Op.getOperand(0);
1899  SDValue Op1 = Op.getOperand(1);
1900  EVT ShiftVT = Op1.getValueType();
1901 
1902  // If we only want bits that already match the signbit then we don't need
1903  // to shift.
1904  unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1905  if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1906  NumHiDemandedBits)
1907  return TLO.CombineTo(Op, Op0);
1908 
1909  // If this is an arithmetic shift right and only the low-bit is set, we can
1910  // always convert this into a logical shr, even if the shift amount is
1911  // variable. The low bit of the shift cannot be an input sign bit unless
1912  // the shift amount is >= the size of the datatype, which is undefined.
1913  if (DemandedBits.isOne())
1914  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1915 
1916  // Try to match AVG patterns.
1917  if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1918  DemandedElts, Depth + 1))
1919  return TLO.CombineTo(Op, AVG);
1920 
1921  if (const APInt *SA =
1922  TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1923  unsigned ShAmt = SA->getZExtValue();
1924  if (ShAmt == 0)
1925  return TLO.CombineTo(Op, Op0);
1926 
1927  APInt InDemandedMask = (DemandedBits << ShAmt);
1928 
1929  // If the shift is exact, then it does demand the low bits (and knows that
1930  // they are zero).
1931  if (Op->getFlags().hasExact())
1932  InDemandedMask.setLowBits(ShAmt);
1933 
1934  // If any of the demanded bits are produced by the sign extension, we also
1935  // demand the input sign bit.
1936  if (DemandedBits.countLeadingZeros() < ShAmt)
1937  InDemandedMask.setSignBit();
1938 
1939  if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1940  Depth + 1))
1941  return true;
1942  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1943  Known.Zero.lshrInPlace(ShAmt);
1944  Known.One.lshrInPlace(ShAmt);
1945 
1946  // If the input sign bit is known to be zero, or if none of the top bits
1947  // are demanded, turn this into an unsigned shift right.
1948  if (Known.Zero[BitWidth - ShAmt - 1] ||
1949  DemandedBits.countLeadingZeros() >= ShAmt) {
1950  SDNodeFlags Flags;
1951  Flags.setExact(Op->getFlags().hasExact());
1952  return TLO.CombineTo(
1953  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1954  }
1955 
1956  int Log2 = DemandedBits.exactLogBase2();
1957  if (Log2 >= 0) {
1958  // The bit must come from the sign.
1959  SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1960  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1961  }
1962 
1963  if (Known.One[BitWidth - ShAmt - 1])
1964  // New bits are known one.
1965  Known.One.setHighBits(ShAmt);
1966 
1967  // Attempt to avoid multi-use ops if we don't need anything from them.
1968  if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1969  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1970  Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1971  if (DemandedOp0) {
1972  SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1973  return TLO.CombineTo(Op, NewOp);
1974  }
1975  }
1976  }
1977  break;
1978  }
1979  case ISD::FSHL:
1980  case ISD::FSHR: {
1981  SDValue Op0 = Op.getOperand(0);
1982  SDValue Op1 = Op.getOperand(1);
1983  SDValue Op2 = Op.getOperand(2);
1984  bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1985 
1986  if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1987  unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1988 
1989  // For fshl, 0-shift returns the 1st arg.
1990  // For fshr, 0-shift returns the 2nd arg.
1991  if (Amt == 0) {
1992  if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1993  Known, TLO, Depth + 1))
1994  return true;
1995  break;
1996  }
1997 
1998  // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1999  // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2000  APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2001  APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2002  if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2003  Depth + 1))
2004  return true;
2005  if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2006  Depth + 1))
2007  return true;
2008 
2009  Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2010  Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2011  Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2012  Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2013  Known.One |= Known2.One;
2014  Known.Zero |= Known2.Zero;
2015 
2016  // Attempt to avoid multi-use ops if we don't need anything from them.
2017  if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2018  !DemandedElts.isAllOnes()) {
2019  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2020  Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2021  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2022  Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2023  if (DemandedOp0 || DemandedOp1) {
2024  DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2025  DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2026  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2027  DemandedOp1, Op2);
2028  return TLO.CombineTo(Op, NewOp);
2029  }
2030  }
2031  }
2032 
2033  // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2034  if (isPowerOf2_32(BitWidth)) {
2035  APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2036  if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2037  Known2, TLO, Depth + 1))
2038  return true;
2039  }
2040  break;
2041  }
2042  case ISD::ROTL:
2043  case ISD::ROTR: {
2044  SDValue Op0 = Op.getOperand(0);
2045  SDValue Op1 = Op.getOperand(1);
2046  bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2047 
2048  // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2049  if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2050  return TLO.CombineTo(Op, Op0);
2051 
2052  if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2053  unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2054  unsigned RevAmt = BitWidth - Amt;
2055 
2056  // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2057  // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2058  APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2059  if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2060  Depth + 1))
2061  return true;
2062 
2063  // rot*(x, 0) --> x
2064  if (Amt == 0)
2065  return TLO.CombineTo(Op, Op0);
2066 
2067  // See if we don't demand either half of the rotated bits.
2068  if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2069  DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
2070  Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2071  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2072  }
2073  if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2074  DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
2075  Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2076  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2077  }
2078  }
2079 
2080  // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2081  if (isPowerOf2_32(BitWidth)) {
2082  APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2083  if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2084  Depth + 1))
2085  return true;
2086  }
2087  break;
2088  }
2089  case ISD::UMIN: {
2090  // Check if one arg is always less than (or equal) to the other arg.
2091  SDValue Op0 = Op.getOperand(0);
2092  SDValue Op1 = Op.getOperand(1);
2093  KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2094  KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2095  Known = KnownBits::umin(Known0, Known1);
2096  if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2097  return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1);
2098  if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2099  return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1);
2100  break;
2101  }
2102  case ISD::UMAX: {
2103  // Check if one arg is always greater than (or equal) to the other arg.
2104  SDValue Op0 = Op.getOperand(0);
2105  SDValue Op1 = Op.getOperand(1);
2106  KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2107  KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2108  Known = KnownBits::umax(Known0, Known1);
2109  if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2110  return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1);
2111  if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2112  return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1);
2113  break;
2114  }
2115  case ISD::BITREVERSE: {
2116  SDValue Src = Op.getOperand(0);
2117  APInt DemandedSrcBits = DemandedBits.reverseBits();
2118  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2119  Depth + 1))
2120  return true;
2121  Known.One = Known2.One.reverseBits();
2122  Known.Zero = Known2.Zero.reverseBits();
2123  break;
2124  }
2125  case ISD::BSWAP: {
2126  SDValue Src = Op.getOperand(0);
2127 
2128  // If the only bits demanded come from one byte of the bswap result,
2129  // just shift the input byte into position to eliminate the bswap.
2130  unsigned NLZ = DemandedBits.countLeadingZeros();
2131  unsigned NTZ = DemandedBits.countTrailingZeros();
2132 
2133  // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2134  // we need all the bits down to bit 8. Likewise, round NLZ. If we
2135  // have 14 leading zeros, round to 8.
2136  NLZ = alignDown(NLZ, 8);
2137  NTZ = alignDown(NTZ, 8);
2138  // If we need exactly one byte, we can do this transformation.
2139  if (BitWidth - NLZ - NTZ == 8) {
2140  // Replace this with either a left or right shift to get the byte into
2141  // the right place.
2142  unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2143  if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2144  EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2145  unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2146  SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2147  SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2148  return TLO.CombineTo(Op, NewOp);
2149  }
2150  }
2151 
2152  APInt DemandedSrcBits = DemandedBits.byteSwap();
2153  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2154  Depth + 1))
2155  return true;
2156  Known.One = Known2.One.byteSwap();
2157  Known.Zero = Known2.Zero.byteSwap();
2158  break;
2159  }
2160  case ISD::CTPOP: {
2161  // If only 1 bit is demanded, replace with PARITY as long as we're before
2162  // op legalization.
2163  // FIXME: Limit to scalars for now.
2164  if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2165  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2166  Op.getOperand(0)));
2167 
2168  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2169  break;
2170  }
2171  case ISD::SIGN_EXTEND_INREG: {
2172  SDValue Op0 = Op.getOperand(0);
2173  EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2174  unsigned ExVTBits = ExVT.getScalarSizeInBits();
2175 
2176  // If we only care about the highest bit, don't bother shifting right.
2177  if (DemandedBits.isSignMask()) {
2178  unsigned MinSignedBits =
2179  TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2180  bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2181  // However if the input is already sign extended we expect the sign
2182  // extension to be dropped altogether later and do not simplify.
2183  if (!AlreadySignExtended) {
2184  // Compute the correct shift amount type, which must be getShiftAmountTy
2185  // for scalar types after legalization.
2186  SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2187  getShiftAmountTy(VT, DL));
2188  return TLO.CombineTo(Op,
2189  TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2190  }
2191  }
2192 
2193  // If none of the extended bits are demanded, eliminate the sextinreg.
2194  if (DemandedBits.getActiveBits() <= ExVTBits)
2195  return TLO.CombineTo(Op, Op0);
2196 
2197  APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2198 
2199  // Since the sign extended bits are demanded, we know that the sign
2200  // bit is demanded.
2201  InputDemandedBits.setBit(ExVTBits - 1);
2202 
2203  if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2204  Depth + 1))
2205  return true;
2206  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2207 
2208  // If the sign bit of the input is known set or clear, then we know the
2209  // top bits of the result.
2210 
2211  // If the input sign bit is known zero, convert this into a zero extension.
2212  if (Known.Zero[ExVTBits - 1])
2213  return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2214 
2215  APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2216  if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2217  Known.One.setBitsFrom(ExVTBits);
2218  Known.Zero &= Mask;
2219  } else { // Input sign bit unknown
2220  Known.Zero &= Mask;
2221  Known.One &= Mask;
2222  }
2223  break;
2224  }
2225  case ISD::BUILD_PAIR: {
2226  EVT HalfVT = Op.getOperand(0).getValueType();
2227  unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2228 
2229  APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2230  APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2231 
2232  KnownBits KnownLo, KnownHi;
2233 
2234  if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2235  return true;
2236 
2237  if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2238  return true;
2239 
2240  Known = KnownHi.concat(KnownLo);
2241  break;
2242  }
2243  case ISD::ZERO_EXTEND:
2245  SDValue Src = Op.getOperand(0);
2246  EVT SrcVT = Src.getValueType();
2247  unsigned InBits = SrcVT.getScalarSizeInBits();
2248  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2249  bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2250 
2251  // If none of the top bits are demanded, convert this into an any_extend.
2252  if (DemandedBits.getActiveBits() <= InBits) {
2253  // If we only need the non-extended bits of the bottom element
2254  // then we can just bitcast to the result.
2255  if (IsLE && IsVecInReg && DemandedElts == 1 &&
2256  VT.getSizeInBits() == SrcVT.getSizeInBits())
2257  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2258 
2259  unsigned Opc =
2261  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2262  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2263  }
2264 
2265  APInt InDemandedBits = DemandedBits.trunc(InBits);
2266  APInt InDemandedElts = DemandedElts.zext(InElts);
2267  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2268  Depth + 1))
2269  return true;
2270  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2271  assert(Known.getBitWidth() == InBits && "Src width has changed?");
2272  Known = Known.zext(BitWidth);
2273 
2274  // Attempt to avoid multi-use ops if we don't need anything from them.
2275  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2276  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2277  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2278  break;
2279  }
2280  case ISD::SIGN_EXTEND:
2282  SDValue Src = Op.getOperand(0);
2283  EVT SrcVT = Src.getValueType();
2284  unsigned InBits = SrcVT.getScalarSizeInBits();
2285  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2286  bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2287 
2288  // If none of the top bits are demanded, convert this into an any_extend.
2289  if (DemandedBits.getActiveBits() <= InBits) {
2290  // If we only need the non-extended bits of the bottom element
2291  // then we can just bitcast to the result.
2292  if (IsLE && IsVecInReg && DemandedElts == 1 &&
2293  VT.getSizeInBits() == SrcVT.getSizeInBits())
2294  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2295 
2296  unsigned Opc =
2298  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2299  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2300  }
2301 
2302  APInt InDemandedBits = DemandedBits.trunc(InBits);
2303  APInt InDemandedElts = DemandedElts.zext(InElts);
2304 
2305  // Since some of the sign extended bits are demanded, we know that the sign
2306  // bit is demanded.
2307  InDemandedBits.setBit(InBits - 1);
2308 
2309  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2310  Depth + 1))
2311  return true;
2312  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2313  assert(Known.getBitWidth() == InBits && "Src width has changed?");
2314 
2315  // If the sign bit is known one, the top bits match.
2316  Known = Known.sext(BitWidth);
2317 
2318  // If the sign bit is known zero, convert this to a zero extend.
2319  if (Known.isNonNegative()) {
2320  unsigned Opc =
2322  if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2323  return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2324  }
2325 
2326  // Attempt to avoid multi-use ops if we don't need anything from them.
2327  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2328  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2329  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2330  break;
2331  }
2332  case ISD::ANY_EXTEND:
2334  SDValue Src = Op.getOperand(0);
2335  EVT SrcVT = Src.getValueType();
2336  unsigned InBits = SrcVT.getScalarSizeInBits();
2337  unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2338  bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2339 
2340  // If we only need the bottom element then we can just bitcast.
2341  // TODO: Handle ANY_EXTEND?
2342  if (IsLE && IsVecInReg && DemandedElts == 1 &&
2343  VT.getSizeInBits() == SrcVT.getSizeInBits())
2344  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2345 
2346  APInt InDemandedBits = DemandedBits.trunc(InBits);
2347  APInt InDemandedElts = DemandedElts.zext(InElts);
2348  if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2349  Depth + 1))
2350  return true;
2351  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2352  assert(Known.getBitWidth() == InBits && "Src width has changed?");
2353  Known = Known.anyext(BitWidth);
2354 
2355  // Attempt to avoid multi-use ops if we don't need anything from them.
2356  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2357  Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2358  return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2359  break;
2360  }
2361  case ISD::TRUNCATE: {
2362  SDValue Src = Op.getOperand(0);
2363 
2364  // Simplify the input, using demanded bit information, and compute the known
2365  // zero/one bits live out.
2366  unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2367  APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2368  if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2369  Depth + 1))
2370  return true;
2371  Known = Known.trunc(BitWidth);
2372 
2373  // Attempt to avoid multi-use ops if we don't need anything from them.
2374  if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2375  Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2376  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2377 
2378  // If the input is only used by this truncate, see if we can shrink it based
2379  // on the known demanded bits.
2380  switch (Src.getOpcode()) {
2381  default:
2382  break;
2383  case ISD::SRL:
2384  // Shrink SRL by a constant if none of the high bits shifted in are
2385  // demanded.
2386  if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2387  // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2388  // undesirable.
2389  break;
2390 
2391  if (Src.getNode()->hasOneUse()) {
2392  const APInt *ShAmtC =
2393  TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2394  if (!ShAmtC || ShAmtC->uge(BitWidth))
2395  break;
2396  uint64_t ShVal = ShAmtC->getZExtValue();
2397 
2398  APInt HighBits =
2399  APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2400  HighBits.lshrInPlace(ShVal);
2401  HighBits = HighBits.trunc(BitWidth);
2402 
2403  if (!(HighBits & DemandedBits)) {
2404  // None of the shifted in bits are needed. Add a truncate of the
2405  // shift input, then shift it.
2406  SDValue NewShAmt = TLO.DAG.getConstant(
2407  ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2408  SDValue NewTrunc =
2409  TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2410  return TLO.CombineTo(
2411  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2412  }
2413  }
2414  break;
2415  }
2416 
2417  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2418  break;
2419  }
2420  case ISD::AssertZext: {
2421  // AssertZext demands all of the high bits, plus any of the low bits
2422  // demanded by its users.
2423  EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2425  if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2426  TLO, Depth + 1))
2427  return true;
2428  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2429 
2430  Known.Zero |= ~InMask;
2431  Known.One &= (~Known.Zero);
2432  break;
2433  }
2434  case ISD::EXTRACT_VECTOR_ELT: {
2435  SDValue Src = Op.getOperand(0);
2436  SDValue Idx = Op.getOperand(1);
2437  ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2438  unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2439 
2440  if (SrcEltCnt.isScalable())
2441  return false;
2442 
2443  // Demand the bits from every vector element without a constant index.
2444  unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2445  APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2446  if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2447  if (CIdx->getAPIntValue().ult(NumSrcElts))
2448  DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2449 
2450  // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2451  // anything about the extended bits.
2452  APInt DemandedSrcBits = DemandedBits;
2453  if (BitWidth > EltBitWidth)
2454  DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2455 
2456  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2457  Depth + 1))
2458  return true;
2459 
2460  // Attempt to avoid multi-use ops if we don't need anything from them.
2461  if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2462  if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2463  Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2464  SDValue NewOp =
2465  TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2466  return TLO.CombineTo(Op, NewOp);
2467  }
2468  }
2469 
2470  Known = Known2;
2471  if (BitWidth > EltBitWidth)
2472  Known = Known.anyext(BitWidth);
2473  break;
2474  }
2475  case ISD::BITCAST: {
2476  SDValue Src = Op.getOperand(0);
2477  EVT SrcVT = Src.getValueType();
2478  unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2479 
2480  // If this is an FP->Int bitcast and if the sign bit is the only
2481  // thing demanded, turn this into a FGETSIGN.
2482  if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2483  DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2484  SrcVT.isFloatingPoint()) {
2485  bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2487  if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2488  SrcVT != MVT::f128) {
2489  // Cannot eliminate/lower SHL for f128 yet.
2490  EVT Ty = OpVTLegal ? VT : MVT::i32;
2491  // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2492  // place. We expect the SHL to be eliminated by other optimizations.
2493  SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2494  unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2495  if (!OpVTLegal && OpVTSizeInBits > 32)
2496  Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2497  unsigned ShVal = Op.getValueSizeInBits() - 1;
2498  SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2499  return TLO.CombineTo(Op,
2500  TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2501  }
2502  }
2503 
2504  // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2505  // Demand the elt/bit if any of the original elts/bits are demanded.
2506  if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2507  unsigned Scale = BitWidth / NumSrcEltBits;
2508  unsigned NumSrcElts = SrcVT.getVectorNumElements();
2509  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2510  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2511  for (unsigned i = 0; i != Scale; ++i) {
2512  unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2513  unsigned BitOffset = EltOffset * NumSrcEltBits;
2514  APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2515  if (!Sub.isZero()) {
2516  DemandedSrcBits |= Sub;
2517  for (unsigned j = 0; j != NumElts; ++j)
2518  if (DemandedElts[j])
2519  DemandedSrcElts.setBit((j * Scale) + i);
2520  }
2521  }
2522 
2523  APInt KnownSrcUndef, KnownSrcZero;
2524  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2525  KnownSrcZero, TLO, Depth + 1))
2526  return true;
2527 
2528  KnownBits KnownSrcBits;
2529  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2530  KnownSrcBits, TLO, Depth + 1))
2531  return true;
2532  } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2533  // TODO - bigendian once we have test coverage.
2534  unsigned Scale = NumSrcEltBits / BitWidth;
2535  unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2536  APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2537  APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2538  for (unsigned i = 0; i != NumElts; ++i)
2539  if (DemandedElts[i]) {
2540  unsigned Offset = (i % Scale) * BitWidth;
2541  DemandedSrcBits.insertBits(DemandedBits, Offset);
2542  DemandedSrcElts.setBit(i / Scale);
2543  }
2544 
2545  if (SrcVT.isVector()) {
2546  APInt KnownSrcUndef, KnownSrcZero;
2547  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2548  KnownSrcZero, TLO, Depth + 1))
2549  return true;
2550  }
2551 
2552  KnownBits KnownSrcBits;
2553  if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2554  KnownSrcBits, TLO, Depth + 1))
2555  return true;
2556  }
2557 
2558  // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2559  // recursive call where Known may be useful to the caller.
2560  if (Depth > 0) {
2561  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2562  return false;
2563  }
2564  break;
2565  }
2566  case ISD::MUL:
2567  if (DemandedBits.isPowerOf2()) {
2568  // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2569  // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2570  // odd (has LSB set), then the left-shifted low bit of X is the answer.
2571  unsigned CTZ = DemandedBits.countTrailingZeros();
2572  ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2573  if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
2574  EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2575  SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2576  SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2577  return TLO.CombineTo(Op, Shl);
2578  }
2579  }
2580  // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2581  // X * X is odd iff X is odd.
2582  // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2583  if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2584  SDValue One = TLO.DAG.getConstant(1, dl, VT);
2585  SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2586  return TLO.CombineTo(Op, And1);
2587  }
2588  [[fallthrough]];
2589  case ISD::ADD:
2590  case ISD::SUB: {
2591  // Add, Sub, and Mul don't demand any bits in positions beyond that
2592  // of the highest bit demanded of them.
2593  SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2594  SDNodeFlags Flags = Op.getNode()->getFlags();
2595  unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2596  APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2597  if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2598  Depth + 1) ||
2599  SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2600  Depth + 1) ||
2601  // See if the operation should be performed at a smaller bit width.
2602  ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2603  if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2604  // Disable the nsw and nuw flags. We can no longer guarantee that we
2605  // won't wrap after simplification.
2606  Flags.setNoSignedWrap(false);
2607  Flags.setNoUnsignedWrap(false);
2608  Op->setFlags(Flags);
2609  }
2610  return true;
2611  }
2612 
2613  // Attempt to avoid multi-use ops if we don't need anything from them.
2614  if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2615  SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2616  Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2617  SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2618  Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2619  if (DemandedOp0 || DemandedOp1) {
2620  Flags.setNoSignedWrap(false);
2621  Flags.setNoUnsignedWrap(false);
2622  Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2623  Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2624  SDValue NewOp =
2625  TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2626  return TLO.CombineTo(Op, NewOp);
2627  }
2628  }
2629 
2630  // If we have a constant operand, we may be able to turn it into -1 if we
2631  // do not demand the high bits. This can make the constant smaller to
2632  // encode, allow more general folding, or match specialized instruction
2633  // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2634  // is probably not useful (and could be detrimental).
2636  APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2637  if (C && !C->isAllOnes() && !C->isOne() &&
2638  (C->getAPIntValue() | HighMask).isAllOnes()) {
2639  SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2640  // Disable the nsw and nuw flags. We can no longer guarantee that we
2641  // won't wrap after simplification.
2642  Flags.setNoSignedWrap(false);
2643  Flags.setNoUnsignedWrap(false);
2644  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2645  return TLO.CombineTo(Op, NewOp);
2646  }
2647 
2648  // Match a multiply with a disguised negated-power-of-2 and convert to a
2649  // an equivalent shift-left amount.
2650  // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2651  auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2652  if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2653  return 0;
2654 
2655  // Don't touch opaque constants. Also, ignore zero and power-of-2
2656  // multiplies. Those will get folded later.
2658  if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2659  !MulC->getAPIntValue().isPowerOf2()) {
2660  APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2661  if (UnmaskedC.isNegatedPowerOf2())
2662  return (-UnmaskedC).logBase2();
2663  }
2664  return 0;
2665  };
2666 
2667  auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2668  EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2669  SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2670  SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2671  SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2672  return TLO.CombineTo(Op, Res);
2673  };
2674 
2676  if (Op.getOpcode() == ISD::ADD) {
2677  // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2678  if (unsigned ShAmt = getShiftLeftAmt(Op0))
2679  return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2680  // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2681  if (unsigned ShAmt = getShiftLeftAmt(Op1))
2682  return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2683  }
2684  if (Op.getOpcode() == ISD::SUB) {
2685  // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2686  if (unsigned ShAmt = getShiftLeftAmt(Op1))
2687  return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2688  }
2689  }
2690 
2691  [[fallthrough]];
2692  }
2693  default:
2694  if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2695  if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2696  Known, TLO, Depth))
2697  return true;
2698  break;
2699  }
2700 
2701  // Just use computeKnownBits to compute output bits.
2702  Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2703  break;
2704  }
2705 
2706  // If we know the value of all of the demanded bits, return this as a
2707  // constant.
2708  if (!isTargetCanonicalConstantNode(Op) &&
2709  DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2710  // Avoid folding to a constant if any OpaqueConstant is involved.
2711  const SDNode *N = Op.getNode();
2712  for (SDNode *Op :
2714  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2715  if (C->isOpaque())
2716  return false;
2717  }
2718  if (VT.isInteger())
2719  return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2720  if (VT.isFloatingPoint())
2721  return TLO.CombineTo(
2722  Op,
2723  TLO.DAG.getConstantFP(
2724  APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2725  }
2726 
2727  // A multi use 'all demanded elts' simplify failed to find any knownbits.
2728  // Try again just for the original demanded elts.
2729  // Ensure we do this AFTER constant folding above.
2730  if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2731  Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2732 
2733  return false;
2734 }
2735 
2737  const APInt &DemandedElts,
2738  DAGCombinerInfo &DCI) const {
2739  SelectionDAG &DAG = DCI.DAG;
2740  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2741  !DCI.isBeforeLegalizeOps());
2742 
2743  APInt KnownUndef, KnownZero;
2744  bool Simplified =
2745  SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2746  if (Simplified) {
2747  DCI.AddToWorklist(Op.getNode());
2748  DCI.CommitTargetLoweringOpt(TLO);
2749  }
2750 
2751  return Simplified;
2752 }
2753 
2754 /// Given a vector binary operation and known undefined elements for each input
2755 /// operand, compute whether each element of the output is undefined.
2757  const APInt &UndefOp0,
2758  const APInt &UndefOp1) {
2759  EVT VT = BO.getValueType();
2760  assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2761  "Vector binop only");
2762 
2763  EVT EltVT = VT.getVectorElementType();
2764  unsigned NumElts = VT.getVectorNumElements();
2765  assert(UndefOp0.getBitWidth() == NumElts &&
2766  UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2767 
2768  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2769  const APInt &UndefVals) {
2770  if (UndefVals[Index])
2771  return DAG.getUNDEF(EltVT);
2772 
2773  if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2774  // Try hard to make sure that the getNode() call is not creating temporary
2775  // nodes. Ignore opaque integers because they do not constant fold.
2776  SDValue Elt = BV->getOperand(Index);
2777  auto *C = dyn_cast<ConstantSDNode>(Elt);
2778  if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2779  return Elt;
2780  }
2781 
2782  return SDValue();
2783  };
2784 
2785  APInt KnownUndef = APInt::getZero(NumElts);
2786  for (unsigned i = 0; i != NumElts; ++i) {
2787  // If both inputs for this element are either constant or undef and match
2788  // the element type, compute the constant/undef result for this element of
2789  // the vector.
2790  // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2791  // not handle FP constants. The code within getNode() should be refactored
2792  // to avoid the danger of creating a bogus temporary node here.
2793  SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2794  SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2795  if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2796  if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2797  KnownUndef.setBit(i);
2798  }
2799  return KnownUndef;
2800 }
2801 
2803  SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2804  APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2805  bool AssumeSingleUse) const {
2806  EVT VT = Op.getValueType();
2807  unsigned Opcode = Op.getOpcode();
2808  APInt DemandedElts = OriginalDemandedElts;
2809  unsigned NumElts = DemandedElts.getBitWidth();
2810  assert(VT.isVector() && "Expected vector op");
2811 
2812  KnownUndef = KnownZero = APInt::getZero(NumElts);
2813 
2814  const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2815  if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2816  return false;
2817 
2818  // TODO: For now we assume we know nothing about scalable vectors.
2819  if (VT.isScalableVector())
2820  return false;
2821 
2822  assert(VT.getVectorNumElements() == NumElts &&
2823  "Mask size mismatches value type element count!");
2824 
2825  // Undef operand.
2826  if (Op.isUndef()) {
2827  KnownUndef.setAllBits();
2828  return false;
2829  }
2830 
2831  // If Op has other users, assume that all elements are needed.
2832  if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
2833  DemandedElts.setAllBits();
2834 
2835  // Not demanding any elements from Op.
2836  if (DemandedElts == 0) {
2837  KnownUndef.setAllBits();
2838  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2839  }
2840 
2841  // Limit search depth.
2843  return false;
2844 
2845  SDLoc DL(Op);
2846  unsigned EltSizeInBits = VT.getScalarSizeInBits();
2847  bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2848 
2849  // Helper for demanding the specified elements and all the bits of both binary
2850  // operands.
2851  auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2852  SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2853  TLO.DAG, Depth + 1);
2854  SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2855  TLO.DAG, Depth + 1);
2856  if (NewOp0 || NewOp1) {
2857  SDValue NewOp = TLO.DAG.getNode(
2858  Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2859  return TLO.CombineTo(Op, NewOp);
2860  }
2861  return false;
2862  };
2863 
2864  switch (Opcode) {
2865  case ISD::SCALAR_TO_VECTOR: {
2866  if (!DemandedElts[0]) {
2867  KnownUndef.setAllBits();
2868  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2869  }
2870  SDValue ScalarSrc = Op.getOperand(0);
2871  if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2872  SDValue Src = ScalarSrc.getOperand(0);
2873  SDValue Idx = ScalarSrc.getOperand(1);
2874  EVT SrcVT = Src.getValueType();
2875 
2876  ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
2877 
2878  if (SrcEltCnt.isScalable())
2879  return false;
2880 
2881  unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2882  if (isNullConstant(Idx)) {
2883  APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
2884  APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
2885  APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
2886  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2887  TLO, Depth + 1))
2888  return true;
2889  }
2890  }
2891  KnownUndef.setHighBits(NumElts - 1);
2892  break;
2893  }
2894  case ISD::BITCAST: {
2895  SDValue Src = Op.getOperand(0);
2896  EVT SrcVT = Src.getValueType();
2897 
2898  // We only handle vectors here.
2899  // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2900  if (!SrcVT.isVector())
2901  break;
2902 
2903  // Fast handling of 'identity' bitcasts.
2904  unsigned NumSrcElts = SrcVT.getVectorNumElements();
2905  if (NumSrcElts == NumElts)
2906  return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2907  KnownZero, TLO, Depth + 1);
2908 
2909  APInt SrcDemandedElts, SrcZero, SrcUndef;
2910 
2911  // Bitcast from 'large element' src vector to 'small element' vector, we
2912  // must demand a source element if any DemandedElt maps to it.
2913  if ((NumElts % NumSrcElts) == 0) {
2914  unsigned Scale = NumElts / NumSrcElts;
2915  SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2916  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2917  TLO, Depth + 1))
2918  return true;
2919 
2920  // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2921  // of the large element.
2922  // TODO - bigendian once we have test coverage.
2923  if (IsLE) {
2924  unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2925  APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
2926  for (unsigned i = 0; i != NumElts; ++i)
2927  if (DemandedElts[i]) {
2928  unsigned Ofs = (i % Scale) * EltSizeInBits;
2929  SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2930  }
2931 
2932  KnownBits Known;
2933  if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2934  TLO, Depth + 1))
2935  return true;
2936 
2937  // The bitcast has split each wide element into a number of
2938  // narrow subelements. We have just computed the Known bits
2939  // for wide elements. See if element splitting results in
2940  // some subelements being zero. Only for demanded elements!
2941  for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
2942  if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
2943  .isAllOnes())
2944  continue;
2945  for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
2946  unsigned Elt = Scale * SrcElt + SubElt;
2947  if (DemandedElts[Elt])
2948  KnownZero.setBit(Elt);
2949  }
2950  }
2951  }
2952 
2953  // If the src element is zero/undef then all the output elements will be -
2954  // only demanded elements are guaranteed to be correct.
2955  for (unsigned i = 0; i != NumSrcElts; ++i) {
2956  if (SrcDemandedElts[i]) {
2957  if (SrcZero[i])
2958  KnownZero.setBits(i * Scale, (i + 1) * Scale);
2959  if (SrcUndef[i])
2960  KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2961  }
2962  }
2963  }
2964 
2965  // Bitcast from 'small element' src vector to 'large element' vector, we
2966  // demand all smaller source elements covered by the larger demanded element
2967  // of this vector.
2968  if ((NumSrcElts % NumElts) == 0) {
2969  unsigned Scale = NumSrcElts / NumElts;
2970  SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2971  if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2972  TLO, Depth + 1))
2973  return true;
2974 
2975  // If all the src elements covering an output element are zero/undef, then
2976  // the output element will be as well, assuming it was demanded.
2977  for (unsigned i = 0; i != NumElts; ++i) {
2978  if (DemandedElts[i]) {
2979  if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
2980  KnownZero.setBit(i);
2981  if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
2982  KnownUndef.setBit(i);
2983  }
2984  }
2985  }
2986  break;
2987  }
2988  case ISD::BUILD_VECTOR: {
2989  // Check all elements and simplify any unused elements with UNDEF.
2990  if (!DemandedElts.isAllOnes()) {
2991  // Don't simplify BROADCASTS.
2992  if (llvm::any_of(Op->op_values(),
2993  [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2994  SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2995  bool Updated = false;
2996  for (unsigned i = 0; i != NumElts; ++i) {
2997  if (!DemandedElts[i] && !Ops[i].isUndef()) {
2998  Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2999  KnownUndef.setBit(i);
3000  Updated = true;
3001  }
3002  }
3003  if (Updated)
3004  return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3005  }
3006  }
3007  for (unsigned i = 0; i != NumElts; ++i) {
3008  SDValue SrcOp = Op.getOperand(i);
3009  if (SrcOp.isUndef()) {
3010  KnownUndef.setBit(i);
3011  } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3013  KnownZero.setBit(i);
3014  }
3015  }
3016  break;
3017  }
3018  case ISD::CONCAT_VECTORS: {
3019  EVT SubVT = Op.getOperand(0).getValueType();
3020  unsigned NumSubVecs = Op.getNumOperands();
3021  unsigned NumSubElts = SubVT.getVectorNumElements();
3022  for (unsigned i = 0; i != NumSubVecs; ++i) {
3023  SDValue SubOp = Op.getOperand(i);
3024  APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3025  APInt SubUndef, SubZero;
3026  if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3027  Depth + 1))
3028  return true;
3029  KnownUndef.insertBits(SubUndef, i * NumSubElts);
3030  KnownZero.insertBits(SubZero, i * NumSubElts);
3031  }
3032 
3033  // Attempt to avoid multi-use ops if we don't need anything from them.
3034  if (!DemandedElts.isAllOnes()) {
3035  bool FoundNewSub = false;
3036  SmallVector<SDValue, 2> DemandedSubOps;
3037  for (unsigned i = 0; i != NumSubVecs; ++i) {
3038  SDValue SubOp = Op.getOperand(i);
3039  APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3040  SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3041  SubOp, SubElts, TLO.DAG, Depth + 1);
3042  DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3043  FoundNewSub = NewSubOp ? true : FoundNewSub;
3044  }
3045  if (FoundNewSub) {
3046  SDValue NewOp =
3047  TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3048  return TLO.CombineTo(Op, NewOp);
3049  }
3050  }
3051  break;
3052  }
3053  case ISD::INSERT_SUBVECTOR: {
3054  // Demand any elements from the subvector and the remainder from the src its
3055  // inserted into.
3056  SDValue Src = Op.getOperand(0);
3057  SDValue Sub = Op.getOperand(1);
3058  uint64_t Idx = Op.getConstantOperandVal(2);
3059  unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3060  APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3061  APInt DemandedSrcElts = DemandedElts;
3062  DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3063 
3064  APInt SubUndef, SubZero;
3065  if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3066  Depth + 1))
3067  return true;
3068 
3069  // If none of the src operand elements are demanded, replace it with undef.
3070  if (!DemandedSrcElts && !Src.isUndef())
3071  return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3072  TLO.DAG.getUNDEF(VT), Sub,
3073  Op.getOperand(2)));
3074 
3075  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3076  TLO, Depth + 1))
3077  return true;
3078  KnownUndef.insertBits(SubUndef, Idx);
3079  KnownZero.insertBits(SubZero, Idx);
3080 
3081  // Attempt to avoid multi-use ops if we don't need anything from them.
3082  if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3083  SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3084  Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3085  SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3086  Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3087  if (NewSrc || NewSub) {
3088  NewSrc = NewSrc ? NewSrc : Src;
3089  NewSub = NewSub ? NewSub : Sub;
3090  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3091  NewSub, Op.getOperand(2));
3092  return TLO.CombineTo(Op, NewOp);
3093  }
3094  }
3095  break;
3096  }
3097  case ISD::EXTRACT_SUBVECTOR: {
3098  // Offset the demanded elts by the subvector index.
3099  SDValue Src = Op.getOperand(0);
3100  if (Src.getValueType().isScalableVector())
3101  break;
3102  uint64_t Idx = Op.getConstantOperandVal(1);
3103  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3104  APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3105 
3106  APInt SrcUndef, SrcZero;
3107  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3108  Depth + 1))
3109  return true;
3110  KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3111  KnownZero = SrcZero.extractBits(NumElts, Idx);
3112 
3113  // Attempt to avoid multi-use ops if we don't need anything from them.
3114  if (!DemandedElts.isAllOnes()) {
3115  SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3116  Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3117  if (NewSrc) {
3118  SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3119  Op.getOperand(1));
3120  return TLO.CombineTo(Op, NewOp);
3121  }
3122  }
3123  break;
3124  }
3125  case ISD::INSERT_VECTOR_ELT: {
3126  SDValue Vec = Op.getOperand(0);
3127  SDValue Scl = Op.getOperand(1);
3128  auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3129 
3130  // For a legal, constant insertion index, if we don't need this insertion
3131  // then strip it, else remove it from the demanded elts.
3132  if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3133  unsigned Idx = CIdx->getZExtValue();
3134  if (!DemandedElts[Idx])
3135  return TLO.CombineTo(Op, Vec);
3136 
3137  APInt DemandedVecElts(DemandedElts);
3138  DemandedVecElts.clearBit(Idx);
3139  if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3140  KnownZero, TLO, Depth + 1))
3141  return true;
3142 
3143  KnownUndef.setBitVal(Idx, Scl.isUndef());
3144 
3145  KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3146  break;
3147  }
3148 
3149  APInt VecUndef, VecZero;
3150  if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3151  Depth + 1))
3152  return true;
3153  // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3154  break;
3155  }
3156  case ISD::VSELECT: {
3157  SDValue Sel = Op.getOperand(0);
3158  SDValue LHS = Op.getOperand(1);
3159  SDValue RHS = Op.getOperand(2);
3160 
3161  // Try to transform the select condition based on the current demanded
3162  // elements.
3163  APInt UndefSel, UndefZero;
3164  if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
3165  Depth + 1))
3166  return true;
3167 
3168  // See if we can simplify either vselect operand.
3169  APInt DemandedLHS(DemandedElts);
3170  APInt DemandedRHS(DemandedElts);
3171  APInt UndefLHS, ZeroLHS;
3172  APInt UndefRHS, ZeroRHS;
3173  if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3174  Depth + 1))
3175  return true;
3176  if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3177  Depth + 1))
3178  return true;
3179 
3180  KnownUndef = UndefLHS & UndefRHS;
3181  KnownZero = ZeroLHS & ZeroRHS;
3182 
3183  // If we know that the selected element is always zero, we don't need the
3184  // select value element.
3185  APInt DemandedSel = DemandedElts & ~KnownZero;
3186  if (DemandedSel != DemandedElts)
3187  if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
3188  Depth + 1))
3189  return true;
3190 
3191  break;
3192  }
3193  case ISD::VECTOR_SHUFFLE: {
3194  SDValue LHS = Op.getOperand(0);
3195  SDValue RHS = Op.getOperand(1);
3196  ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3197 
3198  // Collect demanded elements from shuffle operands..
3199  APInt DemandedLHS(NumElts, 0);
3200  APInt DemandedRHS(NumElts, 0);
3201  for (unsigned i = 0; i != NumElts; ++i) {
3202  int M = ShuffleMask[i];
3203  if (M < 0 || !DemandedElts[i])
3204  continue;
3205  assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3206  if (M < (int)NumElts)
3207  DemandedLHS.setBit(M);
3208  else
3209  DemandedRHS.setBit(M - NumElts);
3210  }
3211 
3212  // See if we can simplify either shuffle operand.
3213  APInt UndefLHS, ZeroLHS;
3214  APInt UndefRHS, ZeroRHS;
3215  if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3216  Depth + 1))
3217  return true;
3218  if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3219  Depth + 1))
3220  return true;
3221 
3222  // Simplify mask using undef elements from LHS/RHS.
3223  bool Updated = false;
3224  bool IdentityLHS = true, IdentityRHS = true;
3225  SmallVector<int, 32> NewMask(ShuffleMask);
3226  for (unsigned i = 0; i != NumElts; ++i) {
3227  int &M = NewMask[i];
3228  if (M < 0)
3229  continue;
3230  if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3231  (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3232  Updated = true;
3233  M = -1;
3234  }
3235  IdentityLHS &= (M < 0) || (M == (int)i);
3236  IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3237  }
3238 
3239  // Update legal shuffle masks based on demanded elements if it won't reduce
3240  // to Identity which can cause premature removal of the shuffle mask.
3241  if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3242  SDValue LegalShuffle =
3243  buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3244  if (LegalShuffle)
3245  return TLO.CombineTo(Op, LegalShuffle);
3246  }
3247 
3248  // Propagate undef/zero elements from LHS/RHS.
3249  for (unsigned i = 0; i != NumElts; ++i) {
3250  int M = ShuffleMask[i];
3251  if (M < 0) {
3252  KnownUndef.setBit(i);
3253  } else if (M < (int)NumElts) {
3254  if (UndefLHS[M])
3255  KnownUndef.setBit(i);
3256  if (ZeroLHS[M])
3257  KnownZero.setBit(i);
3258  } else {
3259  if (UndefRHS[M - NumElts])
3260  KnownUndef.setBit(i);
3261  if (ZeroRHS[M - NumElts])
3262  KnownZero.setBit(i);
3263  }
3264  }
3265  break;
3266  }
3270  APInt SrcUndef, SrcZero;
3271  SDValue Src = Op.getOperand(0);
3272  unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3273  APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3274  if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3275  Depth + 1))
3276  return true;
3277  KnownZero = SrcZero.zextOrTrunc(NumElts);
3278  KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3279 
3280  if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3281  Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3282  DemandedSrcElts == 1) {
3283  // aext - if we just need the bottom element then we can bitcast.
3284  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3285  }
3286 
3287  if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3288  // zext(undef) upper bits are guaranteed to be zero.
3289  if (DemandedElts.isSubsetOf(KnownUndef))
3290  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3291  KnownUndef.clearAllBits();
3292 
3293  // zext - if we just need the bottom element then we can mask:
3294  // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3295  if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3296  Op->isOnlyUserOf(Src.getNode()) &&
3297  Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3298  SDLoc DL(Op);
3299  EVT SrcVT = Src.getValueType();
3300  EVT SrcSVT = SrcVT.getScalarType();
3301  SmallVector<SDValue> MaskElts;
3302  MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3303  MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3304  SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3305  if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3306  ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3307  Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3308  return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3309  }
3310  }
3311  }
3312  break;
3313  }
3314 
3315  // TODO: There are more binop opcodes that could be handled here - MIN,
3316  // MAX, saturated math, etc.
3317  case ISD::ADD: {
3318  SDValue Op0 = Op.getOperand(0);
3319  SDValue Op1 = Op.getOperand(1);
3320  if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3321  APInt UndefLHS, ZeroLHS;
3322  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3323  Depth + 1, /*AssumeSingleUse*/ true))
3324  return true;
3325  }
3326  [[fallthrough]];
3327  }
3328  case ISD::OR:
3329  case ISD::XOR:
3330  case ISD::SUB:
3331  case ISD::FADD:
3332  case ISD::FSUB:
3333  case ISD::FMUL:
3334  case ISD::FDIV:
3335  case ISD::FREM: {
3336  SDValue Op0 = Op.getOperand(0);
3337  SDValue Op1 = Op.getOperand(1);
3338 
3339  APInt UndefRHS, ZeroRHS;
3340  if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3341  Depth + 1))
3342  return true;
3343  APInt UndefLHS, ZeroLHS;
3344  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3345  Depth + 1))
3346  return true;
3347 
3348  KnownZero = ZeroLHS & ZeroRHS;
3349  KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3350 
3351  // Attempt to avoid multi-use ops if we don't need anything from them.
3352  // TODO - use KnownUndef to relax the demandedelts?
3353  if (!DemandedElts.isAllOnes())
3354  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3355  return true;
3356  break;
3357  }
3358  case ISD::SHL:
3359  case ISD::SRL:
3360  case ISD::SRA:
3361  case ISD::ROTL:
3362  case ISD::ROTR: {
3363  SDValue Op0 = Op.getOperand(0);
3364  SDValue Op1 = Op.getOperand(1);
3365 
3366  APInt UndefRHS, ZeroRHS;
3367  if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3368  Depth + 1))
3369  return true;
3370  APInt UndefLHS, ZeroLHS;
3371  if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3372  Depth + 1))
3373  return true;
3374 
3375  KnownZero = ZeroLHS;
3376  KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3377 
3378  // Attempt to avoid multi-use ops if we don't need anything from them.
3379  // TODO - use KnownUndef to relax the demandedelts?
3380  if (!DemandedElts.isAllOnes())
3381  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3382  return true;
3383  break;
3384  }
3385  case ISD::MUL:
3386  case ISD::MULHU:
3387  case ISD::MULHS:
3388  case ISD::AND: {
3389  SDValue Op0 = Op.getOperand(0);
3390  SDValue Op1 = Op.getOperand(1);
3391 
3392  APInt SrcUndef, SrcZero;
3393  if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3394  Depth + 1))
3395  return true;
3396  // If we know that a demanded element was zero in Op1 we don't need to
3397  // demand it in Op0 - its guaranteed to be zero.
3398  APInt DemandedElts0 = DemandedElts & ~SrcZero;
3399  if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3400  TLO, Depth + 1))
3401  return true;
3402 
3403  KnownUndef &= DemandedElts0;
3404  KnownZero &= DemandedElts0;
3405 
3406  // If every element pair has a zero/undef then just fold to zero.
3407  // fold (and x, undef) -> 0 / (and x, 0) -> 0
3408  // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3409  if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3410  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3411 
3412  // If either side has a zero element, then the result element is zero, even
3413  // if the other is an UNDEF.
3414  // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3415  // and then handle 'and' nodes with the rest of the binop opcodes.
3416  KnownZero |= SrcZero;
3417  KnownUndef &= SrcUndef;
3418  KnownUndef &= ~KnownZero;
3419 
3420  // Attempt to avoid multi-use ops if we don't need anything from them.
3421  if (!DemandedElts.isAllOnes())
3422  if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3423  return true;
3424  break;
3425  }
3426  case ISD::TRUNCATE:
3427  case ISD::SIGN_EXTEND:
3428  case ISD::ZERO_EXTEND:
3429  if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3430  KnownZero, TLO, Depth + 1))
3431  return true;
3432 
3433  if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3434  // zext(undef) upper bits are guaranteed to be zero.
3435  if (DemandedElts.isSubsetOf(KnownUndef))
3436  return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3437  KnownUndef.clearAllBits();
3438  }
3439  break;
3440  default: {
3441  if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3442  if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3443  KnownZero, TLO, Depth))
3444  return true;
3445  } else {
3446  KnownBits Known;
3447  APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3448  if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3449  TLO, Depth, AssumeSingleUse))
3450  return true;
3451  }
3452  break;
3453  }
3454  }
3455  assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3456 
3457  // Constant fold all undef cases.
3458  // TODO: Handle zero cases as well.
3459  if (DemandedElts.isSubsetOf(KnownUndef))
3460  return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3461 
3462  return false;
3463 }
3464 
3465 /// Determine which of the bits specified in Mask are known to be either zero or
3466 /// one and return them in the Known.
3468  KnownBits &Known,
3469  const APInt &DemandedElts,
3470  const SelectionDAG &DAG,
3471  unsigned Depth) const {
3472  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3473  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3474  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3475  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3476  "Should use MaskedValueIsZero if you don't know whether Op"
3477  " is a target node!");
3478  Known.resetAll();
3479 }
3480 
3482  GISelKnownBits &Analysis, Register R, KnownBits &Known,
3483  const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3484  unsigned Depth) const {
3485  Known.resetAll();
3486 }
3487 
3489  const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3490  // The low bits are known zero if the pointer is aligned.
3491  Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3492 }
3493 
3495  GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3496  unsigned Depth) const {
3497  return Align(1);
3498 }
3499 
3500 /// This method can be implemented by targets that want to expose additional
3501 /// information about sign bits to the DAG Combiner.
3503  const APInt &,
3504  const SelectionDAG &,
3505  unsigned Depth) const {
3506  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3507  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3508  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3509  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3510  "Should use ComputeNumSignBits if you don't know whether Op"
3511  " is a target node!");
3512  return 1;
3513 }
3514 
3516  GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3517  const MachineRegisterInfo &MRI, unsigned Depth) const {
3518  return 1;
3519 }
3520 
3522  SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3523  TargetLoweringOpt &TLO, unsigned Depth) const {
3524  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3525  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3526  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3527  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3528  "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3529  " is a target node!");
3530  return false;
3531 }
3532 
3534  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3535  KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3536  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3537  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3538  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3539  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3540  "Should use SimplifyDemandedBits if you don't know whether Op"
3541  " is a target node!");
3542  computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3543  return false;
3544 }
3545 
3547  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3548  SelectionDAG &DAG, unsigned Depth) const {
3549  assert(
3550  (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3551  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3552  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3553  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3554  "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3555  " is a target node!");
3556  return SDValue();
3557 }
3558 
3559 SDValue
3562  SelectionDAG &DAG) const {
3563  bool LegalMask = isShuffleMaskLegal(Mask, VT);
3564  if (!LegalMask) {
3565  std::swap(N0, N1);
3567  LegalMask = isShuffleMaskLegal(Mask, VT);
3568  }
3569 
3570  if (!LegalMask)
3571  return SDValue();
3572 
3573  return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3574 }
3575 
3577  return nullptr;
3578 }
3579 
3581  SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3582  bool PoisonOnly, unsigned Depth) const {
3583  assert(
3584  (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3585  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3586  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3587  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3588  "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3589  " is a target node!");
3590  return false;
3591 }
3592 
3594  SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3595  bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3596  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3597  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3598  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3599  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3600  "Should use canCreateUndefOrPoison if you don't know whether Op"
3601  " is a target node!");
3602  // Be conservative and return true.
3603  return true;
3604 }
3605 
3607  const SelectionDAG &DAG,
3608  bool SNaN,
3609  unsigned Depth) const {
3610  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3611  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3612  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3613  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3614  "Should use isKnownNeverNaN if you don't know whether Op"
3615  " is a target node!");
3616  return false;
3617 }
3618 
3620  const APInt &DemandedElts,
3621  APInt &UndefElts,
3622  unsigned Depth) const {
3623  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3624  Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3625  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3626  Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3627  "Should use isSplatValue if you don't know whether Op"
3628  " is a target node!");
3629  return false;
3630 }
3631 
3632 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3633 // work with truncating build vectors and vectors with elements of less than
3634 // 8 bits.
3636  if (!N)
3637  return false;
3638 
3639  unsigned EltWidth;
3640  APInt CVal;
3641  if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3642  /*AllowTruncation=*/true)) {
3643  CVal = CN->getAPIntValue();
3644  EltWidth = N.getValueType().getScalarSizeInBits();
3645  } else
3646  return false;
3647 
3648  // If this is a truncating splat, truncate the splat value.
3649  // Otherwise, we may fail to match the expected values below.
3650  if (EltWidth < CVal.getBitWidth())
3651  CVal = CVal.trunc(EltWidth);
3652 
3653  switch (getBooleanContents(N.getValueType())) {
3655  return CVal[0];
3657  return CVal.isOne();
3659  return CVal.isAllOnes();
3660  }
3661 
3662  llvm_unreachable("Invalid boolean contents");
3663 }
3664 
3666  if (!N)
3667  return false;
3668 
3669  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3670  if (!CN) {
3671  const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3672  if (!BV)
3673  return false;
3674 
3675  // Only interested in constant splats, we don't care about undef
3676  // elements in identifying boolean constants and getConstantSplatNode
3677  // returns NULL if all ops are undef;
3678  CN = BV->getConstantSplatNode();
3679  if (!CN)
3680  return false;
3681  }
3682 
3683  if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3684  return !CN->getAPIntValue()[0];
3685 
3686  return CN->isZero();
3687 }
3688 
3690  bool SExt) const {
3691  if (VT == MVT::i1)
3692  return N->isOne();
3693 
3695  switch (Cnt) {
3697  // An extended value of 1 is always true, unless its original type is i1,
3698  // in which case it will be sign extended to -1.
3699  return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3702  return N->isAllOnes() && SExt;
3703  }
3704  llvm_unreachable("Unexpected enumeration.");
3705 }
3706 
3707 /// This helper function of SimplifySetCC tries to optimize the comparison when
3708 /// either operand of the SetCC node is a bitwise-and instruction.
3709 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3710  ISD::CondCode Cond, const SDLoc &DL,
3711  DAGCombinerInfo &DCI) const {
3712  if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3713  std::swap(N0, N1);
3714 
3715  SelectionDAG &DAG = DCI.DAG;
3716  EVT OpVT = N0.getValueType();
3717  if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3718  (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3719  return SDValue();
3720 
3721  // (X & Y) != 0 --> zextOrTrunc(X & Y)
3722  // iff everything but LSB is known zero:
3723  if (Cond == ISD::SETNE && isNullConstant(N1) &&
3726  unsigned NumEltBits = OpVT.getScalarSizeInBits();
3727  APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3728  if (DAG.MaskedValueIsZero(N0, UpperBits))
3729  return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3730  }
3731 
3732  // Match these patterns in any of their permutations:
3733  // (X & Y) == Y
3734  // (X & Y) != Y
3735  SDValue X, Y;
3736  if (N0.getOperand(0) == N1) {
3737  X = N0.getOperand(1);
3738  Y = N0.getOperand(0);
3739  } else if (N0.getOperand(1) == N1) {
3740  X = N0.getOperand(0);
3741  Y = N0.getOperand(1);
3742  } else {
3743  return SDValue();
3744  }
3745 
3746  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3747  if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3748  // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3749  // Note that where Y is variable and is known to have at most one bit set
3750  // (for example, if it is Z & 1) we cannot do this; the expressions are not
3751  // equivalent when Y == 0.
3752  assert(OpVT.isInteger());
3753  Cond = ISD::getSetCCInverse(Cond, OpVT);
3754  if (DCI.isBeforeLegalizeOps() ||
3756  return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3757  } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3758  // If the target supports an 'and-not' or 'and-complement' logic operation,
3759  // try to use that to make a comparison operation more efficient.
3760  // But don't do this transform if the mask is a single bit because there are
3761  // more efficient ways to deal with that case (for example, 'bt' on x86 or
3762  // 'rlwinm' on PPC).
3763 
3764  // Bail out if the compare operand that we want to turn into a zero is
3765  // already a zero (otherwise, infinite loop).
3766  auto *YConst = dyn_cast<ConstantSDNode>(Y);
3767  if (YConst && YConst->isZero())
3768  return SDValue();
3769 
3770  // Transform this into: ~X & Y == 0.
3771  SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3772  SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3773  return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3774  }
3775 
3776  return SDValue();
3777 }
3778 
3779 /// There are multiple IR patterns that could be checking whether certain
3780 /// truncation of a signed number would be lossy or not. The pattern which is
3781 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3782 /// We are looking for the following pattern: (KeptBits is a constant)
3783 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3784 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3785 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3786 /// We will unfold it into the natural trunc+sext pattern:
3787 /// ((%x << C) a>> C) dstcond %x
3788 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3789 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3790  EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3791  const SDLoc &DL) const {
3792  // We must be comparing with a constant.
3793  ConstantSDNode *C1;
3794  if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3795  return SDValue();
3796 
3797  // N0 should be: add %x, (1 << (KeptBits-1))
3798  if (N0->getOpcode() != ISD::ADD)
3799  return SDValue();
3800 
3801  // And we must be 'add'ing a constant.
3802  ConstantSDNode *C01;
3803  if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3804  return SDValue();
3805 
3806  SDValue X = N0->getOperand(0);
3807  EVT XVT = X.getValueType();
3808 
3809  // Validate constants ...
3810 
3811  APInt I1 = C1->getAPIntValue();
3812 
3813  ISD::CondCode NewCond;
3814  if (Cond == ISD::CondCode::SETULT) {
3815  NewCond = ISD::CondCode::SETEQ;
3816  } else if (Cond == ISD::CondCode::SETULE) {
3817  NewCond = ISD::CondCode::SETEQ;
3818  // But need to 'canonicalize' the constant.
3819  I1 += 1;
3820  } else if (Cond == ISD::CondCode::SETUGT) {
3821  NewCond = ISD::CondCode::SETNE;
3822  // But need to 'canonicalize' the constant.
3823  I1 += 1;
3824  } else if (Cond == ISD::CondCode::SETUGE) {
3825  NewCond = ISD::CondCode::SETNE;
3826  } else
3827  return SDValue();
3828 
3829  APInt I01 = C01->getAPIntValue();
3830 
3831  auto checkConstants = [&I1, &I01]() -> bool {
3832  // Both of them must be power-of-two, and the constant from setcc is bigger.
3833  return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3834  };
3835 
3836  if (checkConstants()) {
3837  // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3838  } else {
3839  // What if we invert constants? (and the target predicate)
3840  I1.negate();
3841  I01.negate();
3842  assert(XVT.isInteger());
3843  NewCond = getSetCCInverse(NewCond, XVT);
3844  if (!checkConstants())
3845  return SDValue();
3846  // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3847  }
3848 
3849  // They are power-of-two, so which bit is set?
3850  const unsigned KeptBits = I1.logBase2();
3851  const unsigned KeptBitsMinusOne = I01.logBase2();
3852 
3853  // Magic!
3854  if (KeptBits != (KeptBitsMinusOne + 1))
3855  return SDValue();
3856  assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3857 
3858  // We don't want to do this in every single case.
3859  SelectionDAG &DAG = DCI.DAG;
3861  XVT, KeptBits))
3862  return SDValue();
3863 
3864  const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3865  assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3866 
3867  // Unfold into: ((%x << C) a>> C) cond %x
3868  // Where 'cond' will be either 'eq' or 'ne'.
3869  SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3870  SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3871  SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3872  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3873 
3874  return T2;
3875 }
3876 
3877 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3878 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3879  EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3880  DAGCombinerInfo &DCI, const SDLoc &DL) const {
3881  assert(isConstOrConstSplat(N1C) &&
3882  isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
3883  "Should be a comparison with 0.");
3884  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3885  "Valid only for [in]equality comparisons.");
3886 
3887  unsigned NewShiftOpcode;
3888  SDValue X, C, Y;
3889 
3890  SelectionDAG &DAG = DCI.DAG;
3891  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3892 
3893  // Look for '(C l>>/<< Y)'.
3894  auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3895  // The shift should be one-use.
3896  if (!V.hasOneUse())
3897  return false;
3898  unsigned OldShiftOpcode = V.getOpcode();
3899  switch (OldShiftOpcode) {
3900  case ISD::SHL:
3901  NewShiftOpcode = ISD::SRL;
3902  break;
3903  case ISD::SRL:
3904  NewShiftOpcode = ISD::SHL;
3905  break;
3906  default:
3907  return false; // must be a logical shift.
3908  }
3909  // We should be shifting a constant.
3910  // FIXME: best to use isConstantOrConstantVector().
3911  C = V.getOperand(0);
3912  ConstantSDNode *CC =
3913  isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3914  if (!CC)
3915  return false;
3916  Y = V.getOperand(1);
3917 
3918  ConstantSDNode *XC =
3919  isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3920  return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3921  X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3922  };
3923 
3924  // LHS of comparison should be an one-use 'and'.
3925  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3926  return SDValue();
3927 
3928  X = N0.getOperand(0);
3929  SDValue Mask = N0.getOperand(1);
3930 
3931  // 'and' is commutative!
3932  if (!Match(Mask)) {
3933  std::swap(X, Mask);
3934  if (!Match(Mask))
3935  return SDValue();
3936  }
3937 
3938  EVT VT = X.getValueType();
3939 
3940  // Produce:
3941  // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3942  SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3943  SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3944  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3945  return T2;
3946 }
3947 
3948 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3949 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3950 /// handle the commuted versions of these patterns.
3951 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3952  ISD::CondCode Cond, const SDLoc &DL,
3953  DAGCombinerInfo &DCI) const {
3954  unsigned BOpcode = N0.getOpcode();
3955  assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3956  "Unexpected binop");
3957  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3958 
3959  // (X + Y) == X --> Y == 0
3960  // (X - Y) == X --> Y == 0
3961  // (X ^ Y) == X --> Y == 0
3962  SelectionDAG &DAG = DCI.DAG;
3963  EVT OpVT = N0.getValueType();
3964  SDValue X = N0.getOperand(0);
3965  SDValue Y = N0.getOperand(1);
3966  if (X == N1)
3967  return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3968 
3969  if (Y != N1)
3970  return SDValue();
3971 
3972  // (X + Y) == Y --> X == 0
3973  // (X ^ Y) == Y --> X == 0
3974  if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3975  return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3976 
3977  // The shift would not be valid if the operands are boolean (i1).
3978  if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3979  return SDValue();
3980 
3981  // (X - Y) == Y --> X == Y << 1
3982  EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3983  !DCI.isBeforeLegalize());
3984  SDValue One = DAG.getConstant(1, DL, ShiftVT);
3985  SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3986  if (!DCI.isCalledByLegalizer())
3987  DCI.AddToWorklist(YShl1.getNode());
3988  return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3989 }
3990 
3992  SDValue N0, const APInt &C1,
3993  ISD::CondCode Cond, const SDLoc &dl,
3994  SelectionDAG &DAG) {
3995  // Look through truncs that don't change the value of a ctpop.
3996  // FIXME: Add vector support? Need to be careful with setcc result type below.
3997  SDValue CTPOP = N0;
3998  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4000  CTPOP = N0.getOperand(0);
4001 
4002  if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4003  return SDValue();
4004 
4005  EVT CTVT = CTPOP.getValueType();
4006  SDValue CTOp = CTPOP.getOperand(0);
4007 
4008  // Expand a power-of-2-or-zero comparison based on ctpop:
4009  // (ctpop x) u< 2 -> (x & x-1) == 0
4010  // (ctpop x) u> 1 -> (x & x-1) != 0
4011  if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4012  // Keep the CTPOP if it is a legal vector op.
4013  if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
4014  return SDValue();
4015 
4016  unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4017  if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4018  return SDValue();
4019  if (C1 == 0 && (Cond == ISD::SETULT))
4020  return SDValue(); // This is handled elsewhere.
4021 
4022  unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4023 
4024  SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4025  SDValue Result = CTOp;
4026  for (unsigned i = 0; i < Passes; i++) {
4027  SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4028  Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4029  }
4031  return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4032  }
4033 
4034  // Expand a power-of-2 comparison based on ctpop:
4035  // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
4036  // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
4037  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4038  // Keep the CTPOP if it is legal.
4039  if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
4040  return SDValue();
4041 
4042  SDValue Zero = DAG.getConstant(0, dl, CTVT);
4043  SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4044  assert(CTVT.isInteger());
4045  ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
4046  SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4047  SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4048  SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
4049  SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4050  unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
4051  return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
4052  }
4053 
4054  return SDValue();
4055 }
4056 
4058  ISD::CondCode Cond, const SDLoc &dl,
4059  SelectionDAG &DAG) {
4060  if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4061  return SDValue();
4062 
4063  auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4064  if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4065  return SDValue();
4066 
4067  auto getRotateSource = [](SDValue X) {
4068  if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4069  return X.getOperand(0);
4070  return SDValue();
4071  };
4072 
4073  // Peek through a rotated value compared against 0 or -1:
4074  // (rot X, Y) == 0/-1 --> X == 0/-1
4075  // (rot X, Y) != 0/-1 --> X != 0/-1
4076  if (SDValue R = getRotateSource(N0))
4077  return DAG.getSetCC(dl, VT, R, N1, Cond);
4078 
4079  // Peek through an 'or' of a rotated value compared against 0:
4080  // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4081  // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4082  //
4083  // TODO: Add the 'and' with -1 sibling.
4084  // TODO: Recurse through a series of 'or' ops to find the rotate.
4085  EVT OpVT = N0.getValueType();
4086  if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4087  if (SDValue R = getRotateSource(N0.getOperand(0))) {
4088  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4089  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4090  }
4091  if (SDValue R = getRotateSource(N0.getOperand(1))) {
4092  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4093  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4094  }
4095  }
4096 
4097  return SDValue();
4098 }
4099 
4101  ISD::CondCode Cond, const SDLoc &dl,
4102  SelectionDAG &DAG) {
4103  // If we are testing for all-bits-clear, we might be able to do that with
4104  // less shifting since bit-order does not matter.
4105  if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4106  return SDValue();
4107 
4108  auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4109  if (!C1 || !C1->isZero())
4110  return SDValue();
4111 
4112  if (!N0.hasOneUse() ||
4113  (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4114  return SDValue();
4115 
4116  unsigned BitWidth = N0.getScalarValueSizeInBits();
4117  auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4118  if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4119  return SDValue();
4120 
4121  // Canonicalize fshr as fshl to reduce pattern-matching.
4122  unsigned ShAmt = ShAmtC->getZExtValue();
4123  if (N0.getOpcode() == ISD::FSHR)
4124  ShAmt = BitWidth - ShAmt;
4125 
4126  // Match an 'or' with a specific operand 'Other' in either commuted variant.
4127  SDValue X, Y;
4128  auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4129  if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4130  return false;
4131  if (Or.getOperand(0) == Other) {
4132  X = Or.getOperand(0);
4133  Y = Or.getOperand(1);
4134  return true;
4135  }
4136  if (Or.getOperand(1) == Other) {
4137  X = Or.getOperand(1);
4138  Y = Or.getOperand(0);
4139  return true;
4140  }
4141  return false;
4142  };
4143 
4144  EVT OpVT = N0.getValueType();
4145  EVT ShAmtVT = N0.getOperand(2).getValueType();
4146  SDValue F0 = N0.getOperand(0);
4147  SDValue F1 = N0.getOperand(1);
4148  if (matchOr(F0, F1)) {
4149  // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4150  SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4151  SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4152  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4153  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4154  }
4155  if (matchOr(F1, F0)) {
4156  // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4157  SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4158  SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4159  SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4160  return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4161  }
4162 
4163  return SDValue();
4164 }
4165 
4166 /// Try to simplify a setcc built with the specified operands and cc. If it is
4167 /// unable to simplify it, return a null SDValue.
4169  ISD::CondCode Cond, bool foldBooleans,
4170  DAGCombinerInfo &DCI,
4171  const SDLoc &dl) const {
4172  SelectionDAG &DAG = DCI.DAG;
4173  const DataLayout &Layout = DAG.getDataLayout();
4174  EVT OpVT = N0.getValueType();
4175 
4176  // Constant fold or commute setcc.
4177  if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4178  return Fold;
4179 
4180  bool N0ConstOrSplat =
4181  isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4182  bool N1ConstOrSplat =
4183  isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4184 
4185  // Ensure that the constant occurs on the RHS and fold constant comparisons.
4186  // TODO: Handle non-splat vector constants. All undef causes trouble.
4187  // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4188  // infinite loop here when we encounter one.
4190  if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
4191  (DCI.isBeforeLegalizeOps() ||
4192  isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4193  return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4194 
4195  // If we have a subtract with the same 2 non-constant operands as this setcc
4196  // -- but in reverse order -- then try to commute the operands of this setcc
4197  // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4198  // instruction on some targets.
4199  if (!N0ConstOrSplat && !N1ConstOrSplat &&
4200  (DCI.isBeforeLegalizeOps() ||
4201  isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4202  DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4203  !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4204  return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4205 
4206  if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4207  return V;
4208 
4209  if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4210  return V;
4211 
4212  if (auto *N1C = isConstOrConstSplat(N1)) {
4213  const APInt &C1 = N1C->getAPIntValue();
4214 
4215  // Optimize some CTPOP cases.
4216  if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4217  return V;
4218 
4219  // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4220  // equality comparison, then we're just comparing whether X itself is
4221  // zero.
4222  if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4223  N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4225  if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4226  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4227  ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4228  if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4229  // (srl (ctlz x), 5) == 0 -> X != 0
4230  // (srl (ctlz x), 5) != 1 -> X != 0
4231  Cond = ISD::SETNE;
4232  } else {
4233  // (srl (ctlz x), 5) != 0 -> X == 0
4234  // (srl (ctlz x), 5) == 1 -> X == 0
4235  Cond = ISD::SETEQ;
4236  }
4237  SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4238  return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4239  Cond);
4240  }
4241  }
4242  }
4243  }
4244 
4245  // FIXME: Support vectors.
4246  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4247  const APInt &C1 = N1C->getAPIntValue();
4248 
4249  // (zext x) == C --> x == (trunc C)
4250  // (sext x) == C --> x == (trunc C)
4251  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4252  DCI.isBeforeLegalize() && N0->hasOneUse()) {
4253  unsigned MinBits = N0.getValueSizeInBits();
4254  SDValue PreExt;
4255  bool Signed = false;
4256  if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4257  // ZExt
4258  MinBits = N0->getOperand(0).getValueSizeInBits();
4259  PreExt = N0->getOperand(0);
4260  } else if (N0->getOpcode() == ISD::AND) {
4261  // DAGCombine turns costly ZExts into ANDs
4262  if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4263  if ((C->getAPIntValue()+1).isPowerOf2()) {
4264  MinBits = C->getAPIntValue().countTrailingOnes();
4265  PreExt = N0->getOperand(0);
4266  }
4267  } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4268  // SExt
4269  MinBits = N0->getOperand(0).getValueSizeInBits();
4270  PreExt = N0->getOperand(0);
4271  Signed = true;
4272  } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4273  // ZEXTLOAD / SEXTLOAD
4274  if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4275  MinBits = LN0->getMemoryVT().getSizeInBits();
4276  PreExt = N0;
4277  } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4278  Signed = true;
4279  MinBits = LN0->getMemoryVT().getSizeInBits();
4280  PreExt = N0;
4281  }
4282  }
4283 
4284  // Figure out how many bits we need to preserve this constant.
4285  unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
4286 
4287  // Make sure we're not losing bits from the constant.
4288  if (MinBits > 0 &&
4289  MinBits < C1.getBitWidth() &&
4290  MinBits >= ReqdBits) {
4291  EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4292  if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4293  // Will get folded away.
4294  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4295  if (MinBits == 1 && C1 == 1)
4296  // Invert the condition.
4297  return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4299  SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4300  return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4301  }
4302 
4303  // If truncating the setcc operands is not desirable, we can still
4304  // simplify the expression in some cases:
4305  // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4306  // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4307  // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4308  // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4309  // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4310  // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4311  SDValue TopSetCC = N0->getOperand(0);
4312  unsigned N0Opc = N0->getOpcode();
4313  bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4314  if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4315  TopSetCC.getOpcode() == ISD::SETCC &&
4316  (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4317  (isConstFalseVal(N1) ||
4318  isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4319 
4320  bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4321  (!N1C->isZero() && Cond == ISD::SETNE);
4322 
4323  if (!Inverse)
4324  return TopSetCC;
4325 
4327  cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4328  TopSetCC.getOperand(0).getValueType());
4329  return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4330  TopSetCC.getOperand(1),
4331  InvCond);
4332  }
4333  }
4334  }
4335 
4336  // If the LHS is '(and load, const)', the RHS is 0, the test is for
4337  // equality or unsigned, and all 1 bits of the const are in the same
4338  // partial word, see if we can shorten the load.
4339  if (DCI.isBeforeLegalize() &&
4341  N0.getOpcode() == ISD::AND && C1 == 0 &&
4342  N0.getNode()->hasOneUse() &&
4343  isa<LoadSDNode>(N0.getOperand(0)) &&
4344  N0.getOperand(0).getNode()->hasOneUse() &&
4345  isa<ConstantSDNode>(N0.getOperand(1))) {
4346  LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4347  APInt bestMask;
4348  unsigned bestWidth = 0, bestOffset = 0;
4349  if (Lod->isSimple() && Lod->isUnindexed()) {
4350  unsigned origWidth = N0.getValueSizeInBits();
4351  unsigned maskWidth = origWidth;
4352  // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4353  // 8 bits, but have to be careful...
4354  if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4355  origWidth = Lod->getMemoryVT().getSizeInBits();
4356  const APInt &Mask = N0.getConstantOperandAPInt(1);
4357  for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4358  APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4359  for (unsigned offset=0; offset<origWidth/width; offset++) {
4360  if (Mask.isSubsetOf(newMask)) {
4361  if (Layout.isLittleEndian())
4362  bestOffset = (uint64_t)offset * (width/8);
4363  else
4364  bestOffset = (origWidth/width - offset - 1) * (width/8);
4365  bestMask = Mask.lshr(offset * (width/8) * 8);
4366  bestWidth = width;
4367  break;
4368  }
4369  newMask <<= width;
4370  }
4371  }
4372  }
4373  if (bestWidth) {
4374  EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4375  if (newVT.isRound() &&
4376  shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
4377  SDValue Ptr = Lod->getBasePtr();
4378  if (bestOffset != 0)
4379  Ptr =
4380  DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
4381  SDValue NewLoad =
4382  DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4383  Lod->getPointerInfo().getWithOffset(bestOffset),
4384  Lod->getOriginalAlign());
4385  return DAG.getSetCC(dl, VT,
4386  DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4387  DAG.getConstant(bestMask.trunc(bestWidth),
4388  dl, newVT)),
4389  DAG.getConstant(0LL, dl, newVT), Cond);
4390  }
4391  }
4392  }
4393 
4394  // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4395  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4396  unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4397 
4398  // If the comparison constant has bits in the upper part, the
4399  // zero-extended value could never match.
4400  if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4401  C1.getBitWidth() - InSize))) {
4402  switch (Cond) {
4403  case ISD::SETUGT:
4404  case ISD::SETUGE:
4405  case ISD::SETEQ:
4406  return DAG.getConstant(0, dl, VT);
4407  case ISD::SETULT:
4408  case ISD::SETULE:
4409  case ISD::SETNE:
4410  return DAG.getConstant(1, dl, VT);
4411  case ISD::SETGT:
4412  case ISD::SETGE:
4413  // True if the sign bit of C1 is set.
4414  return DAG.getConstant(C1.isNegative(), dl, VT);
4415  case ISD::SETLT:
4416  case ISD::SETLE:
4417  // True if the sign bit of C1 isn't set.
4418  return DAG.getConstant(C1.isNonNegative(), dl, VT);
4419  default:
4420  break;
4421  }
4422  }
4423 
4424  // Otherwise, we can perform the comparison with the low bits.
4425  switch (Cond) {
4426  case ISD::SETEQ:
4427  case ISD::SETNE:
4428  case ISD::SETUGT:
4429  case ISD::SETUGE:
4430  case ISD::SETULT:
4431  case ISD::SETULE: {
4432  EVT newVT = N0.getOperand(0).getValueType();
4433  if (DCI.isBeforeLegalizeOps() ||
4434  (isOperationLegal(ISD::SETCC, newVT) &&
4435  isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4436  EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4437  SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4438 
4439  SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4440  NewConst, Cond);
4441  return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4442  }
4443  break;
4444  }
4445  default:
4446  break; // todo, be more careful with signed comparisons
4447  }
4448  } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4449  (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4450  !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4451  OpVT)) {
4452  EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4453  unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4454  EVT ExtDstTy = N0.getValueType();
4455  unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4456 
4457  // If the constant doesn't fit into the number of bits for the source of
4458  // the sign extension, it is impossible for both sides to be equal.
4459  if (C1.getMinSignedBits() > ExtSrcTyBits)
4460  return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4461 
4462  assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4463  ExtDstTy != ExtSrcTy && "Unexpected types!");
4464  APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4465  SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4466  DAG.getConstant(Imm, dl, ExtDstTy));
4467  if (!DCI.isCalledByLegalizer())
4468  DCI.AddToWorklist(ZextOp.getNode());
4469  // Otherwise, make this a use of a zext.
4470  return DAG.getSetCC(dl, VT, ZextOp,
4471  DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4472  } else if ((N1C->isZero() || N1C->isOne()) &&
4473  (