LLVM 17.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
24#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/LLVMContext.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCExpr.h"
35#include <cctype>
36using namespace llvm;
37
38/// NOTE: The TargetMachine owns TLOF.
40 : TargetLoweringBase(tm) {}
41
42const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
43 return nullptr;
44}
45
48}
49
50/// Check whether a given call node is in tail position within its function. If
51/// so, it sets Chain to the input chain of the tail call.
53 SDValue &Chain) const {
55
56 // First, check if tail calls have been disabled in this function.
57 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
58 return false;
59
60 // Conservatively require the attributes of the call to match those of
61 // the return. Ignore following attributes because they don't affect the
62 // call sequence.
63 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
64 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
65 Attribute::DereferenceableOrNull, Attribute::NoAlias,
66 Attribute::NonNull, Attribute::NoUndef})
67 CallerAttrs.removeAttribute(Attr);
68
69 if (CallerAttrs.hasAttributes())
70 return false;
71
72 // It's not safe to eliminate the sign / zero extension of the return value.
73 if (CallerAttrs.contains(Attribute::ZExt) ||
74 CallerAttrs.contains(Attribute::SExt))
75 return false;
76
77 // Check if the only use is a function return node.
78 return isUsedByReturnOnly(Node, Chain);
79}
80
82 const uint32_t *CallerPreservedMask,
83 const SmallVectorImpl<CCValAssign> &ArgLocs,
84 const SmallVectorImpl<SDValue> &OutVals) const {
85 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
86 const CCValAssign &ArgLoc = ArgLocs[I];
87 if (!ArgLoc.isRegLoc())
88 continue;
89 MCRegister Reg = ArgLoc.getLocReg();
90 // Only look at callee saved registers.
91 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
92 continue;
93 // Check that we pass the value used for the caller.
94 // (We look for a CopyFromReg reading a virtual register that is used
95 // for the function live-in value of register Reg)
96 SDValue Value = OutVals[I];
97 if (Value->getOpcode() == ISD::AssertZext)
98 Value = Value.getOperand(0);
99 if (Value->getOpcode() != ISD::CopyFromReg)
100 return false;
101 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
102 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
103 return false;
104 }
105 return true;
106}
107
108/// Set CallLoweringInfo attribute flags based on a call instruction
109/// and called function attributes.
111 unsigned ArgIdx) {
112 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
113 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
114 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
115 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
116 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
117 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
118 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
119 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
120 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
121 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
122 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
123 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
124 Alignment = Call->getParamStackAlign(ArgIdx);
125 IndirectType = nullptr;
127 "multiple ABI attributes?");
128 if (IsByVal) {
129 IndirectType = Call->getParamByValType(ArgIdx);
130 if (!Alignment)
131 Alignment = Call->getParamAlign(ArgIdx);
132 }
133 if (IsPreallocated)
134 IndirectType = Call->getParamPreallocatedType(ArgIdx);
135 if (IsInAlloca)
136 IndirectType = Call->getParamInAllocaType(ArgIdx);
137 if (IsSRet)
138 IndirectType = Call->getParamStructRetType(ArgIdx);
139}
140
141/// Generate a libcall taking the given operands as arguments and returning a
142/// result of type RetVT.
143std::pair<SDValue, SDValue>
146 MakeLibCallOptions CallOptions,
147 const SDLoc &dl,
148 SDValue InChain) const {
149 if (!InChain)
150 InChain = DAG.getEntryNode();
151
153 Args.reserve(Ops.size());
154
156 for (unsigned i = 0; i < Ops.size(); ++i) {
157 SDValue NewOp = Ops[i];
158 Entry.Node = NewOp;
159 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
160 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
161 CallOptions.IsSExt);
162 Entry.IsZExt = !Entry.IsSExt;
163
164 if (CallOptions.IsSoften &&
166 Entry.IsSExt = Entry.IsZExt = false;
167 }
168 Args.push_back(Entry);
169 }
170
171 if (LC == RTLIB::UNKNOWN_LIBCALL)
172 report_fatal_error("Unsupported library call operation!");
175
176 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
178 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
179 bool zeroExtend = !signExtend;
180
181 if (CallOptions.IsSoften &&
183 signExtend = zeroExtend = false;
184 }
185
186 CLI.setDebugLoc(dl)
187 .setChain(InChain)
188 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
189 .setNoReturn(CallOptions.DoesNotReturn)
192 .setSExtResult(signExtend)
193 .setZExtResult(zeroExtend);
194 return LowerCallTo(CLI);
195}
196
198 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
199 unsigned SrcAS, const AttributeList &FuncAttributes) const {
200 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
201 Op.getSrcAlign() < Op.getDstAlign())
202 return false;
203
204 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
205
206 if (VT == MVT::Other) {
207 // Use the largest integer type whose alignment constraints are satisfied.
208 // We only need to check DstAlign here as SrcAlign is always greater or
209 // equal to DstAlign (or zero).
210 VT = MVT::i64;
211 if (Op.isFixedDstAlign())
212 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
213 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
215 assert(VT.isInteger());
216
217 // Find the largest legal integer type.
218 MVT LVT = MVT::i64;
219 while (!isTypeLegal(LVT))
220 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
221 assert(LVT.isInteger());
222
223 // If the type we've chosen is larger than the largest legal integer type
224 // then use that instead.
225 if (VT.bitsGT(LVT))
226 VT = LVT;
227 }
228
229 unsigned NumMemOps = 0;
230 uint64_t Size = Op.size();
231 while (Size) {
232 unsigned VTSize = VT.getSizeInBits() / 8;
233 while (VTSize > Size) {
234 // For now, only use non-vector load / store's for the left-over pieces.
235 EVT NewVT = VT;
236 unsigned NewVTSize;
237
238 bool Found = false;
239 if (VT.isVector() || VT.isFloatingPoint()) {
240 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
243 Found = true;
244 else if (NewVT == MVT::i64 &&
246 isSafeMemOpType(MVT::f64)) {
247 // i64 is usually not legal on 32-bit targets, but f64 may be.
248 NewVT = MVT::f64;
249 Found = true;
250 }
251 }
252
253 if (!Found) {
254 do {
255 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
256 if (NewVT == MVT::i8)
257 break;
258 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
259 }
260 NewVTSize = NewVT.getSizeInBits() / 8;
261
262 // If the new VT cannot cover all of the remaining bits, then consider
263 // issuing a (or a pair of) unaligned and overlapping load / store.
264 unsigned Fast;
265 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
269 Fast)
270 VTSize = Size;
271 else {
272 VT = NewVT;
273 VTSize = NewVTSize;
274 }
275 }
276
277 if (++NumMemOps > Limit)
278 return false;
279
280 MemOps.push_back(VT);
281 Size -= VTSize;
282 }
283
284 return true;
285}
286
287/// Soften the operands of a comparison. This code is shared among BR_CC,
288/// SELECT_CC, and SETCC handlers.
290 SDValue &NewLHS, SDValue &NewRHS,
291 ISD::CondCode &CCCode,
292 const SDLoc &dl, const SDValue OldLHS,
293 const SDValue OldRHS) const {
294 SDValue Chain;
295 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
296 OldRHS, Chain);
297}
298
300 SDValue &NewLHS, SDValue &NewRHS,
301 ISD::CondCode &CCCode,
302 const SDLoc &dl, const SDValue OldLHS,
303 const SDValue OldRHS,
304 SDValue &Chain,
305 bool IsSignaling) const {
306 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
307 // not supporting it. We can update this code when libgcc provides such
308 // functions.
309
310 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
311 && "Unsupported setcc type!");
312
313 // Expand into one or more soft-fp libcall(s).
314 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
315 bool ShouldInvertCC = false;
316 switch (CCCode) {
317 case ISD::SETEQ:
318 case ISD::SETOEQ:
319 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
320 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
321 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
322 break;
323 case ISD::SETNE:
324 case ISD::SETUNE:
325 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
326 (VT == MVT::f64) ? RTLIB::UNE_F64 :
327 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
328 break;
329 case ISD::SETGE:
330 case ISD::SETOGE:
331 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
332 (VT == MVT::f64) ? RTLIB::OGE_F64 :
333 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
334 break;
335 case ISD::SETLT:
336 case ISD::SETOLT:
337 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
338 (VT == MVT::f64) ? RTLIB::OLT_F64 :
339 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
340 break;
341 case ISD::SETLE:
342 case ISD::SETOLE:
343 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
344 (VT == MVT::f64) ? RTLIB::OLE_F64 :
345 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
346 break;
347 case ISD::SETGT:
348 case ISD::SETOGT:
349 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
350 (VT == MVT::f64) ? RTLIB::OGT_F64 :
351 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
352 break;
353 case ISD::SETO:
354 ShouldInvertCC = true;
355 [[fallthrough]];
356 case ISD::SETUO:
357 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
358 (VT == MVT::f64) ? RTLIB::UO_F64 :
359 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
360 break;
361 case ISD::SETONE:
362 // SETONE = O && UNE
363 ShouldInvertCC = true;
364 [[fallthrough]];
365 case ISD::SETUEQ:
366 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
367 (VT == MVT::f64) ? RTLIB::UO_F64 :
368 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
369 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
370 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
371 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
372 break;
373 default:
374 // Invert CC for unordered comparisons
375 ShouldInvertCC = true;
376 switch (CCCode) {
377 case ISD::SETULT:
378 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
379 (VT == MVT::f64) ? RTLIB::OGE_F64 :
380 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
381 break;
382 case ISD::SETULE:
383 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
384 (VT == MVT::f64) ? RTLIB::OGT_F64 :
385 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
386 break;
387 case ISD::SETUGT:
388 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
389 (VT == MVT::f64) ? RTLIB::OLE_F64 :
390 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
391 break;
392 case ISD::SETUGE:
393 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
394 (VT == MVT::f64) ? RTLIB::OLT_F64 :
395 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
396 break;
397 default: llvm_unreachable("Do not know how to soften this setcc!");
398 }
399 }
400
401 // Use the target specific return value for comparison lib calls.
403 SDValue Ops[2] = {NewLHS, NewRHS};
405 EVT OpsVT[2] = { OldLHS.getValueType(),
406 OldRHS.getValueType() };
407 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
408 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
409 NewLHS = Call.first;
410 NewRHS = DAG.getConstant(0, dl, RetVT);
411
412 CCCode = getCmpLibcallCC(LC1);
413 if (ShouldInvertCC) {
414 assert(RetVT.isInteger());
415 CCCode = getSetCCInverse(CCCode, RetVT);
416 }
417
418 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
419 // Update Chain.
420 Chain = Call.second;
421 } else {
422 EVT SetCCVT =
423 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
424 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
425 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
426 CCCode = getCmpLibcallCC(LC2);
427 if (ShouldInvertCC)
428 CCCode = getSetCCInverse(CCCode, RetVT);
429 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
430 if (Chain)
431 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
432 Call2.second);
433 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
434 Tmp.getValueType(), Tmp, NewLHS);
435 NewRHS = SDValue();
436 }
437}
438
439/// Return the entry encoding for a jump table in the current function. The
440/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
442 // In non-pic modes, just use the address of a block.
443 if (!isPositionIndependent())
445
446 // In PIC mode, if the target supports a GPRel32 directive, use it.
447 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449
450 // Otherwise, use a label difference.
452}
453
455 SelectionDAG &DAG) const {
456 // If our PIC model is GP relative, use the global offset table as the base.
457 unsigned JTEncoding = getJumpTableEncoding();
458
462
463 return Table;
464}
465
466/// This returns the relocation base for the given PIC jumptable, the same as
467/// getPICJumpTableRelocBase, but as an MCExpr.
468const MCExpr *
470 unsigned JTI,MCContext &Ctx) const{
471 // The normal PIC reloc base is the label at the start of the jump table.
472 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
473}
474
475bool
477 const TargetMachine &TM = getTargetMachine();
478 const GlobalValue *GV = GA->getGlobal();
479
480 // If the address is not even local to this DSO we will have to load it from
481 // a got and then add the offset.
482 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
483 return false;
484
485 // If the code is position independent we will have to add a base register.
486 if (isPositionIndependent())
487 return false;
488
489 // Otherwise we can do it.
490 return true;
491}
492
493//===----------------------------------------------------------------------===//
494// Optimization Methods
495//===----------------------------------------------------------------------===//
496
497/// If the specified instruction has a constant integer operand and there are
498/// bits set in that constant that are not demanded, then clear those bits and
499/// return true.
501 const APInt &DemandedBits,
502 const APInt &DemandedElts,
503 TargetLoweringOpt &TLO) const {
504 SDLoc DL(Op);
505 unsigned Opcode = Op.getOpcode();
506
507 // Do target-specific constant optimization.
508 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
509 return TLO.New.getNode();
510
511 // FIXME: ISD::SELECT, ISD::SELECT_CC
512 switch (Opcode) {
513 default:
514 break;
515 case ISD::XOR:
516 case ISD::AND:
517 case ISD::OR: {
518 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
519 if (!Op1C || Op1C->isOpaque())
520 return false;
521
522 // If this is a 'not' op, don't touch it because that's a canonical form.
523 const APInt &C = Op1C->getAPIntValue();
524 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
525 return false;
526
527 if (!C.isSubsetOf(DemandedBits)) {
528 EVT VT = Op.getValueType();
529 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
530 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
531 return TLO.CombineTo(Op, NewOp);
532 }
533
534 break;
535 }
536 }
537
538 return false;
539}
540
542 const APInt &DemandedBits,
543 TargetLoweringOpt &TLO) const {
544 EVT VT = Op.getValueType();
545 APInt DemandedElts = VT.isVector()
547 : APInt(1, 1);
548 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
549}
550
551/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
552/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
553/// generalized for targets with other types of implicit widening casts.
555 const APInt &DemandedBits,
556 TargetLoweringOpt &TLO) const {
557 assert(Op.getNumOperands() == 2 &&
558 "ShrinkDemandedOp only supports binary operators!");
559 assert(Op.getNode()->getNumValues() == 1 &&
560 "ShrinkDemandedOp only supports nodes with one result!");
561
562 EVT VT = Op.getValueType();
563 SelectionDAG &DAG = TLO.DAG;
564 SDLoc dl(Op);
565
566 // Early return, as this function cannot handle vector types.
567 if (VT.isVector())
568 return false;
569
570 // Don't do this if the node has another user, which may require the
571 // full value.
572 if (!Op.getNode()->hasOneUse())
573 return false;
574
575 // Search for the smallest integer type with free casts to and from
576 // Op's type. For expedience, just check power-of-2 integer types.
577 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
578 unsigned DemandedSize = DemandedBits.getActiveBits();
579 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
580 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
581 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
582 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
583 // We found a type with free casts.
584 SDValue X = DAG.getNode(
585 Op.getOpcode(), dl, SmallVT,
586 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
587 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
588 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
589 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
590 return TLO.CombineTo(Op, Z);
591 }
592 }
593 return false;
594}
595
597 DAGCombinerInfo &DCI) const {
598 SelectionDAG &DAG = DCI.DAG;
599 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
600 !DCI.isBeforeLegalizeOps());
601 KnownBits Known;
602
603 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
604 if (Simplified) {
605 DCI.AddToWorklist(Op.getNode());
607 }
608 return Simplified;
609}
610
612 const APInt &DemandedElts,
613 DAGCombinerInfo &DCI) const {
614 SelectionDAG &DAG = DCI.DAG;
615 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
616 !DCI.isBeforeLegalizeOps());
617 KnownBits Known;
618
619 bool Simplified =
620 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
621 if (Simplified) {
622 DCI.AddToWorklist(Op.getNode());
624 }
625 return Simplified;
626}
627
629 KnownBits &Known,
631 unsigned Depth,
632 bool AssumeSingleUse) const {
633 EVT VT = Op.getValueType();
634
635 // Since the number of lanes in a scalable vector is unknown at compile time,
636 // we track one bit which is implicitly broadcast to all lanes. This means
637 // that all lanes in a scalable vector are considered demanded.
638 APInt DemandedElts = VT.isFixedLengthVector()
640 : APInt(1, 1);
641 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
642 AssumeSingleUse);
643}
644
645// TODO: Under what circumstances can we create nodes? Constant folding?
647 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
648 SelectionDAG &DAG, unsigned Depth) const {
649 EVT VT = Op.getValueType();
650
651 // Limit search depth.
653 return SDValue();
654
655 // Ignore UNDEFs.
656 if (Op.isUndef())
657 return SDValue();
658
659 // Not demanding any bits/elts from Op.
660 if (DemandedBits == 0 || DemandedElts == 0)
661 return DAG.getUNDEF(VT);
662
663 bool IsLE = DAG.getDataLayout().isLittleEndian();
664 unsigned NumElts = DemandedElts.getBitWidth();
665 unsigned BitWidth = DemandedBits.getBitWidth();
666 KnownBits LHSKnown, RHSKnown;
667 switch (Op.getOpcode()) {
668 case ISD::BITCAST: {
669 if (VT.isScalableVector())
670 return SDValue();
671
672 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
673 EVT SrcVT = Src.getValueType();
674 EVT DstVT = Op.getValueType();
675 if (SrcVT == DstVT)
676 return Src;
677
678 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
679 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
680 if (NumSrcEltBits == NumDstEltBits)
681 if (SDValue V = SimplifyMultipleUseDemandedBits(
682 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
683 return DAG.getBitcast(DstVT, V);
684
685 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
686 unsigned Scale = NumDstEltBits / NumSrcEltBits;
687 unsigned NumSrcElts = SrcVT.getVectorNumElements();
688 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
689 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
690 for (unsigned i = 0; i != Scale; ++i) {
691 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
692 unsigned BitOffset = EltOffset * NumSrcEltBits;
693 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
694 if (!Sub.isZero()) {
695 DemandedSrcBits |= Sub;
696 for (unsigned j = 0; j != NumElts; ++j)
697 if (DemandedElts[j])
698 DemandedSrcElts.setBit((j * Scale) + i);
699 }
700 }
701
702 if (SDValue V = SimplifyMultipleUseDemandedBits(
703 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
704 return DAG.getBitcast(DstVT, V);
705 }
706
707 // TODO - bigendian once we have test coverage.
708 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
709 unsigned Scale = NumSrcEltBits / NumDstEltBits;
710 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
711 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
712 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
713 for (unsigned i = 0; i != NumElts; ++i)
714 if (DemandedElts[i]) {
715 unsigned Offset = (i % Scale) * NumDstEltBits;
716 DemandedSrcBits.insertBits(DemandedBits, Offset);
717 DemandedSrcElts.setBit(i / Scale);
718 }
719
720 if (SDValue V = SimplifyMultipleUseDemandedBits(
721 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
722 return DAG.getBitcast(DstVT, V);
723 }
724
725 break;
726 }
727 case ISD::AND: {
728 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
729 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
730
731 // If all of the demanded bits are known 1 on one side, return the other.
732 // These bits cannot contribute to the result of the 'and' in this
733 // context.
734 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
735 return Op.getOperand(0);
736 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
737 return Op.getOperand(1);
738 break;
739 }
740 case ISD::OR: {
741 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
742 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
743
744 // If all of the demanded bits are known zero on one side, return the
745 // other. These bits cannot contribute to the result of the 'or' in this
746 // context.
747 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
748 return Op.getOperand(0);
749 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
750 return Op.getOperand(1);
751 break;
752 }
753 case ISD::XOR: {
754 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
755 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
756
757 // If all of the demanded bits are known zero on one side, return the
758 // other.
759 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
760 return Op.getOperand(0);
761 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
762 return Op.getOperand(1);
763 break;
764 }
765 case ISD::SHL: {
766 // If we are only demanding sign bits then we can use the shift source
767 // directly.
768 if (const APInt *MaxSA =
769 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
770 SDValue Op0 = Op.getOperand(0);
771 unsigned ShAmt = MaxSA->getZExtValue();
772 unsigned NumSignBits =
773 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
774 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
775 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
776 return Op0;
777 }
778 break;
779 }
780 case ISD::SETCC: {
781 SDValue Op0 = Op.getOperand(0);
782 SDValue Op1 = Op.getOperand(1);
783 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
784 // If (1) we only need the sign-bit, (2) the setcc operands are the same
785 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
786 // -1, we may be able to bypass the setcc.
787 if (DemandedBits.isSignMask() &&
791 // If we're testing X < 0, then this compare isn't needed - just use X!
792 // FIXME: We're limiting to integer types here, but this should also work
793 // if we don't care about FP signed-zero. The use of SETLT with FP means
794 // that we don't care about NaNs.
795 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
797 return Op0;
798 }
799 break;
800 }
802 // If none of the extended bits are demanded, eliminate the sextinreg.
803 SDValue Op0 = Op.getOperand(0);
804 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
805 unsigned ExBits = ExVT.getScalarSizeInBits();
806 if (DemandedBits.getActiveBits() <= ExBits &&
808 return Op0;
809 // If the input is already sign extended, just drop the extension.
810 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
811 if (NumSignBits >= (BitWidth - ExBits + 1))
812 return Op0;
813 break;
814 }
818 if (VT.isScalableVector())
819 return SDValue();
820
821 // If we only want the lowest element and none of extended bits, then we can
822 // return the bitcasted source vector.
823 SDValue Src = Op.getOperand(0);
824 EVT SrcVT = Src.getValueType();
825 EVT DstVT = Op.getValueType();
826 if (IsLE && DemandedElts == 1 &&
827 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
828 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
829 return DAG.getBitcast(DstVT, Src);
830 }
831 break;
832 }
834 if (VT.isScalableVector())
835 return SDValue();
836
837 // If we don't demand the inserted element, return the base vector.
838 SDValue Vec = Op.getOperand(0);
839 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
840 EVT VecVT = Vec.getValueType();
841 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
842 !DemandedElts[CIdx->getZExtValue()])
843 return Vec;
844 break;
845 }
847 if (VT.isScalableVector())
848 return SDValue();
849
850 SDValue Vec = Op.getOperand(0);
851 SDValue Sub = Op.getOperand(1);
852 uint64_t Idx = Op.getConstantOperandVal(2);
853 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
854 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
855 // If we don't demand the inserted subvector, return the base vector.
856 if (DemandedSubElts == 0)
857 return Vec;
858 break;
859 }
860 case ISD::VECTOR_SHUFFLE: {
862 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
863
864 // If all the demanded elts are from one operand and are inline,
865 // then we can use the operand directly.
866 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
867 for (unsigned i = 0; i != NumElts; ++i) {
868 int M = ShuffleMask[i];
869 if (M < 0 || !DemandedElts[i])
870 continue;
871 AllUndef = false;
872 IdentityLHS &= (M == (int)i);
873 IdentityRHS &= ((M - NumElts) == i);
874 }
875
876 if (AllUndef)
877 return DAG.getUNDEF(Op.getValueType());
878 if (IdentityLHS)
879 return Op.getOperand(0);
880 if (IdentityRHS)
881 return Op.getOperand(1);
882 break;
883 }
884 default:
885 // TODO: Probably okay to remove after audit; here to reduce change size
886 // in initial enablement patch for scalable vectors
887 if (VT.isScalableVector())
888 return SDValue();
889
890 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
891 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
892 Op, DemandedBits, DemandedElts, DAG, Depth))
893 return V;
894 break;
895 }
896 return SDValue();
897}
898
900 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
901 unsigned Depth) const {
902 EVT VT = Op.getValueType();
903 // Since the number of lanes in a scalable vector is unknown at compile time,
904 // we track one bit which is implicitly broadcast to all lanes. This means
905 // that all lanes in a scalable vector are considered demanded.
906 APInt DemandedElts = VT.isFixedLengthVector()
908 : APInt(1, 1);
909 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
910 Depth);
911}
912
914 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
915 unsigned Depth) const {
916 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
917 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
918 Depth);
919}
920
921// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
922// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
924 const TargetLowering &TLI,
925 const APInt &DemandedBits,
926 const APInt &DemandedElts,
927 unsigned Depth) {
928 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
929 "SRL or SRA node is required here!");
930 // Is the right shift using an immediate value of 1?
931 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
932 if (!N1C || !N1C->isOne())
933 return SDValue();
934
935 // We are looking for an avgfloor
936 // add(ext, ext)
937 // or one of these as a avgceil
938 // add(add(ext, ext), 1)
939 // add(add(ext, 1), ext)
940 // add(ext, add(ext, 1))
941 SDValue Add = Op.getOperand(0);
942 if (Add.getOpcode() != ISD::ADD)
943 return SDValue();
944
945 SDValue ExtOpA = Add.getOperand(0);
946 SDValue ExtOpB = Add.getOperand(1);
947 SDValue Add2;
948 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
949 ConstantSDNode *ConstOp;
950 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
951 ConstOp->isOne()) {
952 ExtOpA = Op1;
953 ExtOpB = Op3;
954 Add2 = A;
955 return true;
956 }
957 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
958 ConstOp->isOne()) {
959 ExtOpA = Op1;
960 ExtOpB = Op2;
961 Add2 = A;
962 return true;
963 }
964 return false;
965 };
966 bool IsCeil =
967 (ExtOpA.getOpcode() == ISD::ADD &&
968 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
969 (ExtOpB.getOpcode() == ISD::ADD &&
970 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
971
972 // If the shift is signed (sra):
973 // - Needs >= 2 sign bit for both operands.
974 // - Needs >= 2 zero bits.
975 // If the shift is unsigned (srl):
976 // - Needs >= 1 zero bit for both operands.
977 // - Needs 1 demanded bit zero and >= 2 sign bits.
978 unsigned ShiftOpc = Op.getOpcode();
979 bool IsSigned = false;
980 unsigned KnownBits;
981 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
982 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
983 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
984 unsigned NumZeroA =
985 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
986 unsigned NumZeroB =
987 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
988 unsigned NumZero = std::min(NumZeroA, NumZeroB);
989
990 switch (ShiftOpc) {
991 default:
992 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
993 case ISD::SRA: {
994 if (NumZero >= 2 && NumSigned < NumZero) {
995 IsSigned = false;
996 KnownBits = NumZero;
997 break;
998 }
999 if (NumSigned >= 1) {
1000 IsSigned = true;
1001 KnownBits = NumSigned;
1002 break;
1003 }
1004 return SDValue();
1005 }
1006 case ISD::SRL: {
1007 if (NumZero >= 1 && NumSigned < NumZero) {
1008 IsSigned = false;
1009 KnownBits = NumZero;
1010 break;
1011 }
1012 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1013 IsSigned = true;
1014 KnownBits = NumSigned;
1015 break;
1016 }
1017 return SDValue();
1018 }
1019 }
1020
1021 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1022 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1023
1024 // Find the smallest power-2 type that is legal for this vector size and
1025 // operation, given the original type size and the number of known sign/zero
1026 // bits.
1027 EVT VT = Op.getValueType();
1028 unsigned MinWidth =
1029 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1030 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1031 if (VT.isVector())
1032 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1033 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1034 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1035 // larger type size to do the transform.
1036 if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
1037 return SDValue();
1038
1039 if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0),
1040 Add.getOperand(1)) ==
1042 (!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0),
1043 Add2.getOperand(1)) ==
1045 NVT = VT;
1046 else
1047 return SDValue();
1048 }
1049
1050 SDLoc DL(Op);
1051 SDValue ResultAVG =
1052 DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1053 DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1054 return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1055 ResultAVG);
1056}
1057
1058/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1059/// result of Op are ever used downstream. If we can use this information to
1060/// simplify Op, create a new simplified DAG node and return true, returning the
1061/// original and new nodes in Old and New. Otherwise, analyze the expression and
1062/// return a mask of Known bits for the expression (used to simplify the
1063/// caller). The Known bits may only be accurate for those bits in the
1064/// OriginalDemandedBits and OriginalDemandedElts.
1066 SDValue Op, const APInt &OriginalDemandedBits,
1067 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1068 unsigned Depth, bool AssumeSingleUse) const {
1069 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1070 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1071 "Mask size mismatches value type size!");
1072
1073 // Don't know anything.
1074 Known = KnownBits(BitWidth);
1075
1076 EVT VT = Op.getValueType();
1077 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1078 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1079 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1080 "Unexpected vector size");
1081
1082 APInt DemandedBits = OriginalDemandedBits;
1083 APInt DemandedElts = OriginalDemandedElts;
1084 SDLoc dl(Op);
1085 auto &DL = TLO.DAG.getDataLayout();
1086
1087 // Undef operand.
1088 if (Op.isUndef())
1089 return false;
1090
1091 // We can't simplify target constants.
1092 if (Op.getOpcode() == ISD::TargetConstant)
1093 return false;
1094
1095 if (Op.getOpcode() == ISD::Constant) {
1096 // We know all of the bits for a constant!
1097 Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1098 return false;
1099 }
1100
1101 if (Op.getOpcode() == ISD::ConstantFP) {
1102 // We know all of the bits for a floating point constant!
1104 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1105 return false;
1106 }
1107
1108 // Other users may use these bits.
1109 bool HasMultiUse = false;
1110 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1112 // Limit search depth.
1113 return false;
1114 }
1115 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1117 DemandedElts = APInt::getAllOnes(NumElts);
1118 HasMultiUse = true;
1119 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1120 // Not demanding any bits/elts from Op.
1121 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1122 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1123 // Limit search depth.
1124 return false;
1125 }
1126
1127 KnownBits Known2;
1128 switch (Op.getOpcode()) {
1129 case ISD::SCALAR_TO_VECTOR: {
1130 if (VT.isScalableVector())
1131 return false;
1132 if (!DemandedElts[0])
1133 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1134
1135 KnownBits SrcKnown;
1136 SDValue Src = Op.getOperand(0);
1137 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1138 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1139 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1140 return true;
1141
1142 // Upper elements are undef, so only get the knownbits if we just demand
1143 // the bottom element.
1144 if (DemandedElts == 1)
1145 Known = SrcKnown.anyextOrTrunc(BitWidth);
1146 break;
1147 }
1148 case ISD::BUILD_VECTOR:
1149 // Collect the known bits that are shared by every demanded element.
1150 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1151 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1152 return false; // Don't fall through, will infinitely loop.
1153 case ISD::LOAD: {
1154 auto *LD = cast<LoadSDNode>(Op);
1155 if (getTargetConstantFromLoad(LD)) {
1156 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1157 return false; // Don't fall through, will infinitely loop.
1158 }
1159 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1160 // If this is a ZEXTLoad and we are looking at the loaded value.
1161 EVT MemVT = LD->getMemoryVT();
1162 unsigned MemBits = MemVT.getScalarSizeInBits();
1163 Known.Zero.setBitsFrom(MemBits);
1164 return false; // Don't fall through, will infinitely loop.
1165 }
1166 break;
1167 }
1169 if (VT.isScalableVector())
1170 return false;
1171 SDValue Vec = Op.getOperand(0);
1172 SDValue Scl = Op.getOperand(1);
1173 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1174 EVT VecVT = Vec.getValueType();
1175
1176 // If index isn't constant, assume we need all vector elements AND the
1177 // inserted element.
1178 APInt DemandedVecElts(DemandedElts);
1179 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1180 unsigned Idx = CIdx->getZExtValue();
1181 DemandedVecElts.clearBit(Idx);
1182
1183 // Inserted element is not required.
1184 if (!DemandedElts[Idx])
1185 return TLO.CombineTo(Op, Vec);
1186 }
1187
1188 KnownBits KnownScl;
1189 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1190 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1191 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1192 return true;
1193
1194 Known = KnownScl.anyextOrTrunc(BitWidth);
1195
1196 KnownBits KnownVec;
1197 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1198 Depth + 1))
1199 return true;
1200
1201 if (!!DemandedVecElts)
1202 Known = Known.intersectWith(KnownVec);
1203
1204 return false;
1205 }
1206 case ISD::INSERT_SUBVECTOR: {
1207 if (VT.isScalableVector())
1208 return false;
1209 // Demand any elements from the subvector and the remainder from the src its
1210 // inserted into.
1211 SDValue Src = Op.getOperand(0);
1212 SDValue Sub = Op.getOperand(1);
1213 uint64_t Idx = Op.getConstantOperandVal(2);
1214 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1215 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1216 APInt DemandedSrcElts = DemandedElts;
1217 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1218
1219 KnownBits KnownSub, KnownSrc;
1220 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1221 Depth + 1))
1222 return true;
1223 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1224 Depth + 1))
1225 return true;
1226
1227 Known.Zero.setAllBits();
1228 Known.One.setAllBits();
1229 if (!!DemandedSubElts)
1230 Known = Known.intersectWith(KnownSub);
1231 if (!!DemandedSrcElts)
1232 Known = Known.intersectWith(KnownSrc);
1233
1234 // Attempt to avoid multi-use src if we don't need anything from it.
1235 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1236 !DemandedSrcElts.isAllOnes()) {
1237 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1238 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1239 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1240 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1241 if (NewSub || NewSrc) {
1242 NewSub = NewSub ? NewSub : Sub;
1243 NewSrc = NewSrc ? NewSrc : Src;
1244 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1245 Op.getOperand(2));
1246 return TLO.CombineTo(Op, NewOp);
1247 }
1248 }
1249 break;
1250 }
1252 if (VT.isScalableVector())
1253 return false;
1254 // Offset the demanded elts by the subvector index.
1255 SDValue Src = Op.getOperand(0);
1256 if (Src.getValueType().isScalableVector())
1257 break;
1258 uint64_t Idx = Op.getConstantOperandVal(1);
1259 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1260 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1261
1262 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1263 Depth + 1))
1264 return true;
1265
1266 // Attempt to avoid multi-use src if we don't need anything from it.
1267 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1268 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1269 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1270 if (DemandedSrc) {
1271 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1272 Op.getOperand(1));
1273 return TLO.CombineTo(Op, NewOp);
1274 }
1275 }
1276 break;
1277 }
1278 case ISD::CONCAT_VECTORS: {
1279 if (VT.isScalableVector())
1280 return false;
1281 Known.Zero.setAllBits();
1282 Known.One.setAllBits();
1283 EVT SubVT = Op.getOperand(0).getValueType();
1284 unsigned NumSubVecs = Op.getNumOperands();
1285 unsigned NumSubElts = SubVT.getVectorNumElements();
1286 for (unsigned i = 0; i != NumSubVecs; ++i) {
1287 APInt DemandedSubElts =
1288 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1289 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1290 Known2, TLO, Depth + 1))
1291 return true;
1292 // Known bits are shared by every demanded subvector element.
1293 if (!!DemandedSubElts)
1294 Known = Known.intersectWith(Known2);
1295 }
1296 break;
1297 }
1298 case ISD::VECTOR_SHUFFLE: {
1299 assert(!VT.isScalableVector());
1300 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1301
1302 // Collect demanded elements from shuffle operands..
1303 APInt DemandedLHS, DemandedRHS;
1304 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1305 DemandedRHS))
1306 break;
1307
1308 if (!!DemandedLHS || !!DemandedRHS) {
1309 SDValue Op0 = Op.getOperand(0);
1310 SDValue Op1 = Op.getOperand(1);
1311
1312 Known.Zero.setAllBits();
1313 Known.One.setAllBits();
1314 if (!!DemandedLHS) {
1315 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1316 Depth + 1))
1317 return true;
1318 Known = Known.intersectWith(Known2);
1319 }
1320 if (!!DemandedRHS) {
1321 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1322 Depth + 1))
1323 return true;
1324 Known = Known.intersectWith(Known2);
1325 }
1326
1327 // Attempt to avoid multi-use ops if we don't need anything from them.
1328 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1329 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1330 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1331 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1332 if (DemandedOp0 || DemandedOp1) {
1333 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1334 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1335 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1336 return TLO.CombineTo(Op, NewOp);
1337 }
1338 }
1339 break;
1340 }
1341 case ISD::AND: {
1342 SDValue Op0 = Op.getOperand(0);
1343 SDValue Op1 = Op.getOperand(1);
1344
1345 // If the RHS is a constant, check to see if the LHS would be zero without
1346 // using the bits from the RHS. Below, we use knowledge about the RHS to
1347 // simplify the LHS, here we're using information from the LHS to simplify
1348 // the RHS.
1349 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1350 // Do not increment Depth here; that can cause an infinite loop.
1351 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1352 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1353 if ((LHSKnown.Zero & DemandedBits) ==
1354 (~RHSC->getAPIntValue() & DemandedBits))
1355 return TLO.CombineTo(Op, Op0);
1356
1357 // If any of the set bits in the RHS are known zero on the LHS, shrink
1358 // the constant.
1359 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1360 DemandedElts, TLO))
1361 return true;
1362
1363 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1364 // constant, but if this 'and' is only clearing bits that were just set by
1365 // the xor, then this 'and' can be eliminated by shrinking the mask of
1366 // the xor. For example, for a 32-bit X:
1367 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1368 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1369 LHSKnown.One == ~RHSC->getAPIntValue()) {
1370 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1371 return TLO.CombineTo(Op, Xor);
1372 }
1373 }
1374
1375 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1376 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1377 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1378 (Op0.getOperand(0).isUndef() ||
1380 Op0->hasOneUse()) {
1381 unsigned NumSubElts =
1383 unsigned SubIdx = Op0.getConstantOperandVal(2);
1384 APInt DemandedSub =
1385 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1386 KnownBits KnownSubMask =
1387 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1388 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1389 SDValue NewAnd =
1390 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1391 SDValue NewInsert =
1392 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1393 Op0.getOperand(1), Op0.getOperand(2));
1394 return TLO.CombineTo(Op, NewInsert);
1395 }
1396 }
1397
1398 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1399 Depth + 1))
1400 return true;
1401 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1402 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1403 Known2, TLO, Depth + 1))
1404 return true;
1405 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1406
1407 // If all of the demanded bits are known one on one side, return the other.
1408 // These bits cannot contribute to the result of the 'and'.
1409 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1410 return TLO.CombineTo(Op, Op0);
1411 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1412 return TLO.CombineTo(Op, Op1);
1413 // If all of the demanded bits in the inputs are known zeros, return zero.
1414 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1415 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1416 // If the RHS is a constant, see if we can simplify it.
1417 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1418 TLO))
1419 return true;
1420 // If the operation can be done in a smaller type, do so.
1421 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1422 return true;
1423
1424 // Attempt to avoid multi-use ops if we don't need anything from them.
1425 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1426 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1427 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1428 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1429 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1430 if (DemandedOp0 || DemandedOp1) {
1431 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1432 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1433 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1434 return TLO.CombineTo(Op, NewOp);
1435 }
1436 }
1437
1438 Known &= Known2;
1439 break;
1440 }
1441 case ISD::OR: {
1442 SDValue Op0 = Op.getOperand(0);
1443 SDValue Op1 = Op.getOperand(1);
1444
1445 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1446 Depth + 1))
1447 return true;
1448 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1449 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1450 Known2, TLO, Depth + 1))
1451 return true;
1452 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1453
1454 // If all of the demanded bits are known zero on one side, return the other.
1455 // These bits cannot contribute to the result of the 'or'.
1456 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1457 return TLO.CombineTo(Op, Op0);
1458 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1459 return TLO.CombineTo(Op, Op1);
1460 // If the RHS is a constant, see if we can simplify it.
1461 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1462 return true;
1463 // If the operation can be done in a smaller type, do so.
1464 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1465 return true;
1466
1467 // Attempt to avoid multi-use ops if we don't need anything from them.
1468 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1469 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1470 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1471 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1472 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1473 if (DemandedOp0 || DemandedOp1) {
1474 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1475 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1476 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1477 return TLO.CombineTo(Op, NewOp);
1478 }
1479 }
1480
1481 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1482 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1483 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1484 Op0->hasOneUse() && Op1->hasOneUse()) {
1485 // Attempt to match all commutations - m_c_Or would've been useful!
1486 for (int I = 0; I != 2; ++I) {
1487 SDValue X = Op.getOperand(I).getOperand(0);
1488 SDValue C1 = Op.getOperand(I).getOperand(1);
1489 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1490 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1491 if (Alt.getOpcode() == ISD::OR) {
1492 for (int J = 0; J != 2; ++J) {
1493 if (X == Alt.getOperand(J)) {
1494 SDValue Y = Alt.getOperand(1 - J);
1495 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1496 {C1, C2})) {
1497 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1498 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1499 return TLO.CombineTo(
1500 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1501 }
1502 }
1503 }
1504 }
1505 }
1506 }
1507
1508 Known |= Known2;
1509 break;
1510 }
1511 case ISD::XOR: {
1512 SDValue Op0 = Op.getOperand(0);
1513 SDValue Op1 = Op.getOperand(1);
1514
1515 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1516 Depth + 1))
1517 return true;
1518 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1519 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1520 Depth + 1))
1521 return true;
1522 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1523
1524 // If all of the demanded bits are known zero on one side, return the other.
1525 // These bits cannot contribute to the result of the 'xor'.
1526 if (DemandedBits.isSubsetOf(Known.Zero))
1527 return TLO.CombineTo(Op, Op0);
1528 if (DemandedBits.isSubsetOf(Known2.Zero))
1529 return TLO.CombineTo(Op, Op1);
1530 // If the operation can be done in a smaller type, do so.
1531 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1532 return true;
1533
1534 // If all of the unknown bits are known to be zero on one side or the other
1535 // turn this into an *inclusive* or.
1536 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1537 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1538 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1539
1540 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1541 if (C) {
1542 // If one side is a constant, and all of the set bits in the constant are
1543 // also known set on the other side, turn this into an AND, as we know
1544 // the bits will be cleared.
1545 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1546 // NB: it is okay if more bits are known than are requested
1547 if (C->getAPIntValue() == Known2.One) {
1548 SDValue ANDC =
1549 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1550 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1551 }
1552
1553 // If the RHS is a constant, see if we can change it. Don't alter a -1
1554 // constant because that's a 'not' op, and that is better for combining
1555 // and codegen.
1556 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1557 // We're flipping all demanded bits. Flip the undemanded bits too.
1558 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1559 return TLO.CombineTo(Op, New);
1560 }
1561
1562 unsigned Op0Opcode = Op0.getOpcode();
1563 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1564 if (ConstantSDNode *ShiftC =
1565 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1566 // Don't crash on an oversized shift. We can not guarantee that a
1567 // bogus shift has been simplified to undef.
1568 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1569 uint64_t ShiftAmt = ShiftC->getZExtValue();
1571 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1572 : Ones.lshr(ShiftAmt);
1573 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1574 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1575 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1576 // If the xor constant is a demanded mask, do a 'not' before the
1577 // shift:
1578 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1579 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1580 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1581 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1582 Op0.getOperand(1)));
1583 }
1584 }
1585 }
1586 }
1587 }
1588
1589 // If we can't turn this into a 'not', try to shrink the constant.
1590 if (!C || !C->isAllOnes())
1591 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1592 return true;
1593
1594 // Attempt to avoid multi-use ops if we don't need anything from them.
1595 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1596 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1597 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1598 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1599 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1600 if (DemandedOp0 || DemandedOp1) {
1601 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1602 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1603 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1604 return TLO.CombineTo(Op, NewOp);
1605 }
1606 }
1607
1608 Known ^= Known2;
1609 break;
1610 }
1611 case ISD::SELECT:
1612 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1613 Depth + 1))
1614 return true;
1615 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1616 Depth + 1))
1617 return true;
1618 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1619 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1620
1621 // If the operands are constants, see if we can simplify them.
1622 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1623 return true;
1624
1625 // Only known if known in both the LHS and RHS.
1626 Known = Known.intersectWith(Known2);
1627 break;
1628 case ISD::VSELECT:
1629 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1630 Known, TLO, Depth + 1))
1631 return true;
1632 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1633 Known2, TLO, Depth + 1))
1634 return true;
1635 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1636 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1637
1638 // Only known if known in both the LHS and RHS.
1639 Known = Known.intersectWith(Known2);
1640 break;
1641 case ISD::SELECT_CC:
1642 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1643 Depth + 1))
1644 return true;
1645 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1646 Depth + 1))
1647 return true;
1648 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1649 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1650
1651 // If the operands are constants, see if we can simplify them.
1652 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1653 return true;
1654
1655 // Only known if known in both the LHS and RHS.
1656 Known = Known.intersectWith(Known2);
1657 break;
1658 case ISD::SETCC: {
1659 SDValue Op0 = Op.getOperand(0);
1660 SDValue Op1 = Op.getOperand(1);
1661 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1662 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1663 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1664 // -1, we may be able to bypass the setcc.
1665 if (DemandedBits.isSignMask() &&
1669 // If we're testing X < 0, then this compare isn't needed - just use X!
1670 // FIXME: We're limiting to integer types here, but this should also work
1671 // if we don't care about FP signed-zero. The use of SETLT with FP means
1672 // that we don't care about NaNs.
1673 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1675 return TLO.CombineTo(Op, Op0);
1676
1677 // TODO: Should we check for other forms of sign-bit comparisons?
1678 // Examples: X <= -1, X >= 0
1679 }
1680 if (getBooleanContents(Op0.getValueType()) ==
1682 BitWidth > 1)
1683 Known.Zero.setBitsFrom(1);
1684 break;
1685 }
1686 case ISD::SHL: {
1687 SDValue Op0 = Op.getOperand(0);
1688 SDValue Op1 = Op.getOperand(1);
1689 EVT ShiftVT = Op1.getValueType();
1690
1691 if (const APInt *SA =
1692 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1693 unsigned ShAmt = SA->getZExtValue();
1694 if (ShAmt == 0)
1695 return TLO.CombineTo(Op, Op0);
1696
1697 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1698 // single shift. We can do this if the bottom bits (which are shifted
1699 // out) are never demanded.
1700 // TODO - support non-uniform vector amounts.
1701 if (Op0.getOpcode() == ISD::SRL) {
1702 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1703 if (const APInt *SA2 =
1704 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1705 unsigned C1 = SA2->getZExtValue();
1706 unsigned Opc = ISD::SHL;
1707 int Diff = ShAmt - C1;
1708 if (Diff < 0) {
1709 Diff = -Diff;
1710 Opc = ISD::SRL;
1711 }
1712 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1713 return TLO.CombineTo(
1714 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1715 }
1716 }
1717 }
1718
1719 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1720 // are not demanded. This will likely allow the anyext to be folded away.
1721 // TODO - support non-uniform vector amounts.
1722 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1723 SDValue InnerOp = Op0.getOperand(0);
1724 EVT InnerVT = InnerOp.getValueType();
1725 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1726 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1727 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1728 SDValue NarrowShl = TLO.DAG.getNode(
1729 ISD::SHL, dl, InnerVT, InnerOp,
1730 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1731 return TLO.CombineTo(
1732 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1733 }
1734
1735 // Repeat the SHL optimization above in cases where an extension
1736 // intervenes: (shl (anyext (shr x, c1)), c2) to
1737 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1738 // aren't demanded (as above) and that the shifted upper c1 bits of
1739 // x aren't demanded.
1740 // TODO - support non-uniform vector amounts.
1741 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1742 InnerOp.hasOneUse()) {
1743 if (const APInt *SA2 =
1744 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1745 unsigned InnerShAmt = SA2->getZExtValue();
1746 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1747 DemandedBits.getActiveBits() <=
1748 (InnerBits - InnerShAmt + ShAmt) &&
1749 DemandedBits.countr_zero() >= ShAmt) {
1750 SDValue NewSA =
1751 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1752 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1753 InnerOp.getOperand(0));
1754 return TLO.CombineTo(
1755 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1756 }
1757 }
1758 }
1759 }
1760
1761 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1762 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1763 Depth + 1))
1764 return true;
1765 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1766 Known.Zero <<= ShAmt;
1767 Known.One <<= ShAmt;
1768 // low bits known zero.
1769 Known.Zero.setLowBits(ShAmt);
1770
1771 // Attempt to avoid multi-use ops if we don't need anything from them.
1772 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1773 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1774 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1775 if (DemandedOp0) {
1776 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1777 return TLO.CombineTo(Op, NewOp);
1778 }
1779 }
1780
1781 // Try shrinking the operation as long as the shift amount will still be
1782 // in range.
1783 if ((ShAmt < DemandedBits.getActiveBits()) &&
1784 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1785 return true;
1786 } else {
1787 // This is a variable shift, so we can't shift the demand mask by a known
1788 // amount. But if we are not demanding high bits, then we are not
1789 // demanding those bits from the pre-shifted operand either.
1790 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1791 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1792 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1793 Depth + 1)) {
1794 SDNodeFlags Flags = Op.getNode()->getFlags();
1795 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1796 // Disable the nsw and nuw flags. We can no longer guarantee that we
1797 // won't wrap after simplification.
1798 Flags.setNoSignedWrap(false);
1799 Flags.setNoUnsignedWrap(false);
1800 Op->setFlags(Flags);
1801 }
1802 return true;
1803 }
1804 Known.resetAll();
1805 }
1806 }
1807
1808 // If we are only demanding sign bits then we can use the shift source
1809 // directly.
1810 if (const APInt *MaxSA =
1811 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1812 unsigned ShAmt = MaxSA->getZExtValue();
1813 unsigned NumSignBits =
1814 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1815 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1816 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1817 return TLO.CombineTo(Op, Op0);
1818 }
1819 break;
1820 }
1821 case ISD::SRL: {
1822 SDValue Op0 = Op.getOperand(0);
1823 SDValue Op1 = Op.getOperand(1);
1824 EVT ShiftVT = Op1.getValueType();
1825
1826 // Try to match AVG patterns.
1827 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1828 DemandedElts, Depth + 1))
1829 return TLO.CombineTo(Op, AVG);
1830
1831 if (const APInt *SA =
1832 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1833 unsigned ShAmt = SA->getZExtValue();
1834 if (ShAmt == 0)
1835 return TLO.CombineTo(Op, Op0);
1836
1837 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1838 // single shift. We can do this if the top bits (which are shifted out)
1839 // are never demanded.
1840 // TODO - support non-uniform vector amounts.
1841 if (Op0.getOpcode() == ISD::SHL) {
1842 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1843 if (const APInt *SA2 =
1844 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1845 unsigned C1 = SA2->getZExtValue();
1846 unsigned Opc = ISD::SRL;
1847 int Diff = ShAmt - C1;
1848 if (Diff < 0) {
1849 Diff = -Diff;
1850 Opc = ISD::SHL;
1851 }
1852 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1853 return TLO.CombineTo(
1854 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1855 }
1856 }
1857 }
1858
1859 APInt InDemandedMask = (DemandedBits << ShAmt);
1860
1861 // If the shift is exact, then it does demand the low bits (and knows that
1862 // they are zero).
1863 if (Op->getFlags().hasExact())
1864 InDemandedMask.setLowBits(ShAmt);
1865
1866 // Compute the new bits that are at the top now.
1867 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1868 Depth + 1))
1869 return true;
1870 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1871 Known.Zero.lshrInPlace(ShAmt);
1872 Known.One.lshrInPlace(ShAmt);
1873 // High bits known zero.
1874 Known.Zero.setHighBits(ShAmt);
1875
1876 // Attempt to avoid multi-use ops if we don't need anything from them.
1877 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1878 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1879 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1880 if (DemandedOp0) {
1881 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1882 return TLO.CombineTo(Op, NewOp);
1883 }
1884 }
1885 }
1886 break;
1887 }
1888 case ISD::SRA: {
1889 SDValue Op0 = Op.getOperand(0);
1890 SDValue Op1 = Op.getOperand(1);
1891 EVT ShiftVT = Op1.getValueType();
1892
1893 // If we only want bits that already match the signbit then we don't need
1894 // to shift.
1895 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
1896 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1897 NumHiDemandedBits)
1898 return TLO.CombineTo(Op, Op0);
1899
1900 // If this is an arithmetic shift right and only the low-bit is set, we can
1901 // always convert this into a logical shr, even if the shift amount is
1902 // variable. The low bit of the shift cannot be an input sign bit unless
1903 // the shift amount is >= the size of the datatype, which is undefined.
1904 if (DemandedBits.isOne())
1905 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1906
1907 // Try to match AVG patterns.
1908 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1909 DemandedElts, Depth + 1))
1910 return TLO.CombineTo(Op, AVG);
1911
1912 if (const APInt *SA =
1913 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1914 unsigned ShAmt = SA->getZExtValue();
1915 if (ShAmt == 0)
1916 return TLO.CombineTo(Op, Op0);
1917
1918 APInt InDemandedMask = (DemandedBits << ShAmt);
1919
1920 // If the shift is exact, then it does demand the low bits (and knows that
1921 // they are zero).
1922 if (Op->getFlags().hasExact())
1923 InDemandedMask.setLowBits(ShAmt);
1924
1925 // If any of the demanded bits are produced by the sign extension, we also
1926 // demand the input sign bit.
1927 if (DemandedBits.countl_zero() < ShAmt)
1928 InDemandedMask.setSignBit();
1929
1930 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1931 Depth + 1))
1932 return true;
1933 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1934 Known.Zero.lshrInPlace(ShAmt);
1935 Known.One.lshrInPlace(ShAmt);
1936
1937 // If the input sign bit is known to be zero, or if none of the top bits
1938 // are demanded, turn this into an unsigned shift right.
1939 if (Known.Zero[BitWidth - ShAmt - 1] ||
1940 DemandedBits.countl_zero() >= ShAmt) {
1942 Flags.setExact(Op->getFlags().hasExact());
1943 return TLO.CombineTo(
1944 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1945 }
1946
1947 int Log2 = DemandedBits.exactLogBase2();
1948 if (Log2 >= 0) {
1949 // The bit must come from the sign.
1950 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1951 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1952 }
1953
1954 if (Known.One[BitWidth - ShAmt - 1])
1955 // New bits are known one.
1956 Known.One.setHighBits(ShAmt);
1957
1958 // Attempt to avoid multi-use ops if we don't need anything from them.
1959 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1960 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1961 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1962 if (DemandedOp0) {
1963 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1964 return TLO.CombineTo(Op, NewOp);
1965 }
1966 }
1967 }
1968 break;
1969 }
1970 case ISD::FSHL:
1971 case ISD::FSHR: {
1972 SDValue Op0 = Op.getOperand(0);
1973 SDValue Op1 = Op.getOperand(1);
1974 SDValue Op2 = Op.getOperand(2);
1975 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1976
1977 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1978 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1979
1980 // For fshl, 0-shift returns the 1st arg.
1981 // For fshr, 0-shift returns the 2nd arg.
1982 if (Amt == 0) {
1983 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1984 Known, TLO, Depth + 1))
1985 return true;
1986 break;
1987 }
1988
1989 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1990 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1991 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1992 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1993 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1994 Depth + 1))
1995 return true;
1996 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1997 Depth + 1))
1998 return true;
1999
2000 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2001 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2002 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2003 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2004 Known = Known.unionWith(Known2);
2005
2006 // Attempt to avoid multi-use ops if we don't need anything from them.
2007 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2008 !DemandedElts.isAllOnes()) {
2009 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2010 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2011 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2012 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2013 if (DemandedOp0 || DemandedOp1) {
2014 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2015 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2016 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2017 DemandedOp1, Op2);
2018 return TLO.CombineTo(Op, NewOp);
2019 }
2020 }
2021 }
2022
2023 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2024 if (isPowerOf2_32(BitWidth)) {
2025 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2026 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2027 Known2, TLO, Depth + 1))
2028 return true;
2029 }
2030 break;
2031 }
2032 case ISD::ROTL:
2033 case ISD::ROTR: {
2034 SDValue Op0 = Op.getOperand(0);
2035 SDValue Op1 = Op.getOperand(1);
2036 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2037
2038 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2039 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2040 return TLO.CombineTo(Op, Op0);
2041
2042 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2043 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2044 unsigned RevAmt = BitWidth - Amt;
2045
2046 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2047 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2048 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2049 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2050 Depth + 1))
2051 return true;
2052
2053 // rot*(x, 0) --> x
2054 if (Amt == 0)
2055 return TLO.CombineTo(Op, Op0);
2056
2057 // See if we don't demand either half of the rotated bits.
2058 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2059 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2060 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2061 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2062 }
2063 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2064 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2065 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2066 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2067 }
2068 }
2069
2070 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2071 if (isPowerOf2_32(BitWidth)) {
2072 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2073 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2074 Depth + 1))
2075 return true;
2076 }
2077 break;
2078 }
2079 case ISD::UMIN: {
2080 // Check if one arg is always less than (or equal) to the other arg.
2081 SDValue Op0 = Op.getOperand(0);
2082 SDValue Op1 = Op.getOperand(1);
2083 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2084 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2085 Known = KnownBits::umin(Known0, Known1);
2086 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2087 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2088 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2089 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2090 break;
2091 }
2092 case ISD::UMAX: {
2093 // Check if one arg is always greater than (or equal) to the other arg.
2094 SDValue Op0 = Op.getOperand(0);
2095 SDValue Op1 = Op.getOperand(1);
2096 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2097 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2098 Known = KnownBits::umax(Known0, Known1);
2099 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2100 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2101 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2102 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2103 break;
2104 }
2105 case ISD::BITREVERSE: {
2106 SDValue Src = Op.getOperand(0);
2107 APInt DemandedSrcBits = DemandedBits.reverseBits();
2108 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2109 Depth + 1))
2110 return true;
2111 Known.One = Known2.One.reverseBits();
2112 Known.Zero = Known2.Zero.reverseBits();
2113 break;
2114 }
2115 case ISD::BSWAP: {
2116 SDValue Src = Op.getOperand(0);
2117
2118 // If the only bits demanded come from one byte of the bswap result,
2119 // just shift the input byte into position to eliminate the bswap.
2120 unsigned NLZ = DemandedBits.countl_zero();
2121 unsigned NTZ = DemandedBits.countr_zero();
2122
2123 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2124 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2125 // have 14 leading zeros, round to 8.
2126 NLZ = alignDown(NLZ, 8);
2127 NTZ = alignDown(NTZ, 8);
2128 // If we need exactly one byte, we can do this transformation.
2129 if (BitWidth - NLZ - NTZ == 8) {
2130 // Replace this with either a left or right shift to get the byte into
2131 // the right place.
2132 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2133 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2134 EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2135 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2136 SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2137 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2138 return TLO.CombineTo(Op, NewOp);
2139 }
2140 }
2141
2142 APInt DemandedSrcBits = DemandedBits.byteSwap();
2143 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2144 Depth + 1))
2145 return true;
2146 Known.One = Known2.One.byteSwap();
2147 Known.Zero = Known2.Zero.byteSwap();
2148 break;
2149 }
2150 case ISD::CTPOP: {
2151 // If only 1 bit is demanded, replace with PARITY as long as we're before
2152 // op legalization.
2153 // FIXME: Limit to scalars for now.
2154 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2155 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2156 Op.getOperand(0)));
2157
2158 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2159 break;
2160 }
2162 SDValue Op0 = Op.getOperand(0);
2163 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2164 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2165
2166 // If we only care about the highest bit, don't bother shifting right.
2167 if (DemandedBits.isSignMask()) {
2168 unsigned MinSignedBits =
2169 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2170 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2171 // However if the input is already sign extended we expect the sign
2172 // extension to be dropped altogether later and do not simplify.
2173 if (!AlreadySignExtended) {
2174 // Compute the correct shift amount type, which must be getShiftAmountTy
2175 // for scalar types after legalization.
2176 SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2177 getShiftAmountTy(VT, DL));
2178 return TLO.CombineTo(Op,
2179 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2180 }
2181 }
2182
2183 // If none of the extended bits are demanded, eliminate the sextinreg.
2184 if (DemandedBits.getActiveBits() <= ExVTBits)
2185 return TLO.CombineTo(Op, Op0);
2186
2187 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2188
2189 // Since the sign extended bits are demanded, we know that the sign
2190 // bit is demanded.
2191 InputDemandedBits.setBit(ExVTBits - 1);
2192
2193 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2194 Depth + 1))
2195 return true;
2196 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2197
2198 // If the sign bit of the input is known set or clear, then we know the
2199 // top bits of the result.
2200
2201 // If the input sign bit is known zero, convert this into a zero extension.
2202 if (Known.Zero[ExVTBits - 1])
2203 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2204
2205 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2206 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2207 Known.One.setBitsFrom(ExVTBits);
2208 Known.Zero &= Mask;
2209 } else { // Input sign bit unknown
2210 Known.Zero &= Mask;
2211 Known.One &= Mask;
2212 }
2213 break;
2214 }
2215 case ISD::BUILD_PAIR: {
2216 EVT HalfVT = Op.getOperand(0).getValueType();
2217 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2218
2219 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2220 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2221
2222 KnownBits KnownLo, KnownHi;
2223
2224 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2225 return true;
2226
2227 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2228 return true;
2229
2230 Known = KnownHi.concat(KnownLo);
2231 break;
2232 }
2234 if (VT.isScalableVector())
2235 return false;
2236 [[fallthrough]];
2237 case ISD::ZERO_EXTEND: {
2238 SDValue Src = Op.getOperand(0);
2239 EVT SrcVT = Src.getValueType();
2240 unsigned InBits = SrcVT.getScalarSizeInBits();
2241 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2242 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2243
2244 // If none of the top bits are demanded, convert this into an any_extend.
2245 if (DemandedBits.getActiveBits() <= InBits) {
2246 // If we only need the non-extended bits of the bottom element
2247 // then we can just bitcast to the result.
2248 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2249 VT.getSizeInBits() == SrcVT.getSizeInBits())
2250 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2251
2252 unsigned Opc =
2254 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2255 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2256 }
2257
2258 APInt InDemandedBits = DemandedBits.trunc(InBits);
2259 APInt InDemandedElts = DemandedElts.zext(InElts);
2260 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2261 Depth + 1))
2262 return true;
2263 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2264 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2265 Known = Known.zext(BitWidth);
2266
2267 // Attempt to avoid multi-use ops if we don't need anything from them.
2268 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2269 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2270 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2271 break;
2272 }
2274 if (VT.isScalableVector())
2275 return false;
2276 [[fallthrough]];
2277 case ISD::SIGN_EXTEND: {
2278 SDValue Src = Op.getOperand(0);
2279 EVT SrcVT = Src.getValueType();
2280 unsigned InBits = SrcVT.getScalarSizeInBits();
2281 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2282 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2283
2284 // If none of the top bits are demanded, convert this into an any_extend.
2285 if (DemandedBits.getActiveBits() <= InBits) {
2286 // If we only need the non-extended bits of the bottom element
2287 // then we can just bitcast to the result.
2288 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2289 VT.getSizeInBits() == SrcVT.getSizeInBits())
2290 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2291
2292 unsigned Opc =
2294 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2295 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2296 }
2297
2298 APInt InDemandedBits = DemandedBits.trunc(InBits);
2299 APInt InDemandedElts = DemandedElts.zext(InElts);
2300
2301 // Since some of the sign extended bits are demanded, we know that the sign
2302 // bit is demanded.
2303 InDemandedBits.setBit(InBits - 1);
2304
2305 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2306 Depth + 1))
2307 return true;
2308 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2309 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2310
2311 // If the sign bit is known one, the top bits match.
2312 Known = Known.sext(BitWidth);
2313
2314 // If the sign bit is known zero, convert this to a zero extend.
2315 if (Known.isNonNegative()) {
2316 unsigned Opc =
2318 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2319 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2320 }
2321
2322 // Attempt to avoid multi-use ops if we don't need anything from them.
2323 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2324 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2325 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2326 break;
2327 }
2329 if (VT.isScalableVector())
2330 return false;
2331 [[fallthrough]];
2332 case ISD::ANY_EXTEND: {
2333 SDValue Src = Op.getOperand(0);
2334 EVT SrcVT = Src.getValueType();
2335 unsigned InBits = SrcVT.getScalarSizeInBits();
2336 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2337 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2338
2339 // If we only need the bottom element then we can just bitcast.
2340 // TODO: Handle ANY_EXTEND?
2341 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2342 VT.getSizeInBits() == SrcVT.getSizeInBits())
2343 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2344
2345 APInt InDemandedBits = DemandedBits.trunc(InBits);
2346 APInt InDemandedElts = DemandedElts.zext(InElts);
2347 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2348 Depth + 1))
2349 return true;
2350 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2351 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2352 Known = Known.anyext(BitWidth);
2353
2354 // Attempt to avoid multi-use ops if we don't need anything from them.
2355 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2356 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2357 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2358 break;
2359 }
2360 case ISD::TRUNCATE: {
2361 SDValue Src = Op.getOperand(0);
2362
2363 // Simplify the input, using demanded bit information, and compute the known
2364 // zero/one bits live out.
2365 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2366 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2367 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2368 Depth + 1))
2369 return true;
2370 Known = Known.trunc(BitWidth);
2371
2372 // Attempt to avoid multi-use ops if we don't need anything from them.
2373 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2374 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2375 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2376
2377 // If the input is only used by this truncate, see if we can shrink it based
2378 // on the known demanded bits.
2379 switch (Src.getOpcode()) {
2380 default:
2381 break;
2382 case ISD::SRL:
2383 // Shrink SRL by a constant if none of the high bits shifted in are
2384 // demanded.
2385 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2386 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2387 // undesirable.
2388 break;
2389
2390 if (Src.getNode()->hasOneUse()) {
2391 const APInt *ShAmtC =
2392 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2393 if (!ShAmtC || ShAmtC->uge(BitWidth))
2394 break;
2395 uint64_t ShVal = ShAmtC->getZExtValue();
2396
2397 APInt HighBits =
2398 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2399 HighBits.lshrInPlace(ShVal);
2400 HighBits = HighBits.trunc(BitWidth);
2401
2402 if (!(HighBits & DemandedBits)) {
2403 // None of the shifted in bits are needed. Add a truncate of the
2404 // shift input, then shift it.
2405 SDValue NewShAmt = TLO.DAG.getConstant(
2406 ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2407 SDValue NewTrunc =
2408 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2409 return TLO.CombineTo(
2410 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2411 }
2412 }
2413 break;
2414 }
2415
2416 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2417 break;
2418 }
2419 case ISD::AssertZext: {
2420 // AssertZext demands all of the high bits, plus any of the low bits
2421 // demanded by its users.
2422 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2424 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2425 TLO, Depth + 1))
2426 return true;
2427 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2428
2429 Known.Zero |= ~InMask;
2430 Known.One &= (~Known.Zero);
2431 break;
2432 }
2434 SDValue Src = Op.getOperand(0);
2435 SDValue Idx = Op.getOperand(1);
2436 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2437 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2438
2439 if (SrcEltCnt.isScalable())
2440 return false;
2441
2442 // Demand the bits from every vector element without a constant index.
2443 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2444 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2445 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2446 if (CIdx->getAPIntValue().ult(NumSrcElts))
2447 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2448
2449 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2450 // anything about the extended bits.
2451 APInt DemandedSrcBits = DemandedBits;
2452 if (BitWidth > EltBitWidth)
2453 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2454
2455 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2456 Depth + 1))
2457 return true;
2458
2459 // Attempt to avoid multi-use ops if we don't need anything from them.
2460 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2461 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2462 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2463 SDValue NewOp =
2464 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2465 return TLO.CombineTo(Op, NewOp);
2466 }
2467 }
2468
2469 Known = Known2;
2470 if (BitWidth > EltBitWidth)
2471 Known = Known.anyext(BitWidth);
2472 break;
2473 }
2474 case ISD::BITCAST: {
2475 if (VT.isScalableVector())
2476 return false;
2477 SDValue Src = Op.getOperand(0);
2478 EVT SrcVT = Src.getValueType();
2479 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2480
2481 // If this is an FP->Int bitcast and if the sign bit is the only
2482 // thing demanded, turn this into a FGETSIGN.
2483 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2484 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2485 SrcVT.isFloatingPoint()) {
2486 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2487 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2488 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2489 SrcVT != MVT::f128) {
2490 // Cannot eliminate/lower SHL for f128 yet.
2491 EVT Ty = OpVTLegal ? VT : MVT::i32;
2492 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2493 // place. We expect the SHL to be eliminated by other optimizations.
2494 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2495 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2496 if (!OpVTLegal && OpVTSizeInBits > 32)
2497 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2498 unsigned ShVal = Op.getValueSizeInBits() - 1;
2499 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2500 return TLO.CombineTo(Op,
2501 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2502 }
2503 }
2504
2505 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2506 // Demand the elt/bit if any of the original elts/bits are demanded.
2507 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2508 unsigned Scale = BitWidth / NumSrcEltBits;
2509 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2510 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2511 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2512 for (unsigned i = 0; i != Scale; ++i) {
2513 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2514 unsigned BitOffset = EltOffset * NumSrcEltBits;
2515 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2516 if (!Sub.isZero()) {
2517 DemandedSrcBits |= Sub;
2518 for (unsigned j = 0; j != NumElts; ++j)
2519 if (DemandedElts[j])
2520 DemandedSrcElts.setBit((j * Scale) + i);
2521 }
2522 }
2523
2524 APInt KnownSrcUndef, KnownSrcZero;
2525 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2526 KnownSrcZero, TLO, Depth + 1))
2527 return true;
2528
2529 KnownBits KnownSrcBits;
2530 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2531 KnownSrcBits, TLO, Depth + 1))
2532 return true;
2533 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2534 // TODO - bigendian once we have test coverage.
2535 unsigned Scale = NumSrcEltBits / BitWidth;
2536 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2537 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2538 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2539 for (unsigned i = 0; i != NumElts; ++i)
2540 if (DemandedElts[i]) {
2541 unsigned Offset = (i % Scale) * BitWidth;
2542 DemandedSrcBits.insertBits(DemandedBits, Offset);
2543 DemandedSrcElts.setBit(i / Scale);
2544 }
2545
2546 if (SrcVT.isVector()) {
2547 APInt KnownSrcUndef, KnownSrcZero;
2548 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2549 KnownSrcZero, TLO, Depth + 1))
2550 return true;
2551 }
2552
2553 KnownBits KnownSrcBits;
2554 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2555 KnownSrcBits, TLO, Depth + 1))
2556 return true;
2557 }
2558
2559 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2560 // recursive call where Known may be useful to the caller.
2561 if (Depth > 0) {
2562 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2563 return false;
2564 }
2565 break;
2566 }
2567 case ISD::MUL:
2568 if (DemandedBits.isPowerOf2()) {
2569 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2570 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2571 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2572 unsigned CTZ = DemandedBits.countr_zero();
2573 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2574 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2575 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2576 SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2577 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2578 return TLO.CombineTo(Op, Shl);
2579 }
2580 }
2581 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2582 // X * X is odd iff X is odd.
2583 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2584 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2585 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2586 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2587 return TLO.CombineTo(Op, And1);
2588 }
2589 [[fallthrough]];
2590 case ISD::ADD:
2591 case ISD::SUB: {
2592 // Add, Sub, and Mul don't demand any bits in positions beyond that
2593 // of the highest bit demanded of them.
2594 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2595 SDNodeFlags Flags = Op.getNode()->getFlags();
2596 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2597 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2598 KnownBits KnownOp0, KnownOp1;
2599 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
2600 Depth + 1) ||
2601 SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2602 Depth + 1) ||
2603 // See if the operation should be performed at a smaller bit width.
2604 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2605 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2606 // Disable the nsw and nuw flags. We can no longer guarantee that we
2607 // won't wrap after simplification.
2608 Flags.setNoSignedWrap(false);
2609 Flags.setNoUnsignedWrap(false);
2610 Op->setFlags(Flags);
2611 }
2612 return true;
2613 }
2614
2615 // neg x with only low bit demanded is simply x.
2616 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2617 isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
2618 return TLO.CombineTo(Op, Op1);
2619
2620 // Attempt to avoid multi-use ops if we don't need anything from them.
2621 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2622 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2623 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2624 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2625 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2626 if (DemandedOp0 || DemandedOp1) {
2627 Flags.setNoSignedWrap(false);
2628 Flags.setNoUnsignedWrap(false);
2629 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2630 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2631 SDValue NewOp =
2632 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2633 return TLO.CombineTo(Op, NewOp);
2634 }
2635 }
2636
2637 // If we have a constant operand, we may be able to turn it into -1 if we
2638 // do not demand the high bits. This can make the constant smaller to
2639 // encode, allow more general folding, or match specialized instruction
2640 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2641 // is probably not useful (and could be detrimental).
2643 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2644 if (C && !C->isAllOnes() && !C->isOne() &&
2645 (C->getAPIntValue() | HighMask).isAllOnes()) {
2646 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2647 // Disable the nsw and nuw flags. We can no longer guarantee that we
2648 // won't wrap after simplification.
2649 Flags.setNoSignedWrap(false);
2650 Flags.setNoUnsignedWrap(false);
2651 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2652 return TLO.CombineTo(Op, NewOp);
2653 }
2654
2655 // Match a multiply with a disguised negated-power-of-2 and convert to a
2656 // an equivalent shift-left amount.
2657 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2658 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2659 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2660 return 0;
2661
2662 // Don't touch opaque constants. Also, ignore zero and power-of-2
2663 // multiplies. Those will get folded later.
2664 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2665 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2666 !MulC->getAPIntValue().isPowerOf2()) {
2667 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2668 if (UnmaskedC.isNegatedPowerOf2())
2669 return (-UnmaskedC).logBase2();
2670 }
2671 return 0;
2672 };
2673
2674 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2675 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2676 SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2677 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2678 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2679 return TLO.CombineTo(Op, Res);
2680 };
2681
2683 if (Op.getOpcode() == ISD::ADD) {
2684 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2685 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2686 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2687 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2688 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2689 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2690 }
2691 if (Op.getOpcode() == ISD::SUB) {
2692 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2693 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2694 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2695 }
2696 }
2697
2698 if (Op.getOpcode() == ISD::MUL) {
2699 Known = KnownBits::mul(KnownOp0, KnownOp1);
2700 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2701 Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
2702 Flags.hasNoSignedWrap(), KnownOp0,
2703 KnownOp1);
2704 }
2705 break;
2706 }
2707 default:
2708 // We also ask the target about intrinsics (which could be specific to it).
2709 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2710 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2711 // TODO: Probably okay to remove after audit; here to reduce change size
2712 // in initial enablement patch for scalable vectors
2713 if (Op.getValueType().isScalableVector())
2714 break;
2715 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2716 Known, TLO, Depth))
2717 return true;
2718 break;
2719 }
2720
2721 // Just use computeKnownBits to compute output bits.
2722 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2723 break;
2724 }
2725
2726 // If we know the value of all of the demanded bits, return this as a
2727 // constant.
2728 if (!isTargetCanonicalConstantNode(Op) &&
2729 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2730 // Avoid folding to a constant if any OpaqueConstant is involved.
2731 const SDNode *N = Op.getNode();
2732 for (SDNode *Op :
2734 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2735 if (C->isOpaque())
2736 return false;
2737 }
2738 if (VT.isInteger())
2739 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2740 if (VT.isFloatingPoint())
2741 return TLO.CombineTo(
2742 Op,
2743 TLO.DAG.getConstantFP(
2744 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2745 }
2746
2747 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2748 // Try again just for the original demanded elts.
2749 // Ensure we do this AFTER constant folding above.
2750 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2751 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2752
2753 return false;
2754}
2755
2757 const APInt &DemandedElts,
2758 DAGCombinerInfo &DCI) const {
2759 SelectionDAG &DAG = DCI.DAG;
2760 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2761 !DCI.isBeforeLegalizeOps());
2762
2763 APInt KnownUndef, KnownZero;
2764 bool Simplified =
2765 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2766 if (Simplified) {
2767 DCI.AddToWorklist(Op.getNode());
2768 DCI.CommitTargetLoweringOpt(TLO);
2769 }
2770
2771 return Simplified;
2772}
2773
2774/// Given a vector binary operation and known undefined elements for each input
2775/// operand, compute whether each element of the output is undefined.
2777 const APInt &UndefOp0,
2778 const APInt &UndefOp1) {
2779 EVT VT = BO.getValueType();
2781 "Vector binop only");
2782
2783 EVT EltVT = VT.getVectorElementType();
2784 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2785 assert(UndefOp0.getBitWidth() == NumElts &&
2786 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2787
2788 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2789 const APInt &UndefVals) {
2790 if (UndefVals[Index])
2791 return DAG.getUNDEF(EltVT);
2792
2793 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2794 // Try hard to make sure that the getNode() call is not creating temporary
2795 // nodes. Ignore opaque integers because they do not constant fold.
2796 SDValue Elt = BV->getOperand(Index);
2797 auto *C = dyn_cast<ConstantSDNode>(Elt);
2798 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2799 return Elt;
2800 }
2801
2802 return SDValue();
2803 };
2804
2805 APInt KnownUndef = APInt::getZero(NumElts);
2806 for (unsigned i = 0; i != NumElts; ++i) {
2807 // If both inputs for this element are either constant or undef and match
2808 // the element type, compute the constant/undef result for this element of
2809 // the vector.
2810 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2811 // not handle FP constants. The code within getNode() should be refactored
2812 // to avoid the danger of creating a bogus temporary node here.
2813 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2814 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2815 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2816 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2817 KnownUndef.setBit(i);
2818 }
2819 return KnownUndef;
2820}
2821
2823 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2824 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2825 bool AssumeSingleUse) const {
2826 EVT VT = Op.getValueType();
2827 unsigned Opcode = Op.getOpcode();
2828 APInt DemandedElts = OriginalDemandedElts;
2829 unsigned NumElts = DemandedElts.getBitWidth();
2830 assert(VT.isVector() && "Expected vector op");
2831
2832 KnownUndef = KnownZero = APInt::getZero(NumElts);
2833
2834 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2835 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2836 return false;
2837
2838 // TODO: For now we assume we know nothing about scalable vectors.
2839 if (VT.isScalableVector())
2840 return false;
2841
2842 assert(VT.getVectorNumElements() == NumElts &&
2843 "Mask size mismatches value type element count!");
2844
2845 // Undef operand.
2846 if (Op.isUndef()) {
2847 KnownUndef.setAllBits();
2848 return false;
2849 }
2850
2851 // If Op has other users, assume that all elements are needed.
2852 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
2853 DemandedElts.setAllBits();
2854
2855 // Not demanding any elements from Op.
2856 if (DemandedElts == 0) {
2857 KnownUndef.setAllBits();
2858 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2859 }
2860
2861 // Limit search depth.
2863 return false;
2864
2865 SDLoc DL(Op);
2866 unsigned EltSizeInBits = VT.getScalarSizeInBits();
2867 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2868
2869 // Helper for demanding the specified elements and all the bits of both binary
2870 // operands.
2871 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2872 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2873 TLO.DAG, Depth + 1);
2874 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2875 TLO.DAG, Depth + 1);
2876 if (NewOp0 || NewOp1) {
2877 SDValue NewOp = TLO.DAG.getNode(
2878 Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2879 return TLO.CombineTo(Op, NewOp);
2880 }
2881 return false;
2882 };
2883
2884 switch (Opcode) {
2885 case ISD::SCALAR_TO_VECTOR: {
2886 if (!DemandedElts[0]) {
2887 KnownUndef.setAllBits();
2888 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2889 }
2890 SDValue ScalarSrc = Op.getOperand(0);
2891 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2892 SDValue Src = ScalarSrc.getOperand(0);
2893 SDValue Idx = ScalarSrc.getOperand(1);
2894 EVT SrcVT = Src.getValueType();
2895
2896 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
2897
2898 if (SrcEltCnt.isScalable())
2899 return false;
2900
2901 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2902 if (isNullConstant(Idx)) {
2903 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
2904 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
2905 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
2906 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2907 TLO, Depth + 1))
2908 return true;
2909 }
2910 }
2911 KnownUndef.setHighBits(NumElts - 1);
2912 break;
2913 }
2914 case ISD::BITCAST: {
2915 SDValue Src = Op.getOperand(0);
2916 EVT SrcVT = Src.getValueType();
2917
2918 // We only handle vectors here.
2919 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2920 if (!SrcVT.isVector())
2921 break;
2922
2923 // Fast handling of 'identity' bitcasts.
2924 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2925 if (NumSrcElts == NumElts)
2926 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2927 KnownZero, TLO, Depth + 1);
2928
2929 APInt SrcDemandedElts, SrcZero, SrcUndef;
2930
2931 // Bitcast from 'large element' src vector to 'small element' vector, we
2932 // must demand a source element if any DemandedElt maps to it.
2933 if ((NumElts % NumSrcElts) == 0) {
2934 unsigned Scale = NumElts / NumSrcElts;
2935 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2936 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2937 TLO, Depth + 1))
2938 return true;
2939
2940 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2941 // of the large element.
2942 // TODO - bigendian once we have test coverage.
2943 if (IsLE) {
2944 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2945 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
2946 for (unsigned i = 0; i != NumElts; ++i)
2947 if (DemandedElts[i]) {
2948 unsigned Ofs = (i % Scale) * EltSizeInBits;
2949 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2950 }
2951
2952 KnownBits Known;
2953 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2954 TLO, Depth + 1))
2955 return true;
2956
2957 // The bitcast has split each wide element into a number of
2958 // narrow subelements. We have just computed the Known bits
2959 // for wide elements. See if element splitting results in
2960 // some subelements being zero. Only for demanded elements!
2961 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
2962 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
2963 .isAllOnes())
2964 continue;
2965 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
2966 unsigned Elt = Scale * SrcElt + SubElt;
2967 if (DemandedElts[Elt])
2968 KnownZero.setBit(Elt);
2969 }
2970 }
2971 }
2972
2973 // If the src element is zero/undef then all the output elements will be -
2974 // only demanded elements are guaranteed to be correct.
2975 for (unsigned i = 0; i != NumSrcElts; ++i) {
2976 if (SrcDemandedElts[i]) {
2977 if (SrcZero[i])
2978 KnownZero.setBits(i * Scale, (i + 1) * Scale);
2979 if (SrcUndef[i])
2980 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2981 }
2982 }
2983 }
2984
2985 // Bitcast from 'small element' src vector to 'large element' vector, we
2986 // demand all smaller source elements covered by the larger demanded element
2987 // of this vector.
2988 if ((NumSrcElts % NumElts) == 0) {
2989 unsigned Scale = NumSrcElts / NumElts;
2990 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2991 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2992 TLO, Depth + 1))
2993 return true;
2994
2995 // If all the src elements covering an output element are zero/undef, then
2996 // the output element will be as well, assuming it was demanded.
2997 for (unsigned i = 0; i != NumElts; ++i) {
2998 if (DemandedElts[i]) {
2999 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3000 KnownZero.setBit(i);
3001 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3002 KnownUndef.setBit(i);
3003 }
3004 }
3005 }
3006 break;
3007 }
3008 case ISD::BUILD_VECTOR: {
3009 // Check all elements and simplify any unused elements with UNDEF.
3010 if (!DemandedElts.isAllOnes()) {
3011 // Don't simplify BROADCASTS.
3012 if (llvm::any_of(Op->op_values(),
3013 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3014 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3015 bool Updated = false;
3016 for (unsigned i = 0; i != NumElts; ++i) {
3017 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3018 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3019 KnownUndef.setBit(i);
3020 Updated = true;
3021 }
3022 }
3023 if (Updated)
3024 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3025 }
3026 }
3027 for (unsigned i = 0; i != NumElts; ++i) {
3028 SDValue SrcOp = Op.getOperand(i);
3029 if (SrcOp.isUndef()) {
3030 KnownUndef.setBit(i);
3031 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3033 KnownZero.setBit(i);
3034 }
3035 }
3036 break;
3037 }
3038 case ISD::CONCAT_VECTORS: {
3039 EVT SubVT = Op.getOperand(0).getValueType();
3040 unsigned NumSubVecs = Op.getNumOperands();
3041 unsigned NumSubElts = SubVT.getVectorNumElements();
3042 for (unsigned i = 0; i != NumSubVecs; ++i) {
3043 SDValue SubOp = Op.getOperand(i);
3044 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3045 APInt SubUndef, SubZero;
3046 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3047 Depth + 1))
3048 return true;
3049 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3050 KnownZero.insertBits(SubZero, i * NumSubElts);
3051 }
3052
3053 // Attempt to avoid multi-use ops if we don't need anything from them.
3054 if (!DemandedElts.isAllOnes()) {
3055 bool FoundNewSub = false;
3056 SmallVector<SDValue, 2> DemandedSubOps;
3057 for (unsigned i = 0; i != NumSubVecs; ++i) {
3058 SDValue SubOp = Op.getOperand(i);
3059 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3060 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3061 SubOp, SubElts, TLO.DAG, Depth + 1);
3062 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3063 FoundNewSub = NewSubOp ? true : FoundNewSub;
3064 }
3065 if (FoundNewSub) {
3066 SDValue NewOp =
3067 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3068 return TLO.CombineTo(Op, NewOp);
3069 }
3070 }
3071 break;
3072 }
3073 case ISD::INSERT_SUBVECTOR: {
3074 // Demand any elements from the subvector and the remainder from the src its
3075 // inserted into.
3076 SDValue Src = Op.getOperand(0);
3077 SDValue Sub = Op.getOperand(1);
3078 uint64_t Idx = Op.getConstantOperandVal(2);
3079 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3080 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3081 APInt DemandedSrcElts = DemandedElts;
3082 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3083
3084 APInt SubUndef, SubZero;
3085 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3086 Depth + 1))
3087 return true;
3088
3089 // If none of the src operand elements are demanded, replace it with undef.
3090 if (!DemandedSrcElts && !Src.isUndef())
3091 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3092 TLO.DAG.getUNDEF(VT), Sub,
3093 Op.getOperand(2)));
3094
3095 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3096 TLO, Depth + 1))
3097 return true;
3098 KnownUndef.insertBits(SubUndef, Idx);
3099 KnownZero.insertBits(SubZero, Idx);
3100
3101 // Attempt to avoid multi-use ops if we don't need anything from them.
3102 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3103 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3104 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3105 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3106 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3107 if (NewSrc || NewSub) {
3108 NewSrc = NewSrc ? NewSrc : Src;
3109 NewSub = NewSub ? NewSub : Sub;
3110 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3111 NewSub, Op.getOperand(2));
3112 return TLO.CombineTo(Op, NewOp);
3113 }
3114 }
3115 break;
3116 }
3118 // Offset the demanded elts by the subvector index.
3119 SDValue Src = Op.getOperand(0);
3120 if (Src.getValueType().isScalableVector())
3121 break;
3122 uint64_t Idx = Op.getConstantOperandVal(1);
3123 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3124 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3125
3126 APInt SrcUndef, SrcZero;
3127 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3128 Depth + 1))
3129 return true;
3130 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3131 KnownZero = SrcZero.extractBits(NumElts, Idx);
3132
3133 // Attempt to avoid multi-use ops if we don't need anything from them.
3134 if (!DemandedElts.isAllOnes()) {
3135 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3136 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3137 if (NewSrc) {
3138 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3139 Op.getOperand(1));
3140 return TLO.CombineTo(Op, NewOp);
3141 }
3142 }
3143 break;
3144 }
3146 SDValue Vec = Op.getOperand(0);
3147 SDValue Scl = Op.getOperand(1);
3148 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3149
3150 // For a legal, constant insertion index, if we don't need this insertion
3151 // then strip it, else remove it from the demanded elts.
3152 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3153 unsigned Idx = CIdx->getZExtValue();
3154 if (!DemandedElts[Idx])
3155 return TLO.CombineTo(Op, Vec);
3156
3157 APInt DemandedVecElts(DemandedElts);
3158 DemandedVecElts.clearBit(Idx);
3159 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3160 KnownZero, TLO, Depth + 1))
3161 return true;
3162
3163 KnownUndef.setBitVal(Idx, Scl.isUndef());
3164
3165 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3166 break;
3167 }
3168
3169 APInt VecUndef, VecZero;
3170 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3171 Depth + 1))
3172 return true;
3173 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3174 break;
3175 }
3176 case ISD::VSELECT: {
3177 SDValue Sel = Op.getOperand(0);
3178 SDValue LHS = Op.getOperand(1);
3179 SDValue RHS = Op.getOperand(2);
3180
3181 // Try to transform the select condition based on the current demanded
3182 // elements.
3183 APInt UndefSel, UndefZero;
3184 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
3185 Depth + 1))
3186 return true;
3187
3188 // See if we can simplify either vselect operand.
3189 APInt DemandedLHS(DemandedElts);
3190 APInt DemandedRHS(DemandedElts);
3191 APInt UndefLHS, ZeroLHS;
3192 APInt UndefRHS, ZeroRHS;
3193 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3194 Depth + 1))
3195 return true;
3196 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3197 Depth + 1))
3198 return true;
3199
3200 KnownUndef = UndefLHS & UndefRHS;
3201 KnownZero = ZeroLHS & ZeroRHS;
3202
3203 // If we know that the selected element is always zero, we don't need the
3204 // select value element.
3205 APInt DemandedSel = DemandedElts & ~KnownZero;
3206 if (DemandedSel != DemandedElts)
3207 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
3208 Depth + 1))
3209 return true;
3210
3211 break;
3212 }
3213 case ISD::VECTOR_SHUFFLE: {
3214 SDValue LHS = Op.getOperand(0);
3215 SDValue RHS = Op.getOperand(1);
3216 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3217
3218 // Collect demanded elements from shuffle operands..
3219 APInt DemandedLHS(NumElts, 0);
3220 APInt DemandedRHS(NumElts, 0);
3221 for (unsigned i = 0; i != NumElts; ++i) {
3222 int M = ShuffleMask[i];
3223 if (M < 0 || !DemandedElts[i])
3224 continue;
3225 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3226 if (M < (int)NumElts)
3227 DemandedLHS.setBit(M);
3228 else
3229 DemandedRHS.setBit(M - NumElts);
3230 }
3231
3232 // See if we can simplify either shuffle operand.
3233 APInt UndefLHS, ZeroLHS;
3234 APInt UndefRHS, ZeroRHS;
3235 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3236 Depth + 1))
3237 return true;
3238 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3239 Depth + 1))
3240 return true;
3241
3242 // Simplify mask using undef elements from LHS/RHS.
3243 bool Updated = false;
3244 bool IdentityLHS = true, IdentityRHS = true;
3245 SmallVector<int, 32> NewMask(ShuffleMask);
3246 for (unsigned i = 0; i != NumElts; ++i) {
3247 int &M = NewMask[i];
3248 if (M < 0)
3249 continue;
3250 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3251 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3252 Updated = true;
3253 M = -1;
3254 }
3255 IdentityLHS &= (M < 0) || (M == (int)i);
3256 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3257 }
3258
3259 // Update legal shuffle masks based on demanded elements if it won't reduce
3260 // to Identity which can cause premature removal of the shuffle mask.
3261 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3262 SDValue LegalShuffle =
3263 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3264 if (LegalShuffle)
3265 return TLO.CombineTo(Op, LegalShuffle);
3266 }
3267
3268 // Propagate undef/zero elements from LHS/RHS.
3269 for (unsigned i = 0; i != NumElts; ++i) {
3270 int M = ShuffleMask[i];
3271 if (M < 0) {
3272 KnownUndef.setBit(i);
3273 } else if (M < (int)NumElts) {
3274 if (UndefLHS[M])
3275 KnownUndef.setBit(i);
3276 if (ZeroLHS[M])
3277 KnownZero.setBit(i);
3278 } else {
3279 if (UndefRHS[M - NumElts])
3280 KnownUndef.setBit(i);
3281 if (ZeroRHS[M - NumElts])
3282 KnownZero.setBit(i);
3283 }
3284 }
3285 break;
3286 }
3290 APInt SrcUndef, SrcZero;
3291 SDValue Src = Op.getOperand(0);
3292 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3293 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3294 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3295 Depth + 1))
3296 return true;
3297 KnownZero = SrcZero.zextOrTrunc(NumElts);
3298 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3299
3300 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3301 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3302 DemandedSrcElts == 1) {
3303 // aext - if we just need the bottom element then we can bitcast.
3304 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3305 }
3306
3307 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3308 // zext(undef) upper bits are guaranteed to be zero.
3309 if (DemandedElts.isSubsetOf(KnownUndef))
3310 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3311 KnownUndef.clearAllBits();
3312
3313 // zext - if we just need the bottom element then we can mask:
3314 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3315 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3316 Op->isOnlyUserOf(Src.getNode()) &&
3317 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3318 SDLoc DL(Op);
3319 EVT SrcVT = Src.getValueType();
3320 EVT SrcSVT = SrcVT.getScalarType();
3321 SmallVector<SDValue> MaskElts;
3322 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3323 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3324 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3325 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3326 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3327 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3328 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3329 }
3330 }
3331 }
3332 break;
3333 }
3334
3335 // TODO: There are more binop opcodes that could be handled here - MIN,
3336 // MAX, saturated math, etc.
3337 case ISD::ADD: {
3338 SDValue Op0 = Op.getOperand(0);
3339 SDValue Op1 = Op.getOperand(1);
3340 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3341 APInt UndefLHS, ZeroLHS;
3342 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3343 Depth + 1, /*AssumeSingleUse*/ true))
3344 return true;
3345 }
3346 [[fallthrough]];
3347 }
3348 case ISD::OR:
3349 case ISD::XOR:
3350 case ISD::SUB:
3351 case ISD::FADD:
3352 case ISD::FSUB:
3353 case ISD::FMUL:
3354 case ISD::FDIV:
3355 case ISD::FREM: {
3356 SDValue Op0 = Op.getOperand(0);
3357 SDValue Op1 = Op.getOperand(1);
3358
3359 APInt UndefRHS, ZeroRHS;
3360 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3361 Depth + 1))
3362 return true;
3363 APInt UndefLHS, ZeroLHS;
3364 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3365 Depth + 1))
3366 return true;
3367
3368 KnownZero = ZeroLHS & ZeroRHS;
3369 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3370
3371 // Attempt to avoid multi-use ops if we don't need anything from them.
3372 // TODO - use KnownUndef to relax the demandedelts?
3373 if (!DemandedElts.isAllOnes())
3374 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3375 return true;
3376 break;
3377 }
3378 case ISD::SHL:
3379 case ISD::SRL:
3380 case ISD::SRA:
3381 case ISD::ROTL:
3382 case ISD::ROTR: {
3383 SDValue Op0 = Op.getOperand(0);
3384 SDValue Op1 = Op.getOperand(1);
3385
3386 APInt UndefRHS, ZeroRHS;
3387 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3388 Depth + 1))
3389 return true;
3390 APInt UndefLHS, ZeroLHS;
3391 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3392 Depth + 1))
3393 return true;
3394
3395 KnownZero = ZeroLHS;
3396 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3397
3398 // Attempt to avoid multi-use ops if we don't need anything from them.
3399 // TODO - use KnownUndef to relax the demandedelts?
3400 if (!DemandedElts.isAllOnes())
3401 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3402 return true;
3403 break;
3404 }
3405 case ISD::MUL:
3406 case ISD::MULHU:
3407 case ISD::MULHS:
3408 case ISD::AND: {
3409 SDValue Op0 = Op.getOperand(0);
3410 SDValue Op1 = Op.getOperand(1);
3411
3412 APInt SrcUndef, SrcZero;
3413 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3414 Depth + 1))
3415 return true;
3416 // If we know that a demanded element was zero in Op1 we don't need to
3417 // demand it in Op0 - its guaranteed to be zero.
3418 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3419 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3420 TLO, Depth + 1))
3421 return true;
3422
3423 KnownUndef &= DemandedElts0;
3424 KnownZero &= DemandedElts0;
3425
3426 // If every element pair has a zero/undef then just fold to zero.
3427 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3428 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3429 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3430 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3431
3432 // If either side has a zero element, then the result element is zero, even
3433 // if the other is an UNDEF.
3434 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3435 // and then handle 'and' nodes with the rest of the binop opcodes.
3436 KnownZero |= SrcZero;
3437 KnownUndef &= SrcUndef;
3438 KnownUndef &= ~KnownZero;
3439
3440 // Attempt to avoid multi-use ops if we don't need anything from them.
3441 if (!DemandedElts.isAllOnes())
3442 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3443 return true;
3444 break;
3445 }
3446 case ISD::TRUNCATE:
3447 case ISD::SIGN_EXTEND:
3448 case ISD::ZERO_EXTEND:
3449 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3450 KnownZero, TLO, Depth + 1))
3451 return true;
3452
3453 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3454 // zext(undef) upper bits are guaranteed to be zero.
3455 if (DemandedElts.isSubsetOf(KnownUndef))
3456 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3457 KnownUndef.clearAllBits();
3458 }
3459 break;
3460 default: {
3461 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3462 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3463 KnownZero, TLO, Depth))
3464 return true;
3465 } else {
3466 KnownBits Known;
3467 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3468 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3469 TLO, Depth, AssumeSingleUse))
3470 return true;
3471 }
3472 break;
3473 }
3474 }
3475 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3476
3477 // Constant fold all undef cases.
3478 // TODO: Handle zero cases as well.
3479 if (DemandedElts.isSubsetOf(KnownUndef))
3480 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3481
3482 return false;
3483}
3484
3485/// Determine which of the bits specified in Mask are known to be either zero or
3486/// one and return them in the Known.
3488 KnownBits &Known,
3489 const APInt &DemandedElts,
3490 const SelectionDAG &DAG,
3491 unsigned Depth) const {
3492 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3493 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3494 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3495 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3496 "Should use MaskedValueIsZero if you don't know whether Op"
3497 " is a target node!");
3498 Known.resetAll();
3499}
3500
3503 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3504 unsigned Depth) const {
3505 Known.resetAll();
3506}
3507
3509 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3510 // The low bits are known zero if the pointer is aligned.
3511 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3512}
3513
3516 unsigned Depth) const {
3517 return Align(1);
3518}
3519
3520/// This method can be implemented by targets that want to expose additional
3521/// information about sign bits to the DAG Combiner.
3523 const APInt &,
3524 const SelectionDAG &,
3525 unsigned Depth) const {
3526 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3527 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3528 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3529 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3530 "Should use ComputeNumSignBits if you don't know whether Op"
3531 " is a target node!");
3532 return 1;
3533}
3534
3536 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3537 const MachineRegisterInfo &MRI, unsigned Depth) const {
3538 return 1;
3539}
3540
3542 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3543 TargetLoweringOpt &TLO, unsigned Depth) const {
3544 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3545 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3546 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3547 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3548 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3549 " is a target node!");
3550 return false;
3551}
3552
3554 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3555 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3556 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3557 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3558 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3559 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3560 "Should use SimplifyDemandedBits if you don't know whether Op"
3561 " is a target node!");
3562 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3563 return false;
3564}
3565
3567 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3568 SelectionDAG &DAG, unsigned Depth) const {
3569 assert(
3570 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3571 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3572 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3573 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3574 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3575 " is a target node!");
3576 return SDValue();
3577}
3578
3579SDValue
3582 SelectionDAG &DAG) const {
3583 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3584 if (!LegalMask) {
3585 std::swap(N0, N1);
3587 LegalMask = isShuffleMaskLegal(Mask, VT);
3588 }
3589
3590 if (!LegalMask)
3591 return SDValue();
3592
3593 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3594}
3595
3597 return nullptr;
3598}
3599
3601 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3602 bool PoisonOnly, unsigned Depth) const {
3603 assert(
3604 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3605 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3606 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3607 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3608 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3609 " is a target node!");
3610 return false;
3611}
3612
3614 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3615 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3616 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3617 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3618 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3619 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3620 "Should use canCreateUndefOrPoison if you don't know whether Op"
3621 " is a target node!");
3622 // Be conservative and return true.
3623 return true;
3624}
3625
3627 const SelectionDAG &DAG,
3628 bool SNaN,
3629 unsigned Depth) const {
3630 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3631 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3632 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3633 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3634 "Should use isKnownNeverNaN if you don't know whether Op"
3635 " is a target node!");
3636 return false;
3637}
3638
3640 const APInt &DemandedElts,
3641 APInt &UndefElts,
3642 const SelectionDAG &DAG,
3643 unsigned Depth) const {
3644 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3645 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3646 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3647 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3648 "Should use isSplatValue if you don't know whether Op"
3649 " is a target node!");
3650 return false;
3651}
3652
3653// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3654// work with truncating build vectors and vectors with elements of less than
3655// 8 bits.
3657 if (!N)
3658 return false;
3659
3660 unsigned EltWidth;
3661 APInt CVal;
3662 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3663 /*AllowTruncation=*/true)) {
3664 CVal = CN->getAPIntValue();
3665 EltWidth = N.getValueType().getScalarSizeInBits();
3666 } else
3667 return false;
3668
3669 // If this is a truncating splat, truncate the splat value.
3670 // Otherwise, we may fail to match the expected values below.
3671 if (EltWidth < CVal.getBitWidth())
3672 CVal = CVal.trunc(EltWidth);
3673
3674 switch (getBooleanContents(N.getValueType())) {
3676 return CVal[0];
3678 return CVal.isOne();
3680 return CVal.isAllOnes();
3681 }
3682
3683 llvm_unreachable("Invalid boolean contents");
3684}
3685
3687 if (!N)
3688 return false;
3689
3690 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3691 if (!CN) {
3692 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3693 if (!BV)
3694 return false;
3695
3696 // Only interested in constant splats, we don't care about undef
3697 // elements in identifying boolean constants and getConstantSplatNode
3698 // returns NULL if all ops are undef;
3699 CN = BV->getConstantSplatNode();
3700 if (!CN)
3701 return false;
3702 }
3703
3704 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3705 return !CN->getAPIntValue()[0];
3706
3707 return CN->isZero();
3708}
3709
3711 bool SExt) const {
3712 if (VT == MVT::i1)
3713 return N->isOne();
3714
3716 switch (Cnt) {
3718 // An extended value of 1 is always true, unless its original type is i1,
3719 // in which case it will be sign extended to -1.
3720 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3723 return N->isAllOnes() && SExt;
3724 }
3725 llvm_unreachable("Unexpected enumeration.");
3726}
3727
3728/// This helper function of SimplifySetCC tries to optimize the comparison when
3729/// either operand of the SetCC node is a bitwise-and instruction.
3730SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3731 ISD::CondCode Cond, const SDLoc &DL,
3732 DAGCombinerInfo &DCI) const {
3733 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3734 std::swap(N0, N1);
3735
3736 SelectionDAG &DAG = DCI.DAG;
3737 EVT OpVT = N0.getValueType();
3738 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3739 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3740 return SDValue();
3741
3742 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3743 // iff everything but LSB is known zero:
3744 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3747 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3748 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3749 if (DAG.MaskedValueIsZero(N0, UpperBits))
3750 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3751 }
3752
3753 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3754 // test in a narrow type that we can truncate to with no cost. Examples:
3755 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3756 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3757 // TODO: This conservatively checks for type legality on the source and
3758 // destination types. That may inhibit optimizations, but it also
3759 // allows setcc->shift transforms that may be more beneficial.
3760 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3761 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3762 isTypeLegal(OpVT) && N0.hasOneUse()) {
3763 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3764 AndC->getAPIntValue().getActiveBits());
3765 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3766 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3767 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3768 return DAG.getSetCC(DL, VT, Trunc, Zero,
3770 }
3771 }
3772
3773 // Match these patterns in any of their permutations:
3774 // (X & Y) == Y
3775 // (X & Y) != Y
3776 SDValue X, Y;
3777 if (N0.getOperand(0) == N1) {
3778 X = N0.getOperand(1);
3779 Y = N0.getOperand(0);
3780 } else if (N0.getOperand(1) == N1) {
3781 X = N0.getOperand(0);
3782 Y = N0.getOperand(1);
3783 } else {
3784 return SDValue();
3785 }
3786
3787 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3788 if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3789 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3790 // Note that where Y is variable and is known to have at most one bit set
3791 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3792 // equivalent when Y == 0.
3793 assert(OpVT.isInteger());
3795 if (DCI.isBeforeLegalizeOps() ||
3797 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3798 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3799 // If the target supports an 'and-not' or 'and-complement' logic operation,
3800 // try to use that to make a comparison operation more efficient.
3801 // But don't do this transform if the mask is a single bit because there are
3802 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3803 // 'rlwinm' on PPC).
3804
3805 // Bail out if the compare operand that we want to turn into a zero is
3806 // already a zero (otherwise, infinite loop).
3807 auto *YConst = dyn_cast<ConstantSDNode>(Y);
3808 if (YConst && YConst->isZero())
3809 return SDValue();
3810
3811 // Transform this into: ~X & Y == 0.
3812 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3813 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3814 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3815 }
3816
3817 return SDValue();
3818}
3819
3820/// There are multiple IR patterns that could be checking whether certain
3821/// truncation of a signed number would be lossy or not. The pattern which is
3822/// best at IR level, may not lower optimally. Thus, we want to unfold it.
3823/// We are looking for the following pattern: (KeptBits is a constant)
3824/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3825/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3826/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3827/// We will unfold it into the natural trunc+sext pattern:
3828/// ((%x << C) a>> C) dstcond %x
3829/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3830SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3831 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3832 const SDLoc &DL) const {
3833 // We must be comparing with a constant.
3834 ConstantSDNode *C1;
3835 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3836 return SDValue();
3837
3838 // N0 should be: add %x, (1 << (KeptBits-1))
3839 if (N0->getOpcode() != ISD::ADD)
3840 return SDValue();
3841
3842 // And we must be 'add'ing a constant.
3843 ConstantSDNode *C01;
3844 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3845 return SDValue();
3846
3847 SDValue X = N0->getOperand(0);
3848 EVT XVT = X.getValueType();
3849
3850 // Validate constants ...
3851
3852 APInt I1 = C1->getAPIntValue();
3853
3854 ISD::CondCode NewCond;
3855 if (Cond == ISD::CondCode::SETULT) {
3856 NewCond = ISD::CondCode::SETEQ;
3857 } else if (Cond == ISD::CondCode::SETULE) {
3858 NewCond = ISD::CondCode::SETEQ;
3859 // But need to 'canonicalize' the constant.
3860 I1 += 1;
3861 } else if (Cond == ISD::CondCode::SETUGT) {
3862 NewCond = ISD::CondCode::SETNE;
3863 // But need to 'canonicalize' the constant.
3864 I1 += 1;
3865 } else if (Cond == ISD::CondCode::SETUGE) {
3866 NewCond = ISD::CondCode::SETNE;
3867 } else
3868 return SDValue();
3869
3870 APInt I01 = C01->getAPIntValue();
3871
3872 auto checkConstants = [&I1, &I01]() -> bool {
3873 // Both of them must be power-of-two, and the constant from setcc is bigger.
3874 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3875 };
3876
3877 if (checkConstants()) {
3878 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3879 } else {
3880 // What if we invert constants? (and the target predicate)
3881 I1.negate();
3882 I01.negate();
3883 assert(XVT.isInteger());
3884 NewCond = getSetCCInverse(NewCond, XVT);
3885 if (!checkConstants())
3886 return SDValue();
3887 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3888 }
3889
3890 // They are power-of-two, so which bit is set?
3891 const unsigned KeptBits = I1.logBase2();
3892 const unsigned KeptBitsMinusOne = I01.logBase2();
3893
3894 // Magic!
3895 if (KeptBits != (KeptBitsMinusOne + 1))
3896 return SDValue();
3897 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3898
3899 // We don't want to do this in every single case.
3900 SelectionDAG &DAG = DCI.DAG;
3902 XVT, KeptBits))
3903 return SDValue();
3904
3905 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3906 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3907
3908 // Unfold into: ((%x << C) a>> C) cond %x
3909 // Where 'cond' will be either 'eq' or 'ne'.
3910 SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3911 SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3912 SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3913 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3914
3915 return T2;
3916}
3917
3918// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3919SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3920 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3921 DAGCombinerInfo &DCI, const SDLoc &DL) const {
3923 "Should be a comparison with 0.");
3924 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3925 "Valid only for [in]equality comparisons.");
3926
3927 unsigned NewShiftOpcode;
3928 SDValue X, C, Y;
3929
3930 SelectionDAG &DAG = DCI.DAG;
3931 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3932
3933 // Look for '(C l>>/<< Y)'.
3934 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3935 // The shift should be one-use.
3936 if (!V.hasOneUse())
3937 return false;
3938 unsigned OldShiftOpcode = V.getOpcode();
3939 switch (OldShiftOpcode) {
3940 case ISD::SHL:
3941 NewShiftOpcode = ISD::SRL;
3942 break;
3943 case ISD::SRL:
3944 NewShiftOpcode = ISD::SHL;
3945 break;
3946 default:
3947 return false; // must be a logical shift.
3948 }
3949 // We should be shifting a constant.
3950 // FIXME: best to use isConstantOrConstantVector().
3951 C = V.getOperand(0);
3953 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3954 if (!CC)
3955 return false;
3956 Y = V.getOperand(1);
3957
3959 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3960 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3961 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3962 };
3963
3964 // LHS of comparison should be an one-use 'and'.
3965 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3966 return SDValue();
3967
3968 X = N0.getOperand(0);
3969 SDValue Mask = N0.getOperand(1);
3970
3971 // 'and' is commutative!
3972 if (!Match(Mask)) {
3973 std::swap(X, Mask);
3974 if (!Match(Mask))
3975 return SDValue();
3976 }
3977
3978 EVT VT = X.getValueType();
3979
3980 // Produce:
3981 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3982 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3983 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3984 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3985 return T2;
3986}
3987
3988/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3989/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3990/// handle the commuted versions of these patterns.
3991SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3992 ISD::CondCode Cond, const SDLoc &DL,
3993 DAGCombinerInfo &DCI) const {
3994 unsigned BOpcode = N0.getOpcode();
3995 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3996 "Unexpected binop");
3997 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3998
3999 // (X + Y) == X --> Y == 0
4000 // (X - Y) == X --> Y == 0
4001 // (X ^ Y) == X --> Y == 0
4002 SelectionDAG &DAG = DCI.DAG;
4003 EVT OpVT = N0.getValueType();
4004 SDValue X = N0.getOperand(0);
4005 SDValue Y = N0.getOperand(1);
4006 if (X == N1)
4007 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4008
4009 if (Y != N1)
4010 return SDValue();
4011
4012 // (X + Y) == Y --> X == 0
4013 // (X ^ Y) == Y --> X == 0
4014 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4015 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4016
4017 // The shift would not be valid if the operands are boolean (i1).
4018 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4019 return SDValue();
4020
4021 // (X - Y) == Y --> X == Y << 1
4022 EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
4023 !DCI.isBeforeLegalize());
4024 SDValue One = DAG.getConstant(1, DL, ShiftVT);
4025 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4026 if (!DCI.isCalledByLegalizer())
4027 DCI.AddToWorklist(YShl1.getNode());
4028 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4029}
4030
4032 SDValue N0, const APInt &C1,
4033 ISD::CondCode Cond, const SDLoc &dl,
4034 SelectionDAG &DAG) {
4035 // Look through truncs that don't change the value of a ctpop.
4036 // FIXME: Add vector support? Need to be careful with setcc result type below.
4037 SDValue CTPOP = N0;
4038 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4040 CTPOP = N0.getOperand(0);
4041
4042 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4043 return SDValue();
4044
4045 EVT CTVT = CTPOP.getValueType();
4046 SDValue CTOp = CTPOP.getOperand(0);
4047
4048 // Expand a power-of-2-or-zero comparison based on ctpop:
4049 // (ctpop x) u< 2 -> (x & x-1) == 0
4050 // (ctpop x) u> 1 -> (x & x-1) != 0
4051 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4052 // Keep the CTPOP if it is a legal vector op.
4053 if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
4054 return SDValue();
4055
4056 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4057 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4058 return SDValue();
4059 if (C1 == 0 && (Cond == ISD::SETULT))
4060 return SDValue(); // This is handled elsewhere.
4061
4062 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4063
4064 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4065 SDValue Result = CTOp;
4066 for (unsigned i = 0; i < Passes; i++) {
4067 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4068 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4069 }
4071 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4072 }
4073
4074 // Expand a power-of-2 comparison based on ctpop:
4075 // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
4076 // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
4077 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4078 // Keep the CTPOP if it is legal.
4079 if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
4080 return SDValue();
4081
4082 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4083 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4084 assert(CTVT.isInteger());
4085 ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
4086 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4087 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4088 SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
4089 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4090 unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
4091 return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
4092 }
4093
4094 return SDValue();
4095}
4096
4098 ISD::CondCode Cond, const SDLoc &dl,
4099 SelectionDAG &DAG) {
4100 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4101 return SDValue();
4102
4103 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4104 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4105 return SDValue();
4106
4107 auto getRotateSource = [](SDValue X) {
4108 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4109 return X.getOperand(0);
4110 return SDValue();
4111 };
4112
4113 // Peek through a rotated value compared against 0 or -1:
4114 // (rot X, Y) == 0/-1 --> X == 0/-1
4115 // (rot X, Y) != 0/-1 --> X != 0/-1
4116 if (SDValue R = getRotateSource(N0))
4117 return DAG.getSetCC(dl, VT, R, N1, Cond);
4118
4119 // Peek through an 'or' of a rotated value compared against 0:
4120 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4121 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4122 //
4123 // TODO: Add the 'and' with -1 sibling.
4124 // TODO: Recurse through a series of 'or' ops to find the rotate.
4125 EVT OpVT = N0.getValueType();
4126 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4127 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4128 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4129 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4130 }
4131 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4132 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4133 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4134 }
4135 }
4136
4137 return SDValue();
4138}
4139
4141 ISD::CondCode Cond, const SDLoc &dl,
4142 SelectionDAG &DAG) {
4143 // If we are testing for all-bits-clear, we might be able to do that with
4144 // less shifting since bit-order does not matter.
4145 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4146 return SDValue();
4147
4148 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4149 if (!C1 || !C1->isZero())
4150 return SDValue();
4151
4152 if (!N0.hasOneUse() ||
4153 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4154 return SDValue();
4155
4156 unsigned BitWidth = N0.getScalarValueSizeInBits();
4157 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4158 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4159 return SDValue();
4160
4161 // Canonicalize fshr as fshl to reduce pattern-matching.
4162 unsigned ShAmt = ShAmtC->getZExtValue();
4163 if (N0.getOpcode() == ISD::FSHR)
4164 ShAmt = BitWidth - ShAmt;
4165
4166 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4167 SDValue X, Y;
4168 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4169 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4170 return false;
4171 if (Or.getOperand(0) == Other) {
4172 X = Or.getOperand(0);
4173 Y = Or.getOperand(1);
4174 return true;
4175 }
4176 if (Or.getOperand(1) == Other) {
4177 X = Or.getOperand(1);
4178 Y = Or.getOperand(0);
4179 return true;
4180 }
4181 return false;
4182 };
4183
4184 EVT OpVT = N0.getValueType();
4185 EVT ShAmtVT = N0.getOperand(2).getValueType();
4186 SDValue F0 = N0.getOperand(0);
4187 SDValue F1 = N0.getOperand(1);
4188 if (matchOr(F0, F1)) {
4189 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4190 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4191 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4192 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4193 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4194 }
4195 if (matchOr(F1, F0)) {
4196 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4197 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4198 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4199 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4200 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4201 }
4202
4203 return SDValue();
4204}
4205
4206/// Try to simplify a setcc built with the specified operands and cc. If it is
4207/// unable to simplify it, return a null SDValue.
4209 ISD::CondCode Cond, bool foldBooleans,
4210 DAGCombinerInfo &DCI,
4211 const SDLoc &dl) const {
4212 SelectionDAG &DAG = DCI.DAG;
4213 const DataLayout &Layout = DAG.getDataLayout();
4214 EVT OpVT = N0.getValueType();
4216
4217 // Constant fold or commute setcc.
4218 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4219 return Fold;
4220
4221 bool N0ConstOrSplat =
4222 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4223 bool N1ConstOrSplat =
4224 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4225
4226 // Ensure that the constant occurs on the RHS and fold constant comparisons.
4227 // TODO: Handle non-splat vector constants. All undef causes trouble.
4228 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4229 // infinite loop here when we encounter one.
4231 if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
4232 (DCI.isBeforeLegalizeOps() ||
4233 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4234 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4235
4236 // If we have a subtract with the same 2 non-constant operands as this setcc
4237 // -- but in reverse order -- then try to commute the operands of this setcc
4238 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4239 // instruction on some targets.
4240 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4241 (DCI.isBeforeLegalizeOps() ||
4242 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4243 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4244 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4245 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4246
4247 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4248 return V;
4249
4250 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4251 return V;
4252
4253 if (auto *N1C = isConstOrConstSplat(N1)) {
4254 const APInt &C1 = N1C->getAPIntValue();
4255
4256 // Optimize some CTPOP cases.
4257 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4258 return V;
4259
4260 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4261 // X * Y == 0 --> (X == 0) || (Y == 0)
4262 // X * Y != 0 --> (X != 0) && (Y != 0)
4263 // TODO: This bails out if minsize is set, but if the target doesn't have a
4264 // single instruction multiply for this type, it would likely be
4265 // smaller to decompose.
4266 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4267 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4268 (N0->getFlags().hasNoUnsignedWrap() ||
4269 N0->getFlags().hasNoSignedWrap()) &&
4270 !Attr.hasFnAttr(Attribute::MinSize)) {
4271 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4272 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4273 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4274 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4275 }
4276
4277 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4278 // equality comparison, then we're just comparing whether X itself is
4279 // zero.
4280 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4281 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4282 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4283 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4284 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4285 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4286 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4287 // (srl (ctlz x), 5) == 0 -> X != 0
4288 // (srl (ctlz x), 5) != 1 -> X != 0
4289 Cond = ISD::SETNE;
4290 } else {
4291 // (srl (ctlz x), 5) != 0 -> X == 0
4292 // (srl (ctlz x), 5) == 1 -> X == 0
4293 Cond = ISD::SETEQ;
4294 }
4295 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4296 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4297 Cond);
4298 }
4299 }
4300 }
4301 }
4302
4303 // FIXME: Support vectors.
4304 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4305 const APInt &C1 = N1C->getAPIntValue();
4306
4307 // (zext x) == C --> x == (trunc C)
4308 // (sext x) == C --> x == (trunc C)
4309 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4310 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4311 unsigned MinBits = N0.getValueSizeInBits();
4312 SDValue PreExt;
4313 bool Signed = false;
4314 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4315 // ZExt
4316 MinBits = N0->getOperand(0).getValueSizeInBits();
4317 PreExt = N0->getOperand(0);
4318 } else if (N0->getOpcode() == ISD::AND) {
4319 // DAGCombine turns costly ZExts into ANDs
4320 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4321 if ((C->getAPIntValue()+1).isPowerOf2()) {
4322 MinBits = C->getAPIntValue().countr_one();
4323 PreExt = N0->getOperand(0);
4324 }
4325 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4326 // SExt
4327 MinBits = N0->getOperand(0).getValueSizeInBits();
4328 PreExt = N0->getOperand(0);
4329 Signed = true;
4330 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4331 // ZEXTLOAD / SEXTLOAD
4332 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4333 MinBits = LN0->getMemoryVT().getSizeInBits();
4334 PreExt = N0;
4335 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4336 Signed = true;
4337 MinBits = LN0->getMemoryVT().getSizeInBits();
4338 PreExt = N0;
4339 }
4340 }
4341
4342 // Figure out how many bits we need to preserve this constant.
4343 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4344
4345 // Make sure we're not losing bits from the constant.
4346 if (MinBits > 0 &&
4347 MinBits < C1.getBitWidth() &&
4348 MinBits >= ReqdBits) {
4349 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4350 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4351 // Will get folded away.
4352 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4353 if (MinBits == 1 && C1 == 1)
4354 // Invert the condition.
4355 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4357 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4358 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4359 }
4360
4361 // If truncating the setcc operands is not desirable, we can still
4362 // simplify the expression in some cases:
4363 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4364 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4365 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4366 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4367 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4368 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4369 SDValue TopSetCC = N0->getOperand(0);
4370 unsigned N0Opc = N0->getOpcode();
4371 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4372 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4373 TopSetCC.getOpcode() == ISD::SETCC &&
4374 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4375 (isConstFalseVal(N1) ||
4376 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4377
4378 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4379 (!N1C->isZero() && Cond == ISD::SETNE);
4380
4381 if (!Inverse)
4382 return TopSetCC;
4383
4385 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4386 TopSetCC.getOperand(0).getValueType());
4387 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4388 TopSetCC.getOperand(1),
4389 InvCond);
4390 }
4391 }
4392 }
4393
4394 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4395 // equality or unsigned, and all 1 bits of the const are in the same
4396 // partial word, see if we can shorten the load.
4397 if (DCI.isBeforeLegalize() &&
4399 N0.getOpcode() == ISD::AND && C1 == 0 &&
4400 N0.getNode()->hasOneUse() &&
4401 isa<LoadSDNode>(N0.getOperand(0)) &&
4402 N0.getOperand(0).getNode()->hasOneUse() &&
4403 isa<ConstantSDNode>(N0.getOperand(1))) {
4404 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4405 APInt bestMask;
4406 unsigned bestWidth = 0, bestOffset = 0;
4407 if (Lod->isSimple() && Lod->isUnindexed()) {
4408 unsigned origWidth = N0.getValueSizeInBits();
4409 unsigned maskWidth = origWidth;
4410 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4411 // 8 bits, but have to be careful...
4412 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4413 origWidth = Lod->getMemoryVT().getSizeInBits();
4414 const APInt &Mask = N0.getConstantOperandAPInt(1);
4415 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4416 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4417 for (unsigned offset=0; offset<origWidth/width; offset++) {
4418 if (Mask.isSubsetOf(newMask)) {
4419 if (Layout.isLittleEndian())
4420 bestOffset = (uint64_t)offset * (width/8);
4421 else
4422 bestOffset = (origWidth/width - offset - 1) * (width/8);
4423 bestMask = Mask.lshr(offset * (width/8) * 8);
4424 bestWidth = width;
4425 break;
4426 }
4427 newMask <<= width;
4428 }
4429 }
4430 }
4431 if (bestWidth) {
4432 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4433 if (newVT.isRound() &&
4435 SDValue Ptr = Lod->getBasePtr();
4436 if (bestOffset != 0)
4437 Ptr =
4438 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
4439 SDValue NewLoad =
4440 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4441 Lod->getPointerInfo().getWithOffset(bestOffset),
4442 Lod->getOriginalAlign());
4443 return DAG.getSetCC(dl, VT,
4444 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4445 DAG.getConstant(bestMask.trunc(bestWidth),
4446 dl, newVT)),
4447 DAG.getConstant(0LL, dl, newVT), Cond);
4448 }
4449 }
4450 }
4451
4452 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4453 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4454 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4455
4456 // If the comparison constant has bits in the upper part, the
4457 // zero-extended value could never match.
4459 C1.getBitWidth() - InSize))) {
4460 switch (Cond) {
4461 case ISD::SETUGT:
4462 case ISD::SETUGE:
4463 case ISD::SETEQ:
4464 return DAG.getConstant(0, dl, VT);
4465 case ISD::SETULT:
4466 case ISD::SETULE:
4467 case ISD::SETNE:
4468 return DAG.getConstant(1, dl, VT);
4469 case ISD::SETGT:
4470 case