LLVM 19.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
25#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
54 SDValue &Chain) const {
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef})
68 CallerAttrs.removeAttribute(Attr);
69
70 if (CallerAttrs.hasAttributes())
71 return false;
72
73 // It's not safe to eliminate the sign / zero extension of the return value.
74 if (CallerAttrs.contains(Attribute::ZExt) ||
75 CallerAttrs.contains(Attribute::SExt))
76 return false;
77
78 // Check if the only use is a function return node.
79 return isUsedByReturnOnly(Node, Chain);
80}
81
83 const uint32_t *CallerPreservedMask,
84 const SmallVectorImpl<CCValAssign> &ArgLocs,
85 const SmallVectorImpl<SDValue> &OutVals) const {
86 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87 const CCValAssign &ArgLoc = ArgLocs[I];
88 if (!ArgLoc.isRegLoc())
89 continue;
90 MCRegister Reg = ArgLoc.getLocReg();
91 // Only look at callee saved registers.
92 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
93 continue;
94 // Check that we pass the value used for the caller.
95 // (We look for a CopyFromReg reading a virtual register that is used
96 // for the function live-in value of register Reg)
97 SDValue Value = OutVals[I];
98 if (Value->getOpcode() == ISD::AssertZext)
99 Value = Value.getOperand(0);
100 if (Value->getOpcode() != ISD::CopyFromReg)
101 return false;
102 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
103 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
104 return false;
105 }
106 return true;
107}
108
109/// Set CallLoweringInfo attribute flags based on a call instruction
110/// and called function attributes.
112 unsigned ArgIdx) {
113 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
114 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
115 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
116 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
117 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
118 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
119 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
120 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
121 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
122 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
123 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
124 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
125 Alignment = Call->getParamStackAlign(ArgIdx);
126 IndirectType = nullptr;
128 "multiple ABI attributes?");
129 if (IsByVal) {
130 IndirectType = Call->getParamByValType(ArgIdx);
131 if (!Alignment)
132 Alignment = Call->getParamAlign(ArgIdx);
133 }
134 if (IsPreallocated)
135 IndirectType = Call->getParamPreallocatedType(ArgIdx);
136 if (IsInAlloca)
137 IndirectType = Call->getParamInAllocaType(ArgIdx);
138 if (IsSRet)
139 IndirectType = Call->getParamStructRetType(ArgIdx);
140}
141
142/// Generate a libcall taking the given operands as arguments and returning a
143/// result of type RetVT.
144std::pair<SDValue, SDValue>
147 MakeLibCallOptions CallOptions,
148 const SDLoc &dl,
149 SDValue InChain) const {
150 if (!InChain)
151 InChain = DAG.getEntryNode();
152
154 Args.reserve(Ops.size());
155
157 for (unsigned i = 0; i < Ops.size(); ++i) {
158 SDValue NewOp = Ops[i];
159 Entry.Node = NewOp;
160 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
161 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
162 CallOptions.IsSExt);
163 Entry.IsZExt = !Entry.IsSExt;
164
165 if (CallOptions.IsSoften &&
167 Entry.IsSExt = Entry.IsZExt = false;
168 }
169 Args.push_back(Entry);
170 }
171
172 if (LC == RTLIB::UNKNOWN_LIBCALL)
173 report_fatal_error("Unsupported library call operation!");
176
177 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
179 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
180 bool zeroExtend = !signExtend;
181
182 if (CallOptions.IsSoften &&
184 signExtend = zeroExtend = false;
185 }
186
187 CLI.setDebugLoc(dl)
188 .setChain(InChain)
189 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
190 .setNoReturn(CallOptions.DoesNotReturn)
193 .setSExtResult(signExtend)
194 .setZExtResult(zeroExtend);
195 return LowerCallTo(CLI);
196}
197
199 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200 unsigned SrcAS, const AttributeList &FuncAttributes) const {
201 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
202 Op.getSrcAlign() < Op.getDstAlign())
203 return false;
204
205 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207 if (VT == MVT::Other) {
208 // Use the largest integer type whose alignment constraints are satisfied.
209 // We only need to check DstAlign here as SrcAlign is always greater or
210 // equal to DstAlign (or zero).
211 VT = MVT::i64;
212 if (Op.isFixedDstAlign())
213 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
214 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
216 assert(VT.isInteger());
217
218 // Find the largest legal integer type.
219 MVT LVT = MVT::i64;
220 while (!isTypeLegal(LVT))
221 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
222 assert(LVT.isInteger());
223
224 // If the type we've chosen is larger than the largest legal integer type
225 // then use that instead.
226 if (VT.bitsGT(LVT))
227 VT = LVT;
228 }
229
230 unsigned NumMemOps = 0;
231 uint64_t Size = Op.size();
232 while (Size) {
233 unsigned VTSize = VT.getSizeInBits() / 8;
234 while (VTSize > Size) {
235 // For now, only use non-vector load / store's for the left-over pieces.
236 EVT NewVT = VT;
237 unsigned NewVTSize;
238
239 bool Found = false;
240 if (VT.isVector() || VT.isFloatingPoint()) {
241 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244 Found = true;
245 else if (NewVT == MVT::i64 &&
247 isSafeMemOpType(MVT::f64)) {
248 // i64 is usually not legal on 32-bit targets, but f64 may be.
249 NewVT = MVT::f64;
250 Found = true;
251 }
252 }
253
254 if (!Found) {
255 do {
256 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
257 if (NewVT == MVT::i8)
258 break;
259 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
260 }
261 NewVTSize = NewVT.getSizeInBits() / 8;
262
263 // If the new VT cannot cover all of the remaining bits, then consider
264 // issuing a (or a pair of) unaligned and overlapping load / store.
265 unsigned Fast;
266 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
268 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
270 Fast)
271 VTSize = Size;
272 else {
273 VT = NewVT;
274 VTSize = NewVTSize;
275 }
276 }
277
278 if (++NumMemOps > Limit)
279 return false;
280
281 MemOps.push_back(VT);
282 Size -= VTSize;
283 }
284
285 return true;
286}
287
288/// Soften the operands of a comparison. This code is shared among BR_CC,
289/// SELECT_CC, and SETCC handlers.
291 SDValue &NewLHS, SDValue &NewRHS,
292 ISD::CondCode &CCCode,
293 const SDLoc &dl, const SDValue OldLHS,
294 const SDValue OldRHS) const {
295 SDValue Chain;
296 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
297 OldRHS, Chain);
298}
299
301 SDValue &NewLHS, SDValue &NewRHS,
302 ISD::CondCode &CCCode,
303 const SDLoc &dl, const SDValue OldLHS,
304 const SDValue OldRHS,
305 SDValue &Chain,
306 bool IsSignaling) const {
307 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308 // not supporting it. We can update this code when libgcc provides such
309 // functions.
310
311 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
312 && "Unsupported setcc type!");
313
314 // Expand into one or more soft-fp libcall(s).
315 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316 bool ShouldInvertCC = false;
317 switch (CCCode) {
318 case ISD::SETEQ:
319 case ISD::SETOEQ:
320 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
322 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323 break;
324 case ISD::SETNE:
325 case ISD::SETUNE:
326 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327 (VT == MVT::f64) ? RTLIB::UNE_F64 :
328 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329 break;
330 case ISD::SETGE:
331 case ISD::SETOGE:
332 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333 (VT == MVT::f64) ? RTLIB::OGE_F64 :
334 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335 break;
336 case ISD::SETLT:
337 case ISD::SETOLT:
338 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339 (VT == MVT::f64) ? RTLIB::OLT_F64 :
340 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341 break;
342 case ISD::SETLE:
343 case ISD::SETOLE:
344 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345 (VT == MVT::f64) ? RTLIB::OLE_F64 :
346 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347 break;
348 case ISD::SETGT:
349 case ISD::SETOGT:
350 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351 (VT == MVT::f64) ? RTLIB::OGT_F64 :
352 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353 break;
354 case ISD::SETO:
355 ShouldInvertCC = true;
356 [[fallthrough]];
357 case ISD::SETUO:
358 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359 (VT == MVT::f64) ? RTLIB::UO_F64 :
360 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361 break;
362 case ISD::SETONE:
363 // SETONE = O && UNE
364 ShouldInvertCC = true;
365 [[fallthrough]];
366 case ISD::SETUEQ:
367 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368 (VT == MVT::f64) ? RTLIB::UO_F64 :
369 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
372 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373 break;
374 default:
375 // Invert CC for unordered comparisons
376 ShouldInvertCC = true;
377 switch (CCCode) {
378 case ISD::SETULT:
379 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380 (VT == MVT::f64) ? RTLIB::OGE_F64 :
381 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382 break;
383 case ISD::SETULE:
384 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385 (VT == MVT::f64) ? RTLIB::OGT_F64 :
386 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387 break;
388 case ISD::SETUGT:
389 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390 (VT == MVT::f64) ? RTLIB::OLE_F64 :
391 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392 break;
393 case ISD::SETUGE:
394 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395 (VT == MVT::f64) ? RTLIB::OLT_F64 :
396 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397 break;
398 default: llvm_unreachable("Do not know how to soften this setcc!");
399 }
400 }
401
402 // Use the target specific return value for comparison lib calls.
404 SDValue Ops[2] = {NewLHS, NewRHS};
406 EVT OpsVT[2] = { OldLHS.getValueType(),
407 OldRHS.getValueType() };
408 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
409 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
410 NewLHS = Call.first;
411 NewRHS = DAG.getConstant(0, dl, RetVT);
412
413 CCCode = getCmpLibcallCC(LC1);
414 if (ShouldInvertCC) {
415 assert(RetVT.isInteger());
416 CCCode = getSetCCInverse(CCCode, RetVT);
417 }
418
419 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420 // Update Chain.
421 Chain = Call.second;
422 } else {
423 EVT SetCCVT =
424 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
425 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
426 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
427 CCCode = getCmpLibcallCC(LC2);
428 if (ShouldInvertCC)
429 CCCode = getSetCCInverse(CCCode, RetVT);
430 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
431 if (Chain)
432 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433 Call2.second);
434 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
435 Tmp.getValueType(), Tmp, NewLHS);
436 NewRHS = SDValue();
437 }
438}
439
440/// Return the entry encoding for a jump table in the current function. The
441/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
443 // In non-pic modes, just use the address of a block.
444 if (!isPositionIndependent())
446
447 // In PIC mode, if the target supports a GPRel32 directive, use it.
448 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
450
451 // Otherwise, use a label difference.
453}
454
456 SelectionDAG &DAG) const {
457 // If our PIC model is GP relative, use the global offset table as the base.
458 unsigned JTEncoding = getJumpTableEncoding();
459
463
464 return Table;
465}
466
467/// This returns the relocation base for the given PIC jumptable, the same as
468/// getPICJumpTableRelocBase, but as an MCExpr.
469const MCExpr *
471 unsigned JTI,MCContext &Ctx) const{
472 // The normal PIC reloc base is the label at the start of the jump table.
473 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
474}
475
477 SDValue Addr, int JTI,
478 SelectionDAG &DAG) const {
479 SDValue Chain = Value;
480 // Jump table debug info is only needed if CodeView is enabled.
482 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
483 }
484 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485}
486
487bool
489 const TargetMachine &TM = getTargetMachine();
490 const GlobalValue *GV = GA->getGlobal();
491
492 // If the address is not even local to this DSO we will have to load it from
493 // a got and then add the offset.
494 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
495 return false;
496
497 // If the code is position independent we will have to add a base register.
498 if (isPositionIndependent())
499 return false;
500
501 // Otherwise we can do it.
502 return true;
503}
504
505//===----------------------------------------------------------------------===//
506// Optimization Methods
507//===----------------------------------------------------------------------===//
508
509/// If the specified instruction has a constant integer operand and there are
510/// bits set in that constant that are not demanded, then clear those bits and
511/// return true.
513 const APInt &DemandedBits,
514 const APInt &DemandedElts,
515 TargetLoweringOpt &TLO) const {
516 SDLoc DL(Op);
517 unsigned Opcode = Op.getOpcode();
518
519 // Early-out if we've ended up calling an undemanded node, leave this to
520 // constant folding.
521 if (DemandedBits.isZero() || DemandedElts.isZero())
522 return false;
523
524 // Do target-specific constant optimization.
525 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526 return TLO.New.getNode();
527
528 // FIXME: ISD::SELECT, ISD::SELECT_CC
529 switch (Opcode) {
530 default:
531 break;
532 case ISD::XOR:
533 case ISD::AND:
534 case ISD::OR: {
535 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
536 if (!Op1C || Op1C->isOpaque())
537 return false;
538
539 // If this is a 'not' op, don't touch it because that's a canonical form.
540 const APInt &C = Op1C->getAPIntValue();
541 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
542 return false;
543
544 if (!C.isSubsetOf(DemandedBits)) {
545 EVT VT = Op.getValueType();
546 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
547 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
548 return TLO.CombineTo(Op, NewOp);
549 }
550
551 break;
552 }
553 }
554
555 return false;
556}
557
559 const APInt &DemandedBits,
560 TargetLoweringOpt &TLO) const {
561 EVT VT = Op.getValueType();
562 APInt DemandedElts = VT.isVector()
564 : APInt(1, 1);
565 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
566}
567
568/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
569/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
570/// but it could be generalized for targets with other types of implicit
571/// widening casts.
573 const APInt &DemandedBits,
574 TargetLoweringOpt &TLO) const {
575 assert(Op.getNumOperands() == 2 &&
576 "ShrinkDemandedOp only supports binary operators!");
577 assert(Op.getNode()->getNumValues() == 1 &&
578 "ShrinkDemandedOp only supports nodes with one result!");
579
580 EVT VT = Op.getValueType();
581 SelectionDAG &DAG = TLO.DAG;
582 SDLoc dl(Op);
583
584 // Early return, as this function cannot handle vector types.
585 if (VT.isVector())
586 return false;
587
588 // Don't do this if the node has another user, which may require the
589 // full value.
590 if (!Op.getNode()->hasOneUse())
591 return false;
592
593 // Search for the smallest integer type with free casts to and from
594 // Op's type. For expedience, just check power-of-2 integer types.
595 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
596 unsigned DemandedSize = DemandedBits.getActiveBits();
597 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
598 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
599 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
600 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
601 // We found a type with free casts.
602 SDValue X = DAG.getNode(
603 Op.getOpcode(), dl, SmallVT,
604 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
605 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
606 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
607 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
608 return TLO.CombineTo(Op, Z);
609 }
610 }
611 return false;
612}
613
615 DAGCombinerInfo &DCI) const {
616 SelectionDAG &DAG = DCI.DAG;
617 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
618 !DCI.isBeforeLegalizeOps());
619 KnownBits Known;
620
621 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
622 if (Simplified) {
623 DCI.AddToWorklist(Op.getNode());
625 }
626 return Simplified;
627}
628
630 const APInt &DemandedElts,
631 DAGCombinerInfo &DCI) const {
632 SelectionDAG &DAG = DCI.DAG;
633 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
634 !DCI.isBeforeLegalizeOps());
635 KnownBits Known;
636
637 bool Simplified =
638 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
639 if (Simplified) {
640 DCI.AddToWorklist(Op.getNode());
642 }
643 return Simplified;
644}
645
647 KnownBits &Known,
649 unsigned Depth,
650 bool AssumeSingleUse) const {
651 EVT VT = Op.getValueType();
652
653 // Since the number of lanes in a scalable vector is unknown at compile time,
654 // we track one bit which is implicitly broadcast to all lanes. This means
655 // that all lanes in a scalable vector are considered demanded.
656 APInt DemandedElts = VT.isFixedLengthVector()
658 : APInt(1, 1);
659 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
660 AssumeSingleUse);
661}
662
663// TODO: Under what circumstances can we create nodes? Constant folding?
665 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
666 SelectionDAG &DAG, unsigned Depth) const {
667 EVT VT = Op.getValueType();
668
669 // Limit search depth.
671 return SDValue();
672
673 // Ignore UNDEFs.
674 if (Op.isUndef())
675 return SDValue();
676
677 // Not demanding any bits/elts from Op.
678 if (DemandedBits == 0 || DemandedElts == 0)
679 return DAG.getUNDEF(VT);
680
681 bool IsLE = DAG.getDataLayout().isLittleEndian();
682 unsigned NumElts = DemandedElts.getBitWidth();
683 unsigned BitWidth = DemandedBits.getBitWidth();
684 KnownBits LHSKnown, RHSKnown;
685 switch (Op.getOpcode()) {
686 case ISD::BITCAST: {
687 if (VT.isScalableVector())
688 return SDValue();
689
690 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
691 EVT SrcVT = Src.getValueType();
692 EVT DstVT = Op.getValueType();
693 if (SrcVT == DstVT)
694 return Src;
695
696 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
697 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
698 if (NumSrcEltBits == NumDstEltBits)
699 if (SDValue V = SimplifyMultipleUseDemandedBits(
700 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
701 return DAG.getBitcast(DstVT, V);
702
703 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
704 unsigned Scale = NumDstEltBits / NumSrcEltBits;
705 unsigned NumSrcElts = SrcVT.getVectorNumElements();
706 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
707 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
708 for (unsigned i = 0; i != Scale; ++i) {
709 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
710 unsigned BitOffset = EltOffset * NumSrcEltBits;
711 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
712 if (!Sub.isZero()) {
713 DemandedSrcBits |= Sub;
714 for (unsigned j = 0; j != NumElts; ++j)
715 if (DemandedElts[j])
716 DemandedSrcElts.setBit((j * Scale) + i);
717 }
718 }
719
720 if (SDValue V = SimplifyMultipleUseDemandedBits(
721 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
722 return DAG.getBitcast(DstVT, V);
723 }
724
725 // TODO - bigendian once we have test coverage.
726 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
727 unsigned Scale = NumSrcEltBits / NumDstEltBits;
728 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
729 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
730 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
731 for (unsigned i = 0; i != NumElts; ++i)
732 if (DemandedElts[i]) {
733 unsigned Offset = (i % Scale) * NumDstEltBits;
734 DemandedSrcBits.insertBits(DemandedBits, Offset);
735 DemandedSrcElts.setBit(i / Scale);
736 }
737
738 if (SDValue V = SimplifyMultipleUseDemandedBits(
739 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
740 return DAG.getBitcast(DstVT, V);
741 }
742
743 break;
744 }
745 case ISD::AND: {
746 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
747 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
748
749 // If all of the demanded bits are known 1 on one side, return the other.
750 // These bits cannot contribute to the result of the 'and' in this
751 // context.
752 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
753 return Op.getOperand(0);
754 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
755 return Op.getOperand(1);
756 break;
757 }
758 case ISD::OR: {
759 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761
762 // If all of the demanded bits are known zero on one side, return the
763 // other. These bits cannot contribute to the result of the 'or' in this
764 // context.
765 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
766 return Op.getOperand(0);
767 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
768 return Op.getOperand(1);
769 break;
770 }
771 case ISD::XOR: {
772 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774
775 // If all of the demanded bits are known zero on one side, return the
776 // other.
777 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
778 return Op.getOperand(0);
779 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
780 return Op.getOperand(1);
781 break;
782 }
783 case ISD::SHL: {
784 // If we are only demanding sign bits then we can use the shift source
785 // directly.
786 if (const APInt *MaxSA =
787 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
788 SDValue Op0 = Op.getOperand(0);
789 unsigned ShAmt = MaxSA->getZExtValue();
790 unsigned NumSignBits =
791 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
792 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
793 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
794 return Op0;
795 }
796 break;
797 }
798 case ISD::SETCC: {
799 SDValue Op0 = Op.getOperand(0);
800 SDValue Op1 = Op.getOperand(1);
801 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
802 // If (1) we only need the sign-bit, (2) the setcc operands are the same
803 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
804 // -1, we may be able to bypass the setcc.
805 if (DemandedBits.isSignMask() &&
809 // If we're testing X < 0, then this compare isn't needed - just use X!
810 // FIXME: We're limiting to integer types here, but this should also work
811 // if we don't care about FP signed-zero. The use of SETLT with FP means
812 // that we don't care about NaNs.
813 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
815 return Op0;
816 }
817 break;
818 }
820 // If none of the extended bits are demanded, eliminate the sextinreg.
821 SDValue Op0 = Op.getOperand(0);
822 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
823 unsigned ExBits = ExVT.getScalarSizeInBits();
824 if (DemandedBits.getActiveBits() <= ExBits &&
826 return Op0;
827 // If the input is already sign extended, just drop the extension.
828 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
829 if (NumSignBits >= (BitWidth - ExBits + 1))
830 return Op0;
831 break;
832 }
836 if (VT.isScalableVector())
837 return SDValue();
838
839 // If we only want the lowest element and none of extended bits, then we can
840 // return the bitcasted source vector.
841 SDValue Src = Op.getOperand(0);
842 EVT SrcVT = Src.getValueType();
843 EVT DstVT = Op.getValueType();
844 if (IsLE && DemandedElts == 1 &&
845 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
846 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
847 return DAG.getBitcast(DstVT, Src);
848 }
849 break;
850 }
852 if (VT.isScalableVector())
853 return SDValue();
854
855 // If we don't demand the inserted element, return the base vector.
856 SDValue Vec = Op.getOperand(0);
857 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
858 EVT VecVT = Vec.getValueType();
859 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
860 !DemandedElts[CIdx->getZExtValue()])
861 return Vec;
862 break;
863 }
865 if (VT.isScalableVector())
866 return SDValue();
867
868 SDValue Vec = Op.getOperand(0);
869 SDValue Sub = Op.getOperand(1);
870 uint64_t Idx = Op.getConstantOperandVal(2);
871 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
872 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
873 // If we don't demand the inserted subvector, return the base vector.
874 if (DemandedSubElts == 0)
875 return Vec;
876 break;
877 }
878 case ISD::VECTOR_SHUFFLE: {
880 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
881
882 // If all the demanded elts are from one operand and are inline,
883 // then we can use the operand directly.
884 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
885 for (unsigned i = 0; i != NumElts; ++i) {
886 int M = ShuffleMask[i];
887 if (M < 0 || !DemandedElts[i])
888 continue;
889 AllUndef = false;
890 IdentityLHS &= (M == (int)i);
891 IdentityRHS &= ((M - NumElts) == i);
892 }
893
894 if (AllUndef)
895 return DAG.getUNDEF(Op.getValueType());
896 if (IdentityLHS)
897 return Op.getOperand(0);
898 if (IdentityRHS)
899 return Op.getOperand(1);
900 break;
901 }
902 default:
903 // TODO: Probably okay to remove after audit; here to reduce change size
904 // in initial enablement patch for scalable vectors
905 if (VT.isScalableVector())
906 return SDValue();
907
908 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
909 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
910 Op, DemandedBits, DemandedElts, DAG, Depth))
911 return V;
912 break;
913 }
914 return SDValue();
915}
916
919 unsigned Depth) const {
920 EVT VT = Op.getValueType();
921 // Since the number of lanes in a scalable vector is unknown at compile time,
922 // we track one bit which is implicitly broadcast to all lanes. This means
923 // that all lanes in a scalable vector are considered demanded.
924 APInt DemandedElts = VT.isFixedLengthVector()
926 : APInt(1, 1);
927 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
928 Depth);
929}
930
932 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
933 unsigned Depth) const {
934 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
935 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
936 Depth);
937}
938
939// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
940// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
942 const TargetLowering &TLI,
943 const APInt &DemandedBits,
944 const APInt &DemandedElts,
945 unsigned Depth) {
946 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
947 "SRL or SRA node is required here!");
948 // Is the right shift using an immediate value of 1?
949 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
950 if (!N1C || !N1C->isOne())
951 return SDValue();
952
953 // We are looking for an avgfloor
954 // add(ext, ext)
955 // or one of these as a avgceil
956 // add(add(ext, ext), 1)
957 // add(add(ext, 1), ext)
958 // add(ext, add(ext, 1))
959 SDValue Add = Op.getOperand(0);
960 if (Add.getOpcode() != ISD::ADD)
961 return SDValue();
962
963 SDValue ExtOpA = Add.getOperand(0);
964 SDValue ExtOpB = Add.getOperand(1);
965 SDValue Add2;
966 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
967 ConstantSDNode *ConstOp;
968 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
969 ConstOp->isOne()) {
970 ExtOpA = Op1;
971 ExtOpB = Op3;
972 Add2 = A;
973 return true;
974 }
975 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
976 ConstOp->isOne()) {
977 ExtOpA = Op1;
978 ExtOpB = Op2;
979 Add2 = A;
980 return true;
981 }
982 return false;
983 };
984 bool IsCeil =
985 (ExtOpA.getOpcode() == ISD::ADD &&
986 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
987 (ExtOpB.getOpcode() == ISD::ADD &&
988 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
989
990 // If the shift is signed (sra):
991 // - Needs >= 2 sign bit for both operands.
992 // - Needs >= 2 zero bits.
993 // If the shift is unsigned (srl):
994 // - Needs >= 1 zero bit for both operands.
995 // - Needs 1 demanded bit zero and >= 2 sign bits.
996 unsigned ShiftOpc = Op.getOpcode();
997 bool IsSigned = false;
998 unsigned KnownBits;
999 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1000 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1001 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1002 unsigned NumZeroA =
1003 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1004 unsigned NumZeroB =
1005 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1006 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1007
1008 switch (ShiftOpc) {
1009 default:
1010 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1011 case ISD::SRA: {
1012 if (NumZero >= 2 && NumSigned < NumZero) {
1013 IsSigned = false;
1014 KnownBits = NumZero;
1015 break;
1016 }
1017 if (NumSigned >= 1) {
1018 IsSigned = true;
1019 KnownBits = NumSigned;
1020 break;
1021 }
1022 return SDValue();
1023 }
1024 case ISD::SRL: {
1025 if (NumZero >= 1 && NumSigned < NumZero) {
1026 IsSigned = false;
1027 KnownBits = NumZero;
1028 break;
1029 }
1030 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1031 IsSigned = true;
1032 KnownBits = NumSigned;
1033 break;
1034 }
1035 return SDValue();
1036 }
1037 }
1038
1039 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1040 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1041
1042 // Find the smallest power-2 type that is legal for this vector size and
1043 // operation, given the original type size and the number of known sign/zero
1044 // bits.
1045 EVT VT = Op.getValueType();
1046 unsigned MinWidth =
1047 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1048 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1049 if (VT.isVector())
1050 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1051 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1052 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1053 // larger type size to do the transform.
1054 if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
1055 return SDValue();
1056 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1057 Add.getOperand(1)) &&
1058 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1059 Add2.getOperand(1))))
1060 NVT = VT;
1061 else
1062 return SDValue();
1063 }
1064
1065 SDLoc DL(Op);
1066 SDValue ResultAVG =
1067 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1068 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1069 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1070}
1071
1072/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1073/// result of Op are ever used downstream. If we can use this information to
1074/// simplify Op, create a new simplified DAG node and return true, returning the
1075/// original and new nodes in Old and New. Otherwise, analyze the expression and
1076/// return a mask of Known bits for the expression (used to simplify the
1077/// caller). The Known bits may only be accurate for those bits in the
1078/// OriginalDemandedBits and OriginalDemandedElts.
1080 SDValue Op, const APInt &OriginalDemandedBits,
1081 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1082 unsigned Depth, bool AssumeSingleUse) const {
1083 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1084 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1085 "Mask size mismatches value type size!");
1086
1087 // Don't know anything.
1088 Known = KnownBits(BitWidth);
1089
1090 EVT VT = Op.getValueType();
1091 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1092 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1093 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1094 "Unexpected vector size");
1095
1096 APInt DemandedBits = OriginalDemandedBits;
1097 APInt DemandedElts = OriginalDemandedElts;
1098 SDLoc dl(Op);
1099
1100 // Undef operand.
1101 if (Op.isUndef())
1102 return false;
1103
1104 // We can't simplify target constants.
1105 if (Op.getOpcode() == ISD::TargetConstant)
1106 return false;
1107
1108 if (Op.getOpcode() == ISD::Constant) {
1109 // We know all of the bits for a constant!
1110 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1111 return false;
1112 }
1113
1114 if (Op.getOpcode() == ISD::ConstantFP) {
1115 // We know all of the bits for a floating point constant!
1117 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1118 return false;
1119 }
1120
1121 // Other users may use these bits.
1122 bool HasMultiUse = false;
1123 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1125 // Limit search depth.
1126 return false;
1127 }
1128 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1130 DemandedElts = APInt::getAllOnes(NumElts);
1131 HasMultiUse = true;
1132 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1133 // Not demanding any bits/elts from Op.
1134 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1135 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1136 // Limit search depth.
1137 return false;
1138 }
1139
1140 KnownBits Known2;
1141 switch (Op.getOpcode()) {
1142 case ISD::SCALAR_TO_VECTOR: {
1143 if (VT.isScalableVector())
1144 return false;
1145 if (!DemandedElts[0])
1146 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1147
1148 KnownBits SrcKnown;
1149 SDValue Src = Op.getOperand(0);
1150 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1151 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1152 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1153 return true;
1154
1155 // Upper elements are undef, so only get the knownbits if we just demand
1156 // the bottom element.
1157 if (DemandedElts == 1)
1158 Known = SrcKnown.anyextOrTrunc(BitWidth);
1159 break;
1160 }
1161 case ISD::BUILD_VECTOR:
1162 // Collect the known bits that are shared by every demanded element.
1163 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1164 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1165 return false; // Don't fall through, will infinitely loop.
1166 case ISD::SPLAT_VECTOR: {
1167 SDValue Scl = Op.getOperand(0);
1168 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1169 KnownBits KnownScl;
1170 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1171 return true;
1172
1173 // Implicitly truncate the bits to match the official semantics of
1174 // SPLAT_VECTOR.
1175 Known = KnownScl.trunc(BitWidth);
1176 break;
1177 }
1178 case ISD::LOAD: {
1179 auto *LD = cast<LoadSDNode>(Op);
1180 if (getTargetConstantFromLoad(LD)) {
1181 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1182 return false; // Don't fall through, will infinitely loop.
1183 }
1184 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1185 // If this is a ZEXTLoad and we are looking at the loaded value.
1186 EVT MemVT = LD->getMemoryVT();
1187 unsigned MemBits = MemVT.getScalarSizeInBits();
1188 Known.Zero.setBitsFrom(MemBits);
1189 return false; // Don't fall through, will infinitely loop.
1190 }
1191 break;
1192 }
1194 if (VT.isScalableVector())
1195 return false;
1196 SDValue Vec = Op.getOperand(0);
1197 SDValue Scl = Op.getOperand(1);
1198 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1199 EVT VecVT = Vec.getValueType();
1200
1201 // If index isn't constant, assume we need all vector elements AND the
1202 // inserted element.
1203 APInt DemandedVecElts(DemandedElts);
1204 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1205 unsigned Idx = CIdx->getZExtValue();
1206 DemandedVecElts.clearBit(Idx);
1207
1208 // Inserted element is not required.
1209 if (!DemandedElts[Idx])
1210 return TLO.CombineTo(Op, Vec);
1211 }
1212
1213 KnownBits KnownScl;
1214 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1215 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1216 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1217 return true;
1218
1219 Known = KnownScl.anyextOrTrunc(BitWidth);
1220
1221 KnownBits KnownVec;
1222 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1223 Depth + 1))
1224 return true;
1225
1226 if (!!DemandedVecElts)
1227 Known = Known.intersectWith(KnownVec);
1228
1229 return false;
1230 }
1231 case ISD::INSERT_SUBVECTOR: {
1232 if (VT.isScalableVector())
1233 return false;
1234 // Demand any elements from the subvector and the remainder from the src its
1235 // inserted into.
1236 SDValue Src = Op.getOperand(0);
1237 SDValue Sub = Op.getOperand(1);
1238 uint64_t Idx = Op.getConstantOperandVal(2);
1239 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1240 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1241 APInt DemandedSrcElts = DemandedElts;
1242 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1243
1244 KnownBits KnownSub, KnownSrc;
1245 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1246 Depth + 1))
1247 return true;
1248 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1249 Depth + 1))
1250 return true;
1251
1252 Known.Zero.setAllBits();
1253 Known.One.setAllBits();
1254 if (!!DemandedSubElts)
1255 Known = Known.intersectWith(KnownSub);
1256 if (!!DemandedSrcElts)
1257 Known = Known.intersectWith(KnownSrc);
1258
1259 // Attempt to avoid multi-use src if we don't need anything from it.
1260 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1261 !DemandedSrcElts.isAllOnes()) {
1262 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1263 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1264 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1265 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1266 if (NewSub || NewSrc) {
1267 NewSub = NewSub ? NewSub : Sub;
1268 NewSrc = NewSrc ? NewSrc : Src;
1269 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1270 Op.getOperand(2));
1271 return TLO.CombineTo(Op, NewOp);
1272 }
1273 }
1274 break;
1275 }
1277 if (VT.isScalableVector())
1278 return false;
1279 // Offset the demanded elts by the subvector index.
1280 SDValue Src = Op.getOperand(0);
1281 if (Src.getValueType().isScalableVector())
1282 break;
1283 uint64_t Idx = Op.getConstantOperandVal(1);
1284 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1285 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1286
1287 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1288 Depth + 1))
1289 return true;
1290
1291 // Attempt to avoid multi-use src if we don't need anything from it.
1292 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1293 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1294 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1295 if (DemandedSrc) {
1296 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1297 Op.getOperand(1));
1298 return TLO.CombineTo(Op, NewOp);
1299 }
1300 }
1301 break;
1302 }
1303 case ISD::CONCAT_VECTORS: {
1304 if (VT.isScalableVector())
1305 return false;
1306 Known.Zero.setAllBits();
1307 Known.One.setAllBits();
1308 EVT SubVT = Op.getOperand(0).getValueType();
1309 unsigned NumSubVecs = Op.getNumOperands();
1310 unsigned NumSubElts = SubVT.getVectorNumElements();
1311 for (unsigned i = 0; i != NumSubVecs; ++i) {
1312 APInt DemandedSubElts =
1313 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1314 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1315 Known2, TLO, Depth + 1))
1316 return true;
1317 // Known bits are shared by every demanded subvector element.
1318 if (!!DemandedSubElts)
1319 Known = Known.intersectWith(Known2);
1320 }
1321 break;
1322 }
1323 case ISD::VECTOR_SHUFFLE: {
1324 assert(!VT.isScalableVector());
1325 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1326
1327 // Collect demanded elements from shuffle operands..
1328 APInt DemandedLHS, DemandedRHS;
1329 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1330 DemandedRHS))
1331 break;
1332
1333 if (!!DemandedLHS || !!DemandedRHS) {
1334 SDValue Op0 = Op.getOperand(0);
1335 SDValue Op1 = Op.getOperand(1);
1336
1337 Known.Zero.setAllBits();
1338 Known.One.setAllBits();
1339 if (!!DemandedLHS) {
1340 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1341 Depth + 1))
1342 return true;
1343 Known = Known.intersectWith(Known2);
1344 }
1345 if (!!DemandedRHS) {
1346 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1347 Depth + 1))
1348 return true;
1349 Known = Known.intersectWith(Known2);
1350 }
1351
1352 // Attempt to avoid multi-use ops if we don't need anything from them.
1353 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1354 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1355 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1356 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1357 if (DemandedOp0 || DemandedOp1) {
1358 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1359 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1360 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1361 return TLO.CombineTo(Op, NewOp);
1362 }
1363 }
1364 break;
1365 }
1366 case ISD::AND: {
1367 SDValue Op0 = Op.getOperand(0);
1368 SDValue Op1 = Op.getOperand(1);
1369
1370 // If the RHS is a constant, check to see if the LHS would be zero without
1371 // using the bits from the RHS. Below, we use knowledge about the RHS to
1372 // simplify the LHS, here we're using information from the LHS to simplify
1373 // the RHS.
1374 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1375 // Do not increment Depth here; that can cause an infinite loop.
1376 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1377 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1378 if ((LHSKnown.Zero & DemandedBits) ==
1379 (~RHSC->getAPIntValue() & DemandedBits))
1380 return TLO.CombineTo(Op, Op0);
1381
1382 // If any of the set bits in the RHS are known zero on the LHS, shrink
1383 // the constant.
1384 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1385 DemandedElts, TLO))
1386 return true;
1387
1388 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1389 // constant, but if this 'and' is only clearing bits that were just set by
1390 // the xor, then this 'and' can be eliminated by shrinking the mask of
1391 // the xor. For example, for a 32-bit X:
1392 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1393 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1394 LHSKnown.One == ~RHSC->getAPIntValue()) {
1395 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1396 return TLO.CombineTo(Op, Xor);
1397 }
1398 }
1399
1400 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1401 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1402 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1403 (Op0.getOperand(0).isUndef() ||
1405 Op0->hasOneUse()) {
1406 unsigned NumSubElts =
1408 unsigned SubIdx = Op0.getConstantOperandVal(2);
1409 APInt DemandedSub =
1410 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1411 KnownBits KnownSubMask =
1412 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1413 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1414 SDValue NewAnd =
1415 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1416 SDValue NewInsert =
1417 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1418 Op0.getOperand(1), Op0.getOperand(2));
1419 return TLO.CombineTo(Op, NewInsert);
1420 }
1421 }
1422
1423 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1424 Depth + 1))
1425 return true;
1426 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1427 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1428 Known2, TLO, Depth + 1))
1429 return true;
1430 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1431
1432 // If all of the demanded bits are known one on one side, return the other.
1433 // These bits cannot contribute to the result of the 'and'.
1434 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1435 return TLO.CombineTo(Op, Op0);
1436 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1437 return TLO.CombineTo(Op, Op1);
1438 // If all of the demanded bits in the inputs are known zeros, return zero.
1439 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1440 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1441 // If the RHS is a constant, see if we can simplify it.
1442 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1443 TLO))
1444 return true;
1445 // If the operation can be done in a smaller type, do so.
1446 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1447 return true;
1448
1449 // Attempt to avoid multi-use ops if we don't need anything from them.
1450 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1451 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1452 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1453 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1454 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1455 if (DemandedOp0 || DemandedOp1) {
1456 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1457 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1458 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1459 return TLO.CombineTo(Op, NewOp);
1460 }
1461 }
1462
1463 Known &= Known2;
1464 break;
1465 }
1466 case ISD::OR: {
1467 SDValue Op0 = Op.getOperand(0);
1468 SDValue Op1 = Op.getOperand(1);
1469 SDNodeFlags Flags = Op.getNode()->getFlags();
1470 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1471 Depth + 1)) {
1472 if (Flags.hasDisjoint()) {
1473 Flags.setDisjoint(false);
1474 Op->setFlags(Flags);
1475 }
1476 return true;
1477 }
1478 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1479 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1480 Known2, TLO, Depth + 1)) {
1481 if (Flags.hasDisjoint()) {
1482 Flags.setDisjoint(false);
1483 Op->setFlags(Flags);
1484 }
1485 return true;
1486 }
1487 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1488
1489 // If all of the demanded bits are known zero on one side, return the other.
1490 // These bits cannot contribute to the result of the 'or'.
1491 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1492 return TLO.CombineTo(Op, Op0);
1493 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1494 return TLO.CombineTo(Op, Op1);
1495 // If the RHS is a constant, see if we can simplify it.
1496 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1497 return true;
1498 // If the operation can be done in a smaller type, do so.
1499 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1500 return true;
1501
1502 // Attempt to avoid multi-use ops if we don't need anything from them.
1503 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1504 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1505 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1506 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1507 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1508 if (DemandedOp0 || DemandedOp1) {
1509 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1510 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1511 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1512 return TLO.CombineTo(Op, NewOp);
1513 }
1514 }
1515
1516 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1517 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1518 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1519 Op0->hasOneUse() && Op1->hasOneUse()) {
1520 // Attempt to match all commutations - m_c_Or would've been useful!
1521 for (int I = 0; I != 2; ++I) {
1522 SDValue X = Op.getOperand(I).getOperand(0);
1523 SDValue C1 = Op.getOperand(I).getOperand(1);
1524 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1525 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1526 if (Alt.getOpcode() == ISD::OR) {
1527 for (int J = 0; J != 2; ++J) {
1528 if (X == Alt.getOperand(J)) {
1529 SDValue Y = Alt.getOperand(1 - J);
1530 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1531 {C1, C2})) {
1532 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1533 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1534 return TLO.CombineTo(
1535 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1536 }
1537 }
1538 }
1539 }
1540 }
1541 }
1542
1543 Known |= Known2;
1544 break;
1545 }
1546 case ISD::XOR: {
1547 SDValue Op0 = Op.getOperand(0);
1548 SDValue Op1 = Op.getOperand(1);
1549
1550 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1551 Depth + 1))
1552 return true;
1553 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1554 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1555 Depth + 1))
1556 return true;
1557 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1558
1559 // If all of the demanded bits are known zero on one side, return the other.
1560 // These bits cannot contribute to the result of the 'xor'.
1561 if (DemandedBits.isSubsetOf(Known.Zero))
1562 return TLO.CombineTo(Op, Op0);
1563 if (DemandedBits.isSubsetOf(Known2.Zero))
1564 return TLO.CombineTo(Op, Op1);
1565 // If the operation can be done in a smaller type, do so.
1566 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1567 return true;
1568
1569 // If all of the unknown bits are known to be zero on one side or the other
1570 // turn this into an *inclusive* or.
1571 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1572 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1573 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1574
1575 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1576 if (C) {
1577 // If one side is a constant, and all of the set bits in the constant are
1578 // also known set on the other side, turn this into an AND, as we know
1579 // the bits will be cleared.
1580 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1581 // NB: it is okay if more bits are known than are requested
1582 if (C->getAPIntValue() == Known2.One) {
1583 SDValue ANDC =
1584 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1585 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1586 }
1587
1588 // If the RHS is a constant, see if we can change it. Don't alter a -1
1589 // constant because that's a 'not' op, and that is better for combining
1590 // and codegen.
1591 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1592 // We're flipping all demanded bits. Flip the undemanded bits too.
1593 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1594 return TLO.CombineTo(Op, New);
1595 }
1596
1597 unsigned Op0Opcode = Op0.getOpcode();
1598 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1599 if (ConstantSDNode *ShiftC =
1600 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1601 // Don't crash on an oversized shift. We can not guarantee that a
1602 // bogus shift has been simplified to undef.
1603 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1604 uint64_t ShiftAmt = ShiftC->getZExtValue();
1606 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1607 : Ones.lshr(ShiftAmt);
1608 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1609 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1610 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1611 // If the xor constant is a demanded mask, do a 'not' before the
1612 // shift:
1613 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1614 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1615 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1616 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1617 Op0.getOperand(1)));
1618 }
1619 }
1620 }
1621 }
1622 }
1623
1624 // If we can't turn this into a 'not', try to shrink the constant.
1625 if (!C || !C->isAllOnes())
1626 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1627 return true;
1628
1629 // Attempt to avoid multi-use ops if we don't need anything from them.
1630 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1631 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1632 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1633 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1634 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1635 if (DemandedOp0 || DemandedOp1) {
1636 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1637 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1638 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1639 return TLO.CombineTo(Op, NewOp);
1640 }
1641 }
1642
1643 Known ^= Known2;
1644 break;
1645 }
1646 case ISD::SELECT:
1647 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1648 Known, TLO, Depth + 1))
1649 return true;
1650 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1651 Known2, TLO, Depth + 1))
1652 return true;
1653 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1654 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1655
1656 // If the operands are constants, see if we can simplify them.
1657 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1658 return true;
1659
1660 // Only known if known in both the LHS and RHS.
1661 Known = Known.intersectWith(Known2);
1662 break;
1663 case ISD::VSELECT:
1664 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1665 Known, TLO, Depth + 1))
1666 return true;
1667 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1668 Known2, TLO, Depth + 1))
1669 return true;
1670 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1671 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1672
1673 // Only known if known in both the LHS and RHS.
1674 Known = Known.intersectWith(Known2);
1675 break;
1676 case ISD::SELECT_CC:
1677 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1678 Known, TLO, Depth + 1))
1679 return true;
1680 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1681 Known2, TLO, Depth + 1))
1682 return true;
1683 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1684 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1685
1686 // If the operands are constants, see if we can simplify them.
1687 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1688 return true;
1689
1690 // Only known if known in both the LHS and RHS.
1691 Known = Known.intersectWith(Known2);
1692 break;
1693 case ISD::SETCC: {
1694 SDValue Op0 = Op.getOperand(0);
1695 SDValue Op1 = Op.getOperand(1);
1696 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1697 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1698 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1699 // -1, we may be able to bypass the setcc.
1700 if (DemandedBits.isSignMask() &&
1704 // If we're testing X < 0, then this compare isn't needed - just use X!
1705 // FIXME: We're limiting to integer types here, but this should also work
1706 // if we don't care about FP signed-zero. The use of SETLT with FP means
1707 // that we don't care about NaNs.
1708 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1710 return TLO.CombineTo(Op, Op0);
1711
1712 // TODO: Should we check for other forms of sign-bit comparisons?
1713 // Examples: X <= -1, X >= 0
1714 }
1715 if (getBooleanContents(Op0.getValueType()) ==
1717 BitWidth > 1)
1718 Known.Zero.setBitsFrom(1);
1719 break;
1720 }
1721 case ISD::SHL: {
1722 SDValue Op0 = Op.getOperand(0);
1723 SDValue Op1 = Op.getOperand(1);
1724 EVT ShiftVT = Op1.getValueType();
1725
1726 if (const APInt *SA =
1727 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1728 unsigned ShAmt = SA->getZExtValue();
1729 if (ShAmt == 0)
1730 return TLO.CombineTo(Op, Op0);
1731
1732 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1733 // single shift. We can do this if the bottom bits (which are shifted
1734 // out) are never demanded.
1735 // TODO - support non-uniform vector amounts.
1736 if (Op0.getOpcode() == ISD::SRL) {
1737 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1738 if (const APInt *SA2 =
1739 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1740 unsigned C1 = SA2->getZExtValue();
1741 unsigned Opc = ISD::SHL;
1742 int Diff = ShAmt - C1;
1743 if (Diff < 0) {
1744 Diff = -Diff;
1745 Opc = ISD::SRL;
1746 }
1747 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1748 return TLO.CombineTo(
1749 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1750 }
1751 }
1752 }
1753
1754 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1755 // are not demanded. This will likely allow the anyext to be folded away.
1756 // TODO - support non-uniform vector amounts.
1757 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1758 SDValue InnerOp = Op0.getOperand(0);
1759 EVT InnerVT = InnerOp.getValueType();
1760 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1761 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1762 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1763 SDValue NarrowShl = TLO.DAG.getNode(
1764 ISD::SHL, dl, InnerVT, InnerOp,
1765 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1766 return TLO.CombineTo(
1767 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1768 }
1769
1770 // Repeat the SHL optimization above in cases where an extension
1771 // intervenes: (shl (anyext (shr x, c1)), c2) to
1772 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1773 // aren't demanded (as above) and that the shifted upper c1 bits of
1774 // x aren't demanded.
1775 // TODO - support non-uniform vector amounts.
1776 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1777 InnerOp.hasOneUse()) {
1778 if (const APInt *SA2 =
1779 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1780 unsigned InnerShAmt = SA2->getZExtValue();
1781 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1782 DemandedBits.getActiveBits() <=
1783 (InnerBits - InnerShAmt + ShAmt) &&
1784 DemandedBits.countr_zero() >= ShAmt) {
1785 SDValue NewSA =
1786 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1787 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1788 InnerOp.getOperand(0));
1789 return TLO.CombineTo(
1790 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1791 }
1792 }
1793 }
1794 }
1795
1796 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1797 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1798 Depth + 1)) {
1799 SDNodeFlags Flags = Op.getNode()->getFlags();
1800 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1801 // Disable the nsw and nuw flags. We can no longer guarantee that we
1802 // won't wrap after simplification.
1803 Flags.setNoSignedWrap(false);
1804 Flags.setNoUnsignedWrap(false);
1805 Op->setFlags(Flags);
1806 }
1807 return true;
1808 }
1809 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1810 Known.Zero <<= ShAmt;
1811 Known.One <<= ShAmt;
1812 // low bits known zero.
1813 Known.Zero.setLowBits(ShAmt);
1814
1815 // Attempt to avoid multi-use ops if we don't need anything from them.
1816 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1817 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1818 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1819 if (DemandedOp0) {
1820 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1821 return TLO.CombineTo(Op, NewOp);
1822 }
1823 }
1824
1825 // Try shrinking the operation as long as the shift amount will still be
1826 // in range.
1827 if ((ShAmt < DemandedBits.getActiveBits()) &&
1828 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1829 return true;
1830
1831 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1832 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1833 // Only do this if we demand the upper half so the knownbits are correct.
1834 unsigned HalfWidth = BitWidth / 2;
1835 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1836 DemandedBits.countLeadingOnes() >= HalfWidth) {
1837 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1838 if (isNarrowingProfitable(VT, HalfVT) &&
1839 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1840 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1841 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1842 // If we're demanding the upper bits at all, we must ensure
1843 // that the upper bits of the shift result are known to be zero,
1844 // which is equivalent to the narrow shift being NUW.
1845 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1846 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1847 SDNodeFlags Flags;
1848 Flags.setNoSignedWrap(IsNSW);
1849 Flags.setNoUnsignedWrap(IsNUW);
1850 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1851 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1852 ShAmt, HalfVT, dl, TLO.LegalTypes());
1853 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1854 NewShiftAmt, Flags);
1855 SDValue NewExt =
1856 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1857 return TLO.CombineTo(Op, NewExt);
1858 }
1859 }
1860 }
1861 } else {
1862 // This is a variable shift, so we can't shift the demand mask by a known
1863 // amount. But if we are not demanding high bits, then we are not
1864 // demanding those bits from the pre-shifted operand either.
1865 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1866 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1867 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1868 Depth + 1)) {
1869 SDNodeFlags Flags = Op.getNode()->getFlags();
1870 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1871 // Disable the nsw and nuw flags. We can no longer guarantee that we
1872 // won't wrap after simplification.
1873 Flags.setNoSignedWrap(false);
1874 Flags.setNoUnsignedWrap(false);
1875 Op->setFlags(Flags);
1876 }
1877 return true;
1878 }
1879 Known.resetAll();
1880 }
1881 }
1882
1883 // If we are only demanding sign bits then we can use the shift source
1884 // directly.
1885 if (const APInt *MaxSA =
1886 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1887 unsigned ShAmt = MaxSA->getZExtValue();
1888 unsigned NumSignBits =
1889 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1890 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1891 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1892 return TLO.CombineTo(Op, Op0);
1893 }
1894 break;
1895 }
1896 case ISD::SRL: {
1897 SDValue Op0 = Op.getOperand(0);
1898 SDValue Op1 = Op.getOperand(1);
1899 EVT ShiftVT = Op1.getValueType();
1900
1901 // Try to match AVG patterns.
1902 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1903 DemandedElts, Depth + 1))
1904 return TLO.CombineTo(Op, AVG);
1905
1906 if (const APInt *SA =
1907 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1908 unsigned ShAmt = SA->getZExtValue();
1909 if (ShAmt == 0)
1910 return TLO.CombineTo(Op, Op0);
1911
1912 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1913 // single shift. We can do this if the top bits (which are shifted out)
1914 // are never demanded.
1915 // TODO - support non-uniform vector amounts.
1916 if (Op0.getOpcode() == ISD::SHL) {
1917 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1918 if (const APInt *SA2 =
1919 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1920 unsigned C1 = SA2->getZExtValue();
1921 unsigned Opc = ISD::SRL;
1922 int Diff = ShAmt - C1;
1923 if (Diff < 0) {
1924 Diff = -Diff;
1925 Opc = ISD::SHL;
1926 }
1927 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1928 return TLO.CombineTo(
1929 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1930 }
1931 }
1932 }
1933
1934 APInt InDemandedMask = (DemandedBits << ShAmt);
1935
1936 // If the shift is exact, then it does demand the low bits (and knows that
1937 // they are zero).
1938 if (Op->getFlags().hasExact())
1939 InDemandedMask.setLowBits(ShAmt);
1940
1941 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1942 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1943 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1945 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1946 if (isNarrowingProfitable(VT, HalfVT) &&
1947 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1948 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1949 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1950 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1951 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1952 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1953 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1954 ShAmt, HalfVT, dl, TLO.LegalTypes());
1955 SDValue NewShift =
1956 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1957 return TLO.CombineTo(
1958 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1959 }
1960 }
1961
1962 // Compute the new bits that are at the top now.
1963 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1964 Depth + 1))
1965 return true;
1966 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1967 Known.Zero.lshrInPlace(ShAmt);
1968 Known.One.lshrInPlace(ShAmt);
1969 // High bits known zero.
1970 Known.Zero.setHighBits(ShAmt);
1971
1972 // Attempt to avoid multi-use ops if we don't need anything from them.
1973 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1974 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1975 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1976 if (DemandedOp0) {
1977 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1978 return TLO.CombineTo(Op, NewOp);
1979 }
1980 }
1981 } else {
1982 // Use generic knownbits computation as it has support for non-uniform
1983 // shift amounts.
1984 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1985 }
1986 break;
1987 }
1988 case ISD::SRA: {
1989 SDValue Op0 = Op.getOperand(0);
1990 SDValue Op1 = Op.getOperand(1);
1991 EVT ShiftVT = Op1.getValueType();
1992
1993 // If we only want bits that already match the signbit then we don't need
1994 // to shift.
1995 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
1996 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1997 NumHiDemandedBits)
1998 return TLO.CombineTo(Op, Op0);
1999
2000 // If this is an arithmetic shift right and only the low-bit is set, we can
2001 // always convert this into a logical shr, even if the shift amount is
2002 // variable. The low bit of the shift cannot be an input sign bit unless
2003 // the shift amount is >= the size of the datatype, which is undefined.
2004 if (DemandedBits.isOne())
2005 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2006
2007 // Try to match AVG patterns.
2008 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
2009 DemandedElts, Depth + 1))
2010 return TLO.CombineTo(Op, AVG);
2011
2012 if (const APInt *SA =
2013 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
2014 unsigned ShAmt = SA->getZExtValue();
2015 if (ShAmt == 0)
2016 return TLO.CombineTo(Op, Op0);
2017
2018 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2019 // supports sext_inreg.
2020 if (Op0.getOpcode() == ISD::SHL) {
2021 if (const APInt *InnerSA =
2022 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
2023 unsigned LowBits = BitWidth - ShAmt;
2024 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2025 if (VT.isVector())
2026 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2028
2029 if (*InnerSA == ShAmt) {
2030 if (!TLO.LegalOperations() ||
2032 return TLO.CombineTo(
2033 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2034 Op0.getOperand(0),
2035 TLO.DAG.getValueType(ExtVT)));
2036
2037 // Even if we can't convert to sext_inreg, we might be able to
2038 // remove this shift pair if the input is already sign extended.
2039 unsigned NumSignBits =
2040 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2041 if (NumSignBits > ShAmt)
2042 return TLO.CombineTo(Op, Op0.getOperand(0));
2043 }
2044 }
2045 }
2046
2047 APInt InDemandedMask = (DemandedBits << ShAmt);
2048
2049 // If the shift is exact, then it does demand the low bits (and knows that
2050 // they are zero).
2051 if (Op->getFlags().hasExact())
2052 InDemandedMask.setLowBits(ShAmt);
2053
2054 // If any of the demanded bits are produced by the sign extension, we also
2055 // demand the input sign bit.
2056 if (DemandedBits.countl_zero() < ShAmt)
2057 InDemandedMask.setSignBit();
2058
2059 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2060 Depth + 1))
2061 return true;
2062 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2063 Known.Zero.lshrInPlace(ShAmt);
2064 Known.One.lshrInPlace(ShAmt);
2065
2066 // If the input sign bit is known to be zero, or if none of the top bits
2067 // are demanded, turn this into an unsigned shift right.
2068 if (Known.Zero[BitWidth - ShAmt - 1] ||
2069 DemandedBits.countl_zero() >= ShAmt) {
2070 SDNodeFlags Flags;
2071 Flags.setExact(Op->getFlags().hasExact());
2072 return TLO.CombineTo(
2073 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2074 }
2075
2076 int Log2 = DemandedBits.exactLogBase2();
2077 if (Log2 >= 0) {
2078 // The bit must come from the sign.
2079 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2080 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2081 }
2082
2083 if (Known.One[BitWidth - ShAmt - 1])
2084 // New bits are known one.
2085 Known.One.setHighBits(ShAmt);
2086
2087 // Attempt to avoid multi-use ops if we don't need anything from them.
2088 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2089 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2090 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2091 if (DemandedOp0) {
2092 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2093 return TLO.CombineTo(Op, NewOp);
2094 }
2095 }
2096 }
2097 break;
2098 }
2099 case ISD::FSHL:
2100 case ISD::FSHR: {
2101 SDValue Op0 = Op.getOperand(0);
2102 SDValue Op1 = Op.getOperand(1);
2103 SDValue Op2 = Op.getOperand(2);
2104 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2105
2106 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2107 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2108
2109 // For fshl, 0-shift returns the 1st arg.
2110 // For fshr, 0-shift returns the 2nd arg.
2111 if (Amt == 0) {
2112 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2113 Known, TLO, Depth + 1))
2114 return true;
2115 break;
2116 }
2117
2118 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2119 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2120 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2121 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2122 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2123 Depth + 1))
2124 return true;
2125 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2126 Depth + 1))
2127 return true;
2128
2129 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2130 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2131 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2132 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2133 Known = Known.unionWith(Known2);
2134
2135 // Attempt to avoid multi-use ops if we don't need anything from them.
2136 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2137 !DemandedElts.isAllOnes()) {
2138 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2139 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2140 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2141 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2142 if (DemandedOp0 || DemandedOp1) {
2143 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2144 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2145 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2146 DemandedOp1, Op2);
2147 return TLO.CombineTo(Op, NewOp);
2148 }
2149 }
2150 }
2151
2152 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2153 if (isPowerOf2_32(BitWidth)) {
2154 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2155 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2156 Known2, TLO, Depth + 1))
2157 return true;
2158 }
2159 break;
2160 }
2161 case ISD::ROTL:
2162 case ISD::ROTR: {
2163 SDValue Op0 = Op.getOperand(0);
2164 SDValue Op1 = Op.getOperand(1);
2165 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2166
2167 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2168 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2169 return TLO.CombineTo(Op, Op0);
2170
2171 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2172 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2173 unsigned RevAmt = BitWidth - Amt;
2174
2175 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2176 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2177 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2178 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2179 Depth + 1))
2180 return true;
2181
2182 // rot*(x, 0) --> x
2183 if (Amt == 0)
2184 return TLO.CombineTo(Op, Op0);
2185
2186 // See if we don't demand either half of the rotated bits.
2187 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2188 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2189 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2190 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2191 }
2192 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2193 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2194 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2195 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2196 }
2197 }
2198
2199 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2200 if (isPowerOf2_32(BitWidth)) {
2201 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2202 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2203 Depth + 1))
2204 return true;
2205 }
2206 break;
2207 }
2208 case ISD::SMIN:
2209 case ISD::SMAX:
2210 case ISD::UMIN:
2211 case ISD::UMAX: {
2212 unsigned Opc = Op.getOpcode();
2213 SDValue Op0 = Op.getOperand(0);
2214 SDValue Op1 = Op.getOperand(1);
2215
2216 // If we're only demanding signbits, then we can simplify to OR/AND node.
2217 unsigned BitOp =
2218 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2219 unsigned NumSignBits =
2220 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2221 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2222 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2223 if (NumSignBits >= NumDemandedUpperBits)
2224 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2225
2226 // Check if one arg is always less/greater than (or equal) to the other arg.
2227 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2228 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2229 switch (Opc) {
2230 case ISD::SMIN:
2231 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2232 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2233 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2234 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2235 Known = KnownBits::smin(Known0, Known1);
2236 break;
2237 case ISD::SMAX:
2238 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2239 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2240 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2241 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2242 Known = KnownBits::smax(Known0, Known1);
2243 break;
2244 case ISD::UMIN:
2245 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2246 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2247 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2248 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2249 Known = KnownBits::umin(Known0, Known1);
2250 break;
2251 case ISD::UMAX:
2252 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2253 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2254 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2255 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2256 Known = KnownBits::umax(Known0, Known1);
2257 break;
2258 }
2259 break;
2260 }
2261 case ISD::BITREVERSE: {
2262 SDValue Src = Op.getOperand(0);
2263 APInt DemandedSrcBits = DemandedBits.reverseBits();
2264 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2265 Depth + 1))
2266 return true;
2267 Known.One = Known2.One.reverseBits();
2268 Known.Zero = Known2.Zero.reverseBits();
2269 break;
2270 }
2271 case ISD::BSWAP: {
2272 SDValue Src = Op.getOperand(0);
2273
2274 // If the only bits demanded come from one byte of the bswap result,
2275 // just shift the input byte into position to eliminate the bswap.
2276 unsigned NLZ = DemandedBits.countl_zero();
2277 unsigned NTZ = DemandedBits.countr_zero();
2278
2279 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2280 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2281 // have 14 leading zeros, round to 8.
2282 NLZ = alignDown(NLZ, 8);
2283 NTZ = alignDown(NTZ, 8);
2284 // If we need exactly one byte, we can do this transformation.
2285 if (BitWidth - NLZ - NTZ == 8) {
2286 // Replace this with either a left or right shift to get the byte into
2287 // the right place.
2288 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2289 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2290 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2291 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2292 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2293 return TLO.CombineTo(Op, NewOp);
2294 }
2295 }
2296
2297 APInt DemandedSrcBits = DemandedBits.byteSwap();
2298 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2299 Depth + 1))
2300 return true;
2301 Known.One = Known2.One.byteSwap();
2302 Known.Zero = Known2.Zero.byteSwap();
2303 break;
2304 }
2305 case ISD::CTPOP: {
2306 // If only 1 bit is demanded, replace with PARITY as long as we're before
2307 // op legalization.
2308 // FIXME: Limit to scalars for now.
2309 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2310 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2311 Op.getOperand(0)));
2312
2313 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2314 break;
2315 }
2317 SDValue Op0 = Op.getOperand(0);
2318 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2319 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2320
2321 // If we only care about the highest bit, don't bother shifting right.
2322 if (DemandedBits.isSignMask()) {
2323 unsigned MinSignedBits =
2324 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2325 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2326 // However if the input is already sign extended we expect the sign
2327 // extension to be dropped altogether later and do not simplify.
2328 if (!AlreadySignExtended) {
2329 // Compute the correct shift amount type, which must be getShiftAmountTy
2330 // for scalar types after legalization.
2331 SDValue ShiftAmt =
2332 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2333 return TLO.CombineTo(Op,
2334 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2335 }
2336 }
2337
2338 // If none of the extended bits are demanded, eliminate the sextinreg.
2339 if (DemandedBits.getActiveBits() <= ExVTBits)
2340 return TLO.CombineTo(Op, Op0);
2341
2342 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2343
2344 // Since the sign extended bits are demanded, we know that the sign
2345 // bit is demanded.
2346 InputDemandedBits.setBit(ExVTBits - 1);
2347
2348 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2349 Depth + 1))
2350 return true;
2351 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2352
2353 // If the sign bit of the input is known set or clear, then we know the
2354 // top bits of the result.
2355
2356 // If the input sign bit is known zero, convert this into a zero extension.
2357 if (Known.Zero[ExVTBits - 1])
2358 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2359
2360 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2361 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2362 Known.One.setBitsFrom(ExVTBits);
2363 Known.Zero &= Mask;
2364 } else { // Input sign bit unknown
2365 Known.Zero &= Mask;
2366 Known.One &= Mask;
2367 }
2368 break;
2369 }
2370 case ISD::BUILD_PAIR: {
2371 EVT HalfVT = Op.getOperand(0).getValueType();
2372 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2373
2374 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2375 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2376
2377 KnownBits KnownLo, KnownHi;
2378
2379 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2380 return true;
2381
2382 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2383 return true;
2384
2385 Known = KnownHi.concat(KnownLo);
2386 break;
2387 }
2389 if (VT.isScalableVector())
2390 return false;
2391 [[fallthrough]];
2392 case ISD::ZERO_EXTEND: {
2393 SDValue Src = Op.getOperand(0);
2394 EVT SrcVT = Src.getValueType();
2395 unsigned InBits = SrcVT.getScalarSizeInBits();
2396 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2397 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2398
2399 // If none of the top bits are demanded, convert this into an any_extend.
2400 if (DemandedBits.getActiveBits() <= InBits) {
2401 // If we only need the non-extended bits of the bottom element
2402 // then we can just bitcast to the result.
2403 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2404 VT.getSizeInBits() == SrcVT.getSizeInBits())
2405 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2406
2407 unsigned Opc =
2409 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2410 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2411 }
2412
2413 SDNodeFlags Flags = Op->getFlags();
2414 APInt InDemandedBits = DemandedBits.trunc(InBits);
2415 APInt InDemandedElts = DemandedElts.zext(InElts);
2416 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2417 Depth + 1)) {
2418 if (Flags.hasNonNeg()) {
2419 Flags.setNonNeg(false);
2420 Op->setFlags(Flags);
2421 }
2422 return true;
2423 }
2424 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2425 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2426 Known = Known.zext(BitWidth);
2427
2428 // Attempt to avoid multi-use ops if we don't need anything from them.
2429 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2430 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2431 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2432 break;
2433 }
2435 if (VT.isScalableVector())
2436 return false;
2437 [[fallthrough]];
2438 case ISD::SIGN_EXTEND: {
2439 SDValue Src = Op.getOperand(0);
2440 EVT SrcVT = Src.getValueType();
2441 unsigned InBits = SrcVT.getScalarSizeInBits();
2442 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2443 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2444
2445 APInt InDemandedElts = DemandedElts.zext(InElts);
2446 APInt InDemandedBits = DemandedBits.trunc(InBits);
2447
2448 // Since some of the sign extended bits are demanded, we know that the sign
2449 // bit is demanded.
2450 InDemandedBits.setBit(InBits - 1);
2451
2452 // If none of the top bits are demanded, convert this into an any_extend.
2453 if (DemandedBits.getActiveBits() <= InBits) {
2454 // If we only need the non-extended bits of the bottom element
2455 // then we can just bitcast to the result.
2456 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2457 VT.getSizeInBits() == SrcVT.getSizeInBits())
2458 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2459
2460 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2462 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2463 InBits) {
2464 unsigned Opc =
2466 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2467 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2468 }
2469 }
2470
2471 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2472 Depth + 1))
2473 return true;
2474 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2475 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2476
2477 // If the sign bit is known one, the top bits match.
2478 Known = Known.sext(BitWidth);
2479
2480 // If the sign bit is known zero, convert this to a zero extend.
2481 if (Known.isNonNegative()) {
2482 unsigned Opc =
2484 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2485 SDNodeFlags Flags;
2486 if (!IsVecInReg)
2487 Flags.setNonNeg(true);
2488 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2489 }
2490 }
2491
2492 // Attempt to avoid multi-use ops if we don't need anything from them.
2493 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2494 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2495 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2496 break;
2497 }
2499 if (VT.isScalableVector())
2500 return false;
2501 [[fallthrough]];
2502 case ISD::ANY_EXTEND: {
2503 SDValue Src = Op.getOperand(0);
2504 EVT SrcVT = Src.getValueType();
2505 unsigned InBits = SrcVT.getScalarSizeInBits();
2506 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2507 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2508
2509 // If we only need the bottom element then we can just bitcast.
2510 // TODO: Handle ANY_EXTEND?
2511 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2512 VT.getSizeInBits() == SrcVT.getSizeInBits())
2513 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2514
2515 APInt InDemandedBits = DemandedBits.trunc(InBits);
2516 APInt InDemandedElts = DemandedElts.zext(InElts);
2517 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2518 Depth + 1))
2519 return true;
2520 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2521 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2522 Known = Known.anyext(BitWidth);
2523
2524 // Attempt to avoid multi-use ops if we don't need anything from them.
2525 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2526 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2527 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2528 break;
2529 }
2530 case ISD::TRUNCATE: {
2531 SDValue Src = Op.getOperand(0);
2532
2533 // Simplify the input, using demanded bit information, and compute the known
2534 // zero/one bits live out.
2535 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2536 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2537 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2538 Depth + 1))
2539 return true;
2540 Known = Known.trunc(BitWidth);
2541
2542 // Attempt to avoid multi-use ops if we don't need anything from them.
2543 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2544 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2545 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2546
2547 // If the input is only used by this truncate, see if we can shrink it based
2548 // on the known demanded bits.
2549 switch (Src.getOpcode()) {
2550 default:
2551 break;
2552 case ISD::SRL:
2553 // Shrink SRL by a constant if none of the high bits shifted in are
2554 // demanded.
2555 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2556 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2557 // undesirable.
2558 break;
2559
2560 if (Src.getNode()->hasOneUse()) {
2561 const APInt *ShAmtC =
2562 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2563 if (!ShAmtC || ShAmtC->uge(BitWidth))
2564 break;
2565 uint64_t ShVal = ShAmtC->getZExtValue();
2566
2567 APInt HighBits =
2568 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2569 HighBits.lshrInPlace(ShVal);
2570 HighBits = HighBits.trunc(BitWidth);
2571
2572 if (!(HighBits & DemandedBits)) {
2573 // None of the shifted in bits are needed. Add a truncate of the
2574 // shift input, then shift it.
2575 SDValue NewShAmt =
2576 TLO.DAG.getShiftAmountConstant(ShVal, VT, dl, TLO.LegalTypes());
2577 SDValue NewTrunc =
2578 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2579 return TLO.CombineTo(
2580 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2581 }
2582 }
2583 break;
2584 }
2585
2586 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2587 break;
2588 }
2589 case ISD::AssertZext: {
2590 // AssertZext demands all of the high bits, plus any of the low bits
2591 // demanded by its users.
2592 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2594 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2595 TLO, Depth + 1))
2596 return true;
2597 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2598
2599 Known.Zero |= ~InMask;
2600 Known.One &= (~Known.Zero);
2601 break;
2602 }
2604 SDValue Src = Op.getOperand(0);
2605 SDValue Idx = Op.getOperand(1);
2606 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2607 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2608
2609 if (SrcEltCnt.isScalable())
2610 return false;
2611
2612 // Demand the bits from every vector element without a constant index.
2613 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2614 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2615 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2616 if (CIdx->getAPIntValue().ult(NumSrcElts))
2617 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2618
2619 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2620 // anything about the extended bits.
2621 APInt DemandedSrcBits = DemandedBits;
2622 if (BitWidth > EltBitWidth)
2623 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2624
2625 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2626 Depth + 1))
2627 return true;
2628
2629 // Attempt to avoid multi-use ops if we don't need anything from them.
2630 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2631 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2632 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2633 SDValue NewOp =
2634 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2635 return TLO.CombineTo(Op, NewOp);
2636 }
2637 }
2638
2639 Known = Known2;
2640 if (BitWidth > EltBitWidth)
2641 Known = Known.anyext(BitWidth);
2642 break;
2643 }
2644 case ISD::BITCAST: {
2645 if (VT.isScalableVector())
2646 return false;
2647 SDValue Src = Op.getOperand(0);
2648 EVT SrcVT = Src.getValueType();
2649 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2650
2651 // If this is an FP->Int bitcast and if the sign bit is the only
2652 // thing demanded, turn this into a FGETSIGN.
2653 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2654 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2655 SrcVT.isFloatingPoint()) {
2656 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2657 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2658 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2659 SrcVT != MVT::f128) {
2660 // Cannot eliminate/lower SHL for f128 yet.
2661 EVT Ty = OpVTLegal ? VT : MVT::i32;
2662 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2663 // place. We expect the SHL to be eliminated by other optimizations.
2664 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2665 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2666 if (!OpVTLegal && OpVTSizeInBits > 32)
2667 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2668 unsigned ShVal = Op.getValueSizeInBits() - 1;
2669 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2670 return TLO.CombineTo(Op,
2671 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2672 }
2673 }
2674
2675 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2676 // Demand the elt/bit if any of the original elts/bits are demanded.
2677 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2678 unsigned Scale = BitWidth / NumSrcEltBits;
2679 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2680 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2681 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2682 for (unsigned i = 0; i != Scale; ++i) {
2683 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2684 unsigned BitOffset = EltOffset * NumSrcEltBits;
2685 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2686 if (!Sub.isZero()) {
2687 DemandedSrcBits |= Sub;
2688 for (unsigned j = 0; j != NumElts; ++j)
2689 if (DemandedElts[j])
2690 DemandedSrcElts.setBit((j * Scale) + i);
2691 }
2692 }
2693
2694 APInt KnownSrcUndef, KnownSrcZero;
2695 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2696 KnownSrcZero, TLO, Depth + 1))
2697 return true;
2698
2699 KnownBits KnownSrcBits;
2700 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2701 KnownSrcBits, TLO, Depth + 1))
2702 return true;
2703 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2704 // TODO - bigendian once we have test coverage.
2705 unsigned Scale = NumSrcEltBits / BitWidth;
2706 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2707 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2708 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2709 for (unsigned i = 0; i != NumElts; ++i)
2710 if (DemandedElts[i]) {
2711 unsigned Offset = (i % Scale) * BitWidth;
2712 DemandedSrcBits.insertBits(DemandedBits, Offset);
2713 DemandedSrcElts.setBit(i / Scale);
2714 }
2715
2716 if (SrcVT.isVector()) {
2717 APInt KnownSrcUndef, KnownSrcZero;
2718 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2719 KnownSrcZero, TLO, Depth + 1))
2720 return true;
2721 }
2722
2723 KnownBits KnownSrcBits;
2724 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2725 KnownSrcBits, TLO, Depth + 1))
2726 return true;
2727
2728 // Attempt to avoid multi-use ops if we don't need anything from them.
2729 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2730 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2731 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2732 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2733 return TLO.CombineTo(Op, NewOp);
2734 }
2735 }
2736 }
2737
2738 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2739 // recursive call where Known may be useful to the caller.
2740 if (Depth > 0) {
2741 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2742 return false;
2743 }
2744 break;
2745 }
2746 case ISD::MUL:
2747 if (DemandedBits.isPowerOf2()) {
2748 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2749 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2750 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2751 unsigned CTZ = DemandedBits.countr_zero();
2752 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2753 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2754 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2755 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2756 return TLO.CombineTo(Op, Shl);
2757 }
2758 }
2759 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2760 // X * X is odd iff X is odd.
2761 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2762 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2763 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2764 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2765 return TLO.CombineTo(Op, And1);
2766 }
2767 [[fallthrough]];
2768 case ISD::ADD:
2769 case ISD::SUB: {
2770 // Add, Sub, and Mul don't demand any bits in positions beyond that
2771 // of the highest bit demanded of them.
2772 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2773 SDNodeFlags Flags = Op.getNode()->getFlags();
2774 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2775 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2776 KnownBits KnownOp0, KnownOp1;
2777 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
2778 Depth + 1) ||
2779 SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2780 Depth + 1) ||
2781 // See if the operation should be performed at a smaller bit width.
2782 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2783 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2784 // Disable the nsw and nuw flags. We can no longer guarantee that we
2785 // won't wrap after simplification.
2786 Flags.setNoSignedWrap(false);
2787 Flags.setNoUnsignedWrap(false);
2788 Op->setFlags(Flags);
2789 }
2790 return true;
2791 }
2792
2793 // neg x with only low bit demanded is simply x.
2794 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2795 isNullConstant(Op0))
2796 return TLO.CombineTo(Op, Op1);
2797
2798 // Attempt to avoid multi-use ops if we don't need anything from them.
2799 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2800 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2801 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2802 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2803 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2804 if (DemandedOp0 || DemandedOp1) {
2805 Flags.setNoSignedWrap(false);
2806 Flags.setNoUnsignedWrap(false);
2807 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2808 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2809 SDValue NewOp =
2810 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2811 return TLO.CombineTo(Op, NewOp);
2812 }
2813 }
2814
2815 // If we have a constant operand, we may be able to turn it into -1 if we
2816 // do not demand the high bits. This can make the constant smaller to
2817 // encode, allow more general folding, or match specialized instruction
2818 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2819 // is probably not useful (and could be detrimental).
2821 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2822 if (C && !C->isAllOnes() && !C->isOne() &&
2823 (C->getAPIntValue() | HighMask).isAllOnes()) {
2824 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2825 // Disable the nsw and nuw flags. We can no longer guarantee that we
2826 // won't wrap after simplification.
2827 Flags.setNoSignedWrap(false);
2828 Flags.setNoUnsignedWrap(false);
2829 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2830 return TLO.CombineTo(Op, NewOp);
2831 }
2832
2833 // Match a multiply with a disguised negated-power-of-2 and convert to a
2834 // an equivalent shift-left amount.
2835 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2836 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2837 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2838 return 0;
2839
2840 // Don't touch opaque constants. Also, ignore zero and power-of-2
2841 // multiplies. Those will get folded later.
2842 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2843 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2844 !MulC->getAPIntValue().isPowerOf2()) {
2845 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2846 if (UnmaskedC.isNegatedPowerOf2())
2847 return (-UnmaskedC).logBase2();
2848 }
2849 return 0;
2850 };
2851
2852 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2853 unsigned ShlAmt) {
2854 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2855 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2856 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2857 return TLO.CombineTo(Op, Res);
2858 };
2859
2861 if (Op.getOpcode() == ISD::ADD) {
2862 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2863 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2864 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2865 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2866 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2867 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2868 }
2869 if (Op.getOpcode() == ISD::SUB) {
2870 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2871 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2872 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2873 }
2874 }
2875
2876 if (Op.getOpcode() == ISD::MUL) {
2877 Known = KnownBits::mul(KnownOp0, KnownOp1);
2878 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2879 Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
2880 Flags.hasNoSignedWrap(), KnownOp0,
2881 KnownOp1);
2882 }
2883 break;
2884 }
2885 default:
2886 // We also ask the target about intrinsics (which could be specific to it).
2887 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2888 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2889 // TODO: Probably okay to remove after audit; here to reduce change size
2890 // in initial enablement patch for scalable vectors
2891 if (Op.getValueType().isScalableVector())
2892 break;
2893 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2894 Known, TLO, Depth))
2895 return true;
2896 break;
2897 }
2898
2899 // Just use computeKnownBits to compute output bits.
2900 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2901 break;
2902 }
2903
2904 // If we know the value of all of the demanded bits, return this as a
2905 // constant.
2906 if (!isTargetCanonicalConstantNode(Op) &&
2907 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2908 // Avoid folding to a constant if any OpaqueConstant is involved.
2909 const SDNode *N = Op.getNode();
2910 for (SDNode *Op :
2912 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2913 if (C->isOpaque())
2914 return false;
2915 }
2916 if (VT.isInteger())
2917 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2918 if (VT.isFloatingPoint())
2919 return TLO.CombineTo(
2920 Op,
2921 TLO.DAG.getConstantFP(
2922 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2923 }
2924
2925 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2926 // Try again just for the original demanded elts.
2927 // Ensure we do this AFTER constant folding above.
2928 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2929 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2930
2931 return false;
2932}
2933
2935 const APInt &DemandedElts,
2936 DAGCombinerInfo &DCI) const {
2937 SelectionDAG &DAG = DCI.DAG;
2938 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2939 !DCI.isBeforeLegalizeOps());
2940
2941 APInt KnownUndef, KnownZero;
2942 bool Simplified =
2943 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2944 if (Simplified) {
2945 DCI.AddToWorklist(Op.getNode());
2946 DCI.CommitTargetLoweringOpt(TLO);
2947 }
2948
2949 return Simplified;
2950}
2951
2952/// Given a vector binary operation and known undefined elements for each input
2953/// operand, compute whether each element of the output is undefined.
2955 const APInt &UndefOp0,
2956 const APInt &UndefOp1) {
2957 EVT VT = BO.getValueType();
2959 "Vector binop only");
2960
2961 EVT EltVT = VT.getVectorElementType();
2962 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2963 assert(UndefOp0.getBitWidth() == NumElts &&
2964 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2965
2966 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2967 const APInt &UndefVals) {
2968 if (UndefVals[Index])
2969 return DAG.getUNDEF(EltVT);
2970
2971 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2972 // Try hard to make sure that the getNode() call is not creating temporary
2973 // nodes. Ignore opaque integers because they do not constant fold.
2974 SDValue Elt = BV->getOperand(Index);
2975 auto *C = dyn_cast<ConstantSDNode>(Elt);
2976 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2977 return Elt;
2978 }
2979
2980 return SDValue();
2981 };
2982
2983 APInt KnownUndef = APInt::getZero(NumElts);
2984 for (unsigned i = 0; i != NumElts; ++i) {
2985 // If both inputs for this element are either constant or undef and match
2986 // the element type, compute the constant/undef result for this element of
2987 // the vector.
2988 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2989 // not handle FP constants. The code within getNode() should be refactored
2990 // to avoid the danger of creating a bogus temporary node here.
2991 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2992 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2993 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2994 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2995 KnownUndef.setBit(i);
2996 }
2997 return KnownUndef;
2998}
2999
3001 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3002 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3003 bool AssumeSingleUse) const {
3004 EVT VT = Op.getValueType();
3005 unsigned Opcode = Op.getOpcode();
3006 APInt DemandedElts = OriginalDemandedElts;
3007 unsigned NumElts = DemandedElts.getBitWidth();
3008 assert(VT.isVector() && "Expected vector op");
3009
3010 KnownUndef = KnownZero = APInt::getZero(NumElts);
3011
3012 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3013 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3014 return false;
3015
3016 // TODO: For now we assume we know nothing about scalable vectors.
3017 if (VT.isScalableVector())
3018 return false;
3019
3020 assert(VT.getVectorNumElements() == NumElts &&
3021 "Mask size mismatches value type element count!");
3022
3023 // Undef operand.
3024 if (Op.isUndef()) {
3025 KnownUndef.setAllBits();
3026 return false;
3027 }
3028
3029 // If Op has other users, assume that all elements are needed.
3030 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3031 DemandedElts.setAllBits();
3032
3033 // Not demanding any elements from Op.
3034 if (DemandedElts == 0) {
3035 KnownUndef.setAllBits();
3036 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3037 }
3038
3039 // Limit search depth.
3041 return false;
3042
3043 SDLoc DL(Op);
3044 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3045 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3046
3047 // Helper for demanding the specified elements and all the bits of both binary
3048 // operands.
3049 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3050 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3051 TLO.DAG, Depth + 1);
3052 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3053 TLO.DAG, Depth + 1);
3054 if (NewOp0 || NewOp1) {
3055 SDValue NewOp =
3056 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3057 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3058 return TLO.CombineTo(Op, NewOp);
3059 }
3060 return false;
3061 };
3062
3063 switch (Opcode) {
3064 case ISD::SCALAR_TO_VECTOR: {
3065 if (!DemandedElts[0]) {
3066 KnownUndef.setAllBits();
3067 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3068 }
3069 SDValue ScalarSrc = Op.getOperand(0);
3070 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3071 SDValue Src = ScalarSrc.getOperand(0);
3072 SDValue Idx = ScalarSrc.getOperand(1);
3073 EVT SrcVT = Src.getValueType();
3074
3075 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3076
3077 if (SrcEltCnt.isScalable())
3078 return false;
3079
3080 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3081 if (isNullConstant(Idx)) {
3082 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3083 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3084 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3085 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3086 TLO, Depth + 1))
3087 return true;
3088 }
3089 }
3090 KnownUndef.setHighBits(NumElts - 1);
3091 break;
3092 }
3093 case ISD::BITCAST: {
3094 SDValue Src = Op.getOperand(0);
3095 EVT SrcVT = Src.getValueType();
3096
3097 // We only handle vectors here.
3098 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3099 if (!SrcVT.isVector())
3100 break;
3101
3102 // Fast handling of 'identity' bitcasts.
3103 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3104 if (NumSrcElts == NumElts)
3105 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3106 KnownZero, TLO, Depth + 1);
3107
3108 APInt SrcDemandedElts, SrcZero, SrcUndef;
3109
3110 // Bitcast from 'large element' src vector to 'small element' vector, we
3111 // must demand a source element if any DemandedElt maps to it.
3112 if ((NumElts % NumSrcElts) == 0) {
3113 unsigned Scale = NumElts / NumSrcElts;
3114 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3115 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3116 TLO, Depth + 1))
3117 return true;
3118
3119 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3120 // of the large element.
3121 // TODO - bigendian once we have test coverage.
3122 if (IsLE) {
3123 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3124 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3125 for (unsigned i = 0; i != NumElts; ++i)
3126 if (DemandedElts[i]) {
3127 unsigned Ofs = (i % Scale) * EltSizeInBits;
3128 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3129 }
3130
3131 KnownBits Known;
3132 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3133 TLO, Depth + 1))
3134 return true;
3135
3136 // The bitcast has split each wide element into a number of
3137 // narrow subelements. We have just computed the Known bits
3138 // for wide elements. See if element splitting results in
3139 // some subelements being zero. Only for demanded elements!
3140 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3141 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3142 .isAllOnes())
3143 continue;
3144 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3145 unsigned Elt = Scale * SrcElt + SubElt;
3146 if (DemandedElts[Elt])
3147 KnownZero.setBit(Elt);
3148 }
3149 }
3150 }
3151
3152 // If the src element is zero/undef then all the output elements will be -
3153 // only demanded elements are guaranteed to be correct.
3154 for (unsigned i = 0; i != NumSrcElts; ++i) {
3155 if (SrcDemandedElts[i]) {
3156 if (SrcZero[i])
3157 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3158 if (SrcUndef[i])
3159 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3160 }
3161 }
3162 }
3163
3164 // Bitcast from 'small element' src vector to 'large element' vector, we
3165 // demand all smaller source elements covered by the larger demanded element
3166 // of this vector.
3167 if ((NumSrcElts % NumElts) == 0) {
3168 unsigned Scale = NumSrcElts / NumElts;
3169 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3170 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3171 TLO, Depth + 1))
3172 return true;
3173
3174 // If all the src elements covering an output element are zero/undef, then
3175 // the output element will be as well, assuming it was demanded.
3176 for (unsigned i = 0; i != NumElts; ++i) {
3177 if (DemandedElts[i]) {
3178 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3179 KnownZero.setBit(i);
3180 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3181 KnownUndef.setBit(i);
3182 }
3183 }
3184 }
3185 break;
3186 }
3187 case ISD::BUILD_VECTOR: {
3188 // Check all elements and simplify any unused elements with UNDEF.
3189 if (!DemandedElts.isAllOnes()) {
3190 // Don't simplify BROADCASTS.
3191 if (llvm::any_of(Op->op_values(),
3192 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3193 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3194 bool Updated = false;
3195 for (unsigned i = 0; i != NumElts; ++i) {
3196 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3197 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3198 KnownUndef.setBit(i);
3199 Updated = true;
3200 }
3201 }
3202 if (Updated)
3203 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3204 }
3205 }
3206 for (unsigned i = 0; i != NumElts; ++i) {
3207 SDValue SrcOp = Op.getOperand(i);
3208 if (SrcOp.isUndef()) {
3209 KnownUndef.setBit(i);
3210 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3212 KnownZero.setBit(i);
3213 }
3214 }
3215 break;
3216 }
3217 case ISD::CONCAT_VECTORS: {
3218 EVT SubVT = Op.getOperand(0).getValueType();
3219 unsigned NumSubVecs = Op.getNumOperands();
3220 unsigned NumSubElts = SubVT.getVectorNumElements();
3221 for (unsigned i = 0; i != NumSubVecs; ++i) {
3222 SDValue SubOp = Op.getOperand(i);
3223 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3224 APInt SubUndef, SubZero;
3225 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3226 Depth + 1))
3227 return true;
3228 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3229 KnownZero.insertBits(SubZero, i * NumSubElts);
3230 }
3231
3232 // Attempt to avoid multi-use ops if we don't need anything from them.
3233 if (!DemandedElts.isAllOnes()) {
3234 bool FoundNewSub = false;
3235 SmallVector<SDValue, 2> DemandedSubOps;
3236 for (unsigned i = 0; i != NumSubVecs; ++i) {
3237 SDValue SubOp = Op.getOperand(i);
3238 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3239 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3240 SubOp, SubElts, TLO.DAG, Depth + 1);
3241 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3242 FoundNewSub = NewSubOp ? true : FoundNewSub;
3243 }
3244 if (FoundNewSub) {
3245 SDValue NewOp =
3246 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3247 return TLO.CombineTo(Op, NewOp);
3248 }
3249 }
3250 break;
3251 }
3252 case ISD::INSERT_SUBVECTOR: {
3253 // Demand any elements from the subvector and the remainder from the src its
3254 // inserted into.
3255 SDValue Src = Op.getOperand(0);
3256 SDValue Sub = Op.getOperand(1);
3257 uint64_t Idx = Op.getConstantOperandVal(2);
3258 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3259 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3260 APInt DemandedSrcElts = DemandedElts;
3261 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3262
3263 APInt SubUndef, SubZero;
3264 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3265 Depth + 1))
3266 return true;
3267
3268 // If none of the src operand elements are demanded, replace it with undef.
3269 if (!DemandedSrcElts && !Src.isUndef())
3270 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3271 TLO.DAG.getUNDEF(VT), Sub,
3272 Op.getOperand(2)));
3273
3274 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3275 TLO, Depth + 1))
3276 return true;
3277 KnownUndef.insertBits(SubUndef, Idx);
3278 KnownZero.insertBits(SubZero, Idx);
3279
3280 // Attempt to avoid multi-use ops if we don't need anything from them.
3281 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3282 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3283 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3284 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3285 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3286 if (NewSrc || NewSub) {
3287 NewSrc = NewSrc ? NewSrc : Src;
3288 NewSub = NewSub ? NewSub : Sub;
3289 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3290 NewSub, Op.getOperand(2));
3291 return TLO.CombineTo(Op, NewOp);
3292 }
3293 }
3294 break;
3295 }
3297 // Offset the demanded elts by the subvector index.
3298 SDValue Src = Op.getOperand(0);
3299 if (Src.getValueType().isScalableVector())
3300 break;
3301 uint64_t Idx = Op.getConstantOperandVal(1);
3302 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3303 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3304
3305 APInt SrcUndef, SrcZero;
3306 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3307 Depth + 1))
3308 return true;
3309 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3310 KnownZero = SrcZero.extractBits(NumElts, Idx);
3311
3312 // Attempt to avoid multi-use ops if we don't need anything from them.
3313 if (!DemandedElts.isAllOnes()) {
3314 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3315 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3316 if (NewSrc) {
3317 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3318 Op.getOperand(1));
3319 return TLO.CombineTo(Op, NewOp);
3320 }
3321 }
3322 break;
3323 }
3325 SDValue Vec = Op.getOperand(0);
3326 SDValue Scl = Op.getOperand(1);
3327 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3328
3329 // For a legal, constant insertion index, if we don't need this insertion
3330 // then strip it, else remove it from the demanded elts.
3331 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3332 unsigned Idx = CIdx->getZExtValue();
3333 if (!DemandedElts[Idx])
3334 return TLO.CombineTo(Op, Vec);
3335
3336 APInt DemandedVecElts(DemandedElts);
3337 DemandedVecElts.clearBit(Idx);
3338 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3339 KnownZero, TLO, Depth + 1))
3340 return true;
3341
3342 KnownUndef.setBitVal(Idx, Scl.isUndef());
3343
3344 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3345 break;
3346 }
3347
3348 APInt VecUndef, VecZero;
3349 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3350 Depth + 1))
3351 return true;
3352 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3353 break;
3354 }
3355 case ISD::VSELECT: {
3356 SDValue Sel = Op.getOperand(0);
3357 SDValue LHS = Op.getOperand(1);
3358 SDValue RHS = Op.getOperand(2);
3359
3360 // Try to transform the select condition based on the current demanded
3361 // elements.
3362 APInt UndefSel, ZeroSel;
3363 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3364 Depth + 1))
3365 return true;
3366
3367 // See if we can simplify either vselect operand.
3368 APInt DemandedLHS(DemandedElts);
3369 APInt DemandedRHS(DemandedElts);
3370 APInt UndefLHS, ZeroLHS;
3371 APInt UndefRHS, ZeroRHS;
3372 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3373 Depth + 1))
3374 return true;
3375 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3376 Depth + 1))
3377 return true;
3378
3379 KnownUndef = UndefLHS & UndefRHS;
3380 KnownZero = ZeroLHS & ZeroRHS;
3381
3382 // If we know that the selected element is always zero, we don't need the
3383 // select value element.
3384 APInt DemandedSel = DemandedElts & ~KnownZero;
3385 if (DemandedSel != DemandedElts)
3386 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3387 Depth + 1))
3388 return true;
3389
3390 break;
3391 }
3392 case ISD::VECTOR_SHUFFLE: {
3393 SDValue LHS = Op.getOperand(0);
3394 SDValue RHS = Op.getOperand(1);
3395 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3396
3397 // Collect demanded elements from shuffle operands..
3398 APInt DemandedLHS(NumElts, 0);
3399 APInt DemandedRHS(NumElts, 0);
3400 for (unsigned i = 0; i != NumElts; ++i) {
3401 int M = ShuffleMask[i];
3402 if (M < 0 || !DemandedElts[i])
3403 continue;
3404 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3405 if (M < (int)NumElts)
3406 DemandedLHS.setBit(M);
3407 else
3408 DemandedRHS.setBit(M - NumElts);
3409 }
3410
3411 // See if we can simplify either shuffle operand.
3412 APInt UndefLHS, ZeroLHS;
3413 APInt UndefRHS, ZeroRHS;
3414 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3415 Depth + 1))
3416 return true;
3417 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3418 Depth + 1))
3419 return true;
3420
3421 // Simplify mask using undef elements from LHS/RHS.
3422 bool Updated = false;
3423 bool IdentityLHS = true, IdentityRHS = true;
3424 SmallVector<int, 32> NewMask(ShuffleMask);
3425 for (unsigned i = 0; i != NumElts; ++i) {
3426 int &M = NewMask[i];
3427 if (M < 0)
3428 continue;
3429 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3430 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3431 Updated = true;
3432 M = -1;
3433 }
3434 IdentityLHS &= (M < 0) || (M == (int)i);
3435 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3436 }
3437
3438 // Update legal shuffle masks based on demanded elements if it won't reduce
3439 // to Identity which can cause premature removal of the shuffle mask.
3440 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3441 SDValue LegalShuffle =
3442 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3443 if (LegalShuffle)
3444 return TLO.CombineTo(Op, LegalShuffle);
3445 }
3446
3447 // Propagate undef/zero elements from LHS/RHS.
3448 for (unsigned i = 0; i != NumElts; ++i) {
3449 int M = ShuffleMask[i];
3450 if (M < 0) {
3451 KnownUndef.setBit(i);
3452 } else if (M < (int)NumElts) {
3453 if (UndefLHS[M])
3454 KnownUndef.setBit(i);
3455 if (ZeroLHS[M])
3456 KnownZero.setBit(i);
3457 } else {
3458 if (UndefRHS[M - NumElts])
3459 KnownUndef.setBit(i);
3460 if (ZeroRHS[M - NumElts])
3461 KnownZero.setBit(i);
3462 }
3463 }
3464 break;
3465 }
3469 APInt SrcUndef, SrcZero;
3470 SDValue Src = Op.getOperand(0);
3471 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3472 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3473 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3474 Depth + 1))
3475 return true;
3476 KnownZero = SrcZero.zextOrTrunc(NumElts);
3477 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3478
3479 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3480 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3481 DemandedSrcElts == 1) {
3482 // aext - if we just need the bottom element then we can bitcast.
3483 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3484 }
3485
3486 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3487 // zext(undef) upper bits are guaranteed to be zero.
3488 if (DemandedElts.isSubsetOf(KnownUndef))
3489 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3490 KnownUndef.clearAllBits();
3491
3492 // zext - if we just need the bottom element then we can mask:
3493 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3494 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3495 Op->isOnlyUserOf(Src.getNode()) &&
3496 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3497 SDLoc DL(Op);
3498 EVT SrcVT = Src.getValueType();
3499 EVT SrcSVT = SrcVT.getScalarType();
3500 SmallVector<SDValue> MaskElts;
3501 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3502 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3503 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3504 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3505 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3506 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3507 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3508 }
3509 }
3510 }
3511 break;
3512 }
3513
3514 // TODO: There are more binop opcodes that could be handled here - MIN,
3515 // MAX, saturated math, etc.
3516 case ISD::ADD: {
3517 SDValue Op0 = Op.getOperand(0);
3518 SDValue Op1 = Op.getOperand(1);
3519 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3520 APInt UndefLHS, ZeroLHS;
3521 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3522 Depth + 1, /*AssumeSingleUse*/ true))
3523 return true;
3524 }
3525 [[fallthrough]];
3526 }
3527 case ISD::OR:
3528 case ISD::XOR:
3529 case ISD::SUB:
3530 case ISD::FADD:
3531 case ISD::FSUB:
3532 case ISD::FMUL:
3533 case ISD::FDIV:
3534 case ISD::FREM: {
3535 SDValue Op0 = Op.getOperand(0);
3536 SDValue Op1 = Op.getOperand(1);
3537
3538 APInt UndefRHS, ZeroRHS;
3539 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3540 Depth + 1))
3541 return true;
3542 APInt UndefLHS, ZeroLHS;
3543 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3544 Depth + 1))
3545 return true;
3546
3547 KnownZero = ZeroLHS & ZeroRHS;
3548 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3549
3550 // Attempt to avoid multi-use ops if we don't need anything from them.
3551 // TODO - use KnownUndef to relax the demandedelts?
3552 if (!DemandedElts.isAllOnes())
3553 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3554 return true;
3555 break;
3556 }
3557 case ISD::SHL:
3558 case ISD::SRL:
3559 case ISD::SRA:
3560 case ISD::ROTL:
3561 case ISD::ROTR: {
3562 SDValue Op0 = Op.getOperand(0);
3563 SDValue Op1 = Op.getOperand(1);
3564
3565 APInt UndefRHS, ZeroRHS;
3566 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3567 Depth + 1))
3568 return true;
3569 APInt UndefLHS, ZeroLHS;
3570 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3571 Depth + 1))
3572 return true;
3573
3574 KnownZero = ZeroLHS;
3575 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3576
3577 // Attempt to avoid multi-use ops if we don't need anything from them.
3578 // TODO - use KnownUndef to relax the demandedelts?
3579 if (!DemandedElts.isAllOnes())
3580 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3581 return true;
3582 break;
3583 }
3584 case ISD::MUL:
3585 case ISD::MULHU:
3586 case ISD::MULHS:
3587 case ISD::AND: {
3588 SDValue Op0 = Op.getOperand(0);
3589 SDValue Op1 = Op.getOperand(1);
3590
3591 APInt SrcUndef, SrcZero;
3592 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3593 Depth + 1))
3594 return true;
3595 // If we know that a demanded element was zero in Op1 we don't need to
3596 // demand it in Op0 - its guaranteed to be zero.
3597 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3598 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3599 TLO, Depth + 1))
3600 return true;
3601
3602 KnownUndef &= DemandedElts0;
3603 KnownZero &= DemandedElts0;
3604
3605 // If every element pair has a zero/undef then just fold to zero.
3606 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3607 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3608 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3609 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3610
3611 // If either side has a zero element, then the result element is zero, even
3612 // if the other is an UNDEF.
3613 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3614 // and then handle 'and' nodes with the rest of the binop opcodes.
3615 KnownZero |= SrcZero;
3616 KnownUndef &= SrcUndef;
3617 KnownUndef &= ~KnownZero;
3618
3619 // Attempt to avoid multi-use ops if we don't need anything from them.
3620 if (!DemandedElts.isAllOnes())
3621 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3622 return true;
3623 break;
3624 }
3625 case ISD::TRUNCATE:
3626 case ISD::SIGN_EXTEND:
3627 case ISD::ZERO_EXTEND:
3628 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3629 KnownZero, TLO, Depth + 1))
3630 return true;
3631
3632 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3633 // zext(undef) upper bits are guaranteed to be zero.
3634 if (DemandedElts.isSubsetOf(KnownUndef))
3635 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3636 KnownUndef.clearAllBits();
3637 }
3638 break;
3639 default: {
3640 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3641 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3642 KnownZero, TLO, Depth))
3643 return true;
3644 } else {
3645 KnownBits Known;
3646 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3647 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3648 TLO, Depth, AssumeSingleUse))
3649 return true;
3650 }
3651 break;
3652 }
3653 }
3654 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3655
3656 // Constant fold all undef cases.
3657 // TODO: Handle zero cases as well.
3658 if (DemandedElts.isSubsetOf(KnownUndef))
3659 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3660
3661 return false;
3662}
3663
3664/// Determine which of the bits specified in Mask are known to be either zero or
3665/// one and return them in the Known.
3667 KnownBits &Known,
3668 const APInt &DemandedElts,
3669 const SelectionDAG &DAG,
3670 unsigned Depth) const {
3671 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3672 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3673 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3674 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3675 "Should use MaskedValueIsZero if you don't know whether Op"
3676 " is a target node!");
3677 Known.resetAll();
3678}
3679
3682 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3683 unsigned Depth) const {
3684 Known.resetAll();
3685}
3686
3688 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3689 // The low bits are known zero if the pointer is aligned.
3690 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3691}
3692
3695 unsigned Depth) const {
3696 return Align(1);
3697}
3698
3699/// This method can be implemented by targets that want to expose additional
3700/// information about sign bits to the DAG Combiner.
3702 const APInt &,
3703 const SelectionDAG &,
3704 unsigned Depth) const {
3705 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3706 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3707 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3708 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3709 "Should use ComputeNumSignBits if you don't know whether Op"
3710 " is a target node!");
3711 return 1;
3712}
3713
3715 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3716 const MachineRegisterInfo &MRI, unsigned Depth) const {
3717 return 1;
3718}
3719
3721 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3722 TargetLoweringOpt &TLO, unsigned Depth) const {
3723 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3724 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3725 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3726 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3727 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3728 " is a target node!");
3729 return false;
3730}
3731
3733 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3734 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3735 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3736 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3737 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3738 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3739 "Should use SimplifyDemandedBits if you don't know whether Op"
3740 " is a target node!");
3741 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3742 return false;
3743}
3744
3746 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3747 SelectionDAG &DAG, unsigned Depth) const {
3748 assert(
3749 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3750 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3751 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3752 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3753 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3754 " is a target node!");
3755 return SDValue();
3756}
3757
3758SDValue
3761 SelectionDAG &DAG) const {
3762 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3763 if (!LegalMask) {
3764 std::swap(N0, N1);
3766 LegalMask = isShuffleMaskLegal(Mask, VT);
3767 }
3768
3769 if (!LegalMask)
3770 return SDValue();
3771
3772 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3773}
3774
3776 return nullptr;
3777}
3778
3780 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3781 bool PoisonOnly, unsigned Depth) const {
3782 assert(
3783 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3784 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3785 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3786 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3787 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3788 " is a target node!");
3789 return false;
3790}
3791
3793 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3794 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3795 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3796 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3797 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3798 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3799 "Should use canCreateUndefOrPoison if you don't know whether Op"
3800 " is a target node!");
3801 // Be conservative and return true.
3802 return true;
3803}
3804
3806 const SelectionDAG &DAG,
3807 bool SNaN,
3808 unsigned Depth) const {
3809 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3810 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3811 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3812 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3813 "Should use isKnownNeverNaN if you don't know whether Op"
3814 " is a target node!");
3815 return false;
3816}
3817
3819 const APInt &DemandedElts,
3820 APInt &UndefElts,
3821 const SelectionDAG &DAG,
3822 unsigned Depth) const {
3823 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3824 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3825 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3826 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3827 "Should use isSplatValue if you don't know whether Op"
3828 " is a target node!");
3829 return false;
3830}
3831
3832// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3833// work with truncating build vectors and vectors with elements of less than
3834// 8 bits.
3836 if (!N)
3837 return false;
3838
3839 unsigned EltWidth;
3840 APInt CVal;
3841 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3842 /*AllowTruncation=*/true)) {
3843 CVal = CN->getAPIntValue();
3844 EltWidth = N.getValueType().getScalarSizeInBits();
3845 } else
3846 return false;
3847
3848 // If this is a truncating splat, truncate the splat value.
3849 // Otherwise, we may fail to match the expected values below.
3850 if (EltWidth < CVal.getBitWidth())
3851 CVal = CVal.trunc(EltWidth);
3852
3853 switch (getBooleanContents(N.getValueType())) {
3855 return CVal[0];
3857 return CVal.isOne();
3859 return CVal.isAllOnes();
3860 }
3861
3862 llvm_unreachable("Invalid boolean contents");
3863}
3864
3866 if (!N)
3867 return false;
3868
3869 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3870 if (!CN) {
3871 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3872 if (!BV)
3873 return false;
3874
3875 // Only interested in constant splats, we don't care about undef
3876 // elements in identifying boolean constants and getConstantSplatNode
3877 // returns NULL if all ops are undef;
3878 CN = BV->getConstantSplatNode();
3879 if (!CN)
3880 return false;
3881 }
3882
3883 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3884 return !CN->getAPIntValue()[0];
3885
3886 return CN->isZero();
3887}
3888
3890 bool SExt) const {
3891 if (VT == MVT::i1)
3892 return N->isOne();
3893
3895 switch (Cnt) {
3897 // An extended value of 1 is always true, unless its original type is i1,
3898 // in which case it will be sign extended to -1.
3899 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3902 return N->isAllOnes() && SExt;
3903 }
3904 llvm_unreachable("Unexpected enumeration.");
3905}
3906
3907/// This helper function of SimplifySetCC tries to optimize the comparison when
3908/// either operand of the SetCC node is a bitwise-and instruction.
3909SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3910 ISD::CondCode Cond, const SDLoc &DL,
3911 DAGCombinerInfo &DCI) const {
3912 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3913 std::swap(N0, N1);
3914
3915 SelectionDAG &DAG = DCI.DAG;
3916 EVT OpVT = N0.getValueType();
3917 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3918 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3919 return SDValue();
3920
3921 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3922 // iff everything but LSB is known zero:
3923 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3926 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3927 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3928 if (DAG.MaskedValueIsZero(N0, UpperBits))
3929 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3930 }
3931
3932 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3933 // test in a narrow type that we can truncate to with no cost. Examples:
3934 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3935 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3936 // TODO: This conservatively checks for type legality on the source and
3937 // destination types. That may inhibit optimizations, but it also
3938 // allows setcc->shift transforms that may be more beneficial.
3939 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3940 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3941 isTypeLegal(OpVT) && N0.hasOneUse()) {
3942 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3943 AndC->getAPIntValue().getActiveBits());
3944 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3945 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3946 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3947 return DAG.getSetCC(DL, VT, Trunc, Zero,
3949 }
3950 }
3951
3952 // Match these patterns in any of their permutations:
3953 // (X & Y) == Y
3954 // (X & Y) != Y
3955 SDValue X, Y;
3956 if (N0.getOperand(0) == N1) {
3957 X = N0.getOperand(1);
3958 Y = N0.getOperand(0);
3959 } else if (N0.getOperand(1) == N1) {
3960 X = N0.getOperand(0);
3961 Y = N0.getOperand(1);
3962 } else {
3963 return SDValue();
3964 }
3965
3966 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
3967 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
3968 // its liable to create and infinite loop.
3969 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3970 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
3972 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3973 // Note that where Y is variable and is known to have at most one bit set
3974 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3975 // equivalent when Y == 0.
3976 assert(OpVT.isInteger());
3978 if (DCI.isBeforeLegalizeOps() ||
3980 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3981 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3982 // If the target supports an 'and-not' or 'and-complement' logic operation,
3983 // try to use that to make a comparison operation more efficient.
3984 // But don't do this transform if the mask is a single bit because there are
3985 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3986 // 'rlwinm' on PPC).
3987
3988 // Bail out if the compare operand that we want to turn into a zero is
3989 // already a zero (otherwise, infinite loop).
3990 if (isNullConstant(Y))
3991 return SDValue();
3992
3993 // Transform this into: ~X & Y == 0.
3994 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3995 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3996 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3997 }
3998
3999 return SDValue();
4000}
4001
4002/// There are multiple IR patterns that could be checking whether certain
4003/// truncation of a signed number would be lossy or not. The pattern which is
4004/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4005/// We are looking for the following pattern: (KeptBits is a constant)
4006/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4007/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4008/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4009/// We will unfold it into the natural trunc+sext pattern:
4010/// ((%x << C) a>> C) dstcond %x
4011/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4012SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4013 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4014 const SDLoc &DL) const {
4015 // We must be comparing with a constant.
4016 ConstantSDNode *C1;
4017 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4018 return SDValue();
4019
4020 // N0 should be: add %x, (1 << (KeptBits-1))
4021 if (N0->getOpcode() != ISD::ADD)
4022 return SDValue();
4023
4024 // And we must be 'add'ing a constant.
4025 ConstantSDNode *C01;
4026 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4027 return SDValue();
4028
4029 SDValue X = N0->getOperand(0);
4030 EVT XVT = X.getValueType();
4031
4032 // Validate constants ...
4033
4034 APInt I1 = C1->getAPIntValue();
4035
4036 ISD::CondCode NewCond;
4037 if (Cond == ISD::CondCode::SETULT) {
4038 NewCond = ISD::CondCode::SETEQ;
4039 } else if (Cond == ISD::CondCode::SETULE) {
4040 NewCond = ISD::CondCode::SETEQ;
4041 // But need to 'canonicalize' the constant.
4042 I1 += 1;
4043 } else if (Cond == ISD::CondCode::SETUGT) {
4044 NewCond = ISD::CondCode::SETNE;
4045 // But need to 'canonicalize' the constant.
4046 I1 += 1;
4047 } else if (Cond == ISD::CondCode::SETUGE) {
4048 NewCond = ISD::CondCode::SETNE;
4049 } else
4050 return SDValue();
4051
4052 APInt I01 = C01->getAPIntValue();
4053
4054 auto checkConstants = [&I1, &I01]() -> bool {
4055 // Both of them must be power-of-two, and the constant from setcc is bigger.
4056 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4057 };
4058
4059 if (checkConstants()) {
4060 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4061 } else {
4062 // What if we invert constants? (and the target predicate)
4063 I1.negate();
4064 I01.negate();
4065 assert(XVT.isInteger());
4066 NewCond = getSetCCInverse(NewCond, XVT);
4067 if (!checkConstants())
4068 return SDValue();
4069 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4070 }
4071
4072 // They are power-of-two, so which bit is set?
4073 const unsigned KeptBits = I1.logBase2();
4074 const unsigned KeptBitsMinusOne = I01.logBase2();
4075
4076 // Magic!
4077 if (KeptBits != (KeptBitsMinusOne + 1))
4078 return SDValue();
4079 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4080
4081 // We don't want to do this in every single case.
4082 SelectionDAG &DAG = DCI.DAG;
4084 XVT, KeptBits))
4085 return SDValue();
4086
4087 // Unfold into: sext_inreg(%x) cond %x
4088 // Where 'cond' will be either 'eq' or 'ne'.
4089 SDValue SExtInReg = DAG.getNode(
4091 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4092 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4093}
4094
4095// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4096SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4097 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4098 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4100 "Should be a comparison with 0.");
4101 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4102 "Valid only for [in]equality comparisons.");
4103
4104 unsigned NewShiftOpcode;
4105 SDValue X, C, Y;
4106
4107 SelectionDAG &DAG = DCI.DAG;
4108 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4109
4110 // Look for '(C l>>/<< Y)'.
4111 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4112 // The shift should be one-use.
4113 if (!V.hasOneUse())
4114 return false;
4115 unsigned OldShiftOpcode = V.getOpcode();
4116 switch (OldShiftOpcode) {
4117 case ISD::SHL:
4118 NewShiftOpcode = ISD::SRL;
4119 break;
4120 case ISD::SRL:
4121 NewShiftOpcode = ISD::SHL;
4122 break;
4123 default:
4124 return false; // must be a logical shift.
4125 }
4126 // We should be shifting a constant.
4127 // FIXME: best to use isConstantOrConstantVector().
4128 C = V.getOperand(0);
4130 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4131 if (!CC)
4132 return false;
4133 Y = V.getOperand(1);
4134
4136 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4137 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4138 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4139 };
4140
4141 // LHS of comparison should be an one-use 'and'.
4142 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4143 return SDValue();
4144
4145 X = N0.getOperand(0);
4146 SDValue Mask = N0.getOperand(1);
4147
4148 // 'and' is commutative!
4149 if (!Match(Mask)) {
4150 std::swap(X, Mask);
4151 if (!Match(Mask))
4152 return SDValue();
4153 }
4154
4155 EVT VT = X.getValueType();
4156
4157 // Produce:
4158 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4159 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4160 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4161 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4162 return T2;
4163}
4164
4165/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4166/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4167/// handle the commuted versions of these patterns.
4168SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4169 ISD::CondCode Cond, const SDLoc &DL,
4170 DAGCombinerInfo &DCI) const {
4171 unsigned BOpcode = N0.getOpcode();
4172 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4173 "Unexpected binop");
4174 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4175
4176 // (X + Y) == X --> Y == 0
4177 // (X - Y) == X --> Y == 0
4178 // (X ^ Y) == X --> Y == 0
4179 SelectionDAG &DAG = DCI.DAG;
4180 EVT OpVT = N0.getValueType();
4181 SDValue X = N0.getOperand(0);
4182 SDValue Y = N0.getOperand(1);
4183 if (X == N1)
4184 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4185
4186 if (Y != N1)
4187 return SDValue();
4188
4189 // (X + Y) == Y --> X == 0
4190 // (X ^ Y) == Y --> X == 0
4191 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4192 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4193
4194 // The shift would not be valid if the operands are boolean (i1).
4195 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4196 return SDValue();
4197
4198 // (X - Y) == Y --> X == Y << 1
4199 SDValue One =
4200 DAG.getShiftAmountConstant(1, OpVT, DL, !DCI.isBeforeLegalize());
4201 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4202 if (!DCI.isCalledByLegalizer())
4203 DCI.AddToWorklist(YShl1.getNode());
4204 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4205}
4206
4208 SDValue N0, const APInt &C1,
4209 ISD::CondCode Cond, const SDLoc &dl,
4210 SelectionDAG &DAG) {
4211 // Look through truncs that don't change the value of a ctpop.
4212 // FIXME: Add vector support? Need to be careful with setcc result type below.
4213 SDValue CTPOP = N0;
4214 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4216 CTPOP = N0.getOperand(0);
4217
4218 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4219 return SDValue();
4220
4221 EVT CTVT = CTPOP.getValueType();
4222 SDValue CTOp = CTPOP.getOperand(0);
4223
4224 // Expand a power-of-2-or-zero comparison based on ctpop:
4225 // (ctpop x) u< 2 -> (x & x-1) == 0
4226 // (ctpop x) u> 1 -> (x & x-1) != 0
4227 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4228 // Keep the CTPOP if it is a cheap vector op.
4229 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4230 return SDValue();
4231
4232 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4233 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4234 return SDValue();
4235 if (C1 == 0 && (Cond == ISD::SETULT))
4236 return SDValue(); // This is handled elsewhere.
4237
4238 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4239
4240 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4241 SDValue Result = CTOp;
4242 for (unsigned i = 0; i < Passes; i++) {
4243 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4244 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4245 }
4247 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4248 }
4249
4250 // Expand a power-of-2 comparison based on ctpop
4251 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4252 // Keep the CTPOP if it is cheap.
4253 if (TLI.isCtpopFast(CTVT))
4254 return SDValue();
4255
4256 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4257 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4258 assert(CTVT.isInteger());
4259 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4260
4261 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4262 // check before emitting a potentially unnecessary op.
4263 if (DAG.isKnownNeverZero(CTOp)) {
4264 // (ctpop x) == 1 --> (x & x-1) == 0
4265 // (ctpop x) != 1 --> (x & x-1) != 0
4266 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4267 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4268 return RHS;
4269 }
4270
4271 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4272 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4273 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4275 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4276 }
4277
4278 return SDValue();
4279}
4280
4282 ISD::CondCode Cond, const SDLoc &dl,
4283 SelectionDAG &DAG) {
4284 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4285 return SDValue();
4286
4287 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4288 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4289 return SDValue();
4290
4291 auto getRotateSource = [](SDValue X) {
4292 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4293 return X.getOperand(0);
4294 return SDValue();
4295 };
4296
4297 // Peek through a rotated value compared against 0 or -1:
4298 // (rot X, Y) == 0/-1 --> X == 0/-1
4299 // (rot X, Y) != 0/-1 --> X != 0/-1
4300 if (SDValue R = getRotateSource(N0))
4301 return DAG.getSetCC(dl, VT, R, N1, Cond);
4302
4303 // Peek through an 'or' of a rotated value compared against 0:
4304 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4305 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4306 //
4307 // TODO: Add the 'and' with -1 sibling.
4308 // TODO: Recurse through a series of 'or' ops to find the rotate.
4309 EVT OpVT = N0.getValueType();
4310 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4311 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4312 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4313 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4314 }
4315 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4316 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4317 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4318 }
4319 }
4320
4321 return SDValue();
4322}
4323
4325 ISD::CondCode Cond, const SDLoc &dl,
4326 SelectionDAG &DAG) {
4327 // If we are testing for all-bits-clear, we might be able to do that with
4328 // less shifting since bit-order does not matter.
4329 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4330 return SDValue();
4331
4332 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4333 if (!C1 || !C1->isZero())
4334 return SDValue();
4335
4336 if (!N0.hasOneUse() ||
4337 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4338 return SDValue();
4339
4340 unsigned BitWidth = N0.getScalarValueSizeInBits();
4341 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4342 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4343 return SDValue();
4344
4345 // Canonicalize fshr as fshl to reduce pattern-matching.
4346 unsigned ShAmt =