LLVM 18.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
25#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
54 SDValue &Chain) const {
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef})
68 CallerAttrs.removeAttribute(Attr);
69
70 if (CallerAttrs.hasAttributes())
71 return false;
72
73 // It's not safe to eliminate the sign / zero extension of the return value.
74 if (CallerAttrs.contains(Attribute::ZExt) ||
75 CallerAttrs.contains(Attribute::SExt))
76 return false;
77
78 // Check if the only use is a function return node.
79 return isUsedByReturnOnly(Node, Chain);
80}
81
83 const uint32_t *CallerPreservedMask,
84 const SmallVectorImpl<CCValAssign> &ArgLocs,
85 const SmallVectorImpl<SDValue> &OutVals) const {
86 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87 const CCValAssign &ArgLoc = ArgLocs[I];
88 if (!ArgLoc.isRegLoc())
89 continue;
90 MCRegister Reg = ArgLoc.getLocReg();
91 // Only look at callee saved registers.
92 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
93 continue;
94 // Check that we pass the value used for the caller.
95 // (We look for a CopyFromReg reading a virtual register that is used
96 // for the function live-in value of register Reg)
97 SDValue Value = OutVals[I];
98 if (Value->getOpcode() == ISD::AssertZext)
99 Value = Value.getOperand(0);
100 if (Value->getOpcode() != ISD::CopyFromReg)
101 return false;
102 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
103 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
104 return false;
105 }
106 return true;
107}
108
109/// Set CallLoweringInfo attribute flags based on a call instruction
110/// and called function attributes.
112 unsigned ArgIdx) {
113 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
114 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
115 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
116 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
117 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
118 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
119 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
120 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
121 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
122 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
123 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
124 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
125 Alignment = Call->getParamStackAlign(ArgIdx);
126 IndirectType = nullptr;
128 "multiple ABI attributes?");
129 if (IsByVal) {
130 IndirectType = Call->getParamByValType(ArgIdx);
131 if (!Alignment)
132 Alignment = Call->getParamAlign(ArgIdx);
133 }
134 if (IsPreallocated)
135 IndirectType = Call->getParamPreallocatedType(ArgIdx);
136 if (IsInAlloca)
137 IndirectType = Call->getParamInAllocaType(ArgIdx);
138 if (IsSRet)
139 IndirectType = Call->getParamStructRetType(ArgIdx);
140}
141
142/// Generate a libcall taking the given operands as arguments and returning a
143/// result of type RetVT.
144std::pair<SDValue, SDValue>
147 MakeLibCallOptions CallOptions,
148 const SDLoc &dl,
149 SDValue InChain) const {
150 if (!InChain)
151 InChain = DAG.getEntryNode();
152
154 Args.reserve(Ops.size());
155
157 for (unsigned i = 0; i < Ops.size(); ++i) {
158 SDValue NewOp = Ops[i];
159 Entry.Node = NewOp;
160 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
161 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
162 CallOptions.IsSExt);
163 Entry.IsZExt = !Entry.IsSExt;
164
165 if (CallOptions.IsSoften &&
167 Entry.IsSExt = Entry.IsZExt = false;
168 }
169 Args.push_back(Entry);
170 }
171
172 if (LC == RTLIB::UNKNOWN_LIBCALL)
173 report_fatal_error("Unsupported library call operation!");
176
177 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
179 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
180 bool zeroExtend = !signExtend;
181
182 if (CallOptions.IsSoften &&
184 signExtend = zeroExtend = false;
185 }
186
187 CLI.setDebugLoc(dl)
188 .setChain(InChain)
189 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
190 .setNoReturn(CallOptions.DoesNotReturn)
193 .setSExtResult(signExtend)
194 .setZExtResult(zeroExtend);
195 return LowerCallTo(CLI);
196}
197
199 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200 unsigned SrcAS, const AttributeList &FuncAttributes) const {
201 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
202 Op.getSrcAlign() < Op.getDstAlign())
203 return false;
204
205 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207 if (VT == MVT::Other) {
208 // Use the largest integer type whose alignment constraints are satisfied.
209 // We only need to check DstAlign here as SrcAlign is always greater or
210 // equal to DstAlign (or zero).
211 VT = MVT::i64;
212 if (Op.isFixedDstAlign())
213 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
214 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
216 assert(VT.isInteger());
217
218 // Find the largest legal integer type.
219 MVT LVT = MVT::i64;
220 while (!isTypeLegal(LVT))
221 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
222 assert(LVT.isInteger());
223
224 // If the type we've chosen is larger than the largest legal integer type
225 // then use that instead.
226 if (VT.bitsGT(LVT))
227 VT = LVT;
228 }
229
230 unsigned NumMemOps = 0;
231 uint64_t Size = Op.size();
232 while (Size) {
233 unsigned VTSize = VT.getSizeInBits() / 8;
234 while (VTSize > Size) {
235 // For now, only use non-vector load / store's for the left-over pieces.
236 EVT NewVT = VT;
237 unsigned NewVTSize;
238
239 bool Found = false;
240 if (VT.isVector() || VT.isFloatingPoint()) {
241 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244 Found = true;
245 else if (NewVT == MVT::i64 &&
247 isSafeMemOpType(MVT::f64)) {
248 // i64 is usually not legal on 32-bit targets, but f64 may be.
249 NewVT = MVT::f64;
250 Found = true;
251 }
252 }
253
254 if (!Found) {
255 do {
256 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
257 if (NewVT == MVT::i8)
258 break;
259 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
260 }
261 NewVTSize = NewVT.getSizeInBits() / 8;
262
263 // If the new VT cannot cover all of the remaining bits, then consider
264 // issuing a (or a pair of) unaligned and overlapping load / store.
265 unsigned Fast;
266 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
268 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
270 Fast)
271 VTSize = Size;
272 else {
273 VT = NewVT;
274 VTSize = NewVTSize;
275 }
276 }
277
278 if (++NumMemOps > Limit)
279 return false;
280
281 MemOps.push_back(VT);
282 Size -= VTSize;
283 }
284
285 return true;
286}
287
288/// Soften the operands of a comparison. This code is shared among BR_CC,
289/// SELECT_CC, and SETCC handlers.
291 SDValue &NewLHS, SDValue &NewRHS,
292 ISD::CondCode &CCCode,
293 const SDLoc &dl, const SDValue OldLHS,
294 const SDValue OldRHS) const {
295 SDValue Chain;
296 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
297 OldRHS, Chain);
298}
299
301 SDValue &NewLHS, SDValue &NewRHS,
302 ISD::CondCode &CCCode,
303 const SDLoc &dl, const SDValue OldLHS,
304 const SDValue OldRHS,
305 SDValue &Chain,
306 bool IsSignaling) const {
307 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308 // not supporting it. We can update this code when libgcc provides such
309 // functions.
310
311 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
312 && "Unsupported setcc type!");
313
314 // Expand into one or more soft-fp libcall(s).
315 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316 bool ShouldInvertCC = false;
317 switch (CCCode) {
318 case ISD::SETEQ:
319 case ISD::SETOEQ:
320 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
322 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323 break;
324 case ISD::SETNE:
325 case ISD::SETUNE:
326 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327 (VT == MVT::f64) ? RTLIB::UNE_F64 :
328 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329 break;
330 case ISD::SETGE:
331 case ISD::SETOGE:
332 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333 (VT == MVT::f64) ? RTLIB::OGE_F64 :
334 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335 break;
336 case ISD::SETLT:
337 case ISD::SETOLT:
338 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339 (VT == MVT::f64) ? RTLIB::OLT_F64 :
340 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341 break;
342 case ISD::SETLE:
343 case ISD::SETOLE:
344 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345 (VT == MVT::f64) ? RTLIB::OLE_F64 :
346 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347 break;
348 case ISD::SETGT:
349 case ISD::SETOGT:
350 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351 (VT == MVT::f64) ? RTLIB::OGT_F64 :
352 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353 break;
354 case ISD::SETO:
355 ShouldInvertCC = true;
356 [[fallthrough]];
357 case ISD::SETUO:
358 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359 (VT == MVT::f64) ? RTLIB::UO_F64 :
360 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361 break;
362 case ISD::SETONE:
363 // SETONE = O && UNE
364 ShouldInvertCC = true;
365 [[fallthrough]];
366 case ISD::SETUEQ:
367 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368 (VT == MVT::f64) ? RTLIB::UO_F64 :
369 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
372 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373 break;
374 default:
375 // Invert CC for unordered comparisons
376 ShouldInvertCC = true;
377 switch (CCCode) {
378 case ISD::SETULT:
379 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380 (VT == MVT::f64) ? RTLIB::OGE_F64 :
381 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382 break;
383 case ISD::SETULE:
384 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385 (VT == MVT::f64) ? RTLIB::OGT_F64 :
386 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387 break;
388 case ISD::SETUGT:
389 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390 (VT == MVT::f64) ? RTLIB::OLE_F64 :
391 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392 break;
393 case ISD::SETUGE:
394 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395 (VT == MVT::f64) ? RTLIB::OLT_F64 :
396 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397 break;
398 default: llvm_unreachable("Do not know how to soften this setcc!");
399 }
400 }
401
402 // Use the target specific return value for comparison lib calls.
404 SDValue Ops[2] = {NewLHS, NewRHS};
406 EVT OpsVT[2] = { OldLHS.getValueType(),
407 OldRHS.getValueType() };
408 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
409 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
410 NewLHS = Call.first;
411 NewRHS = DAG.getConstant(0, dl, RetVT);
412
413 CCCode = getCmpLibcallCC(LC1);
414 if (ShouldInvertCC) {
415 assert(RetVT.isInteger());
416 CCCode = getSetCCInverse(CCCode, RetVT);
417 }
418
419 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420 // Update Chain.
421 Chain = Call.second;
422 } else {
423 EVT SetCCVT =
424 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
425 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
426 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
427 CCCode = getCmpLibcallCC(LC2);
428 if (ShouldInvertCC)
429 CCCode = getSetCCInverse(CCCode, RetVT);
430 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
431 if (Chain)
432 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433 Call2.second);
434 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
435 Tmp.getValueType(), Tmp, NewLHS);
436 NewRHS = SDValue();
437 }
438}
439
440/// Return the entry encoding for a jump table in the current function. The
441/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
443 // In non-pic modes, just use the address of a block.
444 if (!isPositionIndependent())
446
447 // In PIC mode, if the target supports a GPRel32 directive, use it.
448 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
450
451 // Otherwise, use a label difference.
453}
454
456 SelectionDAG &DAG) const {
457 // If our PIC model is GP relative, use the global offset table as the base.
458 unsigned JTEncoding = getJumpTableEncoding();
459
463
464 return Table;
465}
466
467/// This returns the relocation base for the given PIC jumptable, the same as
468/// getPICJumpTableRelocBase, but as an MCExpr.
469const MCExpr *
471 unsigned JTI,MCContext &Ctx) const{
472 // The normal PIC reloc base is the label at the start of the jump table.
473 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
474}
475
477 SDValue Addr, int JTI,
478 SelectionDAG &DAG) const {
479 SDValue Chain = Value;
480 // Jump table debug info is only needed if CodeView is enabled.
482 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
483 }
484 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485}
486
487bool
489 const TargetMachine &TM = getTargetMachine();
490 const GlobalValue *GV = GA->getGlobal();
491
492 // If the address is not even local to this DSO we will have to load it from
493 // a got and then add the offset.
494 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
495 return false;
496
497 // If the code is position independent we will have to add a base register.
498 if (isPositionIndependent())
499 return false;
500
501 // Otherwise we can do it.
502 return true;
503}
504
505//===----------------------------------------------------------------------===//
506// Optimization Methods
507//===----------------------------------------------------------------------===//
508
509/// If the specified instruction has a constant integer operand and there are
510/// bits set in that constant that are not demanded, then clear those bits and
511/// return true.
513 const APInt &DemandedBits,
514 const APInt &DemandedElts,
515 TargetLoweringOpt &TLO) const {
516 SDLoc DL(Op);
517 unsigned Opcode = Op.getOpcode();
518
519 // Early-out if we've ended up calling an undemanded node, leave this to
520 // constant folding.
521 if (DemandedBits.isZero() || DemandedElts.isZero())
522 return false;
523
524 // Do target-specific constant optimization.
525 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526 return TLO.New.getNode();
527
528 // FIXME: ISD::SELECT, ISD::SELECT_CC
529 switch (Opcode) {
530 default:
531 break;
532 case ISD::XOR:
533 case ISD::AND:
534 case ISD::OR: {
535 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
536 if (!Op1C || Op1C->isOpaque())
537 return false;
538
539 // If this is a 'not' op, don't touch it because that's a canonical form.
540 const APInt &C = Op1C->getAPIntValue();
541 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
542 return false;
543
544 if (!C.isSubsetOf(DemandedBits)) {
545 EVT VT = Op.getValueType();
546 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
547 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
548 return TLO.CombineTo(Op, NewOp);
549 }
550
551 break;
552 }
553 }
554
555 return false;
556}
557
559 const APInt &DemandedBits,
560 TargetLoweringOpt &TLO) const {
561 EVT VT = Op.getValueType();
562 APInt DemandedElts = VT.isVector()
564 : APInt(1, 1);
565 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
566}
567
568/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
569/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
570/// generalized for targets with other types of implicit widening casts.
572 const APInt &DemandedBits,
573 TargetLoweringOpt &TLO) const {
574 assert(Op.getNumOperands() == 2 &&
575 "ShrinkDemandedOp only supports binary operators!");
576 assert(Op.getNode()->getNumValues() == 1 &&
577 "ShrinkDemandedOp only supports nodes with one result!");
578
579 EVT VT = Op.getValueType();
580 SelectionDAG &DAG = TLO.DAG;
581 SDLoc dl(Op);
582
583 // Early return, as this function cannot handle vector types.
584 if (VT.isVector())
585 return false;
586
587 // Don't do this if the node has another user, which may require the
588 // full value.
589 if (!Op.getNode()->hasOneUse())
590 return false;
591
592 // Search for the smallest integer type with free casts to and from
593 // Op's type. For expedience, just check power-of-2 integer types.
594 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
595 unsigned DemandedSize = DemandedBits.getActiveBits();
596 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
597 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
598 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
599 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
600 // We found a type with free casts.
601 SDValue X = DAG.getNode(
602 Op.getOpcode(), dl, SmallVT,
603 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
604 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
605 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
606 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
607 return TLO.CombineTo(Op, Z);
608 }
609 }
610 return false;
611}
612
614 DAGCombinerInfo &DCI) const {
615 SelectionDAG &DAG = DCI.DAG;
616 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
617 !DCI.isBeforeLegalizeOps());
618 KnownBits Known;
619
620 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
621 if (Simplified) {
622 DCI.AddToWorklist(Op.getNode());
624 }
625 return Simplified;
626}
627
629 const APInt &DemandedElts,
630 DAGCombinerInfo &DCI) const {
631 SelectionDAG &DAG = DCI.DAG;
632 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
633 !DCI.isBeforeLegalizeOps());
634 KnownBits Known;
635
636 bool Simplified =
637 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
638 if (Simplified) {
639 DCI.AddToWorklist(Op.getNode());
641 }
642 return Simplified;
643}
644
646 KnownBits &Known,
648 unsigned Depth,
649 bool AssumeSingleUse) const {
650 EVT VT = Op.getValueType();
651
652 // Since the number of lanes in a scalable vector is unknown at compile time,
653 // we track one bit which is implicitly broadcast to all lanes. This means
654 // that all lanes in a scalable vector are considered demanded.
655 APInt DemandedElts = VT.isFixedLengthVector()
657 : APInt(1, 1);
658 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
659 AssumeSingleUse);
660}
661
662// TODO: Under what circumstances can we create nodes? Constant folding?
664 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
665 SelectionDAG &DAG, unsigned Depth) const {
666 EVT VT = Op.getValueType();
667
668 // Limit search depth.
670 return SDValue();
671
672 // Ignore UNDEFs.
673 if (Op.isUndef())
674 return SDValue();
675
676 // Not demanding any bits/elts from Op.
677 if (DemandedBits == 0 || DemandedElts == 0)
678 return DAG.getUNDEF(VT);
679
680 bool IsLE = DAG.getDataLayout().isLittleEndian();
681 unsigned NumElts = DemandedElts.getBitWidth();
682 unsigned BitWidth = DemandedBits.getBitWidth();
683 KnownBits LHSKnown, RHSKnown;
684 switch (Op.getOpcode()) {
685 case ISD::BITCAST: {
686 if (VT.isScalableVector())
687 return SDValue();
688
689 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
690 EVT SrcVT = Src.getValueType();
691 EVT DstVT = Op.getValueType();
692 if (SrcVT == DstVT)
693 return Src;
694
695 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
696 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
697 if (NumSrcEltBits == NumDstEltBits)
698 if (SDValue V = SimplifyMultipleUseDemandedBits(
699 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
700 return DAG.getBitcast(DstVT, V);
701
702 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
703 unsigned Scale = NumDstEltBits / NumSrcEltBits;
704 unsigned NumSrcElts = SrcVT.getVectorNumElements();
705 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
706 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
707 for (unsigned i = 0; i != Scale; ++i) {
708 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
709 unsigned BitOffset = EltOffset * NumSrcEltBits;
710 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
711 if (!Sub.isZero()) {
712 DemandedSrcBits |= Sub;
713 for (unsigned j = 0; j != NumElts; ++j)
714 if (DemandedElts[j])
715 DemandedSrcElts.setBit((j * Scale) + i);
716 }
717 }
718
719 if (SDValue V = SimplifyMultipleUseDemandedBits(
720 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
721 return DAG.getBitcast(DstVT, V);
722 }
723
724 // TODO - bigendian once we have test coverage.
725 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
726 unsigned Scale = NumSrcEltBits / NumDstEltBits;
727 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
728 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
729 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
730 for (unsigned i = 0; i != NumElts; ++i)
731 if (DemandedElts[i]) {
732 unsigned Offset = (i % Scale) * NumDstEltBits;
733 DemandedSrcBits.insertBits(DemandedBits, Offset);
734 DemandedSrcElts.setBit(i / Scale);
735 }
736
737 if (SDValue V = SimplifyMultipleUseDemandedBits(
738 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
739 return DAG.getBitcast(DstVT, V);
740 }
741
742 break;
743 }
744 case ISD::AND: {
745 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
746 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
747
748 // If all of the demanded bits are known 1 on one side, return the other.
749 // These bits cannot contribute to the result of the 'and' in this
750 // context.
751 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
752 return Op.getOperand(0);
753 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
754 return Op.getOperand(1);
755 break;
756 }
757 case ISD::OR: {
758 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
759 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
760
761 // If all of the demanded bits are known zero on one side, return the
762 // other. These bits cannot contribute to the result of the 'or' in this
763 // context.
764 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
765 return Op.getOperand(0);
766 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
767 return Op.getOperand(1);
768 break;
769 }
770 case ISD::XOR: {
771 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
772 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
773
774 // If all of the demanded bits are known zero on one side, return the
775 // other.
776 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
777 return Op.getOperand(0);
778 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
779 return Op.getOperand(1);
780 break;
781 }
782 case ISD::SHL: {
783 // If we are only demanding sign bits then we can use the shift source
784 // directly.
785 if (const APInt *MaxSA =
786 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
787 SDValue Op0 = Op.getOperand(0);
788 unsigned ShAmt = MaxSA->getZExtValue();
789 unsigned NumSignBits =
790 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
791 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
792 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
793 return Op0;
794 }
795 break;
796 }
797 case ISD::SETCC: {
798 SDValue Op0 = Op.getOperand(0);
799 SDValue Op1 = Op.getOperand(1);
800 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
801 // If (1) we only need the sign-bit, (2) the setcc operands are the same
802 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
803 // -1, we may be able to bypass the setcc.
804 if (DemandedBits.isSignMask() &&
808 // If we're testing X < 0, then this compare isn't needed - just use X!
809 // FIXME: We're limiting to integer types here, but this should also work
810 // if we don't care about FP signed-zero. The use of SETLT with FP means
811 // that we don't care about NaNs.
812 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
814 return Op0;
815 }
816 break;
817 }
819 // If none of the extended bits are demanded, eliminate the sextinreg.
820 SDValue Op0 = Op.getOperand(0);
821 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
822 unsigned ExBits = ExVT.getScalarSizeInBits();
823 if (DemandedBits.getActiveBits() <= ExBits &&
825 return Op0;
826 // If the input is already sign extended, just drop the extension.
827 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
828 if (NumSignBits >= (BitWidth - ExBits + 1))
829 return Op0;
830 break;
831 }
835 if (VT.isScalableVector())
836 return SDValue();
837
838 // If we only want the lowest element and none of extended bits, then we can
839 // return the bitcasted source vector.
840 SDValue Src = Op.getOperand(0);
841 EVT SrcVT = Src.getValueType();
842 EVT DstVT = Op.getValueType();
843 if (IsLE && DemandedElts == 1 &&
844 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
845 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
846 return DAG.getBitcast(DstVT, Src);
847 }
848 break;
849 }
851 if (VT.isScalableVector())
852 return SDValue();
853
854 // If we don't demand the inserted element, return the base vector.
855 SDValue Vec = Op.getOperand(0);
856 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
857 EVT VecVT = Vec.getValueType();
858 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
859 !DemandedElts[CIdx->getZExtValue()])
860 return Vec;
861 break;
862 }
864 if (VT.isScalableVector())
865 return SDValue();
866
867 SDValue Vec = Op.getOperand(0);
868 SDValue Sub = Op.getOperand(1);
869 uint64_t Idx = Op.getConstantOperandVal(2);
870 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
871 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
872 // If we don't demand the inserted subvector, return the base vector.
873 if (DemandedSubElts == 0)
874 return Vec;
875 break;
876 }
877 case ISD::VECTOR_SHUFFLE: {
879 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
880
881 // If all the demanded elts are from one operand and are inline,
882 // then we can use the operand directly.
883 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
884 for (unsigned i = 0; i != NumElts; ++i) {
885 int M = ShuffleMask[i];
886 if (M < 0 || !DemandedElts[i])
887 continue;
888 AllUndef = false;
889 IdentityLHS &= (M == (int)i);
890 IdentityRHS &= ((M - NumElts) == i);
891 }
892
893 if (AllUndef)
894 return DAG.getUNDEF(Op.getValueType());
895 if (IdentityLHS)
896 return Op.getOperand(0);
897 if (IdentityRHS)
898 return Op.getOperand(1);
899 break;
900 }
901 default:
902 // TODO: Probably okay to remove after audit; here to reduce change size
903 // in initial enablement patch for scalable vectors
904 if (VT.isScalableVector())
905 return SDValue();
906
907 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
908 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
909 Op, DemandedBits, DemandedElts, DAG, Depth))
910 return V;
911 break;
912 }
913 return SDValue();
914}
915
918 unsigned Depth) const {
919 EVT VT = Op.getValueType();
920 // Since the number of lanes in a scalable vector is unknown at compile time,
921 // we track one bit which is implicitly broadcast to all lanes. This means
922 // that all lanes in a scalable vector are considered demanded.
923 APInt DemandedElts = VT.isFixedLengthVector()
925 : APInt(1, 1);
926 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
927 Depth);
928}
929
931 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
932 unsigned Depth) const {
933 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
934 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
935 Depth);
936}
937
938// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
939// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
941 const TargetLowering &TLI,
942 const APInt &DemandedBits,
943 const APInt &DemandedElts,
944 unsigned Depth) {
945 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
946 "SRL or SRA node is required here!");
947 // Is the right shift using an immediate value of 1?
948 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
949 if (!N1C || !N1C->isOne())
950 return SDValue();
951
952 // We are looking for an avgfloor
953 // add(ext, ext)
954 // or one of these as a avgceil
955 // add(add(ext, ext), 1)
956 // add(add(ext, 1), ext)
957 // add(ext, add(ext, 1))
958 SDValue Add = Op.getOperand(0);
959 if (Add.getOpcode() != ISD::ADD)
960 return SDValue();
961
962 SDValue ExtOpA = Add.getOperand(0);
963 SDValue ExtOpB = Add.getOperand(1);
964 SDValue Add2;
965 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
966 ConstantSDNode *ConstOp;
967 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
968 ConstOp->isOne()) {
969 ExtOpA = Op1;
970 ExtOpB = Op3;
971 Add2 = A;
972 return true;
973 }
974 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
975 ConstOp->isOne()) {
976 ExtOpA = Op1;
977 ExtOpB = Op2;
978 Add2 = A;
979 return true;
980 }
981 return false;
982 };
983 bool IsCeil =
984 (ExtOpA.getOpcode() == ISD::ADD &&
985 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
986 (ExtOpB.getOpcode() == ISD::ADD &&
987 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
988
989 // If the shift is signed (sra):
990 // - Needs >= 2 sign bit for both operands.
991 // - Needs >= 2 zero bits.
992 // If the shift is unsigned (srl):
993 // - Needs >= 1 zero bit for both operands.
994 // - Needs 1 demanded bit zero and >= 2 sign bits.
995 unsigned ShiftOpc = Op.getOpcode();
996 bool IsSigned = false;
997 unsigned KnownBits;
998 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
999 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1000 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1001 unsigned NumZeroA =
1002 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1003 unsigned NumZeroB =
1004 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1005 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1006
1007 switch (ShiftOpc) {
1008 default:
1009 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1010 case ISD::SRA: {
1011 if (NumZero >= 2 && NumSigned < NumZero) {
1012 IsSigned = false;
1013 KnownBits = NumZero;
1014 break;
1015 }
1016 if (NumSigned >= 1) {
1017 IsSigned = true;
1018 KnownBits = NumSigned;
1019 break;
1020 }
1021 return SDValue();
1022 }
1023 case ISD::SRL: {
1024 if (NumZero >= 1 && NumSigned < NumZero) {
1025 IsSigned = false;
1026 KnownBits = NumZero;
1027 break;
1028 }
1029 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1030 IsSigned = true;
1031 KnownBits = NumSigned;
1032 break;
1033 }
1034 return SDValue();
1035 }
1036 }
1037
1038 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1039 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1040
1041 // Find the smallest power-2 type that is legal for this vector size and
1042 // operation, given the original type size and the number of known sign/zero
1043 // bits.
1044 EVT VT = Op.getValueType();
1045 unsigned MinWidth =
1046 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1047 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1048 if (VT.isVector())
1049 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1050 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1051 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1052 // larger type size to do the transform.
1053 if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
1054 return SDValue();
1055 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1056 Add.getOperand(1)) &&
1057 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1058 Add2.getOperand(1))))
1059 NVT = VT;
1060 else
1061 return SDValue();
1062 }
1063
1064 SDLoc DL(Op);
1065 SDValue ResultAVG =
1066 DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1067 DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1068 return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1069 ResultAVG);
1070}
1071
1072/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1073/// result of Op are ever used downstream. If we can use this information to
1074/// simplify Op, create a new simplified DAG node and return true, returning the
1075/// original and new nodes in Old and New. Otherwise, analyze the expression and
1076/// return a mask of Known bits for the expression (used to simplify the
1077/// caller). The Known bits may only be accurate for those bits in the
1078/// OriginalDemandedBits and OriginalDemandedElts.
1080 SDValue Op, const APInt &OriginalDemandedBits,
1081 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1082 unsigned Depth, bool AssumeSingleUse) const {
1083 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1084 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1085 "Mask size mismatches value type size!");
1086
1087 // Don't know anything.
1088 Known = KnownBits(BitWidth);
1089
1090 EVT VT = Op.getValueType();
1091 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1092 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1093 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1094 "Unexpected vector size");
1095
1096 APInt DemandedBits = OriginalDemandedBits;
1097 APInt DemandedElts = OriginalDemandedElts;
1098 SDLoc dl(Op);
1099 auto &DL = TLO.DAG.getDataLayout();
1100
1101 // Undef operand.
1102 if (Op.isUndef())
1103 return false;
1104
1105 // We can't simplify target constants.
1106 if (Op.getOpcode() == ISD::TargetConstant)
1107 return false;
1108
1109 if (Op.getOpcode() == ISD::Constant) {
1110 // We know all of the bits for a constant!
1111 Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1112 return false;
1113 }
1114
1115 if (Op.getOpcode() == ISD::ConstantFP) {
1116 // We know all of the bits for a floating point constant!
1118 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1119 return false;
1120 }
1121
1122 // Other users may use these bits.
1123 bool HasMultiUse = false;
1124 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1126 // Limit search depth.
1127 return false;
1128 }
1129 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1131 DemandedElts = APInt::getAllOnes(NumElts);
1132 HasMultiUse = true;
1133 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1134 // Not demanding any bits/elts from Op.
1135 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1136 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1137 // Limit search depth.
1138 return false;
1139 }
1140
1141 KnownBits Known2;
1142 switch (Op.getOpcode()) {
1143 case ISD::SCALAR_TO_VECTOR: {
1144 if (VT.isScalableVector())
1145 return false;
1146 if (!DemandedElts[0])
1147 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1148
1149 KnownBits SrcKnown;
1150 SDValue Src = Op.getOperand(0);
1151 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1152 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1153 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1154 return true;
1155
1156 // Upper elements are undef, so only get the knownbits if we just demand
1157 // the bottom element.
1158 if (DemandedElts == 1)
1159 Known = SrcKnown.anyextOrTrunc(BitWidth);
1160 break;
1161 }
1162 case ISD::BUILD_VECTOR:
1163 // Collect the known bits that are shared by every demanded element.
1164 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1165 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1166 return false; // Don't fall through, will infinitely loop.
1167 case ISD::SPLAT_VECTOR: {
1168 SDValue Scl = Op.getOperand(0);
1169 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1170 KnownBits KnownScl;
1171 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1172 return true;
1173
1174 // Implicitly truncate the bits to match the official semantics of
1175 // SPLAT_VECTOR.
1176 Known = KnownScl.trunc(BitWidth);
1177 break;
1178 }
1179 case ISD::LOAD: {
1180 auto *LD = cast<LoadSDNode>(Op);
1181 if (getTargetConstantFromLoad(LD)) {
1182 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1183 return false; // Don't fall through, will infinitely loop.
1184 }
1185 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1186 // If this is a ZEXTLoad and we are looking at the loaded value.
1187 EVT MemVT = LD->getMemoryVT();
1188 unsigned MemBits = MemVT.getScalarSizeInBits();
1189 Known.Zero.setBitsFrom(MemBits);
1190 return false; // Don't fall through, will infinitely loop.
1191 }
1192 break;
1193 }
1195 if (VT.isScalableVector())
1196 return false;
1197 SDValue Vec = Op.getOperand(0);
1198 SDValue Scl = Op.getOperand(1);
1199 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1200 EVT VecVT = Vec.getValueType();
1201
1202 // If index isn't constant, assume we need all vector elements AND the
1203 // inserted element.
1204 APInt DemandedVecElts(DemandedElts);
1205 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1206 unsigned Idx = CIdx->getZExtValue();
1207 DemandedVecElts.clearBit(Idx);
1208
1209 // Inserted element is not required.
1210 if (!DemandedElts[Idx])
1211 return TLO.CombineTo(Op, Vec);
1212 }
1213
1214 KnownBits KnownScl;
1215 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1216 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1217 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1218 return true;
1219
1220 Known = KnownScl.anyextOrTrunc(BitWidth);
1221
1222 KnownBits KnownVec;
1223 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1224 Depth + 1))
1225 return true;
1226
1227 if (!!DemandedVecElts)
1228 Known = Known.intersectWith(KnownVec);
1229
1230 return false;
1231 }
1232 case ISD::INSERT_SUBVECTOR: {
1233 if (VT.isScalableVector())
1234 return false;
1235 // Demand any elements from the subvector and the remainder from the src its
1236 // inserted into.
1237 SDValue Src = Op.getOperand(0);
1238 SDValue Sub = Op.getOperand(1);
1239 uint64_t Idx = Op.getConstantOperandVal(2);
1240 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1241 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1242 APInt DemandedSrcElts = DemandedElts;
1243 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1244
1245 KnownBits KnownSub, KnownSrc;
1246 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1247 Depth + 1))
1248 return true;
1249 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1250 Depth + 1))
1251 return true;
1252
1253 Known.Zero.setAllBits();
1254 Known.One.setAllBits();
1255 if (!!DemandedSubElts)
1256 Known = Known.intersectWith(KnownSub);
1257 if (!!DemandedSrcElts)
1258 Known = Known.intersectWith(KnownSrc);
1259
1260 // Attempt to avoid multi-use src if we don't need anything from it.
1261 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1262 !DemandedSrcElts.isAllOnes()) {
1263 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1264 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1265 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1266 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1267 if (NewSub || NewSrc) {
1268 NewSub = NewSub ? NewSub : Sub;
1269 NewSrc = NewSrc ? NewSrc : Src;
1270 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1271 Op.getOperand(2));
1272 return TLO.CombineTo(Op, NewOp);
1273 }
1274 }
1275 break;
1276 }
1278 if (VT.isScalableVector())
1279 return false;
1280 // Offset the demanded elts by the subvector index.
1281 SDValue Src = Op.getOperand(0);
1282 if (Src.getValueType().isScalableVector())
1283 break;
1284 uint64_t Idx = Op.getConstantOperandVal(1);
1285 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1286 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1287
1288 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1289 Depth + 1))
1290 return true;
1291
1292 // Attempt to avoid multi-use src if we don't need anything from it.
1293 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1294 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1295 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1296 if (DemandedSrc) {
1297 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1298 Op.getOperand(1));
1299 return TLO.CombineTo(Op, NewOp);
1300 }
1301 }
1302 break;
1303 }
1304 case ISD::CONCAT_VECTORS: {
1305 if (VT.isScalableVector())
1306 return false;
1307 Known.Zero.setAllBits();
1308 Known.One.setAllBits();
1309 EVT SubVT = Op.getOperand(0).getValueType();
1310 unsigned NumSubVecs = Op.getNumOperands();
1311 unsigned NumSubElts = SubVT.getVectorNumElements();
1312 for (unsigned i = 0; i != NumSubVecs; ++i) {
1313 APInt DemandedSubElts =
1314 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1315 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1316 Known2, TLO, Depth + 1))
1317 return true;
1318 // Known bits are shared by every demanded subvector element.
1319 if (!!DemandedSubElts)
1320 Known = Known.intersectWith(Known2);
1321 }
1322 break;
1323 }
1324 case ISD::VECTOR_SHUFFLE: {
1325 assert(!VT.isScalableVector());
1326 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1327
1328 // Collect demanded elements from shuffle operands..
1329 APInt DemandedLHS, DemandedRHS;
1330 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1331 DemandedRHS))
1332 break;
1333
1334 if (!!DemandedLHS || !!DemandedRHS) {
1335 SDValue Op0 = Op.getOperand(0);
1336 SDValue Op1 = Op.getOperand(1);
1337
1338 Known.Zero.setAllBits();
1339 Known.One.setAllBits();
1340 if (!!DemandedLHS) {
1341 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1342 Depth + 1))
1343 return true;
1344 Known = Known.intersectWith(Known2);
1345 }
1346 if (!!DemandedRHS) {
1347 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1348 Depth + 1))
1349 return true;
1350 Known = Known.intersectWith(Known2);
1351 }
1352
1353 // Attempt to avoid multi-use ops if we don't need anything from them.
1354 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1355 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1356 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1357 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1358 if (DemandedOp0 || DemandedOp1) {
1359 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1360 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1361 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1362 return TLO.CombineTo(Op, NewOp);
1363 }
1364 }
1365 break;
1366 }
1367 case ISD::AND: {
1368 SDValue Op0 = Op.getOperand(0);
1369 SDValue Op1 = Op.getOperand(1);
1370
1371 // If the RHS is a constant, check to see if the LHS would be zero without
1372 // using the bits from the RHS. Below, we use knowledge about the RHS to
1373 // simplify the LHS, here we're using information from the LHS to simplify
1374 // the RHS.
1375 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1376 // Do not increment Depth here; that can cause an infinite loop.
1377 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1378 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1379 if ((LHSKnown.Zero & DemandedBits) ==
1380 (~RHSC->getAPIntValue() & DemandedBits))
1381 return TLO.CombineTo(Op, Op0);
1382
1383 // If any of the set bits in the RHS are known zero on the LHS, shrink
1384 // the constant.
1385 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1386 DemandedElts, TLO))
1387 return true;
1388
1389 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1390 // constant, but if this 'and' is only clearing bits that were just set by
1391 // the xor, then this 'and' can be eliminated by shrinking the mask of
1392 // the xor. For example, for a 32-bit X:
1393 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1394 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1395 LHSKnown.One == ~RHSC->getAPIntValue()) {
1396 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1397 return TLO.CombineTo(Op, Xor);
1398 }
1399 }
1400
1401 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1402 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1403 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1404 (Op0.getOperand(0).isUndef() ||
1406 Op0->hasOneUse()) {
1407 unsigned NumSubElts =
1409 unsigned SubIdx = Op0.getConstantOperandVal(2);
1410 APInt DemandedSub =
1411 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1412 KnownBits KnownSubMask =
1413 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1414 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1415 SDValue NewAnd =
1416 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1417 SDValue NewInsert =
1418 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1419 Op0.getOperand(1), Op0.getOperand(2));
1420 return TLO.CombineTo(Op, NewInsert);
1421 }
1422 }
1423
1424 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1425 Depth + 1))
1426 return true;
1427 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1428 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1429 Known2, TLO, Depth + 1))
1430 return true;
1431 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1432
1433 // If all of the demanded bits are known one on one side, return the other.
1434 // These bits cannot contribute to the result of the 'and'.
1435 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1436 return TLO.CombineTo(Op, Op0);
1437 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1438 return TLO.CombineTo(Op, Op1);
1439 // If all of the demanded bits in the inputs are known zeros, return zero.
1440 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1441 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1442 // If the RHS is a constant, see if we can simplify it.
1443 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1444 TLO))
1445 return true;
1446 // If the operation can be done in a smaller type, do so.
1447 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1448 return true;
1449
1450 // Attempt to avoid multi-use ops if we don't need anything from them.
1451 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1452 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1453 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1454 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1455 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1456 if (DemandedOp0 || DemandedOp1) {
1457 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1458 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1459 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1460 return TLO.CombineTo(Op, NewOp);
1461 }
1462 }
1463
1464 Known &= Known2;
1465 break;
1466 }
1467 case ISD::OR: {
1468 SDValue Op0 = Op.getOperand(0);
1469 SDValue Op1 = Op.getOperand(1);
1470
1471 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1472 Depth + 1))
1473 return true;
1474 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1475 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1476 Known2, TLO, Depth + 1))
1477 return true;
1478 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1479
1480 // If all of the demanded bits are known zero on one side, return the other.
1481 // These bits cannot contribute to the result of the 'or'.
1482 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1483 return TLO.CombineTo(Op, Op0);
1484 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1485 return TLO.CombineTo(Op, Op1);
1486 // If the RHS is a constant, see if we can simplify it.
1487 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1488 return true;
1489 // If the operation can be done in a smaller type, do so.
1490 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1491 return true;
1492
1493 // Attempt to avoid multi-use ops if we don't need anything from them.
1494 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1495 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1496 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1497 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1498 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1499 if (DemandedOp0 || DemandedOp1) {
1500 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1501 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1502 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1503 return TLO.CombineTo(Op, NewOp);
1504 }
1505 }
1506
1507 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1508 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1509 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1510 Op0->hasOneUse() && Op1->hasOneUse()) {
1511 // Attempt to match all commutations - m_c_Or would've been useful!
1512 for (int I = 0; I != 2; ++I) {
1513 SDValue X = Op.getOperand(I).getOperand(0);
1514 SDValue C1 = Op.getOperand(I).getOperand(1);
1515 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1516 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1517 if (Alt.getOpcode() == ISD::OR) {
1518 for (int J = 0; J != 2; ++J) {
1519 if (X == Alt.getOperand(J)) {
1520 SDValue Y = Alt.getOperand(1 - J);
1521 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1522 {C1, C2})) {
1523 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1524 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1525 return TLO.CombineTo(
1526 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1527 }
1528 }
1529 }
1530 }
1531 }
1532 }
1533
1534 Known |= Known2;
1535 break;
1536 }
1537 case ISD::XOR: {
1538 SDValue Op0 = Op.getOperand(0);
1539 SDValue Op1 = Op.getOperand(1);
1540
1541 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1542 Depth + 1))
1543 return true;
1544 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1545 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1546 Depth + 1))
1547 return true;
1548 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1549
1550 // If all of the demanded bits are known zero on one side, return the other.
1551 // These bits cannot contribute to the result of the 'xor'.
1552 if (DemandedBits.isSubsetOf(Known.Zero))
1553 return TLO.CombineTo(Op, Op0);
1554 if (DemandedBits.isSubsetOf(Known2.Zero))
1555 return TLO.CombineTo(Op, Op1);
1556 // If the operation can be done in a smaller type, do so.
1557 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1558 return true;
1559
1560 // If all of the unknown bits are known to be zero on one side or the other
1561 // turn this into an *inclusive* or.
1562 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1563 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1564 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1565
1566 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1567 if (C) {
1568 // If one side is a constant, and all of the set bits in the constant are
1569 // also known set on the other side, turn this into an AND, as we know
1570 // the bits will be cleared.
1571 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1572 // NB: it is okay if more bits are known than are requested
1573 if (C->getAPIntValue() == Known2.One) {
1574 SDValue ANDC =
1575 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1576 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1577 }
1578
1579 // If the RHS is a constant, see if we can change it. Don't alter a -1
1580 // constant because that's a 'not' op, and that is better for combining
1581 // and codegen.
1582 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1583 // We're flipping all demanded bits. Flip the undemanded bits too.
1584 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1585 return TLO.CombineTo(Op, New);
1586 }
1587
1588 unsigned Op0Opcode = Op0.getOpcode();
1589 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1590 if (ConstantSDNode *ShiftC =
1591 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1592 // Don't crash on an oversized shift. We can not guarantee that a
1593 // bogus shift has been simplified to undef.
1594 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1595 uint64_t ShiftAmt = ShiftC->getZExtValue();
1597 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1598 : Ones.lshr(ShiftAmt);
1599 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1600 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1601 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1602 // If the xor constant is a demanded mask, do a 'not' before the
1603 // shift:
1604 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1605 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1606 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1607 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1608 Op0.getOperand(1)));
1609 }
1610 }
1611 }
1612 }
1613 }
1614
1615 // If we can't turn this into a 'not', try to shrink the constant.
1616 if (!C || !C->isAllOnes())
1617 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1618 return true;
1619
1620 // Attempt to avoid multi-use ops if we don't need anything from them.
1621 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1622 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1623 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1624 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1625 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1626 if (DemandedOp0 || DemandedOp1) {
1627 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1628 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1629 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1630 return TLO.CombineTo(Op, NewOp);
1631 }
1632 }
1633
1634 Known ^= Known2;
1635 break;
1636 }
1637 case ISD::SELECT:
1638 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1639 Depth + 1))
1640 return true;
1641 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1642 Depth + 1))
1643 return true;
1644 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1645 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1646
1647 // If the operands are constants, see if we can simplify them.
1648 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1649 return true;
1650
1651 // Only known if known in both the LHS and RHS.
1652 Known = Known.intersectWith(Known2);
1653 break;
1654 case ISD::VSELECT:
1655 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1656 Known, TLO, Depth + 1))
1657 return true;
1658 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1659 Known2, TLO, Depth + 1))
1660 return true;
1661 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1662 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1663
1664 // Only known if known in both the LHS and RHS.
1665 Known = Known.intersectWith(Known2);
1666 break;
1667 case ISD::SELECT_CC:
1668 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1669 Depth + 1))
1670 return true;
1671 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1672 Depth + 1))
1673 return true;
1674 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1675 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1676
1677 // If the operands are constants, see if we can simplify them.
1678 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1679 return true;
1680
1681 // Only known if known in both the LHS and RHS.
1682 Known = Known.intersectWith(Known2);
1683 break;
1684 case ISD::SETCC: {
1685 SDValue Op0 = Op.getOperand(0);
1686 SDValue Op1 = Op.getOperand(1);
1687 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1688 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1689 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1690 // -1, we may be able to bypass the setcc.
1691 if (DemandedBits.isSignMask() &&
1695 // If we're testing X < 0, then this compare isn't needed - just use X!
1696 // FIXME: We're limiting to integer types here, but this should also work
1697 // if we don't care about FP signed-zero. The use of SETLT with FP means
1698 // that we don't care about NaNs.
1699 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1701 return TLO.CombineTo(Op, Op0);
1702
1703 // TODO: Should we check for other forms of sign-bit comparisons?
1704 // Examples: X <= -1, X >= 0
1705 }
1706 if (getBooleanContents(Op0.getValueType()) ==
1708 BitWidth > 1)
1709 Known.Zero.setBitsFrom(1);
1710 break;
1711 }
1712 case ISD::SHL: {
1713 SDValue Op0 = Op.getOperand(0);
1714 SDValue Op1 = Op.getOperand(1);
1715 EVT ShiftVT = Op1.getValueType();
1716
1717 if (const APInt *SA =
1718 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1719 unsigned ShAmt = SA->getZExtValue();
1720 if (ShAmt == 0)
1721 return TLO.CombineTo(Op, Op0);
1722
1723 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1724 // single shift. We can do this if the bottom bits (which are shifted
1725 // out) are never demanded.
1726 // TODO - support non-uniform vector amounts.
1727 if (Op0.getOpcode() == ISD::SRL) {
1728 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1729 if (const APInt *SA2 =
1730 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1731 unsigned C1 = SA2->getZExtValue();
1732 unsigned Opc = ISD::SHL;
1733 int Diff = ShAmt - C1;
1734 if (Diff < 0) {
1735 Diff = -Diff;
1736 Opc = ISD::SRL;
1737 }
1738 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1739 return TLO.CombineTo(
1740 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1741 }
1742 }
1743 }
1744
1745 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1746 // are not demanded. This will likely allow the anyext to be folded away.
1747 // TODO - support non-uniform vector amounts.
1748 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1749 SDValue InnerOp = Op0.getOperand(0);
1750 EVT InnerVT = InnerOp.getValueType();
1751 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1752 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1753 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1754 SDValue NarrowShl = TLO.DAG.getNode(
1755 ISD::SHL, dl, InnerVT, InnerOp,
1756 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1757 return TLO.CombineTo(
1758 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1759 }
1760
1761 // Repeat the SHL optimization above in cases where an extension
1762 // intervenes: (shl (anyext (shr x, c1)), c2) to
1763 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1764 // aren't demanded (as above) and that the shifted upper c1 bits of
1765 // x aren't demanded.
1766 // TODO - support non-uniform vector amounts.
1767 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1768 InnerOp.hasOneUse()) {
1769 if (const APInt *SA2 =
1770 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1771 unsigned InnerShAmt = SA2->getZExtValue();
1772 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1773 DemandedBits.getActiveBits() <=
1774 (InnerBits - InnerShAmt + ShAmt) &&
1775 DemandedBits.countr_zero() >= ShAmt) {
1776 SDValue NewSA =
1777 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1778 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1779 InnerOp.getOperand(0));
1780 return TLO.CombineTo(
1781 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1782 }
1783 }
1784 }
1785 }
1786
1787 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1788 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1789 Depth + 1))
1790 return true;
1791 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1792 Known.Zero <<= ShAmt;
1793 Known.One <<= ShAmt;
1794 // low bits known zero.
1795 Known.Zero.setLowBits(ShAmt);
1796
1797 // Attempt to avoid multi-use ops if we don't need anything from them.
1798 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1799 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1800 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1801 if (DemandedOp0) {
1802 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1803 return TLO.CombineTo(Op, NewOp);
1804 }
1805 }
1806
1807 // Try shrinking the operation as long as the shift amount will still be
1808 // in range.
1809 if ((ShAmt < DemandedBits.getActiveBits()) &&
1810 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1811 return true;
1812 } else {
1813 // This is a variable shift, so we can't shift the demand mask by a known
1814 // amount. But if we are not demanding high bits, then we are not
1815 // demanding those bits from the pre-shifted operand either.
1816 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1817 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1818 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1819 Depth + 1)) {
1820 SDNodeFlags Flags = Op.getNode()->getFlags();
1821 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1822 // Disable the nsw and nuw flags. We can no longer guarantee that we
1823 // won't wrap after simplification.
1824 Flags.setNoSignedWrap(false);
1825 Flags.setNoUnsignedWrap(false);
1826 Op->setFlags(Flags);
1827 }
1828 return true;
1829 }
1830 Known.resetAll();
1831 }
1832 }
1833
1834 // If we are only demanding sign bits then we can use the shift source
1835 // directly.
1836 if (const APInt *MaxSA =
1837 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1838 unsigned ShAmt = MaxSA->getZExtValue();
1839 unsigned NumSignBits =
1840 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1841 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1842 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1843 return TLO.CombineTo(Op, Op0);
1844 }
1845 break;
1846 }
1847 case ISD::SRL: {
1848 SDValue Op0 = Op.getOperand(0);
1849 SDValue Op1 = Op.getOperand(1);
1850 EVT ShiftVT = Op1.getValueType();
1851
1852 // Try to match AVG patterns.
1853 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1854 DemandedElts, Depth + 1))
1855 return TLO.CombineTo(Op, AVG);
1856
1857 if (const APInt *SA =
1858 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1859 unsigned ShAmt = SA->getZExtValue();
1860 if (ShAmt == 0)
1861 return TLO.CombineTo(Op, Op0);
1862
1863 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1864 // single shift. We can do this if the top bits (which are shifted out)
1865 // are never demanded.
1866 // TODO - support non-uniform vector amounts.
1867 if (Op0.getOpcode() == ISD::SHL) {
1868 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1869 if (const APInt *SA2 =
1870 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1871 unsigned C1 = SA2->getZExtValue();
1872 unsigned Opc = ISD::SRL;
1873 int Diff = ShAmt - C1;
1874 if (Diff < 0) {
1875 Diff = -Diff;
1876 Opc = ISD::SHL;
1877 }
1878 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1879 return TLO.CombineTo(
1880 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1881 }
1882 }
1883 }
1884
1885 APInt InDemandedMask = (DemandedBits << ShAmt);
1886
1887 // If the shift is exact, then it does demand the low bits (and knows that
1888 // they are zero).
1889 if (Op->getFlags().hasExact())
1890 InDemandedMask.setLowBits(ShAmt);
1891
1892 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1893 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1894 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1896 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1897 if (isNarrowingProfitable(VT, HalfVT) &&
1898 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1899 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1900 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
1901 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1902 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1903 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1904 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1905 ShAmt, HalfVT, dl, TLO.LegalTypes());
1906 SDValue NewShift =
1907 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1908 return TLO.CombineTo(
1909 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1910 }
1911 }
1912
1913 // Compute the new bits that are at the top now.
1914 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1915 Depth + 1))
1916 return true;
1917 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1918 Known.Zero.lshrInPlace(ShAmt);
1919 Known.One.lshrInPlace(ShAmt);
1920 // High bits known zero.
1921 Known.Zero.setHighBits(ShAmt);
1922
1923 // Attempt to avoid multi-use ops if we don't need anything from them.
1924 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1925 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1926 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1927 if (DemandedOp0) {
1928 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1929 return TLO.CombineTo(Op, NewOp);
1930 }
1931 }
1932 } else {
1933 // Use generic knownbits computation as it has support for non-uniform
1934 // shift amounts.
1935 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1936 }
1937 break;
1938 }
1939 case ISD::SRA: {
1940 SDValue Op0 = Op.getOperand(0);
1941 SDValue Op1 = Op.getOperand(1);
1942 EVT ShiftVT = Op1.getValueType();
1943
1944 // If we only want bits that already match the signbit then we don't need
1945 // to shift.
1946 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
1947 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1948 NumHiDemandedBits)
1949 return TLO.CombineTo(Op, Op0);
1950
1951 // If this is an arithmetic shift right and only the low-bit is set, we can
1952 // always convert this into a logical shr, even if the shift amount is
1953 // variable. The low bit of the shift cannot be an input sign bit unless
1954 // the shift amount is >= the size of the datatype, which is undefined.
1955 if (DemandedBits.isOne())
1956 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1957
1958 // Try to match AVG patterns.
1959 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1960 DemandedElts, Depth + 1))
1961 return TLO.CombineTo(Op, AVG);
1962
1963 if (const APInt *SA =
1964 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1965 unsigned ShAmt = SA->getZExtValue();
1966 if (ShAmt == 0)
1967 return TLO.CombineTo(Op, Op0);
1968
1969 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
1970 // supports sext_inreg.
1971 if (Op0.getOpcode() == ISD::SHL) {
1972 if (const APInt *InnerSA =
1973 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1974 unsigned LowBits = BitWidth - ShAmt;
1975 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
1976 if (VT.isVector())
1977 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
1979
1980 if (*InnerSA == ShAmt) {
1981 if (!TLO.LegalOperations() ||
1983 return TLO.CombineTo(
1984 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
1985 Op0.getOperand(0),
1986 TLO.DAG.getValueType(ExtVT)));
1987
1988 // Even if we can't convert to sext_inreg, we might be able to
1989 // remove this shift pair if the input is already sign extended.
1990 unsigned NumSignBits =
1991 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
1992 if (NumSignBits > ShAmt)
1993 return TLO.CombineTo(Op, Op0.getOperand(0));
1994 }
1995 }
1996 }
1997
1998 APInt InDemandedMask = (DemandedBits << ShAmt);
1999
2000 // If the shift is exact, then it does demand the low bits (and knows that
2001 // they are zero).
2002 if (Op->getFlags().hasExact())
2003 InDemandedMask.setLowBits(ShAmt);
2004
2005 // If any of the demanded bits are produced by the sign extension, we also
2006 // demand the input sign bit.
2007 if (DemandedBits.countl_zero() < ShAmt)
2008 InDemandedMask.setSignBit();
2009
2010 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2011 Depth + 1))
2012 return true;
2013 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2014 Known.Zero.lshrInPlace(ShAmt);
2015 Known.One.lshrInPlace(ShAmt);
2016
2017 // If the input sign bit is known to be zero, or if none of the top bits
2018 // are demanded, turn this into an unsigned shift right.
2019 if (Known.Zero[BitWidth - ShAmt - 1] ||
2020 DemandedBits.countl_zero() >= ShAmt) {
2021 SDNodeFlags Flags;
2022 Flags.setExact(Op->getFlags().hasExact());
2023 return TLO.CombineTo(
2024 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2025 }
2026
2027 int Log2 = DemandedBits.exactLogBase2();
2028 if (Log2 >= 0) {
2029 // The bit must come from the sign.
2030 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2031 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2032 }
2033
2034 if (Known.One[BitWidth - ShAmt - 1])
2035 // New bits are known one.
2036 Known.One.setHighBits(ShAmt);
2037
2038 // Attempt to avoid multi-use ops if we don't need anything from them.
2039 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2040 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2041 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2042 if (DemandedOp0) {
2043 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2044 return TLO.CombineTo(Op, NewOp);
2045 }
2046 }
2047 }
2048 break;
2049 }
2050 case ISD::FSHL:
2051 case ISD::FSHR: {
2052 SDValue Op0 = Op.getOperand(0);
2053 SDValue Op1 = Op.getOperand(1);
2054 SDValue Op2 = Op.getOperand(2);
2055 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2056
2057 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2058 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2059
2060 // For fshl, 0-shift returns the 1st arg.
2061 // For fshr, 0-shift returns the 2nd arg.
2062 if (Amt == 0) {
2063 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2064 Known, TLO, Depth + 1))
2065 return true;
2066 break;
2067 }
2068
2069 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2070 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2071 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2072 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2073 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2074 Depth + 1))
2075 return true;
2076 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2077 Depth + 1))
2078 return true;
2079
2080 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2081 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2082 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2083 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2084 Known = Known.unionWith(Known2);
2085
2086 // Attempt to avoid multi-use ops if we don't need anything from them.
2087 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2088 !DemandedElts.isAllOnes()) {
2089 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2090 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2091 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2092 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2093 if (DemandedOp0 || DemandedOp1) {
2094 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2095 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2096 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2097 DemandedOp1, Op2);
2098 return TLO.CombineTo(Op, NewOp);
2099 }
2100 }
2101 }
2102
2103 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2104 if (isPowerOf2_32(BitWidth)) {
2105 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2106 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2107 Known2, TLO, Depth + 1))
2108 return true;
2109 }
2110 break;
2111 }
2112 case ISD::ROTL:
2113 case ISD::ROTR: {
2114 SDValue Op0 = Op.getOperand(0);
2115 SDValue Op1 = Op.getOperand(1);
2116 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2117
2118 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2119 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2120 return TLO.CombineTo(Op, Op0);
2121
2122 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2123 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2124 unsigned RevAmt = BitWidth - Amt;
2125
2126 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2127 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2128 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2129 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2130 Depth + 1))
2131 return true;
2132
2133 // rot*(x, 0) --> x
2134 if (Amt == 0)
2135 return TLO.CombineTo(Op, Op0);
2136
2137 // See if we don't demand either half of the rotated bits.
2138 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2139 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2140 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2141 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2142 }
2143 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2144 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2145 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2146 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2147 }
2148 }
2149
2150 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2151 if (isPowerOf2_32(BitWidth)) {
2152 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2153 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2154 Depth + 1))
2155 return true;
2156 }
2157 break;
2158 }
2159 case ISD::SMIN:
2160 case ISD::SMAX:
2161 case ISD::UMIN:
2162 case ISD::UMAX: {
2163 unsigned Opc = Op.getOpcode();
2164 SDValue Op0 = Op.getOperand(0);
2165 SDValue Op1 = Op.getOperand(1);
2166
2167 // If we're only demanding signbits, then we can simplify to OR/AND node.
2168 unsigned BitOp =
2169 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2170 unsigned NumSignBits =
2171 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2172 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2173 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2174 if (NumSignBits >= NumDemandedUpperBits)
2175 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2176
2177 // Check if one arg is always less/greater than (or equal) to the other arg.
2178 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2179 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2180 switch (Opc) {
2181 case ISD::SMIN:
2182 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2183 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2184 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2185 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2186 Known = KnownBits::smin(Known0, Known1);
2187 break;
2188 case ISD::SMAX:
2189 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2190 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2191 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2192 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2193 Known = KnownBits::smax(Known0, Known1);
2194 break;
2195 case ISD::UMIN:
2196 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2197 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2198 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2199 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2200 Known = KnownBits::umin(Known0, Known1);
2201 break;
2202 case ISD::UMAX:
2203 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2204 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2205 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2206 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2207 Known = KnownBits::umax(Known0, Known1);
2208 break;
2209 }
2210 break;
2211 }
2212 case ISD::BITREVERSE: {
2213 SDValue Src = Op.getOperand(0);
2214 APInt DemandedSrcBits = DemandedBits.reverseBits();
2215 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2216 Depth + 1))
2217 return true;
2218 Known.One = Known2.One.reverseBits();
2219 Known.Zero = Known2.Zero.reverseBits();
2220 break;
2221 }
2222 case ISD::BSWAP: {
2223 SDValue Src = Op.getOperand(0);
2224
2225 // If the only bits demanded come from one byte of the bswap result,
2226 // just shift the input byte into position to eliminate the bswap.
2227 unsigned NLZ = DemandedBits.countl_zero();
2228 unsigned NTZ = DemandedBits.countr_zero();
2229
2230 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2231 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2232 // have 14 leading zeros, round to 8.
2233 NLZ = alignDown(NLZ, 8);
2234 NTZ = alignDown(NTZ, 8);
2235 // If we need exactly one byte, we can do this transformation.
2236 if (BitWidth - NLZ - NTZ == 8) {
2237 // Replace this with either a left or right shift to get the byte into
2238 // the right place.
2239 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2240 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2241 EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2242 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2243 SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2244 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2245 return TLO.CombineTo(Op, NewOp);
2246 }
2247 }
2248
2249 APInt DemandedSrcBits = DemandedBits.byteSwap();
2250 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2251 Depth + 1))
2252 return true;
2253 Known.One = Known2.One.byteSwap();
2254 Known.Zero = Known2.Zero.byteSwap();
2255 break;
2256 }
2257 case ISD::CTPOP: {
2258 // If only 1 bit is demanded, replace with PARITY as long as we're before
2259 // op legalization.
2260 // FIXME: Limit to scalars for now.
2261 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2262 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2263 Op.getOperand(0)));
2264
2265 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2266 break;
2267 }
2269 SDValue Op0 = Op.getOperand(0);
2270 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2271 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2272
2273 // If we only care about the highest bit, don't bother shifting right.
2274 if (DemandedBits.isSignMask()) {
2275 unsigned MinSignedBits =
2276 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2277 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2278 // However if the input is already sign extended we expect the sign
2279 // extension to be dropped altogether later and do not simplify.
2280 if (!AlreadySignExtended) {
2281 // Compute the correct shift amount type, which must be getShiftAmountTy
2282 // for scalar types after legalization.
2283 SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2284 getShiftAmountTy(VT, DL));
2285 return TLO.CombineTo(Op,
2286 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2287 }
2288 }
2289
2290 // If none of the extended bits are demanded, eliminate the sextinreg.
2291 if (DemandedBits.getActiveBits() <= ExVTBits)
2292 return TLO.CombineTo(Op, Op0);
2293
2294 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2295
2296 // Since the sign extended bits are demanded, we know that the sign
2297 // bit is demanded.
2298 InputDemandedBits.setBit(ExVTBits - 1);
2299
2300 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2301 Depth + 1))
2302 return true;
2303 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2304
2305 // If the sign bit of the input is known set or clear, then we know the
2306 // top bits of the result.
2307
2308 // If the input sign bit is known zero, convert this into a zero extension.
2309 if (Known.Zero[ExVTBits - 1])
2310 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2311
2312 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2313 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2314 Known.One.setBitsFrom(ExVTBits);
2315 Known.Zero &= Mask;
2316 } else { // Input sign bit unknown
2317 Known.Zero &= Mask;
2318 Known.One &= Mask;
2319 }
2320 break;
2321 }
2322 case ISD::BUILD_PAIR: {
2323 EVT HalfVT = Op.getOperand(0).getValueType();
2324 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2325
2326 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2327 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2328
2329 KnownBits KnownLo, KnownHi;
2330
2331 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2332 return true;
2333
2334 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2335 return true;
2336
2337 Known = KnownHi.concat(KnownLo);
2338 break;
2339 }
2341 if (VT.isScalableVector())
2342 return false;
2343 [[fallthrough]];
2344 case ISD::ZERO_EXTEND: {
2345 SDValue Src = Op.getOperand(0);
2346 EVT SrcVT = Src.getValueType();
2347 unsigned InBits = SrcVT.getScalarSizeInBits();
2348 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2349 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2350
2351 // If none of the top bits are demanded, convert this into an any_extend.
2352 if (DemandedBits.getActiveBits() <= InBits) {
2353 // If we only need the non-extended bits of the bottom element
2354 // then we can just bitcast to the result.
2355 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2356 VT.getSizeInBits() == SrcVT.getSizeInBits())
2357 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2358
2359 unsigned Opc =
2361 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2362 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2363 }
2364
2365 APInt InDemandedBits = DemandedBits.trunc(InBits);
2366 APInt InDemandedElts = DemandedElts.zext(InElts);
2367 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2368 Depth + 1))
2369 return true;
2370 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2371 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2372 Known = Known.zext(BitWidth);
2373
2374 // Attempt to avoid multi-use ops if we don't need anything from them.
2375 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2376 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2377 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2378 break;
2379 }
2381 if (VT.isScalableVector())
2382 return false;
2383 [[fallthrough]];
2384 case ISD::SIGN_EXTEND: {
2385 SDValue Src = Op.getOperand(0);
2386 EVT SrcVT = Src.getValueType();
2387 unsigned InBits = SrcVT.getScalarSizeInBits();
2388 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2389 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2390
2391 // If none of the top bits are demanded, convert this into an any_extend.
2392 if (DemandedBits.getActiveBits() <= InBits) {
2393 // If we only need the non-extended bits of the bottom element
2394 // then we can just bitcast to the result.
2395 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2396 VT.getSizeInBits() == SrcVT.getSizeInBits())
2397 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2398
2399 unsigned Opc =
2401 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2402 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2403 }
2404
2405 APInt InDemandedBits = DemandedBits.trunc(InBits);
2406 APInt InDemandedElts = DemandedElts.zext(InElts);
2407
2408 // Since some of the sign extended bits are demanded, we know that the sign
2409 // bit is demanded.
2410 InDemandedBits.setBit(InBits - 1);
2411
2412 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2413 Depth + 1))
2414 return true;
2415 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2416 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2417
2418 // If the sign bit is known one, the top bits match.
2419 Known = Known.sext(BitWidth);
2420
2421 // If the sign bit is known zero, convert this to a zero extend.
2422 if (Known.isNonNegative()) {
2423 unsigned Opc =
2425 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2426 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2427 }
2428
2429 // Attempt to avoid multi-use ops if we don't need anything from them.
2430 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2431 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2432 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2433 break;
2434 }
2436 if (VT.isScalableVector())
2437 return false;
2438 [[fallthrough]];
2439 case ISD::ANY_EXTEND: {
2440 SDValue Src = Op.getOperand(0);
2441 EVT SrcVT = Src.getValueType();
2442 unsigned InBits = SrcVT.getScalarSizeInBits();
2443 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2444 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2445
2446 // If we only need the bottom element then we can just bitcast.
2447 // TODO: Handle ANY_EXTEND?
2448 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2449 VT.getSizeInBits() == SrcVT.getSizeInBits())
2450 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2451
2452 APInt InDemandedBits = DemandedBits.trunc(InBits);
2453 APInt InDemandedElts = DemandedElts.zext(InElts);
2454 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2455 Depth + 1))
2456 return true;
2457 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2458 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2459 Known = Known.anyext(BitWidth);
2460
2461 // Attempt to avoid multi-use ops if we don't need anything from them.
2462 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2463 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2464 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2465 break;
2466 }
2467 case ISD::TRUNCATE: {
2468 SDValue Src = Op.getOperand(0);
2469
2470 // Simplify the input, using demanded bit information, and compute the known
2471 // zero/one bits live out.
2472 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2473 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2474 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2475 Depth + 1))
2476 return true;
2477 Known = Known.trunc(BitWidth);
2478
2479 // Attempt to avoid multi-use ops if we don't need anything from them.
2480 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2481 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2482 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2483
2484 // If the input is only used by this truncate, see if we can shrink it based
2485 // on the known demanded bits.
2486 switch (Src.getOpcode()) {
2487 default:
2488 break;
2489 case ISD::SRL:
2490 // Shrink SRL by a constant if none of the high bits shifted in are
2491 // demanded.
2492 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2493 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2494 // undesirable.
2495 break;
2496
2497 if (Src.getNode()->hasOneUse()) {
2498 const APInt *ShAmtC =
2499 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2500 if (!ShAmtC || ShAmtC->uge(BitWidth))
2501 break;
2502 uint64_t ShVal = ShAmtC->getZExtValue();
2503
2504 APInt HighBits =
2505 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2506 HighBits.lshrInPlace(ShVal);
2507 HighBits = HighBits.trunc(BitWidth);
2508
2509 if (!(HighBits & DemandedBits)) {
2510 // None of the shifted in bits are needed. Add a truncate of the
2511 // shift input, then shift it.
2512 SDValue NewShAmt = TLO.DAG.getConstant(
2513 ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2514 SDValue NewTrunc =
2515 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2516 return TLO.CombineTo(
2517 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2518 }
2519 }
2520 break;
2521 }
2522
2523 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2524 break;
2525 }
2526 case ISD::AssertZext: {
2527 // AssertZext demands all of the high bits, plus any of the low bits
2528 // demanded by its users.
2529 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2531 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2532 TLO, Depth + 1))
2533 return true;
2534 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2535
2536 Known.Zero |= ~InMask;
2537 Known.One &= (~Known.Zero);
2538 break;
2539 }
2541 SDValue Src = Op.getOperand(0);
2542 SDValue Idx = Op.getOperand(1);
2543 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2544 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2545
2546 if (SrcEltCnt.isScalable())
2547 return false;
2548
2549 // Demand the bits from every vector element without a constant index.
2550 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2551 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2552 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2553 if (CIdx->getAPIntValue().ult(NumSrcElts))
2554 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2555
2556 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2557 // anything about the extended bits.
2558 APInt DemandedSrcBits = DemandedBits;
2559 if (BitWidth > EltBitWidth)
2560 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2561
2562 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2563 Depth + 1))
2564 return true;
2565
2566 // Attempt to avoid multi-use ops if we don't need anything from them.
2567 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2568 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2569 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2570 SDValue NewOp =
2571 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2572 return TLO.CombineTo(Op, NewOp);
2573 }
2574 }
2575
2576 Known = Known2;
2577 if (BitWidth > EltBitWidth)
2578 Known = Known.anyext(BitWidth);
2579 break;
2580 }
2581 case ISD::BITCAST: {
2582 if (VT.isScalableVector())
2583 return false;
2584 SDValue Src = Op.getOperand(0);
2585 EVT SrcVT = Src.getValueType();
2586 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2587
2588 // If this is an FP->Int bitcast and if the sign bit is the only
2589 // thing demanded, turn this into a FGETSIGN.
2590 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2591 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2592 SrcVT.isFloatingPoint()) {
2593 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2594 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2595 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2596 SrcVT != MVT::f128) {
2597 // Cannot eliminate/lower SHL for f128 yet.
2598 EVT Ty = OpVTLegal ? VT : MVT::i32;
2599 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2600 // place. We expect the SHL to be eliminated by other optimizations.
2601 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2602 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2603 if (!OpVTLegal && OpVTSizeInBits > 32)
2604 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2605 unsigned ShVal = Op.getValueSizeInBits() - 1;
2606 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2607 return TLO.CombineTo(Op,
2608 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2609 }
2610 }
2611
2612 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2613 // Demand the elt/bit if any of the original elts/bits are demanded.
2614 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2615 unsigned Scale = BitWidth / NumSrcEltBits;
2616 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2617 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2618 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2619 for (unsigned i = 0; i != Scale; ++i) {
2620 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2621 unsigned BitOffset = EltOffset * NumSrcEltBits;
2622 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2623 if (!Sub.isZero()) {
2624 DemandedSrcBits |= Sub;
2625 for (unsigned j = 0; j != NumElts; ++j)
2626 if (DemandedElts[j])
2627 DemandedSrcElts.setBit((j * Scale) + i);
2628 }
2629 }
2630
2631 APInt KnownSrcUndef, KnownSrcZero;
2632 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2633 KnownSrcZero, TLO, Depth + 1))
2634 return true;
2635
2636 KnownBits KnownSrcBits;
2637 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2638 KnownSrcBits, TLO, Depth + 1))
2639 return true;
2640 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2641 // TODO - bigendian once we have test coverage.
2642 unsigned Scale = NumSrcEltBits / BitWidth;
2643 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2644 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2645 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2646 for (unsigned i = 0; i != NumElts; ++i)
2647 if (DemandedElts[i]) {
2648 unsigned Offset = (i % Scale) * BitWidth;
2649 DemandedSrcBits.insertBits(DemandedBits, Offset);
2650 DemandedSrcElts.setBit(i / Scale);
2651 }
2652
2653 if (SrcVT.isVector()) {
2654 APInt KnownSrcUndef, KnownSrcZero;
2655 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2656 KnownSrcZero, TLO, Depth + 1))
2657 return true;
2658 }
2659
2660 KnownBits KnownSrcBits;
2661 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2662 KnownSrcBits, TLO, Depth + 1))
2663 return true;
2664
2665 // Attempt to avoid multi-use ops if we don't need anything from them.
2666 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2667 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2668 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2669 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2670 return TLO.CombineTo(Op, NewOp);
2671 }
2672 }
2673 }
2674
2675 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2676 // recursive call where Known may be useful to the caller.
2677 if (Depth > 0) {
2678 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2679 return false;
2680 }
2681 break;
2682 }
2683 case ISD::MUL:
2684 if (DemandedBits.isPowerOf2()) {
2685 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2686 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2687 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2688 unsigned CTZ = DemandedBits.countr_zero();
2689 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2690 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2691 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2692 SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2693 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2694 return TLO.CombineTo(Op, Shl);
2695 }
2696 }
2697 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2698 // X * X is odd iff X is odd.
2699 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2700 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2701 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2702 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2703 return TLO.CombineTo(Op, And1);
2704 }
2705 [[fallthrough]];
2706 case ISD::ADD:
2707 case ISD::SUB: {
2708 // Add, Sub, and Mul don't demand any bits in positions beyond that
2709 // of the highest bit demanded of them.
2710 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2711 SDNodeFlags Flags = Op.getNode()->getFlags();
2712 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2713 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2714 KnownBits KnownOp0, KnownOp1;
2715 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
2716 Depth + 1) ||
2717 SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2718 Depth + 1) ||
2719 // See if the operation should be performed at a smaller bit width.
2720 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2721 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2722 // Disable the nsw and nuw flags. We can no longer guarantee that we
2723 // won't wrap after simplification.
2724 Flags.setNoSignedWrap(false);
2725 Flags.setNoUnsignedWrap(false);
2726 Op->setFlags(Flags);
2727 }
2728 return true;
2729 }
2730
2731 // neg x with only low bit demanded is simply x.
2732 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2733 isNullConstant(Op0))
2734 return TLO.CombineTo(Op, Op1);
2735
2736 // Attempt to avoid multi-use ops if we don't need anything from them.
2737 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2738 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2739 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2740 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2741 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2742 if (DemandedOp0 || DemandedOp1) {
2743 Flags.setNoSignedWrap(false);
2744 Flags.setNoUnsignedWrap(false);
2745 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2746 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2747 SDValue NewOp =
2748 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2749 return TLO.CombineTo(Op, NewOp);
2750 }
2751 }
2752
2753 // If we have a constant operand, we may be able to turn it into -1 if we
2754 // do not demand the high bits. This can make the constant smaller to
2755 // encode, allow more general folding, or match specialized instruction
2756 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2757 // is probably not useful (and could be detrimental).
2759 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2760 if (C && !C->isAllOnes() && !C->isOne() &&
2761 (C->getAPIntValue() | HighMask).isAllOnes()) {
2762 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2763 // Disable the nsw and nuw flags. We can no longer guarantee that we
2764 // won't wrap after simplification.
2765 Flags.setNoSignedWrap(false);
2766 Flags.setNoUnsignedWrap(false);
2767 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2768 return TLO.CombineTo(Op, NewOp);
2769 }
2770
2771 // Match a multiply with a disguised negated-power-of-2 and convert to a
2772 // an equivalent shift-left amount.
2773 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2774 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2775 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2776 return 0;
2777
2778 // Don't touch opaque constants. Also, ignore zero and power-of-2
2779 // multiplies. Those will get folded later.
2780 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2781 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2782 !MulC->getAPIntValue().isPowerOf2()) {
2783 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2784 if (UnmaskedC.isNegatedPowerOf2())
2785 return (-UnmaskedC).logBase2();
2786 }
2787 return 0;
2788 };
2789
2790 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2791 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2792 SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2793 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2794 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2795 return TLO.CombineTo(Op, Res);
2796 };
2797
2799 if (Op.getOpcode() == ISD::ADD) {
2800 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2801 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2802 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2803 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2804 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2805 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2806 }
2807 if (Op.getOpcode() == ISD::SUB) {
2808 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2809 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2810 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2811 }
2812 }
2813
2814 if (Op.getOpcode() == ISD::MUL) {
2815 Known = KnownBits::mul(KnownOp0, KnownOp1);
2816 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2817 Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
2818 Flags.hasNoSignedWrap(), KnownOp0,
2819 KnownOp1);
2820 }
2821 break;
2822 }
2823 default:
2824 // We also ask the target about intrinsics (which could be specific to it).
2825 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2826 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2827 // TODO: Probably okay to remove after audit; here to reduce change size
2828 // in initial enablement patch for scalable vectors
2829 if (Op.getValueType().isScalableVector())
2830 break;
2831 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2832 Known, TLO, Depth))
2833 return true;
2834 break;
2835 }
2836
2837 // Just use computeKnownBits to compute output bits.
2838 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2839 break;
2840 }
2841
2842 // If we know the value of all of the demanded bits, return this as a
2843 // constant.
2844 if (!isTargetCanonicalConstantNode(Op) &&
2845 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2846 // Avoid folding to a constant if any OpaqueConstant is involved.
2847 const SDNode *N = Op.getNode();
2848 for (SDNode *Op :
2850 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2851 if (C->isOpaque())
2852 return false;
2853 }
2854 if (VT.isInteger())
2855 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2856 if (VT.isFloatingPoint())
2857 return TLO.CombineTo(
2858 Op,
2859 TLO.DAG.getConstantFP(
2860 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2861 }
2862
2863 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2864 // Try again just for the original demanded elts.
2865 // Ensure we do this AFTER constant folding above.
2866 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2867 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2868
2869 return false;
2870}
2871
2873 const APInt &DemandedElts,
2874 DAGCombinerInfo &DCI) const {
2875 SelectionDAG &DAG = DCI.DAG;
2876 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2877 !DCI.isBeforeLegalizeOps());
2878
2879 APInt KnownUndef, KnownZero;
2880 bool Simplified =
2881 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2882 if (Simplified) {
2883 DCI.AddToWorklist(Op.getNode());
2884 DCI.CommitTargetLoweringOpt(TLO);
2885 }
2886
2887 return Simplified;
2888}
2889
2890/// Given a vector binary operation and known undefined elements for each input
2891/// operand, compute whether each element of the output is undefined.
2893 const APInt &UndefOp0,
2894 const APInt &UndefOp1) {
2895 EVT VT = BO.getValueType();
2897 "Vector binop only");
2898
2899 EVT EltVT = VT.getVectorElementType();
2900 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2901 assert(UndefOp0.getBitWidth() == NumElts &&
2902 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2903
2904 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2905 const APInt &UndefVals) {
2906 if (UndefVals[Index])
2907 return DAG.getUNDEF(EltVT);
2908
2909 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2910 // Try hard to make sure that the getNode() call is not creating temporary
2911 // nodes. Ignore opaque integers because they do not constant fold.
2912 SDValue Elt = BV->getOperand(Index);
2913 auto *C = dyn_cast<ConstantSDNode>(Elt);
2914 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2915 return Elt;
2916 }
2917
2918 return SDValue();
2919 };
2920
2921 APInt KnownUndef = APInt::getZero(NumElts);
2922 for (unsigned i = 0; i != NumElts; ++i) {
2923 // If both inputs for this element are either constant or undef and match
2924 // the element type, compute the constant/undef result for this element of
2925 // the vector.
2926 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2927 // not handle FP constants. The code within getNode() should be refactored
2928 // to avoid the danger of creating a bogus temporary node here.
2929 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2930 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2931 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2932 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2933 KnownUndef.setBit(i);
2934 }
2935 return KnownUndef;
2936}
2937
2939 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2940 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2941 bool AssumeSingleUse) const {
2942 EVT VT = Op.getValueType();
2943 unsigned Opcode = Op.getOpcode();
2944 APInt DemandedElts = OriginalDemandedElts;
2945 unsigned NumElts = DemandedElts.getBitWidth();
2946 assert(VT.isVector() && "Expected vector op");
2947
2948 KnownUndef = KnownZero = APInt::getZero(NumElts);
2949
2950 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2951 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2952 return false;
2953
2954 // TODO: For now we assume we know nothing about scalable vectors.
2955 if (VT.isScalableVector())
2956 return false;
2957
2958 assert(VT.getVectorNumElements() == NumElts &&
2959 "Mask size mismatches value type element count!");
2960
2961 // Undef operand.
2962 if (Op.isUndef()) {
2963 KnownUndef.setAllBits();
2964 return false;
2965 }
2966
2967 // If Op has other users, assume that all elements are needed.
2968 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
2969 DemandedElts.setAllBits();
2970
2971 // Not demanding any elements from Op.
2972 if (DemandedElts == 0) {
2973 KnownUndef.setAllBits();
2974 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2975 }
2976
2977 // Limit search depth.
2979 return false;
2980
2981 SDLoc DL(Op);
2982 unsigned EltSizeInBits = VT.getScalarSizeInBits();
2983 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2984
2985 // Helper for demanding the specified elements and all the bits of both binary
2986 // operands.
2987 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2988 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2989 TLO.DAG, Depth + 1);
2990 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2991 TLO.DAG, Depth + 1);
2992 if (NewOp0 || NewOp1) {
2993 SDValue NewOp =
2994 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
2995 NewOp1 ? NewOp1 : Op1, Op->getFlags());
2996 return TLO.CombineTo(Op, NewOp);
2997 }
2998 return false;
2999 };
3000
3001 switch (Opcode) {
3002 case ISD::SCALAR_TO_VECTOR: {
3003 if (!DemandedElts[0]) {
3004 KnownUndef.setAllBits();
3005 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3006 }
3007 SDValue ScalarSrc = Op.getOperand(0);
3008 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3009 SDValue Src = ScalarSrc.getOperand(0);
3010 SDValue Idx = ScalarSrc.getOperand(1);
3011 EVT SrcVT = Src.getValueType();
3012
3013 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3014
3015 if (SrcEltCnt.isScalable())
3016 return false;
3017
3018 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3019 if (isNullConstant(Idx)) {
3020 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3021 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3022 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3023 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3024 TLO, Depth + 1))
3025 return true;
3026 }
3027 }
3028 KnownUndef.setHighBits(NumElts - 1);
3029 break;
3030 }
3031 case ISD::BITCAST: {
3032 SDValue Src = Op.getOperand(0);
3033 EVT SrcVT = Src.getValueType();
3034
3035 // We only handle vectors here.
3036 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3037 if (!SrcVT.isVector())
3038 break;
3039
3040 // Fast handling of 'identity' bitcasts.
3041 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3042 if (NumSrcElts == NumElts)
3043 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3044 KnownZero, TLO, Depth + 1);
3045
3046 APInt SrcDemandedElts, SrcZero, SrcUndef;
3047
3048 // Bitcast from 'large element' src vector to 'small element' vector, we
3049 // must demand a source element if any DemandedElt maps to it.
3050 if ((NumElts % NumSrcElts) == 0) {
3051 unsigned Scale = NumElts / NumSrcElts;
3052 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3053 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3054 TLO, Depth + 1))
3055 return true;
3056
3057 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3058 // of the large element.
3059 // TODO - bigendian once we have test coverage.
3060 if (IsLE) {
3061 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3062 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3063 for (unsigned i = 0; i != NumElts; ++i)
3064 if (DemandedElts[i]) {
3065 unsigned Ofs = (i % Scale) * EltSizeInBits;
3066 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3067 }
3068
3069 KnownBits Known;
3070 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3071 TLO, Depth + 1))
3072 return true;
3073
3074 // The bitcast has split each wide element into a number of
3075 // narrow subelements. We have just computed the Known bits
3076 // for wide elements. See if element splitting results in
3077 // some subelements being zero. Only for demanded elements!
3078 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3079 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3080 .isAllOnes())
3081 continue;
3082 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3083 unsigned Elt = Scale * SrcElt + SubElt;
3084 if (DemandedElts[Elt])
3085 KnownZero.setBit(Elt);
3086 }
3087 }
3088 }
3089
3090 // If the src element is zero/undef then all the output elements will be -
3091 // only demanded elements are guaranteed to be correct.
3092 for (unsigned i = 0; i != NumSrcElts; ++i) {
3093 if (SrcDemandedElts[i]) {
3094 if (SrcZero[i])
3095 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3096 if (SrcUndef[i])
3097 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3098 }
3099 }
3100 }
3101
3102 // Bitcast from 'small element' src vector to 'large element' vector, we
3103 // demand all smaller source elements covered by the larger demanded element
3104 // of this vector.
3105 if ((NumSrcElts % NumElts) == 0) {
3106 unsigned Scale = NumSrcElts / NumElts;
3107 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3108 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3109 TLO, Depth + 1))
3110 return true;
3111
3112 // If all the src elements covering an output element are zero/undef, then
3113 // the output element will be as well, assuming it was demanded.
3114 for (unsigned i = 0; i != NumElts; ++i) {
3115 if (DemandedElts[i]) {
3116 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3117 KnownZero.setBit(i);
3118 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3119 KnownUndef.setBit(i);
3120 }
3121 }
3122 }
3123 break;
3124 }
3125 case ISD::BUILD_VECTOR: {
3126 // Check all elements and simplify any unused elements with UNDEF.
3127 if (!DemandedElts.isAllOnes()) {
3128 // Don't simplify BROADCASTS.
3129 if (llvm::any_of(Op->op_values(),
3130 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3131 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3132 bool Updated = false;
3133 for (unsigned i = 0; i != NumElts; ++i) {
3134 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3135 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3136 KnownUndef.setBit(i);
3137 Updated = true;
3138 }
3139 }
3140 if (Updated)
3141 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3142 }
3143 }
3144 for (unsigned i = 0; i != NumElts; ++i) {
3145 SDValue SrcOp = Op.getOperand(i);
3146 if (SrcOp.isUndef()) {
3147 KnownUndef.setBit(i);
3148 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3150 KnownZero.setBit(i);
3151 }
3152 }
3153 break;
3154 }
3155 case ISD::CONCAT_VECTORS: {
3156 EVT SubVT = Op.getOperand(0).getValueType();
3157 unsigned NumSubVecs = Op.getNumOperands();
3158 unsigned NumSubElts = SubVT.getVectorNumElements();
3159 for (unsigned i = 0; i != NumSubVecs; ++i) {
3160 SDValue SubOp = Op.getOperand(i);
3161 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3162 APInt SubUndef, SubZero;
3163 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3164 Depth + 1))
3165 return true;
3166 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3167 KnownZero.insertBits(SubZero, i * NumSubElts);
3168 }
3169
3170 // Attempt to avoid multi-use ops if we don't need anything from them.
3171 if (!DemandedElts.isAllOnes()) {
3172 bool FoundNewSub = false;
3173 SmallVector<SDValue, 2> DemandedSubOps;
3174 for (unsigned i = 0; i != NumSubVecs; ++i) {
3175 SDValue SubOp = Op.getOperand(i);
3176 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3177 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3178 SubOp, SubElts, TLO.DAG, Depth + 1);
3179 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3180 FoundNewSub = NewSubOp ? true : FoundNewSub;
3181 }
3182 if (FoundNewSub) {
3183 SDValue NewOp =
3184 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3185 return TLO.CombineTo(Op, NewOp);
3186 }
3187 }
3188 break;
3189 }
3190 case ISD::INSERT_SUBVECTOR: {
3191 // Demand any elements from the subvector and the remainder from the src its
3192 // inserted into.
3193 SDValue Src = Op.getOperand(0);
3194 SDValue Sub = Op.getOperand(1);
3195 uint64_t Idx = Op.getConstantOperandVal(2);
3196 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3197 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3198 APInt DemandedSrcElts = DemandedElts;
3199 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3200
3201 APInt SubUndef, SubZero;
3202 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3203 Depth + 1))
3204 return true;
3205
3206 // If none of the src operand elements are demanded, replace it with undef.
3207 if (!DemandedSrcElts && !Src.isUndef())
3208 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3209 TLO.DAG.getUNDEF(VT), Sub,
3210 Op.getOperand(2)));
3211
3212 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3213 TLO, Depth + 1))
3214 return true;
3215 KnownUndef.insertBits(SubUndef, Idx);
3216 KnownZero.insertBits(SubZero, Idx);
3217
3218 // Attempt to avoid multi-use ops if we don't need anything from them.
3219 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3220 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3221 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3222 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3223 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3224 if (NewSrc || NewSub) {
3225 NewSrc = NewSrc ? NewSrc : Src;
3226 NewSub = NewSub ? NewSub : Sub;
3227 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3228 NewSub, Op.getOperand(2));
3229 return TLO.CombineTo(Op, NewOp);
3230 }
3231 }
3232 break;
3233 }
3235 // Offset the demanded elts by the subvector index.
3236 SDValue Src = Op.getOperand(0);
3237 if (Src.getValueType().isScalableVector())
3238 break;
3239 uint64_t Idx = Op.getConstantOperandVal(1);
3240 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3241 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3242
3243 APInt SrcUndef, SrcZero;
3244 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3245 Depth + 1))
3246 return true;
3247 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3248 KnownZero = SrcZero.extractBits(NumElts, Idx);
3249
3250 // Attempt to avoid multi-use ops if we don't need anything from them.
3251 if (!DemandedElts.isAllOnes()) {
3252 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3253 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3254 if (NewSrc) {
3255 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3256 Op.getOperand(1));
3257 return TLO.CombineTo(Op, NewOp);
3258 }
3259 }
3260 break;
3261 }
3263 SDValue Vec = Op.getOperand(0);
3264 SDValue Scl = Op.getOperand(1);
3265 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3266
3267 // For a legal, constant insertion index, if we don't need this insertion
3268 // then strip it, else remove it from the demanded elts.
3269 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3270 unsigned Idx = CIdx->getZExtValue();
3271 if (!DemandedElts[Idx])
3272 return TLO.CombineTo(Op, Vec);
3273
3274 APInt DemandedVecElts(DemandedElts);
3275 DemandedVecElts.clearBit(Idx);
3276 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3277 KnownZero, TLO, Depth + 1))
3278 return true;
3279
3280 KnownUndef.setBitVal(Idx, Scl.isUndef());
3281
3282 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3283 break;
3284 }
3285
3286 APInt VecUndef, VecZero;
3287 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3288 Depth + 1))
3289 return true;
3290 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3291 break;
3292 }
3293 case ISD::VSELECT: {
3294 SDValue Sel = Op.getOperand(0);
3295 SDValue LHS = Op.getOperand(1);
3296 SDValue RHS = Op.getOperand(2);
3297
3298 // Try to transform the select condition based on the current demanded
3299 // elements.
3300 APInt UndefSel, UndefZero;
3301 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
3302 Depth + 1))
3303 return true;
3304
3305 // See if we can simplify either vselect operand.
3306 APInt DemandedLHS(DemandedElts);
3307 APInt DemandedRHS(DemandedElts);
3308 APInt UndefLHS, ZeroLHS;
3309 APInt UndefRHS, ZeroRHS;
3310 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3311 Depth + 1))
3312 return true;
3313 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3314 Depth + 1))
3315 return true;
3316
3317 KnownUndef = UndefLHS & UndefRHS;
3318 KnownZero = ZeroLHS & ZeroRHS;
3319
3320 // If we know that the selected element is always zero, we don't need the
3321 // select value element.
3322 APInt DemandedSel = DemandedElts & ~KnownZero;
3323 if (DemandedSel != DemandedElts)
3324 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
3325 Depth + 1))
3326 return true;
3327
3328 break;
3329 }
3330 case ISD::VECTOR_SHUFFLE: {
3331 SDValue LHS = Op.getOperand(0);
3332 SDValue RHS = Op.getOperand(1);
3333 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3334
3335 // Collect demanded elements from shuffle operands..
3336 APInt DemandedLHS(NumElts, 0);
3337 APInt DemandedRHS(NumElts, 0);
3338 for (unsigned i = 0; i != NumElts; ++i) {
3339 int M = ShuffleMask[i];
3340 if (M < 0 || !DemandedElts[i])
3341 continue;
3342 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3343 if (M < (int)NumElts)
3344 DemandedLHS.setBit(M);
3345 else
3346 DemandedRHS.setBit(M - NumElts);
3347 }
3348
3349 // See if we can simplify either shuffle operand.
3350 APInt UndefLHS, ZeroLHS;
3351 APInt UndefRHS, ZeroRHS;
3352 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3353 Depth + 1))
3354 return true;
3355 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3356 Depth + 1))
3357 return true;
3358
3359 // Simplify mask using undef elements from LHS/RHS.
3360 bool Updated = false;
3361 bool IdentityLHS = true, IdentityRHS = true;
3362 SmallVector<int, 32> NewMask(ShuffleMask);
3363 for (unsigned i = 0; i != NumElts; ++i) {
3364 int &M = NewMask[i];
3365 if (M < 0)
3366 continue;
3367 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3368 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3369 Updated = true;
3370 M = -1;
3371 }
3372 IdentityLHS &= (M < 0) || (M == (int)i);
3373 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3374 }
3375
3376 // Update legal shuffle masks based on demanded elements if it won't reduce
3377 // to Identity which can cause premature removal of the shuffle mask.
3378 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3379 SDValue LegalShuffle =
3380 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3381 if (LegalShuffle)
3382 return TLO.CombineTo(Op, LegalShuffle);
3383 }
3384
3385 // Propagate undef/zero elements from LHS/RHS.
3386 for (unsigned i = 0; i != NumElts; ++i) {
3387 int M = ShuffleMask[i];
3388 if (M < 0) {
3389 KnownUndef.setBit(i);
3390 } else if (M < (int)NumElts) {
3391 if (UndefLHS[M])
3392 KnownUndef.setBit(i);
3393 if (ZeroLHS[M])
3394 KnownZero.setBit(i);
3395 } else {
3396 if (UndefRHS[M - NumElts])
3397 KnownUndef.setBit(i);
3398 if (ZeroRHS[M - NumElts])
3399 KnownZero.setBit(i);
3400 }
3401 }
3402 break;
3403 }
3407 APInt SrcUndef, SrcZero;
3408 SDValue Src = Op.getOperand(0);
3409 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3410 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3411 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3412 Depth + 1))
3413 return true;
3414 KnownZero = SrcZero.zextOrTrunc(NumElts);
3415 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3416
3417 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3418 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3419 DemandedSrcElts == 1) {
3420 // aext - if we just need the bottom element then we can bitcast.
3421 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3422 }
3423
3424 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3425 // zext(undef) upper bits are guaranteed to be zero.
3426 if (DemandedElts.isSubsetOf(KnownUndef))
3427 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3428 KnownUndef.clearAllBits();
3429
3430 // zext - if we just need the bottom element then we can mask:
3431 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3432 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3433 Op->isOnlyUserOf(Src.getNode()) &&
3434 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3435 SDLoc DL(Op);
3436 EVT SrcVT = Src.getValueType();
3437 EVT SrcSVT = SrcVT.getScalarType();
3438 SmallVector<SDValue> MaskElts;
3439 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3440 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3441 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3442 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3443 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3444 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3445 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3446 }
3447 }
3448 }
3449 break;
3450 }
3451
3452 // TODO: There are more binop opcodes that could be handled here - MIN,
3453 // MAX, saturated math, etc.
3454 case ISD::ADD: {
3455 SDValue Op0 = Op.getOperand(0);
3456 SDValue Op1 = Op.getOperand(1);
3457 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3458 APInt UndefLHS, ZeroLHS;
3459 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3460 Depth + 1, /*AssumeSingleUse*/ true))
3461 return true;
3462 }
3463 [[fallthrough]];
3464 }
3465 case ISD::OR:
3466 case ISD::XOR:
3467 case ISD::SUB:
3468 case ISD::FADD:
3469 case ISD::FSUB:
3470 case ISD::FMUL:
3471 case ISD::FDIV:
3472 case ISD::FREM: {
3473 SDValue Op0 = Op.getOperand(0);
3474 SDValue Op1 = Op.getOperand(1);
3475
3476 APInt UndefRHS, ZeroRHS;
3477 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3478 Depth + 1))
3479 return true;
3480 APInt UndefLHS, ZeroLHS;
3481 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3482 Depth + 1))
3483 return true;
3484
3485 KnownZero = ZeroLHS & ZeroRHS;
3486 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3487
3488 // Attempt to avoid multi-use ops if we don't need anything from them.
3489 // TODO - use KnownUndef to relax the demandedelts?
3490 if (!DemandedElts.isAllOnes())
3491 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3492 return true;
3493 break;
3494 }
3495 case ISD::SHL:
3496 case ISD::SRL:
3497 case ISD::SRA:
3498 case ISD::ROTL:
3499 case ISD::ROTR: {
3500 SDValue Op0 = Op.getOperand(0);
3501 SDValue Op1 = Op.getOperand(1);
3502
3503 APInt UndefRHS, ZeroRHS;
3504 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3505 Depth + 1))
3506 return true;
3507 APInt UndefLHS, ZeroLHS;
3508 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3509 Depth + 1))
3510 return true;
3511
3512 KnownZero = ZeroLHS;
3513 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3514
3515 // Attempt to avoid multi-use ops if we don't need anything from them.
3516 // TODO - use KnownUndef to relax the demandedelts?
3517 if (!DemandedElts.isAllOnes())
3518 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3519 return true;
3520 break;
3521 }
3522 case ISD::MUL:
3523 case ISD::MULHU:
3524 case ISD::MULHS:
3525 case ISD::AND: {
3526 SDValue Op0 = Op.getOperand(0);
3527 SDValue Op1 = Op.getOperand(1);
3528
3529 APInt SrcUndef, SrcZero;
3530 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3531 Depth + 1))
3532 return true;
3533 // If we know that a demanded element was zero in Op1 we don't need to
3534 // demand it in Op0 - its guaranteed to be zero.
3535 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3536 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3537 TLO, Depth + 1))
3538 return true;
3539
3540 KnownUndef &= DemandedElts0;
3541 KnownZero &= DemandedElts0;
3542
3543 // If every element pair has a zero/undef then just fold to zero.
3544 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3545 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3546 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3547 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3548
3549 // If either side has a zero element, then the result element is zero, even
3550 // if the other is an UNDEF.
3551 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3552 // and then handle 'and' nodes with the rest of the binop opcodes.
3553 KnownZero |= SrcZero;
3554 KnownUndef &= SrcUndef;
3555 KnownUndef &= ~KnownZero;
3556
3557 // Attempt to avoid multi-use ops if we don't need anything from them.
3558 if (!DemandedElts.isAllOnes())
3559 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3560 return true;
3561 break;
3562 }
3563 case ISD::TRUNCATE:
3564 case ISD::SIGN_EXTEND:
3565 case ISD::ZERO_EXTEND:
3566 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3567 KnownZero, TLO, Depth + 1))
3568 return true;
3569
3570 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3571 // zext(undef) upper bits are guaranteed to be zero.
3572 if (DemandedElts.isSubsetOf(KnownUndef))
3573 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3574 KnownUndef.clearAllBits();
3575 }
3576 break;
3577 default: {
3578 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3579 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3580 KnownZero, TLO, Depth))
3581 return true;
3582 } else {
3583 KnownBits Known;
3584 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3585 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3586 TLO, Depth, AssumeSingleUse))
3587 return true;
3588 }
3589 break;
3590 }
3591 }
3592 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3593
3594 // Constant fold all undef cases.
3595 // TODO: Handle zero cases as well.
3596 if (DemandedElts.isSubsetOf(KnownUndef))
3597 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3598
3599 return false;
3600}
3601
3602/// Determine which of the bits specified in Mask are known to be either zero or
3603/// one and return them in the Known.
3605 KnownBits &Known,
3606 const APInt &DemandedElts,
3607 const SelectionDAG &DAG,
3608 unsigned Depth) const {
3609 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3610 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3611 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3612 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3613 "Should use MaskedValueIsZero if you don't know whether Op"
3614 " is a target node!");
3615 Known.resetAll();
3616}
3617
3620 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3621 unsigned Depth) const {
3622 Known.resetAll();
3623}
3624
3626 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3627 // The low bits are known zero if the pointer is aligned.
3628 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3629}
3630
3633 unsigned Depth) const {
3634 return Align(1);
3635}
3636
3637/// This method can be implemented by targets that want to expose additional
3638/// information about sign bits to the DAG Combiner.
3640 const APInt &,
3641 const SelectionDAG &,
3642 unsigned Depth) const {
3643 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3644 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3645 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3646 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3647 "Should use ComputeNumSignBits if you don't know whether Op"
3648 " is a target node!");
3649 return 1;
3650}
3651
3653 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3654 const MachineRegisterInfo &MRI, unsigned Depth) const {
3655 return 1;
3656}
3657
3659 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3660 TargetLoweringOpt &TLO, unsigned Depth) const {
3661 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3662 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3663 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3664 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3665 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3666 " is a target node!");
3667 return false;
3668}
3669
3671 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3672 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3673 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3674 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3675 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3676 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3677 "Should use SimplifyDemandedBits if you don't know whether Op"
3678 " is a target node!");
3679 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3680 return false;
3681}
3682
3684 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3685 SelectionDAG &DAG, unsigned Depth) const {
3686 assert(
3687 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3688 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3689 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3690 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3691 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3692 " is a target node!");
3693 return SDValue();
3694}
3695
3696SDValue
3699 SelectionDAG &DAG) const {
3700 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3701 if (!LegalMask) {
3702 std::swap(N0, N1);
3704 LegalMask = isShuffleMaskLegal(Mask, VT);
3705 }
3706
3707 if (!LegalMask)
3708 return SDValue();
3709
3710 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3711}
3712
3714 return nullptr;
3715}
3716
3718 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3719 bool PoisonOnly, unsigned Depth) const {
3720 assert(
3721 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3722 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3723 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3724 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3725 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3726 " is a target node!");
3727 return false;
3728}
3729
3731 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3732 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3733 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3734 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3735 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3736 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3737 "Should use canCreateUndefOrPoison if you don't know whether Op"
3738 " is a target node!");
3739 // Be conservative and return true.
3740 return true;
3741}
3742
3744 const SelectionDAG &DAG,
3745 bool SNaN,
3746 unsigned Depth) const {
3747 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3748 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3749 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3750 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3751 "Should use isKnownNeverNaN if you don't know whether Op"
3752 " is a target node!");
3753 return false;
3754}
3755
3757 const APInt &DemandedElts,
3758 APInt &UndefElts,
3759 const SelectionDAG &DAG,
3760 unsigned Depth) const {
3761 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3762 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3763 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3764 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3765 "Should use isSplatValue if you don't know whether Op"
3766 " is a target node!");
3767 return false;
3768}
3769
3770// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3771// work with truncating build vectors and vectors with elements of less than
3772// 8 bits.
3774 if (!N)
3775 return false;
3776
3777 unsigned EltWidth;
3778 APInt CVal;
3779 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3780 /*AllowTruncation=*/true)) {
3781 CVal = CN->getAPIntValue();
3782 EltWidth = N.getValueType().getScalarSizeInBits();
3783 } else
3784 return false;
3785
3786 // If this is a truncating splat, truncate the splat value.
3787 // Otherwise, we may fail to match the expected values below.
3788 if (EltWidth < CVal.getBitWidth())
3789 CVal = CVal.trunc(EltWidth);
3790
3791 switch (getBooleanContents(N.getValueType())) {
3793 return CVal[0];
3795 return CVal.isOne();
3797 return CVal.isAllOnes();
3798 }
3799
3800 llvm_unreachable("Invalid boolean contents");
3801}
3802
3804 if (!N)
3805 return false;
3806
3807 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3808 if (!CN) {
3809 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3810 if (!BV)
3811 return false;
3812
3813 // Only interested in constant splats, we don't care about undef
3814 // elements in identifying boolean constants and getConstantSplatNode
3815 // returns NULL if all ops are undef;
3816 CN = BV->getConstantSplatNode();
3817 if (!CN)
3818 return false;
3819 }
3820
3821 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3822 return !CN->getAPIntValue()[0];
3823
3824 return CN->isZero();
3825}
3826
3828 bool SExt) const {
3829 if (VT == MVT::i1)
3830 return N->isOne();
3831
3833 switch (Cnt) {
3835 // An extended value of 1 is always true, unless its original type is i1,
3836 // in which case it will be sign extended to -1.
3837 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3840 return N->isAllOnes() && SExt;
3841 }
3842 llvm_unreachable("Unexpected enumeration.");
3843}
3844
3845/// This helper function of SimplifySetCC tries to optimize the comparison when
3846/// either operand of the SetCC node is a bitwise-and instruction.
3847SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3848 ISD::CondCode Cond, const SDLoc &DL,
3849 DAGCombinerInfo &DCI) const {
3850 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3851 std::swap(N0, N1);
3852
3853 SelectionDAG &DAG = DCI.DAG;
3854 EVT OpVT = N0.getValueType();
3855 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3856 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3857 return SDValue();
3858
3859 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3860 // iff everything but LSB is known zero:
3861 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3864 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3865 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3866 if (DAG.MaskedValueIsZero(N0, UpperBits))
3867 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3868 }
3869
3870 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3871 // test in a narrow type that we can truncate to with no cost. Examples:
3872 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3873 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3874 // TODO: This conservatively checks for type legality on the source and
3875 // destination types. That may inhibit optimizations, but it also
3876 // allows setcc->shift transforms that may be more beneficial.
3877 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3878 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3879 isTypeLegal(OpVT) && N0.hasOneUse()) {
3880 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3881 AndC->getAPIntValue().getActiveBits());
3882 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3883 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3884 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3885 return DAG.getSetCC(DL, VT, Trunc, Zero,
3887 }
3888 }
3889
3890 // Match these patterns in any of their permutations:
3891 // (X & Y) == Y
3892 // (X & Y) != Y
3893 SDValue X, Y;
3894 if (N0.getOperand(0) == N1) {
3895 X = N0.getOperand(1);
3896 Y = N0.getOperand(0);
3897 } else if (N0.getOperand(1) == N1) {
3898 X = N0.getOperand(0);
3899 Y = N0.getOperand(1);
3900 } else {
3901 return SDValue();
3902 }
3903
3904 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
3905 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
3906 // its liable to create and infinite loop.
3907 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3908 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
3910 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3911 // Note that where Y is variable and is known to have at most one bit set
3912 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3913 // equivalent when Y == 0.
3914 assert(OpVT.isInteger());
3916 if (DCI.isBeforeLegalizeOps() ||
3918 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3919 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3920 // If the target supports an 'and-not' or 'and-complement' logic operation,
3921 // try to use that to make a comparison operation more efficient.
3922 // But don't do this transform if the mask is a single bit because there are
3923 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3924 // 'rlwinm' on PPC).
3925
3926 // Bail out if the compare operand that we want to turn into a zero is
3927 // already a zero (otherwise, infinite loop).
3928 if (isNullConstant(Y))
3929 return SDValue();
3930
3931 // Transform this into: ~X & Y == 0.
3932 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3933 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3934 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3935 }
3936
3937 return SDValue();
3938}
3939
3940/// There are multiple IR patterns that could be checking whether certain
3941/// truncation of a signed number would be lossy or not. The pattern which is
3942/// best at IR level, may not lower optimally. Thus, we want to unfold it.
3943/// We are looking for the following pattern: (KeptBits is a constant)
3944/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3945/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3946/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3947/// We will unfold it into the natural trunc+sext pattern:
3948/// ((%x << C) a>> C) dstcond %x
3949/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3950SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3951 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3952 const SDLoc &DL) const {
3953 // We must be comparing with a constant.
3954 ConstantSDNode *C1;
3955 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3956 return SDValue();
3957
3958 // N0 should be: add %x, (1 << (KeptBits-1))
3959 if (N0->getOpcode() != ISD::ADD)
3960 return SDValue();
3961
3962 // And we must be 'add'ing a constant.
3963 ConstantSDNode *C01;
3964 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3965 return SDValue();
3966
3967 SDValue X = N0->getOperand(0);
3968 EVT XVT = X.getValueType();
3969
3970 // Validate constants ...
3971
3972 APInt I1 = C1->getAPIntValue();
3973
3974 ISD::CondCode NewCond;
3975 if (Cond == ISD::CondCode::SETULT) {
3976 NewCond = ISD::CondCode::SETEQ;
3977 } else if (Cond == ISD::CondCode::SETULE) {
3978 NewCond = ISD::CondCode::SETEQ;
3979 // But need to 'canonicalize' the constant.
3980 I1 += 1;
3981 } else if (Cond == ISD::CondCode::SETUGT) {
3982 NewCond = ISD::CondCode::SETNE;
3983 // But need to 'canonicalize' the constant.
3984 I1 += 1;
3985 } else if (Cond == ISD::CondCode::SETUGE) {
3986 NewCond = ISD::CondCode::SETNE;
3987 } else
3988 return SDValue();
3989
3990 APInt I01 = C01->getAPIntValue();
3991
3992 auto checkConstants = [&I1, &I01]() -> bool {
3993 // Both of them must be power-of-two, and the constant from setcc is bigger.
3994 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3995 };
3996
3997 if (checkConstants()) {
3998 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3999 } else {
4000 // What if we invert constants? (and the target predicate)
4001 I1.negate();
4002 I01.negate();
4003 assert(XVT.isInteger());
4004 NewCond = getSetCCInverse(NewCond, XVT);
4005 if (!checkConstants())
4006 return SDValue();
4007 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4008 }
4009
4010 // They are power-of-two, so which bit is set?
4011 const unsigned KeptBits = I1.logBase2();
4012 const unsigned KeptBitsMinusOne = I01.logBase2();
4013
4014 // Magic!
4015 if (KeptBits != (KeptBitsMinusOne + 1))
4016 return SDValue();
4017 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4018
4019 // We don't want to do this in every single case.
4020 SelectionDAG &DAG = DCI.DAG;
4022 XVT, KeptBits))
4023 return SDValue();
4024
4025 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
4026 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
4027
4028 // Unfold into: ((%x << C) a>> C) cond %x
4029 // Where 'cond' will be either 'eq' or 'ne'.
4030 SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
4031 SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
4032 SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
4033 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
4034
4035 return T2;
4036}
4037
4038// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4039SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4040 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4041 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4043 "Should be a comparison with 0.");
4044 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4045 "Valid only for [in]equality comparisons.");
4046
4047 unsigned NewShiftOpcode;
4048 SDValue X, C, Y;
4049
4050 SelectionDAG &DAG = DCI.DAG;
4051 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4052
4053 // Look for '(C l>>/<< Y)'.
4054 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4055 // The shift should be one-use.
4056 if (!V.hasOneUse())
4057 return false;
4058 unsigned OldShiftOpcode = V.getOpcode();
4059 switch (OldShiftOpcode) {
4060 case ISD::SHL:
4061 NewShiftOpcode = ISD::SRL;
4062 break;
4063 case ISD::SRL:
4064 NewShiftOpcode = ISD::SHL;
4065 break;
4066 default:
4067 return false; // must be a logical shift.
4068 }
4069 // We should be shifting a constant.
4070 // FIXME: best to use isConstantOrConstantVector().
4071 C = V.getOperand(0);
4073 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4074 if (!CC)
4075 return false;
4076 Y = V.getOperand(1);
4077
4079 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4080 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4081 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4082 };
4083
4084 // LHS of comparison should be an one-use 'and'.
4085 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4086 return SDValue();
4087
4088 X = N0.getOperand(0);
4089 SDValue Mask = N0.getOperand(1);
4090
4091 // 'and' is commutative!
4092 if (!Match(Mask)) {
4093 std::swap(X, Mask);
4094 if (!Match(Mask))
4095 return SDValue();
4096 }
4097
4098 EVT VT = X.getValueType();
4099
4100 // Produce:
4101 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4102 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4103 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4104 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4105 return T2;
4106}
4107
4108/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4109/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4110/// handle the commuted versions of these patterns.
4111SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4112 ISD::CondCode Cond, const SDLoc &DL,
4113 DAGCombinerInfo &DCI) const {
4114 unsigned BOpcode = N0.getOpcode();
4115 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4116 "Unexpected binop");
4117 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4118
4119 // (X + Y) == X --> Y == 0
4120 // (X - Y) == X --> Y == 0
4121 // (X ^ Y) == X --> Y == 0
4122 SelectionDAG &DAG = DCI.DAG;
4123 EVT OpVT = N0.getValueType();
4124 SDValue X = N0.getOperand(0);
4125 SDValue Y = N0.getOperand(1);
4126 if (X == N1)
4127 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4128
4129 if (Y != N1)
4130 return SDValue();
4131
4132 // (X + Y) == Y --> X == 0
4133 // (X ^ Y) == Y --> X == 0
4134 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4135 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4136
4137 // The shift would not be valid if the operands are boolean (i1).
4138 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4139 return SDValue();
4140
4141 // (X - Y) == Y --> X == Y << 1
4142 EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
4143 !DCI.isBeforeLegalize());
4144 SDValue One = DAG.getConstant(1, DL, ShiftVT);
4145 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4146 if (!DCI.isCalledByLegalizer())
4147 DCI.AddToWorklist(YShl1.getNode());
4148 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4149}
4150
4152 SDValue N0, const APInt &C1,
4153 ISD::CondCode Cond, const SDLoc &dl,
4154 SelectionDAG &DAG) {
4155 // Look through truncs that don't change the value of a ctpop.
4156 // FIXME: Add vector support? Need to be careful with setcc result type below.
4157 SDValue CTPOP = N0;
4158 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4160 CTPOP = N0.getOperand(0);
4161
4162 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4163 return SDValue();
4164
4165 EVT CTVT = CTPOP.getValueType();
4166 SDValue CTOp = CTPOP.getOperand(0);
4167
4168 // Expand a power-of-2-or-zero comparison based on ctpop:
4169 // (ctpop x) u< 2 -> (x & x-1) == 0
4170 // (ctpop x) u> 1 -> (x & x-1) != 0
4171 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4172 // Keep the CTPOP if it is a cheap vector op.
4173 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4174 return SDValue();
4175
4176 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4177 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4178 return SDValue();
4179 if (C1 == 0 && (Cond == ISD::SETULT))
4180 return SDValue(); // This is handled elsewhere.
4181
4182 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4183
4184 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4185 SDValue Result = CTOp;
4186 for (unsigned i = 0; i < Passes; i++) {
4187 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4188 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4189 }
4191 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4192 }
4193
4194 // Expand a power-of-2 comparison based on ctpop:
4195 // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
4196 // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
4197 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4198 // Keep the CTPOP if it is cheap.
4199 if (TLI.isCtpopFast(CTVT))
4200 return SDValue();
4201
4202 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4203 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4204 assert(CTVT.isInteger());
4205 ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
4206 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4207 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4208 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4209 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4210 // check before the emit a potentially unnecessary op.
4211 if (DAG.isKnownNeverZero(CTOp))
4212 return RHS;
4213 SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
4214 unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
4215 return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
4216 }
4217
4218 return SDValue();
4219}
4220
4222 ISD::CondCode Cond, const SDLoc &dl,
4223 SelectionDAG &DAG) {
4224 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4225 return SDValue();
4226
4227 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4228 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4229 return SDValue();
4230
4231 auto getRotateSource = [](SDValue X) {
4232 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4233 return X.getOperand(0);
4234 return SDValue();
4235 };
4236
4237 // Peek through a rotated value compared against 0 or -1:
4238 // (rot X, Y) == 0/-1 --> X == 0/-1
4239 // (rot X, Y) != 0/-1 --> X != 0/-1
4240 if (SDValue R = getRotateSource(N0))
4241 return DAG.getSetCC(dl, VT, R, N1, Cond);
4242
4243 // Peek through an 'or' of a rotated value compared against 0:
4244 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4245 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4246 //
4247 // TODO: Add the 'and' with -1 sibling.
4248 // TODO: Recurse through a series of 'or' ops to find the rotate.
4249 EVT OpVT = N0.getValueType();
4250 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4251 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4252 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4253 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4254 }
4255 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4256 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4257 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4258 }
4259 }
4260
4261 return SDValue();
4262}
4263
4265 ISD::CondCode Cond, const SDLoc &dl,
4266 SelectionDAG &DAG) {
4267 // If we are testing for all-bits-clear, we might be able to do that with
4268 // less shifting since bit-order does not matter.
4269 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4270 return SDValue();
4271
4272 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4273 if (!C1 || !C1->isZero())
4274 return SDValue();
4275
4276 if (!N0.hasOneUse() ||
4277 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4278 return SDValue();
4279
4280 unsigned BitWidth = N0.getScalarValueSizeInBits();
4281 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4282 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4283 return SDValue();
4284
4285 // Canonicalize fshr as fshl to reduce pattern-matching.
4286 unsigned ShAmt = ShAmtC->getZExtValue();
4287 if (N0.getOpcode() == ISD::FSHR)
4288 ShAmt = BitWidth - ShAmt;
4289
4290 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4291 SDValue X, Y;
4292 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4293 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4294 return false;
4295 if (Or.getOperand(0) == Other) {
4296 X = Or.getOperand(0);
4297 Y = Or.getOperand(1);
4298 return true;
4299 }
4300 if (Or.getOperand(1) == Other) {
4301 X = Or.getOperand(1);
4302 Y = Or.getOperand(0);
4303 return true;
4304 }
4305 return false;
4306 };
4307
4308 EVT OpVT = N0.getValueType();
4309 EVT ShAmtVT = N0.getOperand(2).getValueType();
4310 SDValue F0 = N0.getOperand(0);
4311 SDValue F1 = N0.getOperand(1);
4312 if (matchOr(F0, F1)) {
4313 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4314 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4315 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4316 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4317 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4318 }
4319 if (matchOr(F1, F0)) {
4320 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4321 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4322 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);