LLVM 17.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
24#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/LLVMContext.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCExpr.h"
35#include <cctype>
36using namespace llvm;
37
38/// NOTE: The TargetMachine owns TLOF.
40 : TargetLoweringBase(tm) {}
41
42const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
43 return nullptr;
44}
45
48}
49
50/// Check whether a given call node is in tail position within its function. If
51/// so, it sets Chain to the input chain of the tail call.
53 SDValue &Chain) const {
55
56 // First, check if tail calls have been disabled in this function.
57 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
58 return false;
59
60 // Conservatively require the attributes of the call to match those of
61 // the return. Ignore following attributes because they don't affect the
62 // call sequence.
63 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
64 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
65 Attribute::DereferenceableOrNull, Attribute::NoAlias,
66 Attribute::NonNull, Attribute::NoUndef})
67 CallerAttrs.removeAttribute(Attr);
68
69 if (CallerAttrs.hasAttributes())
70 return false;
71
72 // It's not safe to eliminate the sign / zero extension of the return value.
73 if (CallerAttrs.contains(Attribute::ZExt) ||
74 CallerAttrs.contains(Attribute::SExt))
75 return false;
76
77 // Check if the only use is a function return node.
78 return isUsedByReturnOnly(Node, Chain);
79}
80
82 const uint32_t *CallerPreservedMask,
83 const SmallVectorImpl<CCValAssign> &ArgLocs,
84 const SmallVectorImpl<SDValue> &OutVals) const {
85 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
86 const CCValAssign &ArgLoc = ArgLocs[I];
87 if (!ArgLoc.isRegLoc())
88 continue;
89 MCRegister Reg = ArgLoc.getLocReg();
90 // Only look at callee saved registers.
91 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
92 continue;
93 // Check that we pass the value used for the caller.
94 // (We look for a CopyFromReg reading a virtual register that is used
95 // for the function live-in value of register Reg)
96 SDValue Value = OutVals[I];
97 if (Value->getOpcode() == ISD::AssertZext)
98 Value = Value.getOperand(0);
99 if (Value->getOpcode() != ISD::CopyFromReg)
100 return false;
101 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
102 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
103 return false;
104 }
105 return true;
106}
107
108/// Set CallLoweringInfo attribute flags based on a call instruction
109/// and called function attributes.
111 unsigned ArgIdx) {
112 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
113 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
114 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
115 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
116 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
117 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
118 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
119 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
120 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
121 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
122 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
123 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
124 Alignment = Call->getParamStackAlign(ArgIdx);
125 IndirectType = nullptr;
127 "multiple ABI attributes?");
128 if (IsByVal) {
129 IndirectType = Call->getParamByValType(ArgIdx);
130 if (!Alignment)
131 Alignment = Call->getParamAlign(ArgIdx);
132 }
133 if (IsPreallocated)
134 IndirectType = Call->getParamPreallocatedType(ArgIdx);
135 if (IsInAlloca)
136 IndirectType = Call->getParamInAllocaType(ArgIdx);
137 if (IsSRet)
138 IndirectType = Call->getParamStructRetType(ArgIdx);
139}
140
141/// Generate a libcall taking the given operands as arguments and returning a
142/// result of type RetVT.
143std::pair<SDValue, SDValue>
146 MakeLibCallOptions CallOptions,
147 const SDLoc &dl,
148 SDValue InChain) const {
149 if (!InChain)
150 InChain = DAG.getEntryNode();
151
153 Args.reserve(Ops.size());
154
156 for (unsigned i = 0; i < Ops.size(); ++i) {
157 SDValue NewOp = Ops[i];
158 Entry.Node = NewOp;
159 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
160 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
161 CallOptions.IsSExt);
162 Entry.IsZExt = !Entry.IsSExt;
163
164 if (CallOptions.IsSoften &&
166 Entry.IsSExt = Entry.IsZExt = false;
167 }
168 Args.push_back(Entry);
169 }
170
171 if (LC == RTLIB::UNKNOWN_LIBCALL)
172 report_fatal_error("Unsupported library call operation!");
175
176 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
178 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
179 bool zeroExtend = !signExtend;
180
181 if (CallOptions.IsSoften &&
183 signExtend = zeroExtend = false;
184 }
185
186 CLI.setDebugLoc(dl)
187 .setChain(InChain)
188 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
189 .setNoReturn(CallOptions.DoesNotReturn)
192 .setSExtResult(signExtend)
193 .setZExtResult(zeroExtend);
194 return LowerCallTo(CLI);
195}
196
198 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
199 unsigned SrcAS, const AttributeList &FuncAttributes) const {
200 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
201 Op.getSrcAlign() < Op.getDstAlign())
202 return false;
203
204 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
205
206 if (VT == MVT::Other) {
207 // Use the largest integer type whose alignment constraints are satisfied.
208 // We only need to check DstAlign here as SrcAlign is always greater or
209 // equal to DstAlign (or zero).
210 VT = MVT::i64;
211 if (Op.isFixedDstAlign())
212 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
213 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
215 assert(VT.isInteger());
216
217 // Find the largest legal integer type.
218 MVT LVT = MVT::i64;
219 while (!isTypeLegal(LVT))
220 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
221 assert(LVT.isInteger());
222
223 // If the type we've chosen is larger than the largest legal integer type
224 // then use that instead.
225 if (VT.bitsGT(LVT))
226 VT = LVT;
227 }
228
229 unsigned NumMemOps = 0;
230 uint64_t Size = Op.size();
231 while (Size) {
232 unsigned VTSize = VT.getSizeInBits() / 8;
233 while (VTSize > Size) {
234 // For now, only use non-vector load / store's for the left-over pieces.
235 EVT NewVT = VT;
236 unsigned NewVTSize;
237
238 bool Found = false;
239 if (VT.isVector() || VT.isFloatingPoint()) {
240 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
243 Found = true;
244 else if (NewVT == MVT::i64 &&
247 // i64 is usually not legal on 32-bit targets, but f64 may be.
248 NewVT = MVT::f64;
249 Found = true;
250 }
251 }
252
253 if (!Found) {
254 do {
255 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
256 if (NewVT == MVT::i8)
257 break;
258 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
259 }
260 NewVTSize = NewVT.getSizeInBits() / 8;
261
262 // If the new VT cannot cover all of the remaining bits, then consider
263 // issuing a (or a pair of) unaligned and overlapping load / store.
264 unsigned Fast;
265 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
269 Fast)
270 VTSize = Size;
271 else {
272 VT = NewVT;
273 VTSize = NewVTSize;
274 }
275 }
276
277 if (++NumMemOps > Limit)
278 return false;
279
280 MemOps.push_back(VT);
281 Size -= VTSize;
282 }
283
284 return true;
285}
286
287/// Soften the operands of a comparison. This code is shared among BR_CC,
288/// SELECT_CC, and SETCC handlers.
290 SDValue &NewLHS, SDValue &NewRHS,
291 ISD::CondCode &CCCode,
292 const SDLoc &dl, const SDValue OldLHS,
293 const SDValue OldRHS) const {
294 SDValue Chain;
295 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
296 OldRHS, Chain);
297}
298
300 SDValue &NewLHS, SDValue &NewRHS,
301 ISD::CondCode &CCCode,
302 const SDLoc &dl, const SDValue OldLHS,
303 const SDValue OldRHS,
304 SDValue &Chain,
305 bool IsSignaling) const {
306 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
307 // not supporting it. We can update this code when libgcc provides such
308 // functions.
309
310 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
311 && "Unsupported setcc type!");
312
313 // Expand into one or more soft-fp libcall(s).
314 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
315 bool ShouldInvertCC = false;
316 switch (CCCode) {
317 case ISD::SETEQ:
318 case ISD::SETOEQ:
319 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
320 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
321 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
322 break;
323 case ISD::SETNE:
324 case ISD::SETUNE:
325 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
326 (VT == MVT::f64) ? RTLIB::UNE_F64 :
327 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
328 break;
329 case ISD::SETGE:
330 case ISD::SETOGE:
331 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
332 (VT == MVT::f64) ? RTLIB::OGE_F64 :
333 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
334 break;
335 case ISD::SETLT:
336 case ISD::SETOLT:
337 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
338 (VT == MVT::f64) ? RTLIB::OLT_F64 :
339 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
340 break;
341 case ISD::SETLE:
342 case ISD::SETOLE:
343 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
344 (VT == MVT::f64) ? RTLIB::OLE_F64 :
345 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
346 break;
347 case ISD::SETGT:
348 case ISD::SETOGT:
349 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
350 (VT == MVT::f64) ? RTLIB::OGT_F64 :
351 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
352 break;
353 case ISD::SETO:
354 ShouldInvertCC = true;
355 [[fallthrough]];
356 case ISD::SETUO:
357 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
358 (VT == MVT::f64) ? RTLIB::UO_F64 :
359 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
360 break;
361 case ISD::SETONE:
362 // SETONE = O && UNE
363 ShouldInvertCC = true;
364 [[fallthrough]];
365 case ISD::SETUEQ:
366 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
367 (VT == MVT::f64) ? RTLIB::UO_F64 :
368 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
369 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
370 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
371 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
372 break;
373 default:
374 // Invert CC for unordered comparisons
375 ShouldInvertCC = true;
376 switch (CCCode) {
377 case ISD::SETULT:
378 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
379 (VT == MVT::f64) ? RTLIB::OGE_F64 :
380 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
381 break;
382 case ISD::SETULE:
383 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
384 (VT == MVT::f64) ? RTLIB::OGT_F64 :
385 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
386 break;
387 case ISD::SETUGT:
388 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
389 (VT == MVT::f64) ? RTLIB::OLE_F64 :
390 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
391 break;
392 case ISD::SETUGE:
393 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
394 (VT == MVT::f64) ? RTLIB::OLT_F64 :
395 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
396 break;
397 default: llvm_unreachable("Do not know how to soften this setcc!");
398 }
399 }
400
401 // Use the target specific return value for comparison lib calls.
403 SDValue Ops[2] = {NewLHS, NewRHS};
405 EVT OpsVT[2] = { OldLHS.getValueType(),
406 OldRHS.getValueType() };
407 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
408 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
409 NewLHS = Call.first;
410 NewRHS = DAG.getConstant(0, dl, RetVT);
411
412 CCCode = getCmpLibcallCC(LC1);
413 if (ShouldInvertCC) {
414 assert(RetVT.isInteger());
415 CCCode = getSetCCInverse(CCCode, RetVT);
416 }
417
418 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
419 // Update Chain.
420 Chain = Call.second;
421 } else {
422 EVT SetCCVT =
423 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
424 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
425 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
426 CCCode = getCmpLibcallCC(LC2);
427 if (ShouldInvertCC)
428 CCCode = getSetCCInverse(CCCode, RetVT);
429 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
430 if (Chain)
431 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
432 Call2.second);
433 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
434 Tmp.getValueType(), Tmp, NewLHS);
435 NewRHS = SDValue();
436 }
437}
438
439/// Return the entry encoding for a jump table in the current function. The
440/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
442 // In non-pic modes, just use the address of a block.
443 if (!isPositionIndependent())
445
446 // In PIC mode, if the target supports a GPRel32 directive, use it.
447 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449
450 // Otherwise, use a label difference.
452}
453
455 SelectionDAG &DAG) const {
456 // If our PIC model is GP relative, use the global offset table as the base.
457 unsigned JTEncoding = getJumpTableEncoding();
458
462
463 return Table;
464}
465
466/// This returns the relocation base for the given PIC jumptable, the same as
467/// getPICJumpTableRelocBase, but as an MCExpr.
468const MCExpr *
470 unsigned JTI,MCContext &Ctx) const{
471 // The normal PIC reloc base is the label at the start of the jump table.
472 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
473}
474
475bool
477 const TargetMachine &TM = getTargetMachine();
478 const GlobalValue *GV = GA->getGlobal();
479
480 // If the address is not even local to this DSO we will have to load it from
481 // a got and then add the offset.
482 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
483 return false;
484
485 // If the code is position independent we will have to add a base register.
486 if (isPositionIndependent())
487 return false;
488
489 // Otherwise we can do it.
490 return true;
491}
492
493//===----------------------------------------------------------------------===//
494// Optimization Methods
495//===----------------------------------------------------------------------===//
496
497/// If the specified instruction has a constant integer operand and there are
498/// bits set in that constant that are not demanded, then clear those bits and
499/// return true.
501 const APInt &DemandedBits,
502 const APInt &DemandedElts,
503 TargetLoweringOpt &TLO) const {
504 SDLoc DL(Op);
505 unsigned Opcode = Op.getOpcode();
506
507 // Do target-specific constant optimization.
508 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
509 return TLO.New.getNode();
510
511 // FIXME: ISD::SELECT, ISD::SELECT_CC
512 switch (Opcode) {
513 default:
514 break;
515 case ISD::XOR:
516 case ISD::AND:
517 case ISD::OR: {
518 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
519 if (!Op1C || Op1C->isOpaque())
520 return false;
521
522 // If this is a 'not' op, don't touch it because that's a canonical form.
523 const APInt &C = Op1C->getAPIntValue();
524 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
525 return false;
526
527 if (!C.isSubsetOf(DemandedBits)) {
528 EVT VT = Op.getValueType();
529 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
530 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
531 return TLO.CombineTo(Op, NewOp);
532 }
533
534 break;
535 }
536 }
537
538 return false;
539}
540
542 const APInt &DemandedBits,
543 TargetLoweringOpt &TLO) const {
544 EVT VT = Op.getValueType();
545 APInt DemandedElts = VT.isVector()
547 : APInt(1, 1);
548 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
549}
550
551/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
552/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
553/// generalized for targets with other types of implicit widening casts.
555 const APInt &Demanded,
556 TargetLoweringOpt &TLO) const {
557 assert(Op.getNumOperands() == 2 &&
558 "ShrinkDemandedOp only supports binary operators!");
559 assert(Op.getNode()->getNumValues() == 1 &&
560 "ShrinkDemandedOp only supports nodes with one result!");
561
562 SelectionDAG &DAG = TLO.DAG;
563 SDLoc dl(Op);
564
565 // Early return, as this function cannot handle vector types.
566 if (Op.getValueType().isVector())
567 return false;
568
569 // Don't do this if the node has another user, which may require the
570 // full value.
571 if (!Op.getNode()->hasOneUse())
572 return false;
573
574 // Search for the smallest integer type with free casts to and from
575 // Op's type. For expedience, just check power-of-2 integer types.
576 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
577 unsigned DemandedSize = Demanded.getActiveBits();
578 unsigned SmallVTBits = DemandedSize;
579 SmallVTBits = llvm::bit_ceil(SmallVTBits);
580 for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
581 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
582 if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
583 TLI.isZExtFree(SmallVT, Op.getValueType())) {
584 // We found a type with free casts.
585 SDValue X = DAG.getNode(
586 Op.getOpcode(), dl, SmallVT,
587 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
588 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
589 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
590 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
591 return TLO.CombineTo(Op, Z);
592 }
593 }
594 return false;
595}
596
598 DAGCombinerInfo &DCI) const {
599 SelectionDAG &DAG = DCI.DAG;
600 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
601 !DCI.isBeforeLegalizeOps());
602 KnownBits Known;
603
604 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
605 if (Simplified) {
606 DCI.AddToWorklist(Op.getNode());
608 }
609 return Simplified;
610}
611
613 const APInt &DemandedElts,
614 DAGCombinerInfo &DCI) const {
615 SelectionDAG &DAG = DCI.DAG;
616 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
617 !DCI.isBeforeLegalizeOps());
618 KnownBits Known;
619
620 bool Simplified =
621 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
622 if (Simplified) {
623 DCI.AddToWorklist(Op.getNode());
625 }
626 return Simplified;
627}
628
630 KnownBits &Known,
632 unsigned Depth,
633 bool AssumeSingleUse) const {
634 EVT VT = Op.getValueType();
635
636 // Since the number of lanes in a scalable vector is unknown at compile time,
637 // we track one bit which is implicitly broadcast to all lanes. This means
638 // that all lanes in a scalable vector are considered demanded.
639 APInt DemandedElts = VT.isFixedLengthVector()
641 : APInt(1, 1);
642 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
643 AssumeSingleUse);
644}
645
646// TODO: Under what circumstances can we create nodes? Constant folding?
648 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
649 SelectionDAG &DAG, unsigned Depth) const {
650 EVT VT = Op.getValueType();
651
652 // Limit search depth.
654 return SDValue();
655
656 // Ignore UNDEFs.
657 if (Op.isUndef())
658 return SDValue();
659
660 // Not demanding any bits/elts from Op.
661 if (DemandedBits == 0 || DemandedElts == 0)
662 return DAG.getUNDEF(VT);
663
664 bool IsLE = DAG.getDataLayout().isLittleEndian();
665 unsigned NumElts = DemandedElts.getBitWidth();
666 unsigned BitWidth = DemandedBits.getBitWidth();
667 KnownBits LHSKnown, RHSKnown;
668 switch (Op.getOpcode()) {
669 case ISD::BITCAST: {
670 if (VT.isScalableVector())
671 return SDValue();
672
673 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
674 EVT SrcVT = Src.getValueType();
675 EVT DstVT = Op.getValueType();
676 if (SrcVT == DstVT)
677 return Src;
678
679 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
680 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
681 if (NumSrcEltBits == NumDstEltBits)
682 if (SDValue V = SimplifyMultipleUseDemandedBits(
683 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
684 return DAG.getBitcast(DstVT, V);
685
686 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
687 unsigned Scale = NumDstEltBits / NumSrcEltBits;
688 unsigned NumSrcElts = SrcVT.getVectorNumElements();
689 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
690 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
691 for (unsigned i = 0; i != Scale; ++i) {
692 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
693 unsigned BitOffset = EltOffset * NumSrcEltBits;
694 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
695 if (!Sub.isZero()) {
696 DemandedSrcBits |= Sub;
697 for (unsigned j = 0; j != NumElts; ++j)
698 if (DemandedElts[j])
699 DemandedSrcElts.setBit((j * Scale) + i);
700 }
701 }
702
703 if (SDValue V = SimplifyMultipleUseDemandedBits(
704 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
705 return DAG.getBitcast(DstVT, V);
706 }
707
708 // TODO - bigendian once we have test coverage.
709 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
710 unsigned Scale = NumSrcEltBits / NumDstEltBits;
711 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
712 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
713 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
714 for (unsigned i = 0; i != NumElts; ++i)
715 if (DemandedElts[i]) {
716 unsigned Offset = (i % Scale) * NumDstEltBits;
717 DemandedSrcBits.insertBits(DemandedBits, Offset);
718 DemandedSrcElts.setBit(i / Scale);
719 }
720
721 if (SDValue V = SimplifyMultipleUseDemandedBits(
722 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
723 return DAG.getBitcast(DstVT, V);
724 }
725
726 break;
727 }
728 case ISD::AND: {
729 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
730 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
731
732 // If all of the demanded bits are known 1 on one side, return the other.
733 // These bits cannot contribute to the result of the 'and' in this
734 // context.
735 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
736 return Op.getOperand(0);
737 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
738 return Op.getOperand(1);
739 break;
740 }
741 case ISD::OR: {
742 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
743 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
744
745 // If all of the demanded bits are known zero on one side, return the
746 // other. These bits cannot contribute to the result of the 'or' in this
747 // context.
748 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
749 return Op.getOperand(0);
750 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
751 return Op.getOperand(1);
752 break;
753 }
754 case ISD::XOR: {
755 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
756 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
757
758 // If all of the demanded bits are known zero on one side, return the
759 // other.
760 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
761 return Op.getOperand(0);
762 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
763 return Op.getOperand(1);
764 break;
765 }
766 case ISD::SHL: {
767 // If we are only demanding sign bits then we can use the shift source
768 // directly.
769 if (const APInt *MaxSA =
770 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
771 SDValue Op0 = Op.getOperand(0);
772 unsigned ShAmt = MaxSA->getZExtValue();
773 unsigned NumSignBits =
774 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
775 unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
776 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
777 return Op0;
778 }
779 break;
780 }
781 case ISD::SETCC: {
782 SDValue Op0 = Op.getOperand(0);
783 SDValue Op1 = Op.getOperand(1);
784 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
785 // If (1) we only need the sign-bit, (2) the setcc operands are the same
786 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
787 // -1, we may be able to bypass the setcc.
788 if (DemandedBits.isSignMask() &&
792 // If we're testing X < 0, then this compare isn't needed - just use X!
793 // FIXME: We're limiting to integer types here, but this should also work
794 // if we don't care about FP signed-zero. The use of SETLT with FP means
795 // that we don't care about NaNs.
796 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
798 return Op0;
799 }
800 break;
801 }
803 // If none of the extended bits are demanded, eliminate the sextinreg.
804 SDValue Op0 = Op.getOperand(0);
805 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
806 unsigned ExBits = ExVT.getScalarSizeInBits();
807 if (DemandedBits.getActiveBits() <= ExBits)
808 return Op0;
809 // If the input is already sign extended, just drop the extension.
810 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
811 if (NumSignBits >= (BitWidth - ExBits + 1))
812 return Op0;
813 break;
814 }
818 if (VT.isScalableVector())
819 return SDValue();
820
821 // If we only want the lowest element and none of extended bits, then we can
822 // return the bitcasted source vector.
823 SDValue Src = Op.getOperand(0);
824 EVT SrcVT = Src.getValueType();
825 EVT DstVT = Op.getValueType();
826 if (IsLE && DemandedElts == 1 &&
827 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
828 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
829 return DAG.getBitcast(DstVT, Src);
830 }
831 break;
832 }
834 if (VT.isScalableVector())
835 return SDValue();
836
837 // If we don't demand the inserted element, return the base vector.
838 SDValue Vec = Op.getOperand(0);
839 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
840 EVT VecVT = Vec.getValueType();
841 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
842 !DemandedElts[CIdx->getZExtValue()])
843 return Vec;
844 break;
845 }
847 if (VT.isScalableVector())
848 return SDValue();
849
850 SDValue Vec = Op.getOperand(0);
851 SDValue Sub = Op.getOperand(1);
852 uint64_t Idx = Op.getConstantOperandVal(2);
853 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
854 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
855 // If we don't demand the inserted subvector, return the base vector.
856 if (DemandedSubElts == 0)
857 return Vec;
858 break;
859 }
860 case ISD::VECTOR_SHUFFLE: {
862 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
863
864 // If all the demanded elts are from one operand and are inline,
865 // then we can use the operand directly.
866 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
867 for (unsigned i = 0; i != NumElts; ++i) {
868 int M = ShuffleMask[i];
869 if (M < 0 || !DemandedElts[i])
870 continue;
871 AllUndef = false;
872 IdentityLHS &= (M == (int)i);
873 IdentityRHS &= ((M - NumElts) == i);
874 }
875
876 if (AllUndef)
877 return DAG.getUNDEF(Op.getValueType());
878 if (IdentityLHS)
879 return Op.getOperand(0);
880 if (IdentityRHS)
881 return Op.getOperand(1);
882 break;
883 }
884 default:
885 // TODO: Probably okay to remove after audit; here to reduce change size
886 // in initial enablement patch for scalable vectors
887 if (VT.isScalableVector())
888 return SDValue();
889
890 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
891 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
892 Op, DemandedBits, DemandedElts, DAG, Depth))
893 return V;
894 break;
895 }
896 return SDValue();
897}
898
900 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
901 unsigned Depth) const {
902 EVT VT = Op.getValueType();
903 // Since the number of lanes in a scalable vector is unknown at compile time,
904 // we track one bit which is implicitly broadcast to all lanes. This means
905 // that all lanes in a scalable vector are considered demanded.
906 APInt DemandedElts = VT.isFixedLengthVector()
908 : APInt(1, 1);
909 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
910 Depth);
911}
912
914 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
915 unsigned Depth) const {
916 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
917 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
918 Depth);
919}
920
921// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
922// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
924 const TargetLowering &TLI,
925 const APInt &DemandedBits,
926 const APInt &DemandedElts,
927 unsigned Depth) {
928 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
929 "SRL or SRA node is required here!");
930 // Is the right shift using an immediate value of 1?
931 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
932 if (!N1C || !N1C->isOne())
933 return SDValue();
934
935 // We are looking for an avgfloor
936 // add(ext, ext)
937 // or one of these as a avgceil
938 // add(add(ext, ext), 1)
939 // add(add(ext, 1), ext)
940 // add(ext, add(ext, 1))
941 SDValue Add = Op.getOperand(0);
942 if (Add.getOpcode() != ISD::ADD)
943 return SDValue();
944
945 SDValue ExtOpA = Add.getOperand(0);
946 SDValue ExtOpB = Add.getOperand(1);
947 SDValue Add2;
948 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
949 ConstantSDNode *ConstOp;
950 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
951 ConstOp->isOne()) {
952 ExtOpA = Op1;
953 ExtOpB = Op3;
954 Add2 = A;
955 return true;
956 }
957 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
958 ConstOp->isOne()) {
959 ExtOpA = Op1;
960 ExtOpB = Op2;
961 Add2 = A;
962 return true;
963 }
964 return false;
965 };
966 bool IsCeil =
967 (ExtOpA.getOpcode() == ISD::ADD &&
968 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
969 (ExtOpB.getOpcode() == ISD::ADD &&
970 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
971
972 // If the shift is signed (sra):
973 // - Needs >= 2 sign bit for both operands.
974 // - Needs >= 2 zero bits.
975 // If the shift is unsigned (srl):
976 // - Needs >= 1 zero bit for both operands.
977 // - Needs 1 demanded bit zero and >= 2 sign bits.
978 unsigned ShiftOpc = Op.getOpcode();
979 bool IsSigned = false;
980 unsigned KnownBits;
981 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
982 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
983 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
984 unsigned NumZeroA =
985 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
986 unsigned NumZeroB =
987 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
988 unsigned NumZero = std::min(NumZeroA, NumZeroB);
989
990 switch (ShiftOpc) {
991 default:
992 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
993 case ISD::SRA: {
994 if (NumZero >= 2 && NumSigned < NumZero) {
995 IsSigned = false;
996 KnownBits = NumZero;
997 break;
998 }
999 if (NumSigned >= 1) {
1000 IsSigned = true;
1001 KnownBits = NumSigned;
1002 break;
1003 }
1004 return SDValue();
1005 }
1006 case ISD::SRL: {
1007 if (NumZero >= 1 && NumSigned < NumZero) {
1008 IsSigned = false;
1009 KnownBits = NumZero;
1010 break;
1011 }
1012 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1013 IsSigned = true;
1014 KnownBits = NumSigned;
1015 break;
1016 }
1017 return SDValue();
1018 }
1019 }
1020
1021 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1022 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1023
1024 // Find the smallest power-2 type that is legal for this vector size and
1025 // operation, given the original type size and the number of known sign/zero
1026 // bits.
1027 EVT VT = Op.getValueType();
1028 unsigned MinWidth =
1029 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1030 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1031 if (VT.isVector())
1032 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1033 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1034 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1035 // larger type size to do the transform.
1036 if (((!IsSigned && Add->getFlags().hasNoUnsignedWrap() &&
1037 (!Add2 || Add2->getFlags().hasNoUnsignedWrap())) ||
1038 (IsSigned && Add->getFlags().hasNoSignedWrap() &&
1039 (!Add2 || Add2->getFlags().hasNoSignedWrap()))) &&
1040 TLI.isOperationLegalOrCustom(AVGOpc, VT)) {
1041 NVT = VT;
1042 } else
1043 return SDValue();
1044 }
1045
1046 SDLoc DL(Op);
1047 SDValue ResultAVG =
1048 DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1049 DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1050 return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1051 ResultAVG);
1052}
1053
1054/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1055/// result of Op are ever used downstream. If we can use this information to
1056/// simplify Op, create a new simplified DAG node and return true, returning the
1057/// original and new nodes in Old and New. Otherwise, analyze the expression and
1058/// return a mask of Known bits for the expression (used to simplify the
1059/// caller). The Known bits may only be accurate for those bits in the
1060/// OriginalDemandedBits and OriginalDemandedElts.
1062 SDValue Op, const APInt &OriginalDemandedBits,
1063 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1064 unsigned Depth, bool AssumeSingleUse) const {
1065 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1066 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1067 "Mask size mismatches value type size!");
1068
1069 // Don't know anything.
1070 Known = KnownBits(BitWidth);
1071
1072 EVT VT = Op.getValueType();
1073 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1074 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1075 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1076 "Unexpected vector size");
1077
1078 APInt DemandedBits = OriginalDemandedBits;
1079 APInt DemandedElts = OriginalDemandedElts;
1080 SDLoc dl(Op);
1081 auto &DL = TLO.DAG.getDataLayout();
1082
1083 // Undef operand.
1084 if (Op.isUndef())
1085 return false;
1086
1087 // We can't simplify target constants.
1088 if (Op.getOpcode() == ISD::TargetConstant)
1089 return false;
1090
1091 if (Op.getOpcode() == ISD::Constant) {
1092 // We know all of the bits for a constant!
1093 Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1094 return false;
1095 }
1096
1097 if (Op.getOpcode() == ISD::ConstantFP) {
1098 // We know all of the bits for a floating point constant!
1100 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1101 return false;
1102 }
1103
1104 // Other users may use these bits.
1105 bool HasMultiUse = false;
1106 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1108 // Limit search depth.
1109 return false;
1110 }
1111 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1113 DemandedElts = APInt::getAllOnes(NumElts);
1114 HasMultiUse = true;
1115 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1116 // Not demanding any bits/elts from Op.
1117 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1118 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1119 // Limit search depth.
1120 return false;
1121 }
1122
1123 KnownBits Known2;
1124 switch (Op.getOpcode()) {
1125 case ISD::SCALAR_TO_VECTOR: {
1126 if (VT.isScalableVector())
1127 return false;
1128 if (!DemandedElts[0])
1129 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1130
1131 KnownBits SrcKnown;
1132 SDValue Src = Op.getOperand(0);
1133 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1134 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1135 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1136 return true;
1137
1138 // Upper elements are undef, so only get the knownbits if we just demand
1139 // the bottom element.
1140 if (DemandedElts == 1)
1141 Known = SrcKnown.anyextOrTrunc(BitWidth);
1142 break;
1143 }
1144 case ISD::BUILD_VECTOR:
1145 // Collect the known bits that are shared by every demanded element.
1146 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1147 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1148 return false; // Don't fall through, will infinitely loop.
1149 case ISD::LOAD: {
1150 auto *LD = cast<LoadSDNode>(Op);
1151 if (getTargetConstantFromLoad(LD)) {
1152 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1153 return false; // Don't fall through, will infinitely loop.
1154 }
1155 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1156 // If this is a ZEXTLoad and we are looking at the loaded value.
1157 EVT MemVT = LD->getMemoryVT();
1158 unsigned MemBits = MemVT.getScalarSizeInBits();
1159 Known.Zero.setBitsFrom(MemBits);
1160 return false; // Don't fall through, will infinitely loop.
1161 }
1162 break;
1163 }
1165 if (VT.isScalableVector())
1166 return false;
1167 SDValue Vec = Op.getOperand(0);
1168 SDValue Scl = Op.getOperand(1);
1169 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1170 EVT VecVT = Vec.getValueType();
1171
1172 // If index isn't constant, assume we need all vector elements AND the
1173 // inserted element.
1174 APInt DemandedVecElts(DemandedElts);
1175 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1176 unsigned Idx = CIdx->getZExtValue();
1177 DemandedVecElts.clearBit(Idx);
1178
1179 // Inserted element is not required.
1180 if (!DemandedElts[Idx])
1181 return TLO.CombineTo(Op, Vec);
1182 }
1183
1184 KnownBits KnownScl;
1185 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1186 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1187 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1188 return true;
1189
1190 Known = KnownScl.anyextOrTrunc(BitWidth);
1191
1192 KnownBits KnownVec;
1193 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1194 Depth + 1))
1195 return true;
1196
1197 if (!!DemandedVecElts)
1198 Known = KnownBits::commonBits(Known, KnownVec);
1199
1200 return false;
1201 }
1202 case ISD::INSERT_SUBVECTOR: {
1203 if (VT.isScalableVector())
1204 return false;
1205 // Demand any elements from the subvector and the remainder from the src its
1206 // inserted into.
1207 SDValue Src = Op.getOperand(0);
1208 SDValue Sub = Op.getOperand(1);
1209 uint64_t Idx = Op.getConstantOperandVal(2);
1210 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1211 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1212 APInt DemandedSrcElts = DemandedElts;
1213 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1214
1215 KnownBits KnownSub, KnownSrc;
1216 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1217 Depth + 1))
1218 return true;
1219 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1220 Depth + 1))
1221 return true;
1222
1223 Known.Zero.setAllBits();
1224 Known.One.setAllBits();
1225 if (!!DemandedSubElts)
1226 Known = KnownBits::commonBits(Known, KnownSub);
1227 if (!!DemandedSrcElts)
1228 Known = KnownBits::commonBits(Known, KnownSrc);
1229
1230 // Attempt to avoid multi-use src if we don't need anything from it.
1231 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1232 !DemandedSrcElts.isAllOnes()) {
1233 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1234 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1235 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1236 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1237 if (NewSub || NewSrc) {
1238 NewSub = NewSub ? NewSub : Sub;
1239 NewSrc = NewSrc ? NewSrc : Src;
1240 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1241 Op.getOperand(2));
1242 return TLO.CombineTo(Op, NewOp);
1243 }
1244 }
1245 break;
1246 }
1248 if (VT.isScalableVector())
1249 return false;
1250 // Offset the demanded elts by the subvector index.
1251 SDValue Src = Op.getOperand(0);
1252 if (Src.getValueType().isScalableVector())
1253 break;
1254 uint64_t Idx = Op.getConstantOperandVal(1);
1255 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1256 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1257
1258 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1259 Depth + 1))
1260 return true;
1261
1262 // Attempt to avoid multi-use src if we don't need anything from it.
1263 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1264 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1265 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1266 if (DemandedSrc) {
1267 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1268 Op.getOperand(1));
1269 return TLO.CombineTo(Op, NewOp);
1270 }
1271 }
1272 break;
1273 }
1274 case ISD::CONCAT_VECTORS: {
1275 if (VT.isScalableVector())
1276 return false;
1277 Known.Zero.setAllBits();
1278 Known.One.setAllBits();
1279 EVT SubVT = Op.getOperand(0).getValueType();
1280 unsigned NumSubVecs = Op.getNumOperands();
1281 unsigned NumSubElts = SubVT.getVectorNumElements();
1282 for (unsigned i = 0; i != NumSubVecs; ++i) {
1283 APInt DemandedSubElts =
1284 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1285 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1286 Known2, TLO, Depth + 1))
1287 return true;
1288 // Known bits are shared by every demanded subvector element.
1289 if (!!DemandedSubElts)
1290 Known = KnownBits::commonBits(Known, Known2);
1291 }
1292 break;
1293 }
1294 case ISD::VECTOR_SHUFFLE: {
1295 assert(!VT.isScalableVector());
1296 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1297
1298 // Collect demanded elements from shuffle operands..
1299 APInt DemandedLHS, DemandedRHS;
1300 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1301 DemandedRHS))
1302 break;
1303
1304 if (!!DemandedLHS || !!DemandedRHS) {
1305 SDValue Op0 = Op.getOperand(0);
1306 SDValue Op1 = Op.getOperand(1);
1307
1308 Known.Zero.setAllBits();
1309 Known.One.setAllBits();
1310 if (!!DemandedLHS) {
1311 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1312 Depth + 1))
1313 return true;
1314 Known = KnownBits::commonBits(Known, Known2);
1315 }
1316 if (!!DemandedRHS) {
1317 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1318 Depth + 1))
1319 return true;
1320 Known = KnownBits::commonBits(Known, Known2);
1321 }
1322
1323 // Attempt to avoid multi-use ops if we don't need anything from them.
1324 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1325 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1326 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1327 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1328 if (DemandedOp0 || DemandedOp1) {
1329 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1330 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1331 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1332 return TLO.CombineTo(Op, NewOp);
1333 }
1334 }
1335 break;
1336 }
1337 case ISD::AND: {
1338 SDValue Op0 = Op.getOperand(0);
1339 SDValue Op1 = Op.getOperand(1);
1340
1341 // If the RHS is a constant, check to see if the LHS would be zero without
1342 // using the bits from the RHS. Below, we use knowledge about the RHS to
1343 // simplify the LHS, here we're using information from the LHS to simplify
1344 // the RHS.
1345 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1346 // Do not increment Depth here; that can cause an infinite loop.
1347 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1348 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1349 if ((LHSKnown.Zero & DemandedBits) ==
1350 (~RHSC->getAPIntValue() & DemandedBits))
1351 return TLO.CombineTo(Op, Op0);
1352
1353 // If any of the set bits in the RHS are known zero on the LHS, shrink
1354 // the constant.
1355 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1356 DemandedElts, TLO))
1357 return true;
1358
1359 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1360 // constant, but if this 'and' is only clearing bits that were just set by
1361 // the xor, then this 'and' can be eliminated by shrinking the mask of
1362 // the xor. For example, for a 32-bit X:
1363 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1364 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1365 LHSKnown.One == ~RHSC->getAPIntValue()) {
1366 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1367 return TLO.CombineTo(Op, Xor);
1368 }
1369 }
1370
1371 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1372 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1373 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1374 (Op0.getOperand(0).isUndef() ||
1376 Op0->hasOneUse()) {
1377 unsigned NumSubElts =
1379 unsigned SubIdx = Op0.getConstantOperandVal(2);
1380 APInt DemandedSub =
1381 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1382 KnownBits KnownSubMask =
1383 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1384 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1385 SDValue NewAnd =
1386 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1387 SDValue NewInsert =
1388 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1389 Op0.getOperand(1), Op0.getOperand(2));
1390 return TLO.CombineTo(Op, NewInsert);
1391 }
1392 }
1393
1394 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1395 Depth + 1))
1396 return true;
1397 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1398 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1399 Known2, TLO, Depth + 1))
1400 return true;
1401 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1402
1403 // If all of the demanded bits are known one on one side, return the other.
1404 // These bits cannot contribute to the result of the 'and'.
1405 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1406 return TLO.CombineTo(Op, Op0);
1407 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1408 return TLO.CombineTo(Op, Op1);
1409 // If all of the demanded bits in the inputs are known zeros, return zero.
1410 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1411 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1412 // If the RHS is a constant, see if we can simplify it.
1413 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1414 TLO))
1415 return true;
1416 // If the operation can be done in a smaller type, do so.
1417 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1418 return true;
1419
1420 // Attempt to avoid multi-use ops if we don't need anything from them.
1421 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1422 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1423 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1424 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1425 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1426 if (DemandedOp0 || DemandedOp1) {
1427 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1428 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1429 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1430 return TLO.CombineTo(Op, NewOp);
1431 }
1432 }
1433
1434 Known &= Known2;
1435 break;
1436 }
1437 case ISD::OR: {
1438 SDValue Op0 = Op.getOperand(0);
1439 SDValue Op1 = Op.getOperand(1);
1440
1441 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1442 Depth + 1))
1443 return true;
1444 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1445 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1446 Known2, TLO, Depth + 1))
1447 return true;
1448 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1449
1450 // If all of the demanded bits are known zero on one side, return the other.
1451 // These bits cannot contribute to the result of the 'or'.
1452 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1453 return TLO.CombineTo(Op, Op0);
1454 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1455 return TLO.CombineTo(Op, Op1);
1456 // If the RHS is a constant, see if we can simplify it.
1457 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1458 return true;
1459 // If the operation can be done in a smaller type, do so.
1460 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1461 return true;
1462
1463 // Attempt to avoid multi-use ops if we don't need anything from them.
1464 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1465 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1466 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1467 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1468 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1469 if (DemandedOp0 || DemandedOp1) {
1470 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1471 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1472 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1473 return TLO.CombineTo(Op, NewOp);
1474 }
1475 }
1476
1477 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1478 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1479 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1480 Op0->hasOneUse() && Op1->hasOneUse()) {
1481 // Attempt to match all commutations - m_c_Or would've been useful!
1482 for (int I = 0; I != 2; ++I) {
1483 SDValue X = Op.getOperand(I).getOperand(0);
1484 SDValue C1 = Op.getOperand(I).getOperand(1);
1485 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1486 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1487 if (Alt.getOpcode() == ISD::OR) {
1488 for (int J = 0; J != 2; ++J) {
1489 if (X == Alt.getOperand(J)) {
1490 SDValue Y = Alt.getOperand(1 - J);
1491 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1492 {C1, C2})) {
1493 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1494 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1495 return TLO.CombineTo(
1496 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1497 }
1498 }
1499 }
1500 }
1501 }
1502 }
1503
1504 Known |= Known2;
1505 break;
1506 }
1507 case ISD::XOR: {
1508 SDValue Op0 = Op.getOperand(0);
1509 SDValue Op1 = Op.getOperand(1);
1510
1511 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1512 Depth + 1))
1513 return true;
1514 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1515 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1516 Depth + 1))
1517 return true;
1518 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1519
1520 // If all of the demanded bits are known zero on one side, return the other.
1521 // These bits cannot contribute to the result of the 'xor'.
1522 if (DemandedBits.isSubsetOf(Known.Zero))
1523 return TLO.CombineTo(Op, Op0);
1524 if (DemandedBits.isSubsetOf(Known2.Zero))
1525 return TLO.CombineTo(Op, Op1);
1526 // If the operation can be done in a smaller type, do so.
1527 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1528 return true;
1529
1530 // If all of the unknown bits are known to be zero on one side or the other
1531 // turn this into an *inclusive* or.
1532 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1533 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1534 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1535
1536 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1537 if (C) {
1538 // If one side is a constant, and all of the set bits in the constant are
1539 // also known set on the other side, turn this into an AND, as we know
1540 // the bits will be cleared.
1541 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1542 // NB: it is okay if more bits are known than are requested
1543 if (C->getAPIntValue() == Known2.One) {
1544 SDValue ANDC =
1545 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1546 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1547 }
1548
1549 // If the RHS is a constant, see if we can change it. Don't alter a -1
1550 // constant because that's a 'not' op, and that is better for combining
1551 // and codegen.
1552 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1553 // We're flipping all demanded bits. Flip the undemanded bits too.
1554 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1555 return TLO.CombineTo(Op, New);
1556 }
1557
1558 unsigned Op0Opcode = Op0.getOpcode();
1559 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1560 if (ConstantSDNode *ShiftC =
1561 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1562 // Don't crash on an oversized shift. We can not guarantee that a
1563 // bogus shift has been simplified to undef.
1564 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1565 uint64_t ShiftAmt = ShiftC->getZExtValue();
1567 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1568 : Ones.lshr(ShiftAmt);
1569 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1570 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1571 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1572 // If the xor constant is a demanded mask, do a 'not' before the
1573 // shift:
1574 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1575 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1576 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1577 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1578 Op0.getOperand(1)));
1579 }
1580 }
1581 }
1582 }
1583 }
1584
1585 // If we can't turn this into a 'not', try to shrink the constant.
1586 if (!C || !C->isAllOnes())
1587 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1588 return true;
1589
1590 // Attempt to avoid multi-use ops if we don't need anything from them.
1591 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1592 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1593 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1594 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1595 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1596 if (DemandedOp0 || DemandedOp1) {
1597 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1598 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1599 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1600 return TLO.CombineTo(Op, NewOp);
1601 }
1602 }
1603
1604 Known ^= Known2;
1605 break;
1606 }
1607 case ISD::SELECT:
1608 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1609 Depth + 1))
1610 return true;
1611 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1612 Depth + 1))
1613 return true;
1614 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1615 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1616
1617 // If the operands are constants, see if we can simplify them.
1618 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1619 return true;
1620
1621 // Only known if known in both the LHS and RHS.
1622 Known = KnownBits::commonBits(Known, Known2);
1623 break;
1624 case ISD::VSELECT:
1625 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1626 Known, TLO, Depth + 1))
1627 return true;
1628 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1629 Known2, TLO, Depth + 1))
1630 return true;
1631 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1632 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1633
1634 // Only known if known in both the LHS and RHS.
1635 Known = KnownBits::commonBits(Known, Known2);
1636 break;
1637 case ISD::SELECT_CC:
1638 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1639 Depth + 1))
1640 return true;
1641 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1642 Depth + 1))
1643 return true;
1644 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1645 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1646
1647 // If the operands are constants, see if we can simplify them.
1648 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1649 return true;
1650
1651 // Only known if known in both the LHS and RHS.
1652 Known = KnownBits::commonBits(Known, Known2);
1653 break;
1654 case ISD::SETCC: {
1655 SDValue Op0 = Op.getOperand(0);
1656 SDValue Op1 = Op.getOperand(1);
1657 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1658 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1659 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1660 // -1, we may be able to bypass the setcc.
1661 if (DemandedBits.isSignMask() &&
1665 // If we're testing X < 0, then this compare isn't needed - just use X!
1666 // FIXME: We're limiting to integer types here, but this should also work
1667 // if we don't care about FP signed-zero. The use of SETLT with FP means
1668 // that we don't care about NaNs.
1669 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1671 return TLO.CombineTo(Op, Op0);
1672
1673 // TODO: Should we check for other forms of sign-bit comparisons?
1674 // Examples: X <= -1, X >= 0
1675 }
1676 if (getBooleanContents(Op0.getValueType()) ==
1678 BitWidth > 1)
1679 Known.Zero.setBitsFrom(1);
1680 break;
1681 }
1682 case ISD::SHL: {
1683 SDValue Op0 = Op.getOperand(0);
1684 SDValue Op1 = Op.getOperand(1);
1685 EVT ShiftVT = Op1.getValueType();
1686
1687 if (const APInt *SA =
1688 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1689 unsigned ShAmt = SA->getZExtValue();
1690 if (ShAmt == 0)
1691 return TLO.CombineTo(Op, Op0);
1692
1693 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1694 // single shift. We can do this if the bottom bits (which are shifted
1695 // out) are never demanded.
1696 // TODO - support non-uniform vector amounts.
1697 if (Op0.getOpcode() == ISD::SRL) {
1698 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1699 if (const APInt *SA2 =
1700 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1701 unsigned C1 = SA2->getZExtValue();
1702 unsigned Opc = ISD::SHL;
1703 int Diff = ShAmt - C1;
1704 if (Diff < 0) {
1705 Diff = -Diff;
1706 Opc = ISD::SRL;
1707 }
1708 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1709 return TLO.CombineTo(
1710 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1711 }
1712 }
1713 }
1714
1715 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1716 // are not demanded. This will likely allow the anyext to be folded away.
1717 // TODO - support non-uniform vector amounts.
1718 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1719 SDValue InnerOp = Op0.getOperand(0);
1720 EVT InnerVT = InnerOp.getValueType();
1721 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1722 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1723 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1724 EVT ShTy = getShiftAmountTy(InnerVT, DL);
1725 if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1726 ShTy = InnerVT;
1727 SDValue NarrowShl =
1728 TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1729 TLO.DAG.getConstant(ShAmt, dl, ShTy));
1730 return TLO.CombineTo(
1731 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1732 }
1733
1734 // Repeat the SHL optimization above in cases where an extension
1735 // intervenes: (shl (anyext (shr x, c1)), c2) to
1736 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1737 // aren't demanded (as above) and that the shifted upper c1 bits of
1738 // x aren't demanded.
1739 // TODO - support non-uniform vector amounts.
1740 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1741 InnerOp.hasOneUse()) {
1742 if (const APInt *SA2 =
1743 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1744 unsigned InnerShAmt = SA2->getZExtValue();
1745 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1746 DemandedBits.getActiveBits() <=
1747 (InnerBits - InnerShAmt + ShAmt) &&
1748 DemandedBits.countTrailingZeros() >= ShAmt) {
1749 SDValue NewSA =
1750 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1751 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1752 InnerOp.getOperand(0));
1753 return TLO.CombineTo(
1754 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1755 }
1756 }
1757 }
1758 }
1759
1760 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1761 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1762 Depth + 1))
1763 return true;
1764 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1765 Known.Zero <<= ShAmt;
1766 Known.One <<= ShAmt;
1767 // low bits known zero.
1768 Known.Zero.setLowBits(ShAmt);
1769
1770 // Attempt to avoid multi-use ops if we don't need anything from them.
1771 if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1772 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1773 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1774 if (DemandedOp0) {
1775 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1776 return TLO.CombineTo(Op, NewOp);
1777 }
1778 }
1779
1780 // Try shrinking the operation as long as the shift amount will still be
1781 // in range.
1782 if ((ShAmt < DemandedBits.getActiveBits()) &&
1783 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1784 return true;
1785 } else {
1786 // This is a variable shift, so we can't shift the demand mask by a known
1787 // amount. But if we are not demanding high bits, then we are not
1788 // demanding those bits from the pre-shifted operand either.
1789 if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
1790 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1791 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1792 Depth + 1)) {
1793 SDNodeFlags Flags = Op.getNode()->getFlags();
1794 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1795 // Disable the nsw and nuw flags. We can no longer guarantee that we
1796 // won't wrap after simplification.
1797 Flags.setNoSignedWrap(false);
1798 Flags.setNoUnsignedWrap(false);
1799 Op->setFlags(Flags);
1800 }
1801 return true;
1802 }
1803 Known.resetAll();
1804 }
1805 }
1806
1807 // If we are only demanding sign bits then we can use the shift source
1808 // directly.
1809 if (const APInt *MaxSA =
1810 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1811 unsigned ShAmt = MaxSA->getZExtValue();
1812 unsigned NumSignBits =
1813 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1814 unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1815 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1816 return TLO.CombineTo(Op, Op0);
1817 }
1818 break;
1819 }
1820 case ISD::SRL: {
1821 SDValue Op0 = Op.getOperand(0);
1822 SDValue Op1 = Op.getOperand(1);
1823 EVT ShiftVT = Op1.getValueType();
1824
1825 // Try to match AVG patterns.
1826 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1827 DemandedElts, Depth + 1))
1828 return TLO.CombineTo(Op, AVG);
1829
1830 if (const APInt *SA =
1831 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1832 unsigned ShAmt = SA->getZExtValue();
1833 if (ShAmt == 0)
1834 return TLO.CombineTo(Op, Op0);
1835
1836 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1837 // single shift. We can do this if the top bits (which are shifted out)
1838 // are never demanded.
1839 // TODO - support non-uniform vector amounts.
1840 if (Op0.getOpcode() == ISD::SHL) {
1841 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1842 if (const APInt *SA2 =
1843 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1844 unsigned C1 = SA2->getZExtValue();
1845 unsigned Opc = ISD::SRL;
1846 int Diff = ShAmt - C1;
1847 if (Diff < 0) {
1848 Diff = -Diff;
1849 Opc = ISD::SHL;
1850 }
1851 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1852 return TLO.CombineTo(
1853 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1854 }
1855 }
1856 }
1857
1858 APInt InDemandedMask = (DemandedBits << ShAmt);
1859
1860 // If the shift is exact, then it does demand the low bits (and knows that
1861 // they are zero).
1862 if (Op->getFlags().hasExact())
1863 InDemandedMask.setLowBits(ShAmt);
1864
1865 // Compute the new bits that are at the top now.
1866 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1867 Depth + 1))
1868 return true;
1869 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1870 Known.Zero.lshrInPlace(ShAmt);
1871 Known.One.lshrInPlace(ShAmt);
1872 // High bits known zero.
1873 Known.Zero.setHighBits(ShAmt);
1874
1875 // Attempt to avoid multi-use ops if we don't need anything from them.
1876 if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1877 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1878 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1879 if (DemandedOp0) {
1880 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1881 return TLO.CombineTo(Op, NewOp);
1882 }
1883 }
1884 }
1885 break;
1886 }
1887 case ISD::SRA: {
1888 SDValue Op0 = Op.getOperand(0);
1889 SDValue Op1 = Op.getOperand(1);
1890 EVT ShiftVT = Op1.getValueType();
1891
1892 // If we only want bits that already match the signbit then we don't need
1893 // to shift.
1894 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1895 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1896 NumHiDemandedBits)
1897 return TLO.CombineTo(Op, Op0);
1898
1899 // If this is an arithmetic shift right and only the low-bit is set, we can
1900 // always convert this into a logical shr, even if the shift amount is
1901 // variable. The low bit of the shift cannot be an input sign bit unless
1902 // the shift amount is >= the size of the datatype, which is undefined.
1903 if (DemandedBits.isOne())
1904 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1905
1906 // Try to match AVG patterns.
1907 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1908 DemandedElts, Depth + 1))
1909 return TLO.CombineTo(Op, AVG);
1910
1911 if (const APInt *SA =
1912 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1913 unsigned ShAmt = SA->getZExtValue();
1914 if (ShAmt == 0)
1915 return TLO.CombineTo(Op, Op0);
1916
1917 APInt InDemandedMask = (DemandedBits << ShAmt);
1918
1919 // If the shift is exact, then it does demand the low bits (and knows that
1920 // they are zero).
1921 if (Op->getFlags().hasExact())
1922 InDemandedMask.setLowBits(ShAmt);
1923
1924 // If any of the demanded bits are produced by the sign extension, we also
1925 // demand the input sign bit.
1926 if (DemandedBits.countLeadingZeros() < ShAmt)
1927 InDemandedMask.setSignBit();
1928
1929 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1930 Depth + 1))
1931 return true;
1932 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1933 Known.Zero.lshrInPlace(ShAmt);
1934 Known.One.lshrInPlace(ShAmt);
1935
1936 // If the input sign bit is known to be zero, or if none of the top bits
1937 // are demanded, turn this into an unsigned shift right.
1938 if (Known.Zero[BitWidth - ShAmt - 1] ||
1939 DemandedBits.countLeadingZeros() >= ShAmt) {
1940 SDNodeFlags Flags;
1941 Flags.setExact(Op->getFlags().hasExact());
1942 return TLO.CombineTo(
1943 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1944 }
1945
1946 int Log2 = DemandedBits.exactLogBase2();
1947 if (Log2 >= 0) {
1948 // The bit must come from the sign.
1949 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1950 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1951 }
1952
1953 if (Known.One[BitWidth - ShAmt - 1])
1954 // New bits are known one.
1955 Known.One.setHighBits(ShAmt);
1956
1957 // Attempt to avoid multi-use ops if we don't need anything from them.
1958 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1959 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1960 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1961 if (DemandedOp0) {
1962 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1963 return TLO.CombineTo(Op, NewOp);
1964 }
1965 }
1966 }
1967 break;
1968 }
1969 case ISD::FSHL:
1970 case ISD::FSHR: {
1971 SDValue Op0 = Op.getOperand(0);
1972 SDValue Op1 = Op.getOperand(1);
1973 SDValue Op2 = Op.getOperand(2);
1974 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1975
1976 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1977 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1978
1979 // For fshl, 0-shift returns the 1st arg.
1980 // For fshr, 0-shift returns the 2nd arg.
1981 if (Amt == 0) {
1982 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1983 Known, TLO, Depth + 1))
1984 return true;
1985 break;
1986 }
1987
1988 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1989 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1990 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1991 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1992 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1993 Depth + 1))
1994 return true;
1995 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1996 Depth + 1))
1997 return true;
1998
1999 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2000 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2001 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2002 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2003 Known.One |= Known2.One;
2004 Known.Zero |= Known2.Zero;
2005
2006 // Attempt to avoid multi-use ops if we don't need anything from them.
2007 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2008 !DemandedElts.isAllOnes()) {
2009 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2010 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2011 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2012 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2013 if (DemandedOp0 || DemandedOp1) {
2014 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2015 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2016 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2017 DemandedOp1, Op2);
2018 return TLO.CombineTo(Op, NewOp);
2019 }
2020 }
2021 }
2022
2023 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2024 if (isPowerOf2_32(BitWidth)) {
2025 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2026 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2027 Known2, TLO, Depth + 1))
2028 return true;
2029 }
2030 break;
2031 }
2032 case ISD::ROTL:
2033 case ISD::ROTR: {
2034 SDValue Op0 = Op.getOperand(0);
2035 SDValue Op1 = Op.getOperand(1);
2036 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2037
2038 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2039 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2040 return TLO.CombineTo(Op, Op0);
2041
2042 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2043 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2044 unsigned RevAmt = BitWidth - Amt;
2045
2046 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2047 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2048 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2049 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2050 Depth + 1))
2051 return true;
2052
2053 // rot*(x, 0) --> x
2054 if (Amt == 0)
2055 return TLO.CombineTo(Op, Op0);
2056
2057 // See if we don't demand either half of the rotated bits.
2058 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2059 DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
2060 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2061 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2062 }
2063 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2064 DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
2065 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2066 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2067 }
2068 }
2069
2070 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2071 if (isPowerOf2_32(BitWidth)) {
2072 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2073 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2074 Depth + 1))
2075 return true;
2076 }
2077 break;
2078 }
2079 case ISD::UMIN: {
2080 // Check if one arg is always less than (or equal) to the other arg.
2081 SDValue Op0 = Op.getOperand(0);
2082 SDValue Op1 = Op.getOperand(1);
2083 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2084 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2085 Known = KnownBits::umin(Known0, Known1);
2086 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2087 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2088 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2089 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2090 break;
2091 }
2092 case ISD::UMAX: {
2093 // Check if one arg is always greater than (or equal) to the other arg.
2094 SDValue Op0 = Op.getOperand(0);
2095 SDValue Op1 = Op.getOperand(1);
2096 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2097 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2098 Known = KnownBits::umax(Known0, Known1);
2099 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2100 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2101 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2102 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2103 break;
2104 }
2105 case ISD::BITREVERSE: {
2106 SDValue Src = Op.getOperand(0);
2107 APInt DemandedSrcBits = DemandedBits.reverseBits();
2108 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2109 Depth + 1))
2110 return true;
2111 Known.One = Known2.One.reverseBits();
2112 Known.Zero = Known2.Zero.reverseBits();
2113 break;
2114 }
2115 case ISD::BSWAP: {
2116 SDValue Src = Op.getOperand(0);
2117
2118 // If the only bits demanded come from one byte of the bswap result,
2119 // just shift the input byte into position to eliminate the bswap.
2120 unsigned NLZ = DemandedBits.countLeadingZeros();
2121 unsigned NTZ = DemandedBits.countTrailingZeros();
2122
2123 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2124 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2125 // have 14 leading zeros, round to 8.
2126 NLZ = alignDown(NLZ, 8);
2127 NTZ = alignDown(NTZ, 8);
2128 // If we need exactly one byte, we can do this transformation.
2129 if (BitWidth - NLZ - NTZ == 8) {
2130 // Replace this with either a left or right shift to get the byte into
2131 // the right place.
2132 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2133 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2134 EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2135 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2136 SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2137 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2138 return TLO.CombineTo(Op, NewOp);
2139 }
2140 }
2141
2142 APInt DemandedSrcBits = DemandedBits.byteSwap();
2143 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2144 Depth + 1))
2145 return true;
2146 Known.One = Known2.One.byteSwap();
2147 Known.Zero = Known2.Zero.byteSwap();
2148 break;
2149 }
2150 case ISD::CTPOP: {
2151 // If only 1 bit is demanded, replace with PARITY as long as we're before
2152 // op legalization.
2153 // FIXME: Limit to scalars for now.
2154 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2155 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2156 Op.getOperand(0)));
2157
2158 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2159 break;
2160 }
2162 SDValue Op0 = Op.getOperand(0);
2163 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2164 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2165
2166 // If we only care about the highest bit, don't bother shifting right.
2167 if (DemandedBits.isSignMask()) {
2168 unsigned MinSignedBits =
2169 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2170 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2171 // However if the input is already sign extended we expect the sign
2172 // extension to be dropped altogether later and do not simplify.
2173 if (!AlreadySignExtended) {
2174 // Compute the correct shift amount type, which must be getShiftAmountTy
2175 // for scalar types after legalization.
2176 SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2177 getShiftAmountTy(VT, DL));
2178 return TLO.CombineTo(Op,
2179 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2180 }
2181 }
2182
2183 // If none of the extended bits are demanded, eliminate the sextinreg.
2184 if (DemandedBits.getActiveBits() <= ExVTBits)
2185 return TLO.CombineTo(Op, Op0);
2186
2187 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2188
2189 // Since the sign extended bits are demanded, we know that the sign
2190 // bit is demanded.
2191 InputDemandedBits.setBit(ExVTBits - 1);
2192
2193 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2194 Depth + 1))
2195 return true;
2196 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2197
2198 // If the sign bit of the input is known set or clear, then we know the
2199 // top bits of the result.
2200
2201 // If the input sign bit is known zero, convert this into a zero extension.
2202 if (Known.Zero[ExVTBits - 1])
2203 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2204
2205 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2206 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2207 Known.One.setBitsFrom(ExVTBits);
2208 Known.Zero &= Mask;
2209 } else { // Input sign bit unknown
2210 Known.Zero &= Mask;
2211 Known.One &= Mask;
2212 }
2213 break;
2214 }
2215 case ISD::BUILD_PAIR: {
2216 EVT HalfVT = Op.getOperand(0).getValueType();
2217 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2218
2219 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2220 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2221
2222 KnownBits KnownLo, KnownHi;
2223
2224 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2225 return true;
2226
2227 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2228 return true;
2229
2230 Known = KnownHi.concat(KnownLo);
2231 break;
2232 }
2234 if (VT.isScalableVector())
2235 return false;
2236 [[fallthrough]];
2237 case ISD::ZERO_EXTEND: {
2238 SDValue Src = Op.getOperand(0);
2239 EVT SrcVT = Src.getValueType();
2240 unsigned InBits = SrcVT.getScalarSizeInBits();
2241 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2242 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2243
2244 // If none of the top bits are demanded, convert this into an any_extend.
2245 if (DemandedBits.getActiveBits() <= InBits) {
2246 // If we only need the non-extended bits of the bottom element
2247 // then we can just bitcast to the result.
2248 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2249 VT.getSizeInBits() == SrcVT.getSizeInBits())
2250 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2251
2252 unsigned Opc =
2254 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2255 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2256 }
2257
2258 APInt InDemandedBits = DemandedBits.trunc(InBits);
2259 APInt InDemandedElts = DemandedElts.zext(InElts);
2260 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2261 Depth + 1))
2262 return true;
2263 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2264 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2265 Known = Known.zext(BitWidth);
2266
2267 // Attempt to avoid multi-use ops if we don't need anything from them.
2268 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2269 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2270 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2271 break;
2272 }
2274 if (VT.isScalableVector())
2275 return false;
2276 [[fallthrough]];
2277 case ISD::SIGN_EXTEND: {
2278 SDValue Src = Op.getOperand(0);
2279 EVT SrcVT = Src.getValueType();
2280 unsigned InBits = SrcVT.getScalarSizeInBits();
2281 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2282 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2283
2284 // If none of the top bits are demanded, convert this into an any_extend.
2285 if (DemandedBits.getActiveBits() <= InBits) {
2286 // If we only need the non-extended bits of the bottom element
2287 // then we can just bitcast to the result.
2288 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2289 VT.getSizeInBits() == SrcVT.getSizeInBits())
2290 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2291
2292 unsigned Opc =
2294 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2295 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2296 }
2297
2298 APInt InDemandedBits = DemandedBits.trunc(InBits);
2299 APInt InDemandedElts = DemandedElts.zext(InElts);
2300
2301 // Since some of the sign extended bits are demanded, we know that the sign
2302 // bit is demanded.
2303 InDemandedBits.setBit(InBits - 1);
2304
2305 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2306 Depth + 1))
2307 return true;
2308 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2309 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2310
2311 // If the sign bit is known one, the top bits match.
2312 Known = Known.sext(BitWidth);
2313
2314 // If the sign bit is known zero, convert this to a zero extend.
2315 if (Known.isNonNegative()) {
2316 unsigned Opc =
2318 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2319 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2320 }
2321
2322 // Attempt to avoid multi-use ops if we don't need anything from them.
2323 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2324 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2325 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2326 break;
2327 }
2329 if (VT.isScalableVector())
2330 return false;
2331 [[fallthrough]];
2332 case ISD::ANY_EXTEND: {
2333 SDValue Src = Op.getOperand(0);
2334 EVT SrcVT = Src.getValueType();
2335 unsigned InBits = SrcVT.getScalarSizeInBits();
2336 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2337 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2338
2339 // If we only need the bottom element then we can just bitcast.
2340 // TODO: Handle ANY_EXTEND?
2341 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2342 VT.getSizeInBits() == SrcVT.getSizeInBits())
2343 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2344
2345 APInt InDemandedBits = DemandedBits.trunc(InBits);
2346 APInt InDemandedElts = DemandedElts.zext(InElts);
2347 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2348 Depth + 1))
2349 return true;
2350 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2351 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2352 Known = Known.anyext(BitWidth);
2353
2354 // Attempt to avoid multi-use ops if we don't need anything from them.
2355 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2356 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2357 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2358 break;
2359 }
2360 case ISD::TRUNCATE: {
2361 SDValue Src = Op.getOperand(0);
2362
2363 // Simplify the input, using demanded bit information, and compute the known
2364 // zero/one bits live out.
2365 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2366 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2367 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2368 Depth + 1))
2369 return true;
2370 Known = Known.trunc(BitWidth);
2371
2372 // Attempt to avoid multi-use ops if we don't need anything from them.
2373 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2374 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2375 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2376
2377 // If the input is only used by this truncate, see if we can shrink it based
2378 // on the known demanded bits.
2379 switch (Src.getOpcode()) {
2380 default:
2381 break;
2382 case ISD::SRL:
2383 // Shrink SRL by a constant if none of the high bits shifted in are
2384 // demanded.
2385 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2386 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2387 // undesirable.
2388 break;
2389
2390 if (Src.getNode()->hasOneUse()) {
2391 const APInt *ShAmtC =
2392 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2393 if (!ShAmtC || ShAmtC->uge(BitWidth))
2394 break;
2395 uint64_t ShVal = ShAmtC->getZExtValue();
2396
2397 APInt HighBits =
2398 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2399 HighBits.lshrInPlace(ShVal);
2400 HighBits = HighBits.trunc(BitWidth);
2401
2402 if (!(HighBits & DemandedBits)) {
2403 // None of the shifted in bits are needed. Add a truncate of the
2404 // shift input, then shift it.
2405 SDValue NewShAmt = TLO.DAG.getConstant(
2406 ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2407 SDValue NewTrunc =
2408 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2409 return TLO.CombineTo(
2410 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2411 }
2412 }
2413 break;
2414 }
2415
2416 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2417 break;
2418 }
2419 case ISD::AssertZext: {
2420 // AssertZext demands all of the high bits, plus any of the low bits
2421 // demanded by its users.
2422 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2424 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2425 TLO, Depth + 1))
2426 return true;
2427 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2428
2429 Known.Zero |= ~InMask;
2430 Known.One &= (~Known.Zero);
2431 break;
2432 }
2434 SDValue Src = Op.getOperand(0);
2435 SDValue Idx = Op.getOperand(1);
2436 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2437 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2438
2439 if (SrcEltCnt.isScalable())
2440 return false;
2441
2442 // Demand the bits from every vector element without a constant index.
2443 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2444 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2445 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2446 if (CIdx->getAPIntValue().ult(NumSrcElts))
2447 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2448
2449 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2450 // anything about the extended bits.
2451 APInt DemandedSrcBits = DemandedBits;
2452 if (BitWidth > EltBitWidth)
2453 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2454
2455 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2456 Depth + 1))
2457 return true;
2458
2459 // Attempt to avoid multi-use ops if we don't need anything from them.
2460 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2461 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2462 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2463 SDValue NewOp =
2464 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2465 return TLO.CombineTo(Op, NewOp);
2466 }
2467 }
2468
2469 Known = Known2;
2470 if (BitWidth > EltBitWidth)
2471 Known = Known.anyext(BitWidth);
2472 break;
2473 }
2474 case ISD::BITCAST: {
2475 if (VT.isScalableVector())
2476 return false;
2477 SDValue Src = Op.getOperand(0);
2478 EVT SrcVT = Src.getValueType();
2479 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2480
2481 // If this is an FP->Int bitcast and if the sign bit is the only
2482 // thing demanded, turn this into a FGETSIGN.
2483 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2484 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2485 SrcVT.isFloatingPoint()) {
2486 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2488 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2489 SrcVT != MVT::f128) {
2490 // Cannot eliminate/lower SHL for f128 yet.
2491 EVT Ty = OpVTLegal ? VT : MVT::i32;
2492 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2493 // place. We expect the SHL to be eliminated by other optimizations.
2494 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2495 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2496 if (!OpVTLegal && OpVTSizeInBits > 32)
2497 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2498 unsigned ShVal = Op.getValueSizeInBits() - 1;
2499 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2500 return TLO.CombineTo(Op,
2501 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2502 }
2503 }
2504
2505 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2506 // Demand the elt/bit if any of the original elts/bits are demanded.
2507 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2508 unsigned Scale = BitWidth / NumSrcEltBits;
2509 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2510 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2511 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2512 for (unsigned i = 0; i != Scale; ++i) {
2513 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2514 unsigned BitOffset = EltOffset * NumSrcEltBits;
2515 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2516 if (!Sub.isZero()) {
2517 DemandedSrcBits |= Sub;
2518 for (unsigned j = 0; j != NumElts; ++j)
2519 if (DemandedElts[j])
2520 DemandedSrcElts.setBit((j * Scale) + i);
2521 }
2522 }
2523
2524 APInt KnownSrcUndef, KnownSrcZero;
2525 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2526 KnownSrcZero, TLO, Depth + 1))
2527 return true;
2528
2529 KnownBits KnownSrcBits;
2530 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2531 KnownSrcBits, TLO, Depth + 1))
2532 return true;
2533 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2534 // TODO - bigendian once we have test coverage.
2535 unsigned Scale = NumSrcEltBits / BitWidth;
2536 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2537 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2538 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2539 for (unsigned i = 0; i != NumElts; ++i)
2540 if (DemandedElts[i]) {
2541 unsigned Offset = (i % Scale) * BitWidth;
2542 DemandedSrcBits.insertBits(DemandedBits, Offset);
2543 DemandedSrcElts.setBit(i / Scale);
2544 }
2545
2546 if (SrcVT.isVector()) {
2547 APInt KnownSrcUndef, KnownSrcZero;
2548 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2549 KnownSrcZero, TLO, Depth + 1))
2550 return true;
2551 }
2552
2553 KnownBits KnownSrcBits;
2554 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2555 KnownSrcBits, TLO, Depth + 1))
2556 return true;
2557 }
2558
2559 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2560 // recursive call where Known may be useful to the caller.
2561 if (Depth > 0) {
2562 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2563 return false;
2564 }
2565 break;
2566 }
2567 case ISD::MUL:
2568 if (DemandedBits.isPowerOf2()) {
2569 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2570 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2571 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2572 unsigned CTZ = DemandedBits.countTrailingZeros();
2573 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2574 if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
2575 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2576 SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2577 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2578 return TLO.CombineTo(Op, Shl);
2579 }
2580 }
2581 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2582 // X * X is odd iff X is odd.
2583 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2584 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2585 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2586 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2587 return TLO.CombineTo(Op, And1);
2588 }
2589 [[fallthrough]];
2590 case ISD::ADD:
2591 case ISD::SUB: {
2592 // Add, Sub, and Mul don't demand any bits in positions beyond that
2593 // of the highest bit demanded of them.
2594 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2595 SDNodeFlags Flags = Op.getNode()->getFlags();
2596 unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2597 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2598 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2599 Depth + 1) ||
2600 SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2601 Depth + 1) ||
2602 // See if the operation should be performed at a smaller bit width.
2603 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2604 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2605 // Disable the nsw and nuw flags. We can no longer guarantee that we
2606 // won't wrap after simplification.
2607 Flags.setNoSignedWrap(false);
2608 Flags.setNoUnsignedWrap(false);
2609 Op->setFlags(Flags);
2610 }
2611 return true;
2612 }
2613
2614 // neg x with only low bit demanded is simply x.
2615 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2616 isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
2617 return TLO.CombineTo(Op, Op1);
2618
2619 // Attempt to avoid multi-use ops if we don't need anything from them.
2620 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2621 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2622 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2623 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2624 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2625 if (DemandedOp0 || DemandedOp1) {
2626 Flags.setNoSignedWrap(false);
2627 Flags.setNoUnsignedWrap(false);
2628 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2629 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2630 SDValue NewOp =
2631 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2632 return TLO.CombineTo(Op, NewOp);
2633 }
2634 }
2635
2636 // If we have a constant operand, we may be able to turn it into -1 if we
2637 // do not demand the high bits. This can make the constant smaller to
2638 // encode, allow more general folding, or match specialized instruction
2639 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2640 // is probably not useful (and could be detrimental).
2642 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2643 if (C && !C->isAllOnes() && !C->isOne() &&
2644 (C->getAPIntValue() | HighMask).isAllOnes()) {
2645 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2646 // Disable the nsw and nuw flags. We can no longer guarantee that we
2647 // won't wrap after simplification.
2648 Flags.setNoSignedWrap(false);
2649 Flags.setNoUnsignedWrap(false);
2650 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2651 return TLO.CombineTo(Op, NewOp);
2652 }
2653
2654 // Match a multiply with a disguised negated-power-of-2 and convert to a
2655 // an equivalent shift-left amount.
2656 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2657 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2658 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2659 return 0;
2660
2661 // Don't touch opaque constants. Also, ignore zero and power-of-2
2662 // multiplies. Those will get folded later.
2663 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2664 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2665 !MulC->getAPIntValue().isPowerOf2()) {
2666 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2667 if (UnmaskedC.isNegatedPowerOf2())
2668 return (-UnmaskedC).logBase2();
2669 }
2670 return 0;
2671 };
2672
2673 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2674 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2675 SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2676 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2677 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2678 return TLO.CombineTo(Op, Res);
2679 };
2680
2682 if (Op.getOpcode() == ISD::ADD) {
2683 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2684 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2685 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2686 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2687 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2688 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2689 }
2690 if (Op.getOpcode() == ISD::SUB) {
2691 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2692 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2693 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2694 }
2695 }
2696
2697 [[fallthrough]];
2698 }
2699 default:
2700 // We also ask the target about intrinsics (which could be specific to it).
2701 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2702 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2703 // TODO: Probably okay to remove after audit; here to reduce change size
2704 // in initial enablement patch for scalable vectors
2705 if (Op.getValueType().isScalableVector())
2706 break;
2707 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2708 Known, TLO, Depth))
2709 return true;
2710 break;
2711 }
2712
2713 // Just use computeKnownBits to compute output bits.
2714 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2715 break;
2716 }
2717
2718 // If we know the value of all of the demanded bits, return this as a
2719 // constant.
2720 if (!isTargetCanonicalConstantNode(Op) &&
2721 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2722 // Avoid folding to a constant if any OpaqueConstant is involved.
2723 const SDNode *N = Op.getNode();
2724 for (SDNode *Op :
2726 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2727 if (C->isOpaque())
2728 return false;
2729 }
2730 if (VT.isInteger())
2731 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2732 if (VT.isFloatingPoint())
2733 return TLO.CombineTo(
2734 Op,
2735 TLO.DAG.getConstantFP(
2736 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2737 }
2738
2739 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2740 // Try again just for the original demanded elts.
2741 // Ensure we do this AFTER constant folding above.
2742 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2743 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2744
2745 return false;
2746}
2747
2749 const APInt &DemandedElts,
2750 DAGCombinerInfo &DCI) const {
2751 SelectionDAG &DAG = DCI.DAG;
2752 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2753 !DCI.isBeforeLegalizeOps());
2754
2755 APInt KnownUndef, KnownZero;
2756 bool Simplified =
2757 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2758 if (Simplified) {
2759 DCI.AddToWorklist(Op.getNode());
2760 DCI.CommitTargetLoweringOpt(TLO);
2761 }
2762
2763 return Simplified;
2764}
2765
2766/// Given a vector binary operation and known undefined elements for each input
2767/// operand, compute whether each element of the output is undefined.
2769 const APInt &UndefOp0,
2770 const APInt &UndefOp1) {
2771 EVT VT = BO.getValueType();
2773 "Vector binop only");
2774
2775 EVT EltVT = VT.getVectorElementType();
2776 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2777 assert(UndefOp0.getBitWidth() == NumElts &&
2778 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2779
2780 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2781 const APInt &UndefVals) {
2782 if (UndefVals[Index])
2783 return DAG.getUNDEF(EltVT);
2784
2785 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2786 // Try hard to make sure that the getNode() call is not creating temporary
2787 // nodes. Ignore opaque integers because they do not constant fold.
2788 SDValue Elt = BV->getOperand(Index);
2789 auto *C = dyn_cast<ConstantSDNode>(Elt);
2790 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2791 return Elt;
2792 }
2793
2794 return SDValue();
2795 };
2796
2797 APInt KnownUndef = APInt::getZero(NumElts);
2798 for (unsigned i = 0; i != NumElts; ++i) {
2799 // If both inputs for this element are either constant or undef and match
2800 // the element type, compute the constant/undef result for this element of
2801 // the vector.
2802 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2803 // not handle FP constants. The code within getNode() should be refactored
2804 // to avoid the danger of creating a bogus temporary node here.
2805 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2806 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2807 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2808 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2809 KnownUndef.setBit(i);
2810 }
2811 return KnownUndef;
2812}
2813
2815 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2816 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2817 bool AssumeSingleUse) const {
2818 EVT VT = Op.getValueType();
2819 unsigned Opcode = Op.getOpcode();
2820 APInt DemandedElts = OriginalDemandedElts;
2821 unsigned NumElts = DemandedElts.getBitWidth();
2822 assert(VT.isVector() && "Expected vector op");
2823
2824 KnownUndef = KnownZero = APInt::getZero(NumElts);
2825
2826 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2827 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2828 return false;
2829
2830 // TODO: For now we assume we know nothing about scalable vectors.
2831 if (VT.isScalableVector())
2832 return false;
2833
2834 assert(VT.getVectorNumElements() == NumElts &&
2835 "Mask size mismatches value type element count!");
2836
2837 // Undef operand.
2838 if (Op.isUndef()) {
2839 KnownUndef.setAllBits();
2840 return false;
2841 }
2842
2843 // If Op has other users, assume that all elements are needed.
2844 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
2845 DemandedElts.setAllBits();
2846
2847 // Not demanding any elements from Op.
2848 if (DemandedElts == 0) {
2849 KnownUndef.setAllBits();
2850 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2851 }
2852
2853 // Limit search depth.
2855 return false;
2856
2857 SDLoc DL(Op);
2858 unsigned EltSizeInBits = VT.getScalarSizeInBits();
2859 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2860
2861 // Helper for demanding the specified elements and all the bits of both binary
2862 // operands.
2863 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2864 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2865 TLO.DAG, Depth + 1);
2866 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2867 TLO.DAG, Depth + 1);
2868 if (NewOp0 || NewOp1) {
2869 SDValue NewOp = TLO.DAG.getNode(
2870 Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2871 return TLO.CombineTo(Op, NewOp);
2872 }
2873 return false;
2874 };
2875
2876 switch (Opcode) {
2877 case ISD::SCALAR_TO_VECTOR: {
2878 if (!DemandedElts[0]) {
2879 KnownUndef.setAllBits();
2880 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2881 }
2882 SDValue ScalarSrc = Op.getOperand(0);
2883 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2884 SDValue Src = ScalarSrc.getOperand(0);
2885 SDValue Idx = ScalarSrc.getOperand(1);
2886 EVT SrcVT = Src.getValueType();
2887
2888 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
2889
2890 if (SrcEltCnt.isScalable())
2891 return false;
2892
2893 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2894 if (isNullConstant(Idx)) {
2895 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
2896 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
2897 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
2898 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2899 TLO, Depth + 1))
2900 return true;
2901 }
2902 }
2903 KnownUndef.setHighBits(NumElts - 1);
2904 break;
2905 }
2906 case ISD::BITCAST: {
2907 SDValue Src = Op.getOperand(0);
2908 EVT SrcVT = Src.getValueType();
2909
2910 // We only handle vectors here.
2911 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2912 if (!SrcVT.isVector())
2913 break;
2914
2915 // Fast handling of 'identity' bitcasts.
2916 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2917 if (NumSrcElts == NumElts)
2918 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2919 KnownZero, TLO, Depth + 1);
2920
2921 APInt SrcDemandedElts, SrcZero, SrcUndef;
2922
2923 // Bitcast from 'large element' src vector to 'small element' vector, we
2924 // must demand a source element if any DemandedElt maps to it.
2925 if ((NumElts % NumSrcElts) == 0) {
2926 unsigned Scale = NumElts / NumSrcElts;
2927 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2928 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2929 TLO, Depth + 1))
2930 return true;
2931
2932 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2933 // of the large element.
2934 // TODO - bigendian once we have test coverage.
2935 if (IsLE) {
2936 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2937 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
2938 for (unsigned i = 0; i != NumElts; ++i)
2939 if (DemandedElts[i]) {
2940 unsigned Ofs = (i % Scale) * EltSizeInBits;
2941 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2942 }
2943
2944 KnownBits Known;
2945 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2946 TLO, Depth + 1))
2947 return true;
2948
2949 // The bitcast has split each wide element into a number of
2950 // narrow subelements. We have just computed the Known bits
2951 // for wide elements. See if element splitting results in
2952 // some subelements being zero. Only for demanded elements!
2953 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
2954 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
2955 .isAllOnes())
2956 continue;
2957 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
2958 unsigned Elt = Scale * SrcElt + SubElt;
2959 if (DemandedElts[Elt])
2960 KnownZero.setBit(Elt);
2961 }
2962 }
2963 }
2964
2965 // If the src element is zero/undef then all the output elements will be -
2966 // only demanded elements are guaranteed to be correct.
2967 for (unsigned i = 0; i != NumSrcElts; ++i) {
2968 if (SrcDemandedElts[i]) {
2969 if (SrcZero[i])
2970 KnownZero.setBits(i * Scale, (i + 1) * Scale);
2971 if (SrcUndef[i])
2972 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2973 }
2974 }
2975 }
2976
2977 // Bitcast from 'small element' src vector to 'large element' vector, we
2978 // demand all smaller source elements covered by the larger demanded element
2979 // of this vector.
2980 if ((NumSrcElts % NumElts) == 0) {
2981 unsigned Scale = NumSrcElts / NumElts;
2982 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2983 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2984 TLO, Depth + 1))
2985 return true;
2986
2987 // If all the src elements covering an output element are zero/undef, then
2988 // the output element will be as well, assuming it was demanded.
2989 for (unsigned i = 0; i != NumElts; ++i) {
2990 if (DemandedElts[i]) {
2991 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
2992 KnownZero.setBit(i);
2993 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
2994 KnownUndef.setBit(i);
2995 }
2996 }
2997 }
2998 break;
2999 }
3000 case ISD::BUILD_VECTOR: {
3001 // Check all elements and simplify any unused elements with UNDEF.
3002 if (!DemandedElts.isAllOnes()) {
3003 // Don't simplify BROADCASTS.
3004 if (llvm::any_of(Op->op_values(),
3005 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3006 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3007 bool Updated = false;
3008 for (unsigned i = 0; i != NumElts; ++i) {
3009 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3010 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3011 KnownUndef.setBit(i);
3012 Updated = true;
3013 }
3014 }
3015 if (Updated)
3016 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3017 }
3018 }
3019 for (unsigned i = 0; i != NumElts; ++i) {
3020 SDValue SrcOp = Op.getOperand(i);
3021 if (SrcOp.isUndef()) {
3022 KnownUndef.setBit(i);
3023 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3025 KnownZero.setBit(i);
3026 }
3027 }
3028 break;
3029 }
3030 case ISD::CONCAT_VECTORS: {
3031 EVT SubVT = Op.getOperand(0).getValueType();
3032 unsigned NumSubVecs = Op.getNumOperands();
3033 unsigned NumSubElts = SubVT.getVectorNumElements();
3034 for (unsigned i = 0; i != NumSubVecs; ++i) {
3035 SDValue SubOp = Op.getOperand(i);
3036 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3037 APInt SubUndef, SubZero;
3038 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3039 Depth + 1))
3040 return true;
3041 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3042 KnownZero.insertBits(SubZero, i * NumSubElts);
3043 }
3044
3045 // Attempt to avoid multi-use ops if we don't need anything from them.
3046 if (!DemandedElts.isAllOnes()) {
3047 bool FoundNewSub = false;
3048 SmallVector<SDValue, 2> DemandedSubOps;
3049 for (unsigned i = 0; i != NumSubVecs; ++i) {
3050 SDValue SubOp = Op.getOperand(i);
3051 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3052 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3053 SubOp, SubElts, TLO.DAG, Depth + 1);
3054 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3055 FoundNewSub = NewSubOp ? true : FoundNewSub;
3056 }
3057 if (FoundNewSub) {
3058 SDValue NewOp =
3059 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3060 return TLO.CombineTo(Op, NewOp);
3061 }
3062 }
3063 break;
3064 }
3065 case ISD::INSERT_SUBVECTOR: {
3066 // Demand any elements from the subvector and the remainder from the src its
3067 // inserted into.
3068 SDValue Src = Op.getOperand(0);
3069 SDValue Sub = Op.getOperand(1);
3070 uint64_t Idx = Op.getConstantOperandVal(2);
3071 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3072 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3073 APInt DemandedSrcElts = DemandedElts;
3074 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3075
3076 APInt SubUndef, SubZero;
3077 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3078 Depth + 1))
3079 return true;
3080
3081 // If none of the src operand elements are demanded, replace it with undef.
3082 if (!DemandedSrcElts && !Src.isUndef())
3083 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3084 TLO.DAG.getUNDEF(VT), Sub,
3085 Op.getOperand(2)));
3086
3087 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3088 TLO, Depth + 1))
3089 return true;
3090 KnownUndef.insertBits(SubUndef, Idx);
3091 KnownZero.insertBits(SubZero, Idx);
3092
3093 // Attempt to avoid multi-use ops if we don't need anything from them.
3094 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3095 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3096 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3097 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3098 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3099 if (NewSrc || NewSub) {
3100 NewSrc = NewSrc ? NewSrc : Src;
3101 NewSub = NewSub ? NewSub : Sub;
3102 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3103 NewSub, Op.getOperand(2));
3104 return TLO.CombineTo(Op, NewOp);
3105 }
3106 }
3107 break;
3108 }
3110 // Offset the demanded elts by the subvector index.
3111 SDValue Src = Op.getOperand(0);
3112 if (Src.getValueType().isScalableVector())
3113 break;
3114 uint64_t Idx = Op.getConstantOperandVal(1);
3115 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3116 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3117
3118 APInt SrcUndef, SrcZero;
3119 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3120 Depth + 1))
3121 return true;
3122 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3123 KnownZero = SrcZero.extractBits(NumElts, Idx);
3124
3125 // Attempt to avoid multi-use ops if we don't need anything from them.
3126 if (!DemandedElts.isAllOnes()) {
3127 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3128 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3129 if (NewSrc) {
3130 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3131 Op.getOperand(1));
3132 return TLO.CombineTo(Op, NewOp);
3133 }
3134 }
3135 break;
3136 }
3138 SDValue Vec = Op.getOperand(0);
3139 SDValue Scl = Op.getOperand(1);
3140 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3141
3142 // For a legal, constant insertion index, if we don't need this insertion
3143 // then strip it, else remove it from the demanded elts.
3144 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3145 unsigned Idx = CIdx->getZExtValue();
3146 if (!DemandedElts[Idx])
3147 return TLO.CombineTo(Op, Vec);
3148
3149 APInt DemandedVecElts(DemandedElts);
3150 DemandedVecElts.clearBit(Idx);
3151 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3152 KnownZero, TLO, Depth + 1))
3153 return true;
3154
3155 KnownUndef.setBitVal(Idx, Scl.isUndef());
3156
3157 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3158 break;
3159 }
3160
3161 APInt VecUndef, VecZero;
3162 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3163 Depth + 1))
3164 return true;
3165 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3166 break;
3167 }
3168 case ISD::VSELECT: {
3169 SDValue Sel = Op.getOperand(0);
3170 SDValue LHS = Op.getOperand(1);
3171 SDValue RHS = Op.getOperand(2);
3172
3173 // Try to transform the select condition based on the current demanded
3174 // elements.
3175 APInt UndefSel, UndefZero;
3176 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
3177 Depth + 1))
3178 return true;
3179
3180 // See if we can simplify either vselect operand.
3181 APInt DemandedLHS(DemandedElts);
3182 APInt DemandedRHS(DemandedElts);
3183 APInt UndefLHS, ZeroLHS;
3184 APInt UndefRHS, ZeroRHS;
3185 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3186 Depth + 1))
3187 return true;
3188 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3189 Depth + 1))
3190 return true;
3191
3192 KnownUndef = UndefLHS & UndefRHS;
3193 KnownZero = ZeroLHS & ZeroRHS;
3194
3195 // If we know that the selected element is always zero, we don't need the
3196 // select value element.
3197 APInt DemandedSel = DemandedElts & ~KnownZero;
3198 if (DemandedSel != DemandedElts)
3199 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
3200 Depth + 1))
3201 return true;
3202
3203 break;
3204 }
3205 case ISD::VECTOR_SHUFFLE: {
3206 SDValue LHS = Op.getOperand(0);
3207 SDValue RHS = Op.getOperand(1);
3208 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3209
3210 // Collect demanded elements from shuffle operands..
3211 APInt DemandedLHS(NumElts, 0);
3212 APInt DemandedRHS(NumElts, 0);
3213 for (unsigned i = 0; i != NumElts; ++i) {
3214 int M = ShuffleMask[i];
3215 if (M < 0 || !DemandedElts[i])
3216 continue;
3217 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3218 if (M < (int)NumElts)
3219 DemandedLHS.setBit(M);
3220 else
3221 DemandedRHS.setBit(M - NumElts);
3222 }
3223
3224 // See if we can simplify either shuffle operand.
3225 APInt UndefLHS, ZeroLHS;
3226 APInt UndefRHS, ZeroRHS;
3227 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3228 Depth + 1))
3229 return true;
3230 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3231 Depth + 1))
3232 return true;
3233
3234 // Simplify mask using undef elements from LHS/RHS.
3235 bool Updated = false;
3236 bool IdentityLHS = true, IdentityRHS = true;
3237 SmallVector<int, 32> NewMask(ShuffleMask);
3238 for (unsigned i = 0; i != NumElts; ++i) {
3239 int &M = NewMask[i];
3240 if (M < 0)
3241 continue;
3242 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3243 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3244 Updated = true;
3245 M = -1;
3246 }
3247 IdentityLHS &= (M < 0) || (M == (int)i);
3248 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3249 }
3250
3251 // Update legal shuffle masks based on demanded elements if it won't reduce
3252 // to Identity which can cause premature removal of the shuffle mask.
3253 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3254 SDValue LegalShuffle =
3255 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3256 if (LegalShuffle)
3257 return TLO.CombineTo(Op, LegalShuffle);
3258 }
3259
3260 // Propagate undef/zero elements from LHS/RHS.
3261 for (unsigned i = 0; i != NumElts; ++i) {
3262 int M = ShuffleMask[i];
3263 if (M < 0) {
3264 KnownUndef.setBit(i);
3265 } else if (M < (int)NumElts) {
3266 if (UndefLHS[M])
3267 KnownUndef.setBit(i);
3268 if (ZeroLHS[M])
3269 KnownZero.setBit(i);
3270 } else {
3271 if (UndefRHS[M - NumElts])
3272 KnownUndef.setBit(i);
3273 if (ZeroRHS[M - NumElts])
3274 KnownZero.setBit(i);
3275 }
3276 }
3277 break;
3278 }
3282 APInt SrcUndef, SrcZero;
3283 SDValue Src = Op.getOperand(0);
3284 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3285 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3286 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3287 Depth + 1))
3288 return true;
3289 KnownZero = SrcZero.zextOrTrunc(NumElts);
3290 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3291
3292 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3293 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3294 DemandedSrcElts == 1) {
3295 // aext - if we just need the bottom element then we can bitcast.
3296 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3297 }
3298
3299 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3300 // zext(undef) upper bits are guaranteed to be zero.
3301 if (DemandedElts.isSubsetOf(KnownUndef))
3302 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3303 KnownUndef.clearAllBits();
3304
3305 // zext - if we just need the bottom element then we can mask:
3306 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3307 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3308 Op->isOnlyUserOf(Src.getNode()) &&
3309 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3310 SDLoc DL(Op);
3311 EVT SrcVT = Src.getValueType();
3312 EVT SrcSVT = SrcVT.getScalarType();
3313 SmallVector<SDValue> MaskElts;
3314 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3315 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3316 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3317 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3318 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3319 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3320 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3321 }
3322 }
3323 }
3324 break;
3325 }
3326
3327 // TODO: There are more binop opcodes that could be handled here - MIN,
3328 // MAX, saturated math, etc.
3329 case ISD::ADD: {
3330 SDValue Op0 = Op.getOperand(0);
3331 SDValue Op1 = Op.getOperand(1);
3332 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3333 APInt UndefLHS, ZeroLHS;
3334 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3335 Depth + 1, /*AssumeSingleUse*/ true))
3336 return true;
3337 }
3338 [[fallthrough]];
3339 }
3340 case ISD::OR:
3341 case ISD::XOR:
3342 case ISD::SUB:
3343 case ISD::FADD:
3344 case ISD::FSUB:
3345 case ISD::FMUL:
3346 case ISD::FDIV:
3347 case ISD::FREM: {
3348 SDValue Op0 = Op.getOperand(0);
3349 SDValue Op1 = Op.getOperand(1);
3350
3351 APInt UndefRHS, ZeroRHS;
3352 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3353 Depth + 1))
3354 return true;
3355 APInt UndefLHS, ZeroLHS;
3356 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3357 Depth + 1))
3358 return true;
3359
3360 KnownZero = ZeroLHS & ZeroRHS;
3361 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3362
3363 // Attempt to avoid multi-use ops if we don't need anything from them.
3364 // TODO - use KnownUndef to relax the demandedelts?
3365 if (!DemandedElts.isAllOnes())
3366 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3367 return true;
3368 break;
3369 }
3370 case ISD::SHL:
3371 case ISD::SRL:
3372 case ISD::SRA:
3373 case ISD::ROTL:
3374 case ISD::ROTR: {
3375 SDValue Op0 = Op.getOperand(0);
3376 SDValue Op1 = Op.getOperand(1);
3377
3378 APInt UndefRHS, ZeroRHS;
3379 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3380 Depth + 1))
3381 return true;
3382 APInt UndefLHS, ZeroLHS;
3383 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3384 Depth + 1))
3385 return true;
3386
3387 KnownZero = ZeroLHS;
3388 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3389
3390 // Attempt to avoid multi-use ops if we don't need anything from them.
3391 // TODO - use KnownUndef to relax the demandedelts?
3392 if (!DemandedElts.isAllOnes())
3393 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3394 return true;
3395 break;
3396 }
3397 case ISD::MUL:
3398 case ISD::MULHU:
3399 case ISD::MULHS:
3400 case ISD::AND: {
3401 SDValue Op0 = Op.getOperand(0);
3402 SDValue Op1 = Op.getOperand(1);
3403
3404 APInt SrcUndef, SrcZero;
3405 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3406 Depth + 1))
3407 return true;
3408 // If we know that a demanded element was zero in Op1 we don't need to
3409 // demand it in Op0 - its guaranteed to be zero.
3410 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3411 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3412 TLO, Depth + 1))
3413 return true;
3414
3415 KnownUndef &= DemandedElts0;
3416 KnownZero &= DemandedElts0;
3417
3418 // If every element pair has a zero/undef then just fold to zero.
3419 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3420 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3421 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3422 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3423
3424 // If either side has a zero element, then the result element is zero, even
3425 // if the other is an UNDEF.
3426 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3427 // and then handle 'and' nodes with the rest of the binop opcodes.
3428 KnownZero |= SrcZero;
3429 KnownUndef &= SrcUndef;
3430 KnownUndef &= ~KnownZero;
3431
3432 // Attempt to avoid multi-use ops if we don't need anything from them.
3433 if (!DemandedElts.isAllOnes())
3434 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3435 return true;
3436 break;
3437 }
3438 case ISD::TRUNCATE:
3439 case ISD::SIGN_EXTEND:
3440 case ISD::ZERO_EXTEND:
3441 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3442 KnownZero, TLO, Depth + 1))
3443 return true;
3444
3445 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3446 // zext(undef) upper bits are guaranteed to be zero.
3447 if (DemandedElts.isSubsetOf(KnownUndef))
3448 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3449 KnownUndef.clearAllBits();
3450 }
3451 break;
3452 default: {
3453 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3454 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3455 KnownZero, TLO, Depth))
3456 return true;
3457 } else {
3458 KnownBits Known;
3459 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3460 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3461 TLO, Depth, AssumeSingleUse))
3462 return true;
3463 }
3464 break;
3465 }
3466 }
3467 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3468
3469 // Constant fold all undef cases.
3470 // TODO: Handle zero cases as well.
3471 if (DemandedElts.isSubsetOf(KnownUndef))
3472 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3473
3474 return false;
3475}
3476
3477/// Determine which of the bits specified in Mask are known to be either zero or
3478/// one and return them in the Known.
3480 KnownBits &Known,
3481 const APInt &DemandedElts,
3482 const SelectionDAG &DAG,
3483 unsigned Depth) const {
3484 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3485 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3486 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3487 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3488 "Should use MaskedValueIsZero if you don't know whether Op"
3489 " is a target node!");
3490 Known.resetAll();
3491}
3492
3495 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3496 unsigned Depth) const {
3497 Known.resetAll();
3498}
3499
3501 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3502 // The low bits are known zero if the pointer is aligned.
3503 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3504}
3505
3508 unsigned Depth) const {
3509 return Align(1);
3510}
3511
3512/// This method can be implemented by targets that want to expose additional
3513/// information about sign bits to the DAG Combiner.
3515 const APInt &,
3516 const SelectionDAG &,
3517 unsigned Depth) const {
3518 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3519 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3520 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3521 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3522 "Should use ComputeNumSignBits if you don't know whether Op"
3523 " is a target node!");
3524 return 1;
3525}
3526
3528 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3529 const MachineRegisterInfo &MRI, unsigned Depth) const {
3530 return 1;
3531}
3532
3534 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3535 TargetLoweringOpt &TLO, unsigned Depth) const {
3536 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3537 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3538 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3539 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3540 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3541 " is a target node!");
3542 return false;
3543}
3544
3546 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3547 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3548 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3549 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3550 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3551 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3552 "Should use SimplifyDemandedBits if you don't know whether Op"
3553 " is a target node!");
3554 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3555 return false;
3556}
3557
3559 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3560 SelectionDAG &DAG, unsigned Depth) const {
3561 assert(
3562 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3563 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3564 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3565 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3566 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3567 " is a target node!");
3568 return SDValue();
3569}
3570
3571SDValue
3574 SelectionDAG &DAG) const {
3575 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3576 if (!LegalMask) {
3577 std::swap(N0, N1);
3579 LegalMask = isShuffleMaskLegal(Mask, VT);
3580 }
3581
3582 if (!LegalMask)
3583 return SDValue();
3584
3585 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3586}
3587
3589 return nullptr;
3590}
3591
3593 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3594 bool PoisonOnly, unsigned Depth) const {
3595 assert(
3596 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3597 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3598 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3599 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3600 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3601 " is a target node!");
3602 return false;
3603}
3604
3606 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3607 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3608 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3609 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3610 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3611 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3612 "Should use canCreateUndefOrPoison if you don't know whether Op"
3613 " is a target node!");
3614 // Be conservative and return true.
3615 return true;
3616}
3617
3619 const SelectionDAG &DAG,
3620 bool SNaN,
3621 unsigned Depth) const {
3622 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3623 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3624 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3625 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3626 "Should use isKnownNeverNaN if you don't know whether Op"
3627 " is a target node!");
3628 return false;
3629}
3630
3632 const APInt &DemandedElts,
3633 APInt &UndefElts,
3634 const SelectionDAG &DAG,
3635 unsigned Depth) const {
3636 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3637 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3638 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3639 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3640 "Should use isSplatValue if you don't know whether Op"
3641 " is a target node!");
3642 return false;
3643}
3644
3645// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3646// work with truncating build vectors and vectors with elements of less than
3647// 8 bits.
3649 if (!N)
3650 return false;
3651
3652 unsigned EltWidth;
3653 APInt CVal;
3654 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3655 /*AllowTruncation=*/true)) {
3656 CVal = CN->getAPIntValue();
3657 EltWidth = N.getValueType().getScalarSizeInBits();
3658 } else
3659 return false;
3660
3661 // If this is a truncating splat, truncate the splat value.
3662 // Otherwise, we may fail to match the expected values below.
3663 if (EltWidth < CVal.getBitWidth())
3664 CVal = CVal.trunc(EltWidth);
3665
3666 switch (getBooleanContents(N.getValueType())) {
3668 return CVal[0];
3670 return CVal.isOne();
3672 return CVal.isAllOnes();
3673 }
3674
3675 llvm_unreachable("Invalid boolean contents");
3676}
3677
3679 if (!N)
3680 return false;
3681
3682 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3683 if (!CN) {
3684 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3685 if (!BV)
3686 return false;
3687
3688 // Only interested in constant splats, we don't care about undef
3689 // elements in identifying boolean constants and getConstantSplatNode
3690 // returns NULL if all ops are undef;
3691 CN = BV->getConstantSplatNode();
3692 if (!CN)
3693 return false;
3694 }
3695
3696 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3697 return !CN->getAPIntValue()[0];
3698
3699 return CN->isZero();
3700}
3701
3703 bool SExt) const {
3704 if (VT == MVT::i1)
3705 return N->isOne();
3706
3708 switch (Cnt) {
3710 // An extended value of 1 is always true, unless its original type is i1,
3711 // in which case it will be sign extended to -1.
3712 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3715 return N->isAllOnes() && SExt;
3716 }
3717 llvm_unreachable("Unexpected enumeration.");
3718}
3719
3720/// This helper function of SimplifySetCC tries to optimize the comparison when
3721/// either operand of the SetCC node is a bitwise-and instruction.
3722SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3723 ISD::CondCode Cond, const SDLoc &DL,
3724 DAGCombinerInfo &DCI) const {
3725 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3726 std::swap(N0, N1);
3727
3728 SelectionDAG &DAG = DCI.DAG;
3729 EVT OpVT = N0.getValueType();
3730 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3731 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3732 return SDValue();
3733
3734 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3735 // iff everything but LSB is known zero:
3736 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3739 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3740 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3741 if (DAG.MaskedValueIsZero(N0, UpperBits))
3742 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3743 }
3744
3745 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3746 // test in a narrow type that we can truncate to with no cost. Examples:
3747 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3748 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3749 // TODO: This conservatively checks for type legality on the source and
3750 // destination types. That may inhibit optimizations, but it also
3751 // allows setcc->shift transforms that may be more beneficial.
3752 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3753 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3754 isTypeLegal(OpVT) && N0.hasOneUse()) {
3755 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3756 AndC->getAPIntValue().getActiveBits());
3757 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3758 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3759 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3760 return DAG.getSetCC(DL, VT, Trunc, Zero,
3762 }
3763 }
3764
3765 // Match these patterns in any of their permutations:
3766 // (X & Y) == Y
3767 // (X & Y) != Y
3768 SDValue X, Y;
3769 if (N0.getOperand(0) == N1) {
3770 X = N0.getOperand(1);
3771 Y = N0.getOperand(0);
3772 } else if (N0.getOperand(1) == N1) {
3773 X = N0.getOperand(0);
3774 Y = N0.getOperand(1);
3775 } else {
3776 return SDValue();
3777 }
3778
3779 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3780 if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3781 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3782 // Note that where Y is variable and is known to have at most one bit set
3783 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3784 // equivalent when Y == 0.
3785 assert(OpVT.isInteger());
3787 if (DCI.isBeforeLegalizeOps() ||
3789 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3790 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3791 // If the target supports an 'and-not' or 'and-complement' logic operation,
3792 // try to use that to make a comparison operation more efficient.
3793 // But don't do this transform if the mask is a single bit because there are
3794 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3795 // 'rlwinm' on PPC).
3796
3797 // Bail out if the compare operand that we want to turn into a zero is
3798 // already a zero (otherwise, infinite loop).
3799 auto *YConst = dyn_cast<ConstantSDNode>(Y);
3800 if (YConst && YConst->isZero())
3801 return SDValue();
3802
3803 // Transform this into: ~X & Y == 0.
3804 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3805 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3806 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3807 }
3808
3809 return SDValue();
3810}
3811
3812/// There are multiple IR patterns that could be checking whether certain
3813/// truncation of a signed number would be lossy or not. The pattern which is
3814/// best at IR level, may not lower optimally. Thus, we want to unfold it.
3815/// We are looking for the following pattern: (KeptBits is a constant)
3816/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3817/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3818/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3819/// We will unfold it into the natural trunc+sext pattern:
3820/// ((%x << C) a>> C) dstcond %x
3821/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3822SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3823 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3824 const SDLoc &DL) const {
3825 // We must be comparing with a constant.
3826 ConstantSDNode *C1;
3827 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3828 return SDValue();
3829
3830 // N0 should be: add %x, (1 << (KeptBits-1))
3831 if (N0->getOpcode() != ISD::ADD)
3832 return SDValue();
3833
3834 // And we must be 'add'ing a constant.
3835 ConstantSDNode *C01;
3836 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3837 return SDValue();
3838
3839 SDValue X = N0->getOperand(0);
3840 EVT XVT = X.getValueType();
3841
3842 // Validate constants ...
3843
3844 APInt I1 = C1->getAPIntValue();
3845
3846 ISD::CondCode NewCond;
3847 if (Cond == ISD::CondCode::SETULT) {
3848 NewCond = ISD::CondCode::SETEQ;
3849 } else if (Cond == ISD::CondCode::SETULE) {
3850 NewCond = ISD::CondCode::SETEQ;
3851 // But need to 'canonicalize' the constant.
3852 I1 += 1;
3853 } else if (Cond == ISD::CondCode::SETUGT) {
3854 NewCond = ISD::CondCode::SETNE;
3855 // But need to 'canonicalize' the constant.
3856 I1 += 1;
3857 } else if (Cond == ISD::CondCode::SETUGE) {
3858 NewCond = ISD::CondCode::SETNE;
3859 } else
3860 return SDValue();
3861
3862 APInt I01 = C01->getAPIntValue();
3863
3864 auto checkConstants = [&I1, &I01]() -> bool {
3865 // Both of them must be power-of-two, and the constant from setcc is bigger.
3866 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3867 };
3868
3869 if (checkConstants()) {
3870 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3871 } else {
3872 // What if we invert constants? (and the target predicate)
3873 I1.negate();
3874 I01.negate();
3875 assert(XVT.isInteger());
3876 NewCond = getSetCCInverse(NewCond, XVT);
3877 if (!checkConstants())
3878 return SDValue();
3879 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3880 }
3881
3882 // They are power-of-two, so which bit is set?
3883 const unsigned KeptBits = I1.logBase2();
3884 const unsigned KeptBitsMinusOne = I01.logBase2();
3885
3886 // Magic!
3887 if (KeptBits != (KeptBitsMinusOne + 1))
3888 return SDValue();
3889 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3890
3891 // We don't want to do this in every single case.
3892 SelectionDAG &DAG = DCI.DAG;
3894 XVT, KeptBits))
3895 return SDValue();
3896
3897 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3898 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3899
3900 // Unfold into: ((%x << C) a>> C) cond %x
3901 // Where 'cond' will be either 'eq' or 'ne'.
3902 SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3903 SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3904 SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3905 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3906
3907 return T2;
3908}
3909
3910// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3911SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3912 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3913 DAGCombinerInfo &DCI, const SDLoc &DL) const {
3915 isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
3916 "Should be a comparison with 0.");
3917 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3918 "Valid only for [in]equality comparisons.");
3919
3920 unsigned NewShiftOpcode;
3921 SDValue X, C, Y;
3922
3923 SelectionDAG &DAG = DCI.DAG;
3924 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3925
3926 // Look for '(C l>>/<< Y)'.
3927 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3928 // The shift should be one-use.
3929 if (!V.hasOneUse())
3930 return false;
3931 unsigned OldShiftOpcode = V.getOpcode();
3932 switch (OldShiftOpcode) {
3933 case ISD::SHL:
3934 NewShiftOpcode = ISD::SRL;
3935 break;
3936 case ISD::SRL:
3937 NewShiftOpcode = ISD::SHL;
3938 break;
3939 default:
3940 return false; // must be a logical shift.
3941 }
3942 // We should be shifting a constant.
3943 // FIXME: best to use isConstantOrConstantVector().
3944 C = V.getOperand(0);
3946 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3947 if (!CC)
3948 return false;
3949 Y = V.getOperand(1);
3950
3952 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3953 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3954 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3955 };
3956
3957 // LHS of comparison should be an one-use 'and'.
3958 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3959 return SDValue();
3960
3961 X = N0.getOperand(0);
3962 SDValue Mask = N0.getOperand(1);
3963
3964 // 'and' is commutative!
3965 if (!Match(Mask)) {
3966 std::swap(X, Mask);
3967 if (!Match(Mask))
3968 return SDValue();
3969 }
3970
3971 EVT VT = X.getValueType();
3972
3973 // Produce:
3974 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3975 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3976 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3977 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3978 return T2;
3979}
3980
3981/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3982/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3983/// handle the commuted versions of these patterns.
3984SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3985 ISD::CondCode Cond, const SDLoc &DL,
3986 DAGCombinerInfo &DCI) const {
3987 unsigned BOpcode = N0.getOpcode();
3988 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3989 "Unexpected binop");
3990 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3991
3992 // (X + Y) == X --> Y == 0
3993 // (X - Y) == X --> Y == 0
3994 // (X ^ Y) == X --> Y == 0
3995 SelectionDAG &DAG = DCI.DAG;
3996 EVT OpVT = N0.getValueType();
3997 SDValue X = N0.getOperand(0);
3998 SDValue Y = N0.getOperand(1);
3999 if (X == N1)
4000 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4001
4002 if (Y != N1)
4003 return SDValue();
4004
4005 // (X + Y) == Y --> X == 0
4006 // (X ^ Y) == Y --> X == 0
4007 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4008 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4009
4010 // The shift would not be valid if the operands are boolean (i1).
4011 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4012 return SDValue();
4013
4014 // (X - Y) == Y --> X == Y << 1
4015 EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
4016 !DCI.isBeforeLegalize());
4017 SDValue One = DAG.getConstant(1, DL, ShiftVT);
4018 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4019 if (!DCI.isCalledByLegalizer())
4020 DCI.AddToWorklist(YShl1.getNode());
4021 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4022}
4023
4025 SDValue N0, const APInt &C1,
4026 ISD::CondCode Cond, const SDLoc &dl,
4027 SelectionDAG &DAG) {
4028 // Look through truncs that don't change the value of a ctpop.
4029 // FIXME: Add vector support? Need to be careful with setcc result type below.
4030 SDValue CTPOP = N0;
4031 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4033 CTPOP = N0.getOperand(0);
4034
4035 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4036 return SDValue();
4037
4038 EVT CTVT = CTPOP.getValueType();
4039 SDValue CTOp = CTPOP.getOperand(0);
4040
4041 // Expand a power-of-2-or-zero comparison based on ctpop:
4042 // (ctpop x) u< 2 -> (x & x-1) == 0
4043 // (ctpop x) u> 1 -> (x & x-1) != 0
4044 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4045 // Keep the CTPOP if it is a legal vector op.
4046 if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
4047 return SDValue();
4048
4049 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4050 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4051 return SDValue();
4052 if (C1 == 0 && (Cond == ISD::SETULT))
4053 return SDValue(); // This is handled elsewhere.
4054
4055 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4056
4057 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4058 SDValue Result = CTOp;
4059 for (unsigned i = 0; i < Passes; i++) {
4060 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4061 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4062 }
4064 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4065 }
4066
4067 // Expand a power-of-2 comparison based on ctpop:
4068 // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
4069 // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
4070 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4071 // Keep the CTPOP if it is legal.
4072 if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
4073 return SDValue();
4074
4075 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4076 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4077 assert(CTVT.isInteger());
4078 ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
4079 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4080 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4081 SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
4082 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4083 unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
4084 return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
4085 }
4086
4087 return SDValue();
4088}
4089
4091 ISD::CondCode Cond, const SDLoc &dl,
4092 SelectionDAG &DAG) {
4093 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4094 return SDValue();
4095
4096 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4097 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4098 return SDValue();
4099
4100 auto getRotateSource = [](SDValue X) {
4101 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4102 return X.getOperand(0);
4103 return SDValue();
4104 };
4105
4106 // Peek through a rotated value compared against 0 or -1:
4107 // (rot X, Y) == 0/-1 --> X == 0/-1
4108 // (rot X, Y) != 0/-1 --> X != 0/-1
4109 if (SDValue R = getRotateSource(N0))
4110 return DAG.getSetCC(dl, VT, R, N1, Cond);
4111
4112 // Peek through an 'or' of a rotated value compared against 0:
4113 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4114 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4115 //
4116 // TODO: Add the 'and' with -1 sibling.
4117 // TODO: Recurse through a series of 'or' ops to find the rotate.
4118 EVT OpVT = N0.getValueType();
4119 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4120 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4121 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4122 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4123 }
4124 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4125 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4126 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4127 }
4128 }
4129
4130 return SDValue();
4131}
4132
4134 ISD::CondCode Cond, const SDLoc &dl,
4135 SelectionDAG &DAG) {
4136 // If we are testing for all-bits-clear, we might be able to do that with
4137 // less shifting since bit-order does not matter.
4138 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4139 return SDValue();
4140
4141 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4142 if (!C1 || !C1->isZero())
4143 return SDValue();
4144
4145 if (!N0.hasOneUse() ||
4146 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4147 return SDValue();
4148
4149 unsigned BitWidth = N0.getScalarValueSizeInBits();
4150 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4151 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4152 return SDValue();
4153
4154 // Canonicalize fshr as fshl to reduce pattern-matching.
4155 unsigned ShAmt = ShAmtC->getZExtValue();
4156 if (N0.getOpcode() == ISD::FSHR)
4157 ShAmt = BitWidth - ShAmt;
4158
4159 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4160 SDValue X, Y;
4161 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4162 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4163 return false;
4164 if (Or.getOperand(0) == Other) {
4165 X = Or.getOperand(0);
4166 Y = Or.getOperand(1);
4167 return true;
4168 }
4169 if (Or.getOperand(1) == Other) {
4170 X = Or.getOperand(1);
4171 Y = Or.getOperand(0);
4172 return true;
4173 }
4174 return false;
4175 };
4176
4177 EVT OpVT = N0.getValueType();
4178 EVT ShAmtVT = N0.getOperand(2).getValueType();
4179 SDValue F0 = N0.getOperand(0);
4180 SDValue F1 = N0.getOperand(1);
4181 if (matchOr(F0, F1)) {
4182 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4183 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4184 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4185 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4186 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4187 }
4188 if (matchOr(F1, F0)) {
4189 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4190 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4191 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4192 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4193 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4194 }
4195
4196 return SDValue();
4197}
4198
4199/// Try to simplify a setcc built with the specified operands and cc. If it is
4200/// unable to simplify it, return a null SDValue.
4202 ISD::CondCode Cond, bool foldBooleans,
4203 DAGCombinerInfo &DCI,
4204 const SDLoc &dl) const {
4205 SelectionDAG &DAG = DCI.DAG;
4206 const DataLayout &Layout = DAG.getDataLayout();
4207 EVT OpVT = N0.getValueType();
4209
4210 // Constant fold or commute setcc.
4211 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4212 return Fold;
4213
4214 bool N0ConstOrSplat =
4215 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4216 bool N1ConstOrSplat =
4217 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4218
4219 // Ensure that the constant occurs on the RHS and fold constant comparisons.
4220 // TODO: Handle non-splat vector constants. All undef causes trouble.
4221 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4222 // infinite loop here when we encounter one.
4224 if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
4225 (DCI.isBeforeLegalizeOps() ||
4226 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4227 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4228
4229 // If we have a subtract with the same 2 non-constant operands as this setcc
4230 // -- but in reverse order -- then try to commute the operands of this setcc
4231 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4232 // instruction on some targets.
4233 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4234 (DCI.isBeforeLegalizeOps() ||
4235 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4236 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4237 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4238 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4239
4240 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4241 return V;
4242
4243 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4244 return V;
4245
4246 if (auto *N1C = isConstOrConstSplat(N1)) {
4247 const APInt &C1 = N1C->getAPIntValue();
4248
4249 // Optimize some CTPOP cases.
4250 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4251 return V;
4252
4253 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4254 // X * Y == 0 --> (X == 0) || (Y == 0)
4255 // X * Y != 0 --> (X != 0) && (Y != 0)
4256 // TODO: This bails out if minsize is set, but if the target doesn't have a
4257 // single instruction multiply for this type, it would likely be
4258 // smaller to decompose.
4259 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4260 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4261 (N0->getFlags().hasNoUnsignedWrap() ||
4262 N0->getFlags().hasNoSignedWrap()) &&
4263 !Attr.hasFnAttr(Attribute::MinSize)) {
4264 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4265 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4266 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4267 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4268 }
4269
4270 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4271 // equality comparison, then we're just comparing whether X itself is
4272 // zero.
4273 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4274 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4276 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4277 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4278 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4279 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4280 // (srl (ctlz x), 5) == 0 -> X != 0
4281 // (srl (ctlz x), 5) != 1 -> X != 0
4282 Cond = ISD::SETNE;
4283 } else {
4284 // (srl (ctlz x), 5) != 0 -> X == 0
4285 // (srl (ctlz x), 5) == 1 -> X == 0
4286 Cond = ISD::SETEQ;
4287 }
4288 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4289 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4290 Cond);
4291 }
4292 }
4293 }
4294 }
4295
4296 // FIXME: Support vectors.
4297 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4298 const APInt &C1 = N1C->getAPIntValue();
4299
4300 // (zext x) == C --> x == (trunc C)
4301 // (sext x) == C --> x == (trunc C)
4302 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4303 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4304 unsigned MinBits = N0.getValueSizeInBits();
4305 SDValue PreExt;
4306 bool Signed = false;
4307 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4308 // ZExt
4309 MinBits = N0->getOperand(0).getValueSizeInBits();
4310 PreExt = N0->getOperand(0);
4311 } else if (N0->getOpcode() == ISD::AND) {
4312 // DAGCombine turns costly ZExts into ANDs
4313 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4314 if ((C->getAPIntValue()+1).isPowerOf2()) {
4315 MinBits = C->getAPIntValue().countTrailingOnes();
4316 PreExt = N0->getOperand(0);
4317 }
4318 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4319 // SExt
4320 MinBits = N0->getOperand(0).getValueSizeInBits();
4321 PreExt = N0->getOperand(0);
4322 Signed = true;
4323 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4324 // ZEXTLOAD / SEXTLOAD
4325 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4326 MinBits = LN0->getMemoryVT().getSizeInBits();
4327 PreExt = N0;
4328 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4329 Signed = true;
4330 MinBits = LN0->getMemoryVT().getSizeInBits();
4331 PreExt = N0;
4332 }
4333 }
4334
4335 // Figure out how many bits we need to preserve this constant.
4336 unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
4337
4338 // Make sure we're not losing bits from the constant.
4339 if (MinBits > 0 &&
4340 MinBits < C1.getBitWidth() &&
4341 MinBits >= ReqdBits) {
4342 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4343 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4344 // Will get folded away.
4345 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4346 if (MinBits == 1 && C1 == 1)
4347 // Invert the condition.
4348 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4350 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4351 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4352 }
4353
4354 // If truncating the setcc operands is not desirable, we can still
4355 // simplify the expression in some cases:
4356 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4357 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4358 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4359 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4360 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4361 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4362 SDValue TopSetCC = N0->getOperand(0);
4363 unsigned N0Opc = N0->getOpcode();
4364 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4365 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4366 TopSetCC.getOpcode() == ISD::SETCC &&
4367 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4368 (isConstFalseVal(N1) ||
4369 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4370
4371 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4372 (!N1C->isZero() && Cond == ISD::SETNE);
4373
4374 if (!Inverse)
4375 return TopSetCC;
4376
4378 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4379 TopSetCC.getOperand(0).getValueType());
4380 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4381 TopSetCC.getOperand(1),
4382 InvCond);
4383 }
4384 }
4385 }
4386
4387 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4388 // equality or unsigned, and all 1 bits of the const are in the same
4389 // partial word, see if we can shorten the load.
4390 if (DCI.isBeforeLegalize() &&
4392 N0.getOpcode() == ISD::AND && C1 == 0 &&
4393 N0.getNode()->hasOneUse() &&
4394 isa<LoadSDNode>(N0.getOperand(0)) &&
4395 N0.getOperand(0).getNode()->hasOneUse() &&
4396 isa<ConstantSDNode>(N0.getOperand(1))) {
4397 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4398 APInt bestMask;
4399 unsigned bestWidth = 0, bestOffset = 0;
4400 if (Lod->isSimple() && Lod->isUnindexed()) {
4401 unsigned origWidth = N0.getValueSizeInBits();
4402 unsigned maskWidth = origWidth;
4403 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4404 // 8 bits, but have to be careful...
4405 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4406 origWidth = Lod->getMemoryVT().getSizeInBits();
4407 const APInt &Mask = N0.getConstantOperandAPInt(1);
4408 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4409 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4410 for (unsigned offset=0; offset<origWidth/width; offset++) {
4411 if (Mask.isSubsetOf(newMask)) {
4412 if (Layout.isLittleEndian())
4413 bestOffset = (uint64_t)offset * (width/8);
4414 else
4415 bestOffset = (origWidth/width - offset - 1) * (width/8);
4416 bestMask = Mask.lshr(offset * (width/8) * 8);
4417 bestWidth = width;
4418 break;
4419 }
4420 newMask <<= width;
4421 }
4422 }
4423 }
4424 if (bestWidth) {
4425 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4426 if (newVT.isRound() &&
4428 SDValue Ptr = Lod->getBasePtr();
4429 if (bestOffset != 0)
4430 Ptr =
4431 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
4432 SDValue NewLoad =
4433 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4434 Lod->getPointerInfo().getWithOffset(bestOffset),
4435 Lod->getOriginalAlign());
4436 return DAG.getSetCC(dl, VT,
4437 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4438 DAG.getConstant(bestMask.trunc(bestWidth),
4439 dl, newVT)),
4440 DAG.getConstant(0LL, dl, newVT), Cond);
4441 }
4442 }
4443 }
4444
4445 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4446 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4447 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4448
4449 // If the comparison constant has bits in the upper part, the
4450 // zero-extended value could never match.